From d3b8f889a220aed825accc28eb64ce283a0d51ac Mon Sep 17 00:00:00 2001 From: john stultz Date: Mon, 17 Aug 2009 16:40:47 -0700 Subject: x86: Make tsc=reliable override boot time stability checks This patch makes the tsc=reliable option disable the boot time stability checks. Currently the option only disables the runtime watchdog checks. This change allows folks who want to override the boot time TSC stability checks and use the TSC when the system would otherwise disqualify it. There still are some situations that the TSC will be disqualified, such as cpufreq scaling. But these are situations where the box will hang if allowed. Patch also includes a fix for an issue found by Thomas Gleixner, where the TSC disqualification message wouldn't be printed after a call to unsynchronized_tsc(). Signed-off-by: John Stultz Cc: Andrew Morton Cc: akataria@vmware.com Cc: Stephen Hemminger LKML-Reference: <1250552447.7212.92.camel@localhost.localdomain> Signed-off-by: Thomas Gleixner diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 7936b80..4c6b415 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2484,12 +2484,13 @@ and is between 256 and 4096 characters. It is defined in the file Format: ,,,,,,,, - tsc= Disable clocksource-must-verify flag for TSC. + tsc= Disable clocksource stability checks for TSC. Format: [x86] reliable: mark tsc clocksource as reliable, this - disables clocksource verification at runtime. - Used to enable high-resolution timer mode on older - hardware, and in virtualized environment. + disables clocksource verification at runtime, as well + as the stability checks done at bootup. Used to enable + high-resolution timer mode on older hardware, and in + virtualized environment. turbografx.map[2|3]= [HW,JOY] TurboGraFX parallel port interface diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 71f4368..648fb26 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -825,6 +825,9 @@ __cpuinit int unsynchronized_tsc(void) if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) return 0; + + if (tsc_clocksource_reliable) + return 0; /* * Intel systems are normally all synchronized. * Exceptions must mark TSC as unstable: @@ -832,10 +835,10 @@ __cpuinit int unsynchronized_tsc(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { /* assume multi socket systems are not synchronized: */ if (num_possible_cpus() > 1) - tsc_unstable = 1; + return 1; } - return tsc_unstable; + return 0; } static void __init init_tsc_clocksource(void) -- cgit v0.10.2 From e3cc067b0a79d3a3672bfe7cfba12f2e8ae56039 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 18 Sep 2009 16:31:22 -0700 Subject: xen/evtchn: track enabled state for each port enable/disable_irq() complain if the enables/disables are unbalanced, so keep track of the state and avoid redundant enables. Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index af03195..4356a9a 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -69,10 +69,36 @@ struct per_user_data { const char *name; }; -/* Who's bound to each port? */ -static struct per_user_data *port_user[NR_EVENT_CHANNELS]; +/* + * Who's bound to each port? This is logically an array of struct + * per_user_data *, but we encode the current enabled-state in bit 0. + */ +static unsigned long port_user[NR_EVENT_CHANNELS]; static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */ +static inline struct per_user_data *get_port_user(unsigned port) +{ + return (struct per_user_data *)(port_user[port] & ~1); +} + +static inline void set_port_user(unsigned port, struct per_user_data *u) +{ + port_user[port] = (unsigned long)u; +} + +static inline bool get_port_enabled(unsigned port) +{ + return port_user[port] & 1; +} + +static inline void set_port_enabled(unsigned port, bool enabled) +{ + if (enabled) + port_user[port] |= 1; + else + port_user[port] &= ~1; +} + irqreturn_t evtchn_interrupt(int irq, void *data) { unsigned int port = (unsigned long)data; @@ -80,9 +106,15 @@ irqreturn_t evtchn_interrupt(int irq, void *data) spin_lock(&port_user_lock); - u = port_user[port]; + u = get_port_user(port); + + if (WARN(!get_port_enabled(port), + "Interrupt for port %d, but apparently not enabled; per-user %p\n", + port, u)) + goto out; disable_irq_nosync(irq); + set_port_enabled(port, false); if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port; @@ -92,10 +124,10 @@ irqreturn_t evtchn_interrupt(int irq, void *data) kill_fasync(&u->evtchn_async_queue, SIGIO, POLL_IN); } - } else { + } else u->ring_overflow = 1; - } +out: spin_unlock(&port_user_lock); return IRQ_HANDLED; @@ -198,9 +230,18 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf, goto out; spin_lock_irq(&port_user_lock); - for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) - if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u)) - enable_irq(irq_from_evtchn(kbuf[i])); + + for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) { + unsigned port = kbuf[i]; + + if (port < NR_EVENT_CHANNELS && + get_port_user(port) == u && + !get_port_enabled(port)) { + set_port_enabled(port, true); + enable_irq(irq_from_evtchn(port)); + } + } + spin_unlock_irq(&port_user_lock); rc = count; @@ -222,8 +263,8 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port) * interrupt handler yet, and our caller has already * serialized bind operations.) */ - BUG_ON(port_user[port] != NULL); - port_user[port] = u; + BUG_ON(get_port_user(port) != NULL); + set_port_user(port, u); rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, u->name, (void *)(unsigned long)port); @@ -242,7 +283,7 @@ static void evtchn_unbind_from_user(struct per_user_data *u, int port) /* make sure we unbind the irq handler before clearing the port */ barrier(); - port_user[port] = NULL; + set_port_user(port, NULL); } static long evtchn_ioctl(struct file *file, @@ -333,7 +374,7 @@ static long evtchn_ioctl(struct file *file, spin_lock_irq(&port_user_lock); rc = -ENOTCONN; - if (port_user[unbind.port] != u) { + if (get_port_user(unbind.port) != u) { spin_unlock_irq(&port_user_lock); break; } @@ -355,7 +396,7 @@ static long evtchn_ioctl(struct file *file, if (notify.port >= NR_EVENT_CHANNELS) { rc = -EINVAL; - } else if (port_user[notify.port] != u) { + } else if (get_port_user(notify.port) != u) { rc = -ENOTCONN; } else { notify_remote_via_evtchn(notify.port); @@ -444,10 +485,10 @@ static int evtchn_release(struct inode *inode, struct file *filp) free_page((unsigned long)u->ring); for (i = 0; i < NR_EVENT_CHANNELS; i++) { - if (port_user[i] != u) + if (get_port_user(i) != u) continue; - evtchn_unbind_from_user(port_user[i], i); + evtchn_unbind_from_user(get_port_user(i), i); } spin_unlock_irq(&port_user_lock); -- cgit v0.10.2 From 93afe0b75ef3675ca05320919a57de8b9bbb159c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 18 Sep 2009 16:36:58 -0700 Subject: xen/evtchn: dynamically allocate port_user array We only need the array when running as a Xen domain, so dynamically allocate it as needed to save on bss space. Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 4356a9a..709c32d 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -73,7 +73,7 @@ struct per_user_data { * Who's bound to each port? This is logically an array of struct * per_user_data *, but we encode the current enabled-state in bit 0. */ -static unsigned long port_user[NR_EVENT_CHANNELS]; +static unsigned long *port_user; static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */ static inline struct per_user_data *get_port_user(unsigned port) @@ -522,8 +522,11 @@ static int __init evtchn_init(void) if (!xen_domain()) return -ENODEV; + port_user = kcalloc(NR_EVENT_CHANNELS, sizeof(*port_user), GFP_KERNEL); + if (port_user == NULL) + return -ENOMEM; + spin_lock_init(&port_user_lock); - memset(port_user, 0, sizeof(port_user)); /* Create '/dev/misc/evtchn'. */ err = misc_register(&evtchn_miscdev); @@ -539,6 +542,9 @@ static int __init evtchn_init(void) static void __exit evtchn_cleanup(void) { + kfree(port_user); + port_user = NULL; + misc_deregister(&evtchn_miscdev); } -- cgit v0.10.2 From 0edce91dcd83160019867a00746c679344dc0bbd Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 18 Sep 2009 16:55:29 -0700 Subject: xen/evtchn: ports start enabled Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 709c32d..72dc7f2 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -108,10 +108,9 @@ irqreturn_t evtchn_interrupt(int irq, void *data) u = get_port_user(port); - if (WARN(!get_port_enabled(port), - "Interrupt for port %d, but apparently not enabled; per-user %p\n", - port, u)) - goto out; + WARN(!get_port_enabled(port), + "Interrupt for port %d, but apparently not enabled; per-user %p\n", + port, u); disable_irq_nosync(irq); set_port_enabled(port, false); @@ -127,7 +126,6 @@ irqreturn_t evtchn_interrupt(int irq, void *data) } else u->ring_overflow = 1; -out: spin_unlock(&port_user_lock); return IRQ_HANDLED; @@ -265,6 +263,7 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port) */ BUG_ON(get_port_user(port) != NULL); set_port_user(port, u); + set_port_enabled(port, true); /* start enabled */ rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, u->name, (void *)(unsigned long)port); -- cgit v0.10.2 From 1a1a17cddbfb1f81222b3f18ee8530c41fbc3b82 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 18 Sep 2009 17:13:41 -0700 Subject: xen/evtchn: remove spurious barrier evtchn_unbind_from_user() is called under a lock, so there's no need to worry about the ordering of unbind_from_irqhandler vs clearing the port per-user data. Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 72dc7f2..f79ac5c 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -279,9 +279,6 @@ static void evtchn_unbind_from_user(struct per_user_data *u, int port) unbind_from_irqhandler(irq, (void *)(unsigned long)port); - /* make sure we unbind the irq handler before clearing the port */ - barrier(); - set_port_user(port, NULL); } -- cgit v0.10.2 From 3f5e554f669098c84c82ce75e7577f7e0f3fccde Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 28 May 2010 15:28:27 -0700 Subject: xen/evtchn: don't do unbind_from_irqhandler under spinlock unbind_from_irqhandler can end up doing /proc operations, which can't happen under a spinlock. So before removing the IRQ handler, disable the irq under the port_user lock (masking the underlying event channel and making sure the irq handler isn't running concurrently and won't start running), then remove the handler without the lock. Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index f79ac5c..6a3a129 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -375,10 +375,12 @@ static long evtchn_ioctl(struct file *file, break; } - evtchn_unbind_from_user(u, unbind.port); + disable_irq(irq_from_evtchn(unbind.port)); spin_unlock_irq(&port_user_lock); + evtchn_unbind_from_user(u, unbind.port); + rc = 0; break; } @@ -484,11 +486,18 @@ static int evtchn_release(struct inode *inode, struct file *filp) if (get_port_user(i) != u) continue; - evtchn_unbind_from_user(get_port_user(i), i); + disable_irq(irq_from_evtchn(i)); } spin_unlock_irq(&port_user_lock); + for (i = 0; i < NR_EVENT_CHANNELS; i++) { + if (get_port_user(i) != u) + continue; + + evtchn_unbind_from_user(get_port_user(i), i); + } + kfree(u->name); kfree(u); -- cgit v0.10.2 From 376d908f52427591cef4acd172db9c3ef28676ec Mon Sep 17 00:00:00 2001 From: Bastian Blank Date: Fri, 28 May 2010 15:43:49 -0700 Subject: xen/evtchn: Fix name of Xen event-channel device The Xen event-channel device is named evtchn in the kernel but always used as /dev/xen/evtchn in userspace. This patch fixes the name. Signed-off-by: Bastian Blank Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 6a3a129..68119f6 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -517,7 +517,7 @@ static const struct file_operations evtchn_fops = { static struct miscdevice evtchn_miscdev = { .minor = MISC_DYNAMIC_MINOR, - .name = "evtchn", + .name = "xen/evtchn", .fops = &evtchn_fops, }; static int __init evtchn_init(void) -- cgit v0.10.2 From 70697d540c0598ad023a391d4c895044db9a6624 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 5 Oct 2010 11:13:44 -0700 Subject: xen/evtchn: add missing static Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 68119f6..f3594ec 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -99,7 +99,7 @@ static inline void set_port_enabled(unsigned port, bool enabled) port_user[port] &= ~1; } -irqreturn_t evtchn_interrupt(int irq, void *data) +static irqreturn_t evtchn_interrupt(int irq, void *data) { unsigned int port = (unsigned long)data; struct per_user_data *u; -- cgit v0.10.2 From 8e8be45e8e55daa381028aec339829929ddb53a5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 2 Sep 2010 16:16:14 -0700 Subject: rcu: add priority-inversion testing to rcutorture Add an optional test to force long-term preemption of RCU read-side critical sections, controlled by new test_boost, test_boost_interval, and test_boost_duration module parameters. This is to be used to test RCU priority boosting. Signed-off-by: Paul E. McKenney diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 9d8e8fb..89613f9 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -47,6 +47,7 @@ #include #include #include +#include MODULE_LICENSE("GPL"); MODULE_AUTHOR("Paul E. McKenney and " @@ -64,6 +65,9 @@ static int irqreader = 1; /* RCU readers from irq (timers). */ static int fqs_duration = 0; /* Duration of bursts (us), 0 to disable. */ static int fqs_holdoff = 0; /* Hold time within burst (us). */ static int fqs_stutter = 3; /* Wait time between bursts (s). */ +static int test_boost = 1; /* Test RCU prio boost: 0=no, 1=maybe, 2=yes. */ +static int test_boost_interval = 7; /* Interval between boost tests, seconds. */ +static int test_boost_duration = 4; /* Duration of each boost test, seconds. */ static char *torture_type = "rcu"; /* What RCU implementation to torture. */ module_param(nreaders, int, 0444); @@ -88,6 +92,12 @@ module_param(fqs_holdoff, int, 0444); MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)"); module_param(fqs_stutter, int, 0444); MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); +module_param(test_boost, int, 0444); +MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes."); +module_param(test_boost_interval, int, 0444); +MODULE_PARM_DESC(test_boost_interval, "Interval between boost tests, seconds."); +module_param(test_boost_duration, int, 0444); +MODULE_PARM_DESC(test_boost_duration, "Duration of each boost test, seconds."); module_param(torture_type, charp, 0444); MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)"); @@ -109,6 +119,7 @@ static struct task_struct *stats_task; static struct task_struct *shuffler_task; static struct task_struct *stutter_task; static struct task_struct *fqs_task; +static struct task_struct *boost_tasks[NR_CPUS]; #define RCU_TORTURE_PIPE_LEN 10 @@ -134,6 +145,12 @@ static atomic_t n_rcu_torture_alloc_fail; static atomic_t n_rcu_torture_free; static atomic_t n_rcu_torture_mberror; static atomic_t n_rcu_torture_error; +static long n_rcu_torture_boost_ktrerror; +static long n_rcu_torture_boost_rterror; +static long n_rcu_torture_boost_allocerror; +static long n_rcu_torture_boost_afferror; +static long n_rcu_torture_boost_failure; +static long n_rcu_torture_boosts; static long n_rcu_torture_timers; static struct list_head rcu_torture_removed; static cpumask_var_t shuffle_tmp_mask; @@ -147,6 +164,16 @@ static int stutter_pause_test; #endif int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; +#ifdef CONFIG_RCU_BOOST +#define rcu_can_boost() 1 +#else /* #ifdef CONFIG_RCU_BOOST */ +#define rcu_can_boost() 0 +#endif /* #else #ifdef CONFIG_RCU_BOOST */ + +static unsigned long boost_starttime; /* jiffies of next boost test start. */ +DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ + /* and boost task create/destroy. */ + /* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */ #define FULLSTOP_DONTSTOP 0 /* Normal operation. */ @@ -277,6 +304,7 @@ struct rcu_torture_ops { void (*fqs)(void); int (*stats)(char *page); int irq_capable; + int can_boost; char *name; }; @@ -366,6 +394,7 @@ static struct rcu_torture_ops rcu_ops = { .fqs = rcu_force_quiescent_state, .stats = NULL, .irq_capable = 1, + .can_boost = rcu_can_boost(), .name = "rcu" }; @@ -408,6 +437,7 @@ static struct rcu_torture_ops rcu_sync_ops = { .fqs = rcu_force_quiescent_state, .stats = NULL, .irq_capable = 1, + .can_boost = rcu_can_boost(), .name = "rcu_sync" }; @@ -424,6 +454,7 @@ static struct rcu_torture_ops rcu_expedited_ops = { .fqs = rcu_force_quiescent_state, .stats = NULL, .irq_capable = 1, + .can_boost = rcu_can_boost(), .name = "rcu_expedited" }; @@ -684,6 +715,110 @@ static struct rcu_torture_ops sched_expedited_ops = { }; /* + * RCU torture priority-boost testing. Runs one real-time thread per + * CPU for moderate bursts, repeatedly registering RCU callbacks and + * spinning waiting for them to be invoked. If a given callback takes + * too long to be invoked, we assume that priority inversion has occurred. + */ + +struct rcu_boost_inflight { + struct rcu_head rcu; + int inflight; +}; + +static void rcu_torture_boost_cb(struct rcu_head *head) +{ + struct rcu_boost_inflight *rbip = + container_of(head, struct rcu_boost_inflight, rcu); + + smp_mb(); /* Ensure RCU-core accesses precede clearing ->inflight */ + rbip->inflight = 0; +} + +static int rcu_torture_boost(void *arg) +{ + unsigned long call_rcu_time; + unsigned long endtime; + unsigned long oldstarttime; + struct rcu_boost_inflight rbi = { .inflight = 0 }; + struct sched_param sp; + + VERBOSE_PRINTK_STRING("rcu_torture_boost started"); + + /* Set real-time priority. */ + sp.sched_priority = 1; + if (sched_setscheduler(current, SCHED_FIFO, &sp) < 0) { + VERBOSE_PRINTK_STRING("rcu_torture_boost RT prio failed!"); + n_rcu_torture_boost_rterror++; + } + + /* Each pass through the following loop does one boost-test cycle. */ + do { + /* Wait for the next test interval. */ + oldstarttime = boost_starttime; + while (jiffies - oldstarttime > ULONG_MAX / 2) { + schedule_timeout_uninterruptible(1); + rcu_stutter_wait("rcu_torture_boost"); + if (kthread_should_stop() || + fullstop != FULLSTOP_DONTSTOP) + goto checkwait; + } + + /* Do one boost-test interval. */ + endtime = oldstarttime + test_boost_duration * HZ; + call_rcu_time = jiffies; + while (jiffies - endtime > ULONG_MAX / 2) { + /* If we don't have a callback in flight, post one. */ + if (!rbi.inflight) { + smp_mb(); /* RCU core before ->inflight = 1. */ + rbi.inflight = 1; + call_rcu(&rbi.rcu, rcu_torture_boost_cb); + if (jiffies - call_rcu_time > + test_boost_duration * HZ - HZ / 2) { + VERBOSE_PRINTK_STRING("rcu_torture_boost boosting failed"); + n_rcu_torture_boost_failure++; + } + call_rcu_time = jiffies; + } + cond_resched(); + rcu_stutter_wait("rcu_torture_boost"); + if (kthread_should_stop() || + fullstop != FULLSTOP_DONTSTOP) + goto checkwait; + } + + /* + * Set the start time of the next test interval. + * Yes, this is vulnerable to long delays, but such + * delays simply cause a false negative for the next + * interval. Besides, we are running at RT priority, + * so delays should be relatively rare. + */ + while (oldstarttime == boost_starttime) { + if (mutex_trylock(&boost_mutex)) { + boost_starttime = jiffies + + test_boost_interval * HZ; + n_rcu_torture_boosts++; + mutex_unlock(&boost_mutex); + break; + } + schedule_timeout_uninterruptible(1); + } + + /* Go do the stutter. */ +checkwait: rcu_stutter_wait("rcu_torture_boost"); + } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); + + /* Clean up and exit. */ + VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping"); + rcutorture_shutdown_absorb("rcu_torture_boost"); + while (!kthread_should_stop() || rbi.inflight) + schedule_timeout_uninterruptible(1); + smp_mb(); /* order accesses to ->inflight before stack-frame death. */ + return 0; +} + +/* * RCU torture force-quiescent-state kthread. Repeatedly induces * bursts of calls to force_quiescent_state(), increasing the probability * of occurrence of some important types of race conditions. @@ -933,7 +1068,8 @@ rcu_torture_printk(char *page) cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG); cnt += sprintf(&page[cnt], "rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d " - "rtmbe: %d nt: %ld", + "rtmbe: %d rtbke: %ld rtbre: %ld rtbae: %ld rtbafe: %ld " + "rtbf: %ld rtb: %ld nt: %ld", rcu_torture_current, rcu_torture_current_version, list_empty(&rcu_torture_freelist), @@ -941,8 +1077,19 @@ rcu_torture_printk(char *page) atomic_read(&n_rcu_torture_alloc_fail), atomic_read(&n_rcu_torture_free), atomic_read(&n_rcu_torture_mberror), + n_rcu_torture_boost_ktrerror, + n_rcu_torture_boost_rterror, + n_rcu_torture_boost_allocerror, + n_rcu_torture_boost_afferror, + n_rcu_torture_boost_failure, + n_rcu_torture_boosts, n_rcu_torture_timers); - if (atomic_read(&n_rcu_torture_mberror) != 0) + if (atomic_read(&n_rcu_torture_mberror) != 0 || + n_rcu_torture_boost_ktrerror != 0 || + n_rcu_torture_boost_rterror != 0 || + n_rcu_torture_boost_allocerror != 0 || + n_rcu_torture_boost_afferror != 0 || + n_rcu_torture_boost_failure != 0) cnt += sprintf(&page[cnt], " !!!"); cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG); if (i > 1) { @@ -1094,22 +1241,91 @@ rcu_torture_stutter(void *arg) } static inline void -rcu_torture_print_module_parms(char *tag) +rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, char *tag) { printk(KERN_ALERT "%s" TORTURE_FLAG "--- %s: nreaders=%d nfakewriters=%d " "stat_interval=%d verbose=%d test_no_idle_hz=%d " "shuffle_interval=%d stutter=%d irqreader=%d " - "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d\n", + "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d " + "test_boost=%d/%d test_boost_interval=%d " + "test_boost_duration=%d\n", torture_type, tag, nrealreaders, nfakewriters, stat_interval, verbose, test_no_idle_hz, shuffle_interval, - stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter); + stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter, + test_boost, cur_ops->can_boost, + test_boost_interval, test_boost_duration); } -static struct notifier_block rcutorture_nb = { +static struct notifier_block rcutorture_shutdown_nb = { .notifier_call = rcutorture_shutdown_notify, }; +static void rcutorture_booster_cleanup(int cpu) +{ + struct task_struct *t; + + if (boost_tasks[cpu] == NULL) + return; + mutex_lock(&boost_mutex); + VERBOSE_PRINTK_STRING("Stopping rcu_torture_boost task"); + t = boost_tasks[cpu]; + boost_tasks[cpu] = NULL; + mutex_unlock(&boost_mutex); + + /* This must be outside of the mutex, otherwise deadlock! */ + kthread_stop(t); +} + +static int rcutorture_booster_init(int cpu) +{ + int retval; + + if (boost_tasks[cpu] != NULL) + return 0; /* Already created, nothing more to do. */ + + /* Don't allow time recalculation while creating a new task. */ + mutex_lock(&boost_mutex); + VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task"); + boost_tasks[cpu] = kthread_create(rcu_torture_boost, NULL, + "rcu_torture_boost"); + if (IS_ERR(boost_tasks[cpu])) { + retval = PTR_ERR(boost_tasks[cpu]); + VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed"); + n_rcu_torture_boost_ktrerror++; + boost_tasks[cpu] = NULL; + mutex_unlock(&boost_mutex); + return retval; + } + kthread_bind(boost_tasks[cpu], cpu); + wake_up_process(boost_tasks[cpu]); + mutex_unlock(&boost_mutex); + return 0; +} + +static int rcutorture_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + long cpu = (long)hcpu; + + switch (action) { + case CPU_ONLINE: + case CPU_DOWN_FAILED: + (void)rcutorture_booster_init(cpu); + break; + case CPU_DOWN_PREPARE: + rcutorture_booster_cleanup(cpu); + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block rcutorture_cpu_nb = { + .notifier_call = rcutorture_cpu_notify, +}; + static void rcu_torture_cleanup(void) { @@ -1127,7 +1343,7 @@ rcu_torture_cleanup(void) } fullstop = FULLSTOP_RMMOD; mutex_unlock(&fullstop_mutex); - unregister_reboot_notifier(&rcutorture_nb); + unregister_reboot_notifier(&rcutorture_shutdown_nb); if (stutter_task) { VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task"); kthread_stop(stutter_task); @@ -1184,6 +1400,12 @@ rcu_torture_cleanup(void) kthread_stop(fqs_task); } fqs_task = NULL; + if ((test_boost == 1 && cur_ops->can_boost) || + test_boost == 2) { + unregister_cpu_notifier(&rcutorture_cpu_nb); + for_each_possible_cpu(i) + rcutorture_booster_cleanup(i); + } /* Wait for all RCU callbacks to fire. */ @@ -1195,9 +1417,9 @@ rcu_torture_cleanup(void) if (cur_ops->cleanup) cur_ops->cleanup(); if (atomic_read(&n_rcu_torture_error)) - rcu_torture_print_module_parms("End of test: FAILURE"); + rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE"); else - rcu_torture_print_module_parms("End of test: SUCCESS"); + rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS"); } static int __init @@ -1242,7 +1464,7 @@ rcu_torture_init(void) nrealreaders = nreaders; else nrealreaders = 2 * num_online_cpus(); - rcu_torture_print_module_parms("Start of test"); + rcu_torture_print_module_parms(cur_ops, "Start of test"); fullstop = FULLSTOP_DONTSTOP; /* Set up the freelist. */ @@ -1263,6 +1485,12 @@ rcu_torture_init(void) atomic_set(&n_rcu_torture_free, 0); atomic_set(&n_rcu_torture_mberror, 0); atomic_set(&n_rcu_torture_error, 0); + n_rcu_torture_boost_ktrerror = 0; + n_rcu_torture_boost_rterror = 0; + n_rcu_torture_boost_allocerror = 0; + n_rcu_torture_boost_afferror = 0; + n_rcu_torture_boost_failure = 0; + n_rcu_torture_boosts = 0; for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) atomic_set(&rcu_torture_wcount[i], 0); for_each_possible_cpu(cpu) { @@ -1376,7 +1604,27 @@ rcu_torture_init(void) goto unwind; } } - register_reboot_notifier(&rcutorture_nb); + if (test_boost_interval < 1) + test_boost_interval = 1; + if (test_boost_duration < 2) + test_boost_duration = 2; + if ((test_boost == 1 && cur_ops->can_boost) || + test_boost == 2) { + int retval; + + boost_starttime = jiffies + test_boost_interval * HZ; + register_cpu_notifier(&rcutorture_cpu_nb); + for_each_possible_cpu(i) { + if (cpu_is_offline(i)) + continue; /* Heuristic: CPU can go offline. */ + retval = rcutorture_booster_init(i); + if (retval < 0) { + firsterr = retval; + goto unwind; + } + } + } + register_reboot_notifier(&rcutorture_shutdown_nb); mutex_unlock(&fullstop_mutex); return 0; -- cgit v0.10.2 From a386b5af8edda1c742ce9f77891e112eefffc005 Mon Sep 17 00:00:00 2001 From: Kasper Pedersen Date: Wed, 20 Oct 2010 15:55:15 -0700 Subject: time: Compensate for rounding on odd-frequency clocksources When the clocksource is not a multiple of HZ, the clock will be off. For acpi_pm, HZ=1000 the error is 127.111 ppm: The rounding of cycle_interval ends up generating a false error term in ntp_error accumulation since xtime_interval is not exactly 1/HZ. So, we subtract out the error caused by the rounding. This has been visible since 2.6.32-rc2 commit a092ff0f90cae22b2ac8028ecd2c6f6c1a9e4601 time: Implement logarithmic time accumulation That commit raised NTP_INTERVAL_FREQ and exposed the rounding error. testing tool: http://n1.taur.dk/permanent/testpmt.c Also tested with ntpd and a frequency counter. Signed-off-by: Kasper Pedersen Acked-by: john stultz Cc: John Kacur Cc: Clark Williams Cc: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 49010d8..5bb86da 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -32,6 +32,8 @@ struct timekeeper { cycle_t cycle_interval; /* Number of clock shifted nano seconds in one NTP interval. */ u64 xtime_interval; + /* shifted nano seconds left over when rounding cycle_interval */ + s64 xtime_remainder; /* Raw nano seconds accumulated per NTP interval. */ u32 raw_interval; @@ -62,7 +64,7 @@ struct timekeeper timekeeper; static void timekeeper_setup_internals(struct clocksource *clock) { cycle_t interval; - u64 tmp; + u64 tmp, ntpinterval; timekeeper.clock = clock; clock->cycle_last = clock->read(clock); @@ -70,6 +72,7 @@ static void timekeeper_setup_internals(struct clocksource *clock) /* Do the ns -> cycle conversion first, using original mult */ tmp = NTP_INTERVAL_LENGTH; tmp <<= clock->shift; + ntpinterval = tmp; tmp += clock->mult/2; do_div(tmp, clock->mult); if (tmp == 0) @@ -80,6 +83,7 @@ static void timekeeper_setup_internals(struct clocksource *clock) /* Go back from cycles -> shifted ns */ timekeeper.xtime_interval = (u64) interval * clock->mult; + timekeeper.xtime_remainder = ntpinterval - timekeeper.xtime_interval; timekeeper.raw_interval = ((u64) interval * clock->mult) >> clock->shift; @@ -719,7 +723,8 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift) /* Accumulate error between NTP and clock interval */ timekeeper.ntp_error += tick_length << shift; - timekeeper.ntp_error -= timekeeper.xtime_interval << + timekeeper.ntp_error -= + (timekeeper.xtime_interval + timekeeper.xtime_remainder) << (timekeeper.ntp_error_shift + shift); return offset; -- cgit v0.10.2 From d0959024d8fb6555ba8bfdc6624cc7b7c2e675fd Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Wed, 20 Oct 2010 15:57:30 -0700 Subject: timer_list: Remove alignment padding on 64 bit when CONFIG_TIMER_STATS Reorder struct timer_list to remove 8 bytes of alignment padding on 64 bit builds when CONFIG_TIMER_STATS is selected. timer_list is widely used across the kernel so many structures will benefit and shrink in size. For example, with my config on x86_64 per_cpu_dm_data shrinks from 136 to 128 bytes and ahci_port_priv shrinks from 1032 to 968 bytes. Signed-off-by: Richard Kennedy Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner diff --git a/include/linux/timer.h b/include/linux/timer.h index 38cf093..f3dccdb 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -24,9 +24,9 @@ struct timer_list { int slack; #ifdef CONFIG_TIMER_STATS + int start_pid; void *start_site; char start_comm[16]; - int start_pid; #endif #ifdef CONFIG_LOCKDEP struct lockdep_map lockdep_map; -- cgit v0.10.2 From aaabe31c25a439b92cc281b14ca18b85bae7e7a6 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Wed, 20 Oct 2010 15:57:30 -0700 Subject: timer: Initialize the field slack of timer_list TIMER_INITIALIZER() should initialize the field slack of timer_list as __init_timer() does. Signed-off-by: Changli Gao Cc: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner diff --git a/include/linux/timer.h b/include/linux/timer.h index f3dccdb..1794674 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -54,6 +54,7 @@ extern struct tvec_base boot_tvec_bases; .expires = (_expires), \ .data = (_data), \ .base = &boot_tvec_bases, \ + .slack = -1, \ __TIMER_LOCKDEP_MAP_INITIALIZER( \ __FILE__ ":" __stringify(__LINE__)) \ } -- cgit v0.10.2 From 2bf1c05e3c406925e498d06da66b4828f0209ea6 Mon Sep 17 00:00:00 2001 From: Nikitas Angelinas Date: Wed, 20 Oct 2010 15:57:31 -0700 Subject: time: Use ARRAY_SIZE macro in timecompare.c Replace sizeof(buffer)/sizeof(buffer[0]) with ARRAY_SIZE(buffer) in kernel/time/timecompare.c Signed-off-by: Nikitas Angelinas Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner diff --git a/kernel/time/timecompare.c b/kernel/time/timecompare.c index ac38fbb..a9ae369 100644 --- a/kernel/time/timecompare.c +++ b/kernel/time/timecompare.c @@ -21,6 +21,7 @@ #include #include #include +#include /* * fixed point arithmetic scale factor for skew @@ -57,11 +58,11 @@ int timecompare_offset(struct timecompare *sync, int index; int num_samples = sync->num_samples; - if (num_samples > sizeof(buffer)/sizeof(buffer[0])) { + if (num_samples > ARRAY_SIZE(buffer)) { samples = kmalloc(sizeof(*samples) * num_samples, GFP_ATOMIC); if (!samples) { samples = buffer; - num_samples = sizeof(buffer)/sizeof(buffer[0]); + num_samples = ARRAY_SIZE(buffer); } } else { samples = buffer; -- cgit v0.10.2 From dd6414b50fa2b1cd247a8aa8f8bd42414b7453e1 Mon Sep 17 00:00:00 2001 From: Phil Carmody Date: Wed, 20 Oct 2010 15:57:33 -0700 Subject: timer: Permit statically-declared work with deferrable timers Currently, you have to just define a delayed_work uninitialised, and then initialise it before first use. That's a tad clumsy. At risk of playing mind-games with the compiler, fooling it into doing pointer arithmetic with compile-time-constants, this lets clients properly initialise delayed work with deferrable timers statically. This patch was inspired by the issues which lead Artem Bityutskiy to commit 8eab945c5616fc984 ("sunrpc: make the cache cleaner workqueue deferrable"). Signed-off-by: Phil Carmody Acked-by: Artem Bityutskiy Cc: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner diff --git a/include/linux/timer.h b/include/linux/timer.h index 1794674..cbfb7a3 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -48,6 +48,18 @@ extern struct tvec_base boot_tvec_bases; #define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn) #endif +/* + * Note that all tvec_bases are 2 byte aligned and lower bit of + * base in timer_list is guaranteed to be zero. Use the LSB to + * indicate whether the timer is deferrable. + * + * A deferrable timer will work normally when the system is busy, but + * will not cause a CPU to come out of idle just to service it; instead, + * the timer will be serviced when the CPU eventually wakes up with a + * subsequent non-deferrable timer. + */ +#define TBASE_DEFERRABLE_FLAG (0x1) + #define TIMER_INITIALIZER(_function, _expires, _data) { \ .entry = { .prev = TIMER_ENTRY_STATIC }, \ .function = (_function), \ @@ -59,6 +71,19 @@ extern struct tvec_base boot_tvec_bases; __FILE__ ":" __stringify(__LINE__)) \ } +#define TBASE_MAKE_DEFERRED(ptr) ((struct tvec_base *) \ + ((unsigned char *)(ptr) + TBASE_DEFERRABLE_FLAG)) + +#define TIMER_DEFERRED_INITIALIZER(_function, _expires, _data) {\ + .entry = { .prev = TIMER_ENTRY_STATIC }, \ + .function = (_function), \ + .expires = (_expires), \ + .data = (_data), \ + .base = TBASE_MAKE_DEFERRED(&boot_tvec_bases), \ + __TIMER_LOCKDEP_MAP_INITIALIZER( \ + __FILE__ ":" __stringify(__LINE__)) \ + } + #define DEFINE_TIMER(_name, _function, _expires, _data) \ struct timer_list _name = \ TIMER_INITIALIZER(_function, _expires, _data) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index f11100f..88238c1 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -127,12 +127,20 @@ struct execute_work { .timer = TIMER_INITIALIZER(NULL, 0, 0), \ } +#define __DEFERRED_WORK_INITIALIZER(n, f) { \ + .work = __WORK_INITIALIZER((n).work, (f)), \ + .timer = TIMER_DEFERRED_INITIALIZER(NULL, 0, 0), \ + } + #define DECLARE_WORK(n, f) \ struct work_struct n = __WORK_INITIALIZER(n, f) #define DECLARE_DELAYED_WORK(n, f) \ struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f) +#define DECLARE_DEFERRED_WORK(n, f) \ + struct delayed_work n = __DEFERRED_WORK_INITIALIZER(n, f) + /* * initialize a work item's function pointer */ diff --git a/kernel/timer.c b/kernel/timer.c index 97bf05b..72853b2 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -88,18 +88,6 @@ struct tvec_base boot_tvec_bases; EXPORT_SYMBOL(boot_tvec_bases); static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; -/* - * Note that all tvec_bases are 2 byte aligned and lower bit of - * base in timer_list is guaranteed to be zero. Use the LSB to - * indicate whether the timer is deferrable. - * - * A deferrable timer will work normally when the system is busy, but - * will not cause a CPU to come out of idle just to service it; instead, - * the timer will be serviced when the CPU eventually wakes up with a - * subsequent non-deferrable timer. - */ -#define TBASE_DEFERRABLE_FLAG (0x1) - /* Functions below help us manage 'deferrable' flag */ static inline unsigned int tbase_get_deferrable(struct tvec_base *base) { @@ -113,8 +101,7 @@ static inline struct tvec_base *tbase_get_base(struct tvec_base *base) static inline void timer_set_deferrable(struct timer_list *timer) { - timer->base = ((struct tvec_base *)((unsigned long)(timer->base) | - TBASE_DEFERRABLE_FLAG)); + timer->base = TBASE_MAKE_DEFERRED(timer->base); } static inline void -- cgit v0.10.2 From 20f33a03f0cf87e51165f7084f697acfb68e865b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 20 Oct 2010 15:57:34 -0700 Subject: posix-timers: Annotate lock_timer() lock_timer() conditionally grabs it_lock in case of returning non-NULL but unlock_timer() releases it unconditionally. This leads sparse to complain about the lock context imbalance. Rename and wrap lock_timer using __cond_lock() macro to make sparse happy. Signed-off-by: Namhyung Kim Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 9ca4973..93bd2eb 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -145,7 +145,13 @@ static int common_timer_del(struct k_itimer *timer); static enum hrtimer_restart posix_timer_fn(struct hrtimer *data); -static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags); +static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags); + +#define lock_timer(tid, flags) \ +({ struct k_itimer *__timr; \ + __cond_lock(&__timr->it_lock, __timr = __lock_timer(tid, flags)); \ + __timr; \ +}) static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) { @@ -619,7 +625,7 @@ out: * the find to the timer lock. To avoid a dead lock, the timer id MUST * be release with out holding the timer lock. */ -static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags) +static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags) { struct k_itimer *timr; /* -- cgit v0.10.2 From 6f1bc451e6a79470b122a37ee1fc6bbca450f444 Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Wed, 20 Oct 2010 15:57:31 -0700 Subject: timer: Make try_to_del_timer_sync() the same on SMP and UP On UP try_to_del_timer_sync() is mapped to del_timer() which does not take the running timer callback into account, so it has different semantics. Remove the SMP dependency of try_to_del_timer_sync() by using base->running_timer in the UP case as well. [ tglx: Removed set_running_timer() inline and tweaked the changelog ] Signed-off-by: Yong Zhang Cc: Ingo Molnar Cc: Peter Zijlstra Acked-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner diff --git a/include/linux/timer.h b/include/linux/timer.h index cbfb7a3..6abd913 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -274,11 +274,11 @@ static inline void timer_stats_timer_clear_start_info(struct timer_list *timer) extern void add_timer(struct timer_list *timer); +extern int try_to_del_timer_sync(struct timer_list *timer); + #ifdef CONFIG_SMP - extern int try_to_del_timer_sync(struct timer_list *timer); extern int del_timer_sync(struct timer_list *timer); #else -# define try_to_del_timer_sync(t) del_timer(t) # define del_timer_sync(t) del_timer(t) #endif diff --git a/kernel/timer.c b/kernel/timer.c index 72853b2..47b86c1 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -330,15 +330,6 @@ void set_timer_slack(struct timer_list *timer, int slack_hz) } EXPORT_SYMBOL_GPL(set_timer_slack); - -static inline void set_running_timer(struct tvec_base *base, - struct timer_list *timer) -{ -#ifdef CONFIG_SMP - base->running_timer = timer; -#endif -} - static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) { unsigned long expires = timer->expires; @@ -923,15 +914,12 @@ int del_timer(struct timer_list *timer) } EXPORT_SYMBOL(del_timer); -#ifdef CONFIG_SMP /** * try_to_del_timer_sync - Try to deactivate a timer * @timer: timer do del * * This function tries to deactivate a timer. Upon successful (ret >= 0) * exit the timer is not queued and the handler is not running on any CPU. - * - * It must not be called from interrupt contexts. */ int try_to_del_timer_sync(struct timer_list *timer) { @@ -960,6 +948,7 @@ out: } EXPORT_SYMBOL(try_to_del_timer_sync); +#ifdef CONFIG_SMP /** * del_timer_sync - deactivate a timer and wait for the handler to finish. * @timer: the timer to be deactivated @@ -1098,7 +1087,7 @@ static inline void __run_timers(struct tvec_base *base) timer_stats_account_timer(timer); - set_running_timer(base, timer); + base->running_timer = timer; detach_timer(timer, 1); spin_unlock_irq(&base->lock); @@ -1106,7 +1095,7 @@ static inline void __run_timers(struct tvec_base *base) spin_lock_irq(&base->lock); } } - set_running_timer(base, NULL); + base->running_timer = NULL; spin_unlock_irq(&base->lock); } -- cgit v0.10.2 From 1118e2cd33d47254854e1ba3ba8e32802ff14fdf Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Wed, 20 Oct 2010 15:57:32 -0700 Subject: timer: Del_timer_sync() can be used in softirq context Actually we have used del_timer_sync() in softirq context for a long time, e.g. in __dst_free()::cancel_delayed_work(). So change the comments of it to warn on hardirq context only, and make lockdep know about this change. Signed-off-by: Yong Zhang Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner diff --git a/kernel/timer.c b/kernel/timer.c index 47b86c1..612de03 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -959,7 +959,7 @@ EXPORT_SYMBOL(try_to_del_timer_sync); * * Synchronization rules: Callers must prevent restarting of the timer, * otherwise this function is meaningless. It must not be called from - * interrupt contexts. The caller must not hold locks which would prevent + * hardirq contexts. The caller must not hold locks which would prevent * completion of the timer's handler. The timer's handler must not call * add_timer_on(). Upon exit the timer is not queued and the handler is * not running on any CPU. @@ -969,12 +969,10 @@ EXPORT_SYMBOL(try_to_del_timer_sync); int del_timer_sync(struct timer_list *timer) { #ifdef CONFIG_LOCKDEP - unsigned long flags; - - local_irq_save(flags); + local_bh_disable(); lock_map_acquire(&timer->lockdep_map); lock_map_release(&timer->lockdep_map); - local_irq_restore(flags); + local_bh_enable(); #endif for (;;) { -- cgit v0.10.2 From 466bd3030973910118ca601da8072be97a1e2209 Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Wed, 20 Oct 2010 15:57:33 -0700 Subject: timer: Warn when del_timer_sync() is called in hardirq context Add explict warning when del_timer_sync() is called in hardirq context. Signed-off-by: Yong Zhang Cc: Ingo Molnar Cc: Peter Zijlstra Acked-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner diff --git a/kernel/timer.c b/kernel/timer.c index 612de03..483e54b 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -974,7 +974,11 @@ int del_timer_sync(struct timer_list *timer) lock_map_release(&timer->lockdep_map); local_bh_enable(); #endif - + /* + * don't use it in hardirq context, because it + * could lead to deadlock. + */ + WARN_ON(in_irq()); for (;;) { int ret = try_to_del_timer_sync(timer); if (ret >= 0) -- cgit v0.10.2 From fd35fbcdd1b2579a6e00a1545f7124e4005d0474 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 22 Oct 2010 15:33:38 -0700 Subject: x86-64, asm: Use fxsaveq/fxrestorq in more places Checkin d7acb92fea932ad2e7846480aeacddc2c03c8485 made use of fxsaveq in fpu_fxsave() if the assembler supports it; this adds fxsaveq/fxrstorq to fxrstor_checking() and fxsave_user() as well. Reported-by: Linus Torvalds LKML-Reference: Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 4aa2bb3..ef32890 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -93,6 +93,17 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) int err; /* See comment in fxsave() below. */ +#ifdef CONFIG_AS_FXSAVEQ + asm volatile("1: fxrstorq %[fx]\n\t" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: movl $-1,%[err]\n" + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + : [err] "=r" (err) + : [fx] "m" (*fx), "0" (0)); +#else asm volatile("1: rex64/fxrstor (%[fx])\n\t" "2:\n" ".section .fixup,\"ax\"\n" @@ -102,6 +113,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) _ASM_EXTABLE(1b, 3b) : [err] "=r" (err) : [fx] "R" (fx), "m" (*fx), "0" (0)); +#endif return err; } @@ -119,6 +131,17 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) return -EFAULT; /* See comment in fxsave() below. */ +#ifdef CONFIG_AS_FXSAVEQ + asm volatile("1: fxsaveq %[fx]\n\t" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: movl $-1,%[err]\n" + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + : [err] "=r" (err), [fx] "=m" (*fx) + : "0" (0)); +#else asm volatile("1: rex64/fxsave (%[fx])\n\t" "2:\n" ".section .fixup,\"ax\"\n" @@ -128,6 +151,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) _ASM_EXTABLE(1b, 3b) : [err] "=r" (err), "=m" (*fx) : [fx] "R" (fx), "0" (0)); +#endif if (unlikely(err) && __clear_user(fx, sizeof(struct i387_fxsave_struct))) err = -EFAULT; -- cgit v0.10.2 From fe7de49f9d4e53f24ec9ef762a503f70b562341c Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 20 Oct 2010 16:01:12 -0700 Subject: sched: Make sched_param argument static in sched_setscheduler() callers Andrew Morton pointed out almost all sched_setscheduler() callers are using fixed parameters and can be converted to static. It reduces runtime memory use a little. Signed-off-by: KOSAKI Motohiro Reported-by: Andrew Morton Acked-by: James Morris Cc: Ingo Molnar Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/include/linux/sched.h b/include/linux/sched.h index 0383601..849c867 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1942,9 +1942,10 @@ extern int task_nice(const struct task_struct *p); extern int can_nice(const struct task_struct *p, const int nice); extern int task_curr(const struct task_struct *p); extern int idle_cpu(int cpu); -extern int sched_setscheduler(struct task_struct *, int, struct sched_param *); +extern int sched_setscheduler(struct task_struct *, int, + const struct sched_param *); extern int sched_setscheduler_nocheck(struct task_struct *, int, - struct sched_param *); + const struct sched_param *); extern struct task_struct *idle_task(int cpu); extern struct task_struct *curr_task(int cpu); extern void set_curr_task(int cpu, struct task_struct *p); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 644e8d5..850f030 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -573,7 +573,9 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { } */ static int irq_thread(void *data) { - struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, }; + static struct sched_param param = { + .sched_priority = MAX_USER_RT_PRIO/2, + }; struct irqaction *action = data; struct irq_desc *desc = irq_to_desc(action->irq); int wake, oneshot = desc->status & IRQ_ONESHOT; diff --git a/kernel/kthread.c b/kernel/kthread.c index 2dc3786..74cf6f5 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -148,7 +148,7 @@ struct task_struct *kthread_create(int (*threadfn)(void *data), wait_for_completion(&create.done); if (!IS_ERR(create.result)) { - struct sched_param param = { .sched_priority = 0 }; + static struct sched_param param = { .sched_priority = 0 }; va_list args; va_start(args, namefmt); diff --git a/kernel/sched.c b/kernel/sched.c index d42992b..51944e8 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4701,7 +4701,7 @@ static bool check_same_owner(struct task_struct *p) } static int __sched_setscheduler(struct task_struct *p, int policy, - struct sched_param *param, bool user) + const struct sched_param *param, bool user) { int retval, oldprio, oldpolicy = -1, on_rq, running; unsigned long flags; @@ -4856,7 +4856,7 @@ recheck: * NOTE that the task may be already dead. */ int sched_setscheduler(struct task_struct *p, int policy, - struct sched_param *param) + const struct sched_param *param) { return __sched_setscheduler(p, policy, param, true); } @@ -4874,7 +4874,7 @@ EXPORT_SYMBOL_GPL(sched_setscheduler); * but our caller might not have that capability. */ int sched_setscheduler_nocheck(struct task_struct *p, int policy, - struct sched_param *param) + const struct sched_param *param) { return __sched_setscheduler(p, policy, param, false); } diff --git a/kernel/softirq.c b/kernel/softirq.c index fc97888..081869e 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -851,7 +851,9 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, cpumask_any(cpu_online_mask)); case CPU_DEAD: case CPU_DEAD_FROZEN: { - struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; + static struct sched_param param = { + .sched_priority = MAX_RT_PRIO-1 + }; p = per_cpu(ksoftirqd, hotcpu); per_cpu(ksoftirqd, hotcpu) = NULL; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 155a415..562c56e 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -558,7 +558,7 @@ trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr) static int trace_wakeup_test_thread(void *data) { /* Make this a RT thread, doesn't need to be too high */ - struct sched_param param = { .sched_priority = 5 }; + static struct sched_param param = { .sched_priority = 5 }; struct completion *x = data; sched_setscheduler(current, SCHED_FIFO, ¶m); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index bafba68..94ca779 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -307,7 +307,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) */ static int watchdog(void *unused) { - struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; + static struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); sched_setscheduler(current, SCHED_FIFO, ¶m); -- cgit v0.10.2 From 9c37c9d89773ee9da9f6af28ee37d931bd045711 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 25 Oct 2010 16:03:35 +0200 Subject: mce, amd: Implement mce_threshold_block_init() helper function This patch adds a helper function for the initial setup of an mce threshold block. The LVT offset is passed as argument. Also making variable threshold_defaults local as it is only used in function mce_amd_feature_init(). Function threshold_restart_bank() is extended to setup the LVT offset, the change is backward compatible. Thus, now there is only a single wrmsrl() to setup the block. Signed-off-by: Robert Richter Acked-by: Borislav Petkov LKML-Reference: <1288015419-29543-2-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 80c4823..f438318 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -59,12 +59,6 @@ struct threshold_block { struct list_head miscj; }; -/* defaults used early on boot */ -static struct threshold_block threshold_defaults = { - .interrupt_enable = 0, - .threshold_limit = THRESHOLD_MAX, -}; - struct threshold_bank { struct kobject *kobj; struct threshold_block *blocks; @@ -89,6 +83,8 @@ static void amd_threshold_interrupt(void); struct thresh_restart { struct threshold_block *b; int reset; + int set_lvt_off; + int lvt_off; u16 old_limit; }; @@ -116,6 +112,12 @@ static void threshold_restart_bank(void *_tr) (new_count & THRESHOLD_MAX); } + if (tr->set_lvt_off) { + /* set new lvt offset */ + mci_misc_hi &= ~MASK_LVTOFF_HI; + mci_misc_hi |= tr->lvt_off << 20; + } + tr->b->interrupt_enable ? (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : (mci_misc_hi &= ~MASK_INT_TYPE_HI); @@ -124,13 +126,25 @@ static void threshold_restart_bank(void *_tr) wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi); } +static void mce_threshold_block_init(struct threshold_block *b, int offset) +{ + struct thresh_restart tr = { + .b = b, + .set_lvt_off = 1, + .lvt_off = offset, + }; + + b->threshold_limit = THRESHOLD_MAX; + threshold_restart_bank(&tr); +}; + /* cpu init entry point, called from mce.c with preempt off */ void mce_amd_feature_init(struct cpuinfo_x86 *c) { + struct threshold_block b; unsigned int cpu = smp_processor_id(); u32 low = 0, high = 0, address = 0; unsigned int bank, block; - struct thresh_restart tr; int lvt_off = -1; u8 offset; @@ -186,16 +200,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) continue; } - high &= ~MASK_LVTOFF_HI; - high |= lvt_off << 20; - wrmsr(address, low, high); - - threshold_defaults.address = address; - tr.b = &threshold_defaults; - tr.reset = 0; - tr.old_limit = 0; - threshold_restart_bank(&tr); + memset(&b, 0, sizeof(b)); + b.cpu = cpu; + b.bank = bank; + b.block = block; + b.address = address; + mce_threshold_block_init(&b, offset); mce_threshold_vector = amd_threshold_interrupt; } } @@ -298,9 +309,8 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size) b->interrupt_enable = !!new; + memset(&tr, 0, sizeof(tr)); tr.b = b; - tr.reset = 0; - tr.old_limit = 0; smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); @@ -321,10 +331,10 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size) if (new < 1) new = 1; + memset(&tr, 0, sizeof(tr)); tr.old_limit = b->threshold_limit; b->threshold_limit = new; tr.b = b; - tr.reset = 0; smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); -- cgit v0.10.2 From 7203a0494084541575bac6dfc4e153f9e28869b8 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 25 Oct 2010 16:03:36 +0200 Subject: mce, amd: Shorten local variables mci_misc_{hi,lo} Shorten this variables to make later changes more readable. Signed-off-by: Robert Richter Acked-by: Borislav Petkov LKML-Reference: <1288015419-29543-3-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index f438318..eb771b9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -93,37 +93,37 @@ struct thresh_restart { static void threshold_restart_bank(void *_tr) { struct thresh_restart *tr = _tr; - u32 mci_misc_hi, mci_misc_lo; + u32 hi, lo; - rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi); + rdmsr(tr->b->address, lo, hi); - if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) + if (tr->b->threshold_limit < (hi & THRESHOLD_MAX)) tr->reset = 1; /* limit cannot be lower than err count */ if (tr->reset) { /* reset err count and overflow bit */ - mci_misc_hi = - (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | + hi = + (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | (THRESHOLD_MAX - tr->b->threshold_limit); } else if (tr->old_limit) { /* change limit w/o reset */ - int new_count = (mci_misc_hi & THRESHOLD_MAX) + + int new_count = (hi & THRESHOLD_MAX) + (tr->old_limit - tr->b->threshold_limit); - mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | + hi = (hi & ~MASK_ERR_COUNT_HI) | (new_count & THRESHOLD_MAX); } if (tr->set_lvt_off) { /* set new lvt offset */ - mci_misc_hi &= ~MASK_LVTOFF_HI; - mci_misc_hi |= tr->lvt_off << 20; + hi &= ~MASK_LVTOFF_HI; + hi |= tr->lvt_off << 20; } tr->b->interrupt_enable ? - (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : - (mci_misc_hi &= ~MASK_INT_TYPE_HI); + (hi = (hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : + (hi &= ~MASK_INT_TYPE_HI); - mci_misc_hi |= MASK_COUNT_EN_HI; - wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi); + hi |= MASK_COUNT_EN_HI; + wrmsr(tr->b->address, lo, hi); } static void mce_threshold_block_init(struct threshold_block *b, int offset) -- cgit v0.10.2 From bbaff08dca3c34d0fb6b4c4051354184e33e3df8 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 25 Oct 2010 16:03:37 +0200 Subject: mce, amd: Add helper functions to setup APIC This patch reworks and cleans up mce_amd_feature_init() by introducing helper functions to setup and check the LVT offset. It also fixes line endings in pr_err() calls. Signed-off-by: Robert Richter Acked-by: Borislav Petkov LKML-Reference: <1288015419-29543-4-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index eb771b9..e316684 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -31,8 +31,6 @@ #include #include -#define PFX "mce_threshold: " -#define VERSION "version 1.1.1" #define NR_BANKS 6 #define NR_BLOCKS 9 #define THRESHOLD_MAX 0xFFF @@ -88,6 +86,27 @@ struct thresh_restart { u16 old_limit; }; +static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) +{ + int msr = (hi & MASK_LVTOFF_HI) >> 20; + + if (apic < 0) { + pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt " + "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu, + b->bank, b->block, b->address, hi, lo); + return 0; + } + + if (apic != msr) { + pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d " + "for bank %d, block %d (MSR%08X=0x%x%08x)\n", + b->cpu, apic, b->bank, b->block, b->address, hi, lo); + return 0; + } + + return 1; +}; + /* must be called with correct cpu affinity */ /* Called via smp_call_function_single() */ static void threshold_restart_bank(void *_tr) @@ -113,9 +132,11 @@ static void threshold_restart_bank(void *_tr) } if (tr->set_lvt_off) { - /* set new lvt offset */ - hi &= ~MASK_LVTOFF_HI; - hi |= tr->lvt_off << 20; + if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) { + /* set new lvt offset */ + hi &= ~MASK_LVTOFF_HI; + hi |= tr->lvt_off << 20; + } } tr->b->interrupt_enable ? @@ -138,6 +159,15 @@ static void mce_threshold_block_init(struct threshold_block *b, int offset) threshold_restart_bank(&tr); }; +static int setup_APIC_mce(int reserved, int new) +{ + if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR, + APIC_EILVT_MSG_FIX, 0)) + return new; + + return reserved; +} + /* cpu init entry point, called from mce.c with preempt off */ void mce_amd_feature_init(struct cpuinfo_x86 *c) { @@ -145,8 +175,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) unsigned int cpu = smp_processor_id(); u32 low = 0, high = 0, address = 0; unsigned int bank, block; - int lvt_off = -1; - u8 offset; + int offset = -1; for (bank = 0; bank < NR_BANKS; ++bank) { for (block = 0; block < NR_BLOCKS; ++block) { @@ -177,28 +206,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) if (shared_bank[bank] && c->cpu_core_id) break; #endif - offset = (high & MASK_LVTOFF_HI) >> 20; - if (lvt_off < 0) { - if (setup_APIC_eilvt(offset, - THRESHOLD_APIC_VECTOR, - APIC_EILVT_MSG_FIX, 0)) { - pr_err(FW_BUG "cpu %d, failed to " - "setup threshold interrupt " - "for bank %d, block %d " - "(MSR%08X=0x%x%08x)", - smp_processor_id(), bank, block, - address, high, low); - continue; - } - lvt_off = offset; - } else if (lvt_off != offset) { - pr_err(FW_BUG "cpu %d, invalid threshold " - "interrupt offset %d for bank %d," - "block %d (MSR%08X=0x%x%08x)", - smp_processor_id(), lvt_off, bank, - block, address, high, low); - continue; - } + offset = setup_APIC_mce(offset, + (high & MASK_LVTOFF_HI) >> 20); memset(&b, 0, sizeof(b)); b.cpu = cpu; -- cgit v0.10.2 From 0a17941e71f089b128514f7b5b486e20072ca7dc Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 25 Oct 2010 16:03:38 +0200 Subject: mce, amd: Remove goto in threshold_create_device() Removing the goto in threshold_create_device(). Signed-off-by: Robert Richter Acked-by: Borislav Petkov LKML-Reference: <1288015419-29543-5-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index e316684..5bf2fac 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -622,9 +622,9 @@ static __cpuinit int threshold_create_device(unsigned int cpu) continue; err = threshold_create_bank(cpu, bank); if (err) - goto out; + return err; } -out: + return err; } -- cgit v0.10.2 From eb48c9cb2053e7bb5f7f8f0371cb578a0d439450 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 25 Oct 2010 16:03:39 +0200 Subject: apic, amd: Make firmware bug messages more meaningful This improves error messages in case the BIOS was setting up wrong LVT offsets. Signed-off-by: Robert Richter Acked-by: Borislav Petkov LKML-Reference: <1288015419-29543-6-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 850657d..cb13048 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -433,17 +433,18 @@ int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask) reserved = reserve_eilvt_offset(offset, new); if (reserved != new) { - pr_err(FW_BUG "cpu %d, try to setup vector 0x%x, but " - "vector 0x%x was already reserved by another core, " - "APIC%lX=0x%x\n", - smp_processor_id(), new, reserved, reg, old); + pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for " + "vector 0x%x, but the register is already in use for " + "vector 0x%x on another cpu\n", + smp_processor_id(), reg, offset, new, reserved); return -EINVAL; } if (!eilvt_entry_is_changeable(old, new)) { - pr_err(FW_BUG "cpu %d, try to setup vector 0x%x but " - "register already in use, APIC%lX=0x%x\n", - smp_processor_id(), new, reg, old); + pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for " + "vector 0x%x, but the register is already in use for " + "vector 0x%x on this cpu\n", + smp_processor_id(), reg, offset, new, old); return -EBUSY; } diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 42fb46f..08de254 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -566,6 +566,7 @@ static int force_ibs_eilvt_setup(void) ret = setup_ibs_ctl(i); if (ret) return ret; + pr_err(FW_BUG "using offset %d for IBS interrupts\n", i); return 0; } -- cgit v0.10.2 From 2f56f56ad991edd51ffd0baf1182245ee1277a04 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 27 Oct 2010 20:59:49 -0700 Subject: Revert "ceph: update issue_seq on cap grant" This reverts commit d91f2438d881514e4a923fd786dbd94b764a9440. The intent of issue_seq is to distinguish between mds->client messages that (re)create the cap and those that do not, which means we should _only_ be updating that value in the create paths. By updating it in handle_cap_grant, we reset it to zero, which then breaks release. The larger question is what workload/problem made me think it should be updated here... Signed-off-by: Sage Weil diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 98ab13e..6e0942f 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2273,8 +2273,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, { struct ceph_inode_info *ci = ceph_inode(inode); int mds = session->s_mds; - unsigned seq = le32_to_cpu(grant->seq); - unsigned issue_seq = le32_to_cpu(grant->issue_seq); + int seq = le32_to_cpu(grant->seq); int newcaps = le32_to_cpu(grant->caps); int issued, implemented, used, wanted, dirty; u64 size = le64_to_cpu(grant->size); @@ -2286,8 +2285,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, int revoked_rdcache = 0; int queue_invalidate = 0; - dout("handle_cap_grant inode %p cap %p mds%d seq %u/%u %s\n", - inode, cap, mds, seq, issue_seq, ceph_cap_string(newcaps)); + dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", + inode, cap, mds, seq, ceph_cap_string(newcaps)); dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, inode->i_size); @@ -2383,7 +2382,6 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, } cap->seq = seq; - cap->issue_seq = issue_seq; /* file layout may have changed */ ci->i_layout = grant->layout; -- cgit v0.10.2 From 313e74412105c670ff8900ec8099a3a5df1fa83c Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Thu, 28 Oct 2010 15:39:02 +0400 Subject: xen: xenfs: privcmd: check put_user() return code put_user() may fail. In this case propagate error code from privcmd_ioctl_mmap_batch(). Signed-off-by: Vasiliy Kulikov Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c index f80be7f..2eb04c8 100644 --- a/drivers/xen/xenfs/privcmd.c +++ b/drivers/xen/xenfs/privcmd.c @@ -266,9 +266,7 @@ static int mmap_return_errors(void *data, void *state) xen_pfn_t *mfnp = data; struct mmap_batch_state *st = state; - put_user(*mfnp, st->user++); - - return 0; + return put_user(*mfnp, st->user++); } static struct vm_operations_struct privcmd_vm_ops; @@ -323,10 +321,8 @@ static long privcmd_ioctl_mmap_batch(void __user *udata) up_write(&mm->mmap_sem); if (state.err > 0) { - ret = 0; - state.user = m.arr; - traverse_pages(m.num, sizeof(xen_pfn_t), + ret = traverse_pages(m.num, sizeof(xen_pfn_t), &pagelist, mmap_return_errors, &state); } -- cgit v0.10.2 From b690c425fe07c725e7f1f7d40303588416cba67f Mon Sep 17 00:00:00 2001 From: Daniel Hellstrom Date: Fri, 29 Oct 2010 13:25:24 -0700 Subject: SPARC/LEON: removed constant timer initialization as if HZ=100, now it reflects the value of HZ Signed-off-by: Daniel Hellstrom Signed-off-by: David S. Miller diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c index 2d51527..f01c426 100644 --- a/arch/sparc/kernel/leon_kernel.c +++ b/arch/sparc/kernel/leon_kernel.c @@ -114,7 +114,7 @@ void __init leon_init_timers(irq_handler_t counter_fn) if (leon3_gptimer_regs && leon3_irqctrl_regs) { LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[0].val, 0); LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[0].rld, - (((1000000 / 100) - 1))); + (((1000000 / HZ) - 1))); LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[0].ctrl, 0); #ifdef CONFIG_SMP @@ -128,7 +128,7 @@ void __init leon_init_timers(irq_handler_t counter_fn) } LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[1].val, 0); - LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[1].rld, (((1000000/100) - 1))); + LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[1].rld, (((1000000/HZ) - 1))); LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[1].ctrl, 0); # endif -- cgit v0.10.2 From df9f86faf3ee610527ed02031fe7dd3c8b752e44 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 1 Nov 2010 15:49:23 -0700 Subject: ceph: fix small seq message skipping If the client gets out of sync with the server message sequence number, we normally skip low seq messages (ones we already received). The skip code was also incrementing the expected seq, such that all subsequent messages also appeared old and got skipped, and an eventual timeout on the osd connection. This resulted in some lagging requests and console messages like [233480.882885] ceph: skipping osd22 10.138.138.13:6804 seq 2016, expected 2017 [233480.882919] ceph: skipping osd22 10.138.138.13:6804 seq 2017, expected 2018 [233480.882963] ceph: skipping osd22 10.138.138.13:6804 seq 2018, expected 2019 [233480.883488] ceph: skipping osd22 10.138.138.13:6804 seq 2019, expected 2020 [233485.219558] ceph: skipping osd22 10.138.138.13:6804 seq 2020, expected 2021 [233485.906595] ceph: skipping osd22 10.138.138.13:6804 seq 2021, expected 2022 [233490.379536] ceph: skipping osd22 10.138.138.13:6804 seq 2022, expected 2023 [233495.523260] ceph: skipping osd22 10.138.138.13:6804 seq 2023, expected 2024 [233495.923194] ceph: skipping osd22 10.138.138.13:6804 seq 2024, expected 2025 [233500.534614] ceph: tid 6023602 timed out on osd22, will reset osd Reported-by: Theodore Ts'o Signed-off-by: Sage Weil diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 0e8157e..d379abf 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1532,14 +1532,13 @@ static int read_partial_message(struct ceph_connection *con) /* verify seq# */ seq = le64_to_cpu(con->in_hdr.seq); if ((s64)seq - (s64)con->in_seq < 1) { - pr_info("skipping %s%lld %s seq %lld, expected %lld\n", + pr_info("skipping %s%lld %s seq %lld expected %lld\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr.in_addr), seq, con->in_seq + 1); con->in_base_pos = -front_len - middle_len - data_len - sizeof(m->footer); con->in_tag = CEPH_MSGR_TAG_READY; - con->in_seq++; return 0; } else if ((s64)seq - (s64)con->in_seq > 1) { pr_err("read_partial_message bad seq %lld expected %lld\n", -- cgit v0.10.2 From f0573e6db1d1e637e20011f40264b2f5b5880587 Mon Sep 17 00:00:00 2001 From: Alberto Panizzo Date: Mon, 1 Nov 2010 18:00:03 +0100 Subject: mach-pcm037_eet: Fix section mismatch for eet_init_devices() This function should be marked as __init because it is used only in the init phase. This fix the compiler warning: LD arch/arm/mach-mx3/built-in.o WARNING: arch/arm/mach-mx3/built-in.o(.text+0x1328): Section mismatch in reference from the function eet_init_devices() to the (unknown reference) .init.rodata:(unknown) The function eet_init_devices() references the (unknown reference) __initconst (unknown). This is often because eet_init_devices lacks a __initconst annotation or the annotation of (unknown) is wrong. Signed-off-by: Alberto Panizzo Signed-off-by: Sascha Hauer diff --git a/arch/arm/mach-mx3/mach-pcm037_eet.c b/arch/arm/mach-mx3/mach-pcm037_eet.c index 99e0894..3392812 100644 --- a/arch/arm/mach-mx3/mach-pcm037_eet.c +++ b/arch/arm/mach-mx3/mach-pcm037_eet.c @@ -171,7 +171,7 @@ static struct platform_device pcm037_gpio_keys_device = { }, }; -static int eet_init_devices(void) +static int __init eet_init_devices(void) { if (!machine_is_pcm037() || pcm037_variant() != PCM037_EET) return 0; -- cgit v0.10.2 From c64e38ea17a81721da0393584fd807f8434050fa Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 1 Nov 2010 14:32:27 -0400 Subject: xen/blkfront: map REQ_FLUSH into a full barrier Implement a flush as a full barrier, since we have nothing weaker. Signed-off-by: Jeremy Fitzhardinge Acked-by: Christoph Hellwig diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 06e2812..3a318d8 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -245,14 +245,11 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t mode, } /* - * blkif_queue_request + * Generate a Xen blkfront IO request from a blk layer request. Reads + * and writes are handled as expected. Since we lack a loose flush + * request, we map flushes into a full ordered barrier. * - * request block io - * - * id: for guest use only. - * operation: BLKIF_OP_{READ,WRITE,PROBE} - * buffer: buffer to read/write into. this should be a - * virtual address in the guest os. + * @req: a request struct */ static int blkif_queue_request(struct request *req) { @@ -289,7 +286,7 @@ static int blkif_queue_request(struct request *req) ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : BLKIF_OP_READ; - if (req->cmd_flags & REQ_HARDBARRIER) + if (req->cmd_flags & REQ_FLUSH) ring_req->operation = BLKIF_OP_WRITE_BARRIER; ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); @@ -1069,14 +1066,8 @@ static void blkfront_connect(struct blkfront_info *info) */ info->feature_flush = 0; - /* - * The driver doesn't properly handled empty flushes, so - * lets disable barrier support for now. - */ -#if 0 if (!err && barrier) info->feature_flush = REQ_FLUSH; -#endif err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); if (err) { -- cgit v0.10.2 From a945b9801a9bfd4a98bcfd9f6656b5027b254e3f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 1 Nov 2010 17:03:14 -0400 Subject: xen/blkfront: change blk_shadow.request to proper pointer Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 3a318d8..31c8a64 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -65,7 +65,7 @@ enum blkif_state { struct blk_shadow { struct blkif_request req; - unsigned long request; + struct request *request; unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; }; @@ -136,7 +136,7 @@ static void add_id_to_freelist(struct blkfront_info *info, unsigned long id) { info->shadow[id].req.id = info->shadow_free; - info->shadow[id].request = 0; + info->shadow[id].request = NULL; info->shadow_free = id; } @@ -278,7 +278,7 @@ static int blkif_queue_request(struct request *req) /* Fill out a communications ring structure. */ ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); id = get_id_from_freelist(info); - info->shadow[id].request = (unsigned long)req; + info->shadow[id].request = req; ring_req->id = id; ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req); @@ -633,7 +633,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) bret = RING_GET_RESPONSE(&info->ring, i); id = bret->id; - req = (struct request *)info->shadow[id].request; + req = info->shadow[id].request; blkif_completion(&info->shadow[id]); @@ -898,7 +898,7 @@ static int blkif_recover(struct blkfront_info *info) /* Stage 3: Find pending requests and requeue them. */ for (i = 0; i < BLK_RING_SIZE; i++) { /* Not in use? */ - if (copy[i].request == 0) + if (!copy[i].request) continue; /* Grab a request slot and copy shadow state into it. */ @@ -915,9 +915,7 @@ static int blkif_recover(struct blkfront_info *info) req->seg[j].gref, info->xbdev->otherend_id, pfn_to_mfn(info->shadow[req->id].frame[j]), - rq_data_dir( - (struct request *) - info->shadow[req->id].request)); + rq_data_dir(info->shadow[req->id].request)); info->shadow[req->id].req = *req; info->ring.req_prod_pvt++; -- cgit v0.10.2 From be2f8373c188ed1f5d36003c9928e4d695213080 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 2 Nov 2010 10:38:33 -0400 Subject: xen/blkfront: Implement FUA with BLKIF_OP_WRITE_BARRIER The BLKIF_OP_WRITE_BARRIER is a full ordered barrier, so we can use it to implement FUA as well as a plain FLUSH. Signed-off-by: Jeremy Fitzhardinge Acked-by: Christoph Hellwig diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 31c8a64..76b874a 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -286,8 +286,18 @@ static int blkif_queue_request(struct request *req) ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : BLKIF_OP_READ; - if (req->cmd_flags & REQ_FLUSH) + + if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) { + /* + * Ideally we could just do an unordered + * flush-to-disk, but all we have is a full write + * barrier at the moment. However, a barrier write is + * a superset of FUA, so we can implement it the same + * way. (It's also a FLUSH+FUA, since it is + * guaranteed ordered WRT previous writes.) + */ ring_req->operation = BLKIF_OP_WRITE_BARRIER; + } ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); @@ -1065,7 +1075,7 @@ static void blkfront_connect(struct blkfront_info *info) info->feature_flush = 0; if (!err && barrier) - info->feature_flush = REQ_FLUSH; + info->feature_flush = REQ_FLUSH | REQ_FUA; err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); if (err) { -- cgit v0.10.2 From 239b0b441449b2c70492880e6c6a4a885afa74ba Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 2 Nov 2010 13:15:53 -0400 Subject: MAINTAINERS: add drivers/char/hvc_tile.c as maintained by tile Signed-off-by: Chris Metcalf diff --git a/MAINTAINERS b/MAINTAINERS index 0094224..2525b04 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5827,6 +5827,7 @@ M: Chris Metcalf W: http://www.tilera.com/scm/ S: Supported F: arch/tile/ +F: drivers/char/hvc_tile.c TLAN NETWORK DRIVER M: Samuel Chessman -- cgit v0.10.2 From dcb8baeceaa1c629bbd06f472cea023ad08a0c33 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 2 Nov 2010 11:55:58 -0400 Subject: xen/blkfront: cope with backend that fail empty BLKIF_OP_WRITE_BARRIER requests Some(?) Xen block backends fail BLKIF_OP_WRITE_BARRIER requests, which Linux uses as a cache flush operation. In that case, disable use of FLUSH. Signed-off-by: Jeremy Fitzhardinge Cc: Daniel Stodden diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 76b874a..4f9e22f 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -656,6 +656,16 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) printk(KERN_WARNING "blkfront: %s: write barrier op failed\n", info->gd->disk_name); error = -EOPNOTSUPP; + } + if (unlikely(bret->status == BLKIF_RSP_ERROR && + info->shadow[id].req.nr_segments == 0)) { + printk(KERN_WARNING "blkfront: %s: empty write barrier op failed\n", + info->gd->disk_name); + error = -EOPNOTSUPP; + } + if (unlikely(error)) { + if (error == -EOPNOTSUPP) + error = 0; info->feature_flush = 0; xlvbd_flush(info); } -- cgit v0.10.2 From c46e0079cec40b49fbdb86a088cfd50b250fef47 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 3 Nov 2010 15:04:45 +0800 Subject: ASoC: Fix snd_soc_register_dais error handling kzalloc for dai may fail at any iteration of the for loop, thus properly unregister already registered DAIs before return error. The error handling code in snd_soc_register_dais() already ensure all the DAIs are unregistered before return error, we can remove the error handling code to unregister DAIs in snd_soc_register_codec(). Signed-off-by: Axel Lin Signed-off-by: Mark Brown diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 614a8b3..441285a 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -3043,8 +3043,10 @@ int snd_soc_register_dais(struct device *dev, for (i = 0; i < count; i++) { dai = kzalloc(sizeof(struct snd_soc_dai), GFP_KERNEL); - if (dai == NULL) - return -ENOMEM; + if (dai == NULL) { + ret = -ENOMEM; + goto err; + } /* create DAI component name */ dai->name = fmt_multiple_name(dev, &dai_drv[i]); @@ -3263,9 +3265,6 @@ int snd_soc_register_codec(struct device *dev, return 0; error: - for (i--; i >= 0; i--) - snd_soc_unregister_dai(dev); - if (codec->reg_cache) kfree(codec->reg_cache); kfree(codec->name); -- cgit v0.10.2 From 233538501f707b0176f09af7039fec1e3fcac6e7 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Tue, 2 Nov 2010 15:50:32 +0100 Subject: ASoC: OMAP: fix OMAP1 compilation problem In the new code introduced with commit cf4c87abe238ec17cd0255b4e21abd949d7f811e, "OMAP: McBSP: implement McBSP CLKR and FSR signal muxing via mach-omap2/mcbsp.c", the way omap1 build is supposed to bypass omap2 specific functionality doesn't optimize out all omap2 specific stuff. This breaks linking phase for omap1 machines, giving "undefined reference to `omap2_mcbsp1_mux_clkr_src'" and "undefined reference to `omap2_mcbsp1_mux_fsr_src'" errors. Fix it. Created and tested against linux-2.6.37-rc1. Signed-off-by: Janusz Krzysztofik Acked-by: Mark Brown Acked-by: Paul Walmsley Acked-by: Jarkko Nikula Signed-off-by: Liam Girdwood diff --git a/sound/soc/omap/omap-mcbsp.c b/sound/soc/omap/omap-mcbsp.c index d211c9f..7e84f24 100644 --- a/sound/soc/omap/omap-mcbsp.c +++ b/sound/soc/omap/omap-mcbsp.c @@ -644,15 +644,23 @@ static int omap_mcbsp_dai_set_dai_sysclk(struct snd_soc_dai *cpu_dai, case OMAP_MCBSP_CLKR_SRC_CLKR: + if (cpu_class_is_omap1()) + break; omap2_mcbsp1_mux_clkr_src(CLKR_SRC_CLKR); break; case OMAP_MCBSP_CLKR_SRC_CLKX: + if (cpu_class_is_omap1()) + break; omap2_mcbsp1_mux_clkr_src(CLKR_SRC_CLKX); break; case OMAP_MCBSP_FSR_SRC_FSR: + if (cpu_class_is_omap1()) + break; omap2_mcbsp1_mux_fsr_src(FSR_SRC_FSR); break; case OMAP_MCBSP_FSR_SRC_FSX: + if (cpu_class_is_omap1()) + break; omap2_mcbsp1_mux_fsr_src(FSR_SRC_FSX); break; default: -- cgit v0.10.2 From 587d145200f26758940099fbbc301fdd43d3f391 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 25 Oct 2010 19:44:21 -0700 Subject: HID: Remove KERN_DEBUG from dbg_hid use Signed-off-by: Joe Perches Signed-off-by: Jiri Kosina diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 834ef47..76e1f64 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -136,7 +136,8 @@ static int hidinput_setkeycode(struct input_dev *dev, clear_bit(old_keycode, dev->keybit); set_bit(usage->code, dev->keybit); - dbg_hid(KERN_DEBUG "Assigned keycode %d to HID usage code %x\n", keycode, scancode); + dbg_hid("Assigned keycode %d to HID usage code %x\n", + keycode, scancode); /* Set the keybit for the old keycode if the old keycode is used * by another key */ if (hidinput_find_key (hid, 0, old_keycode)) -- cgit v0.10.2 From 74a557e27ff86a5a1f8d5f24c178c70b98367b12 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 3 Nov 2010 09:37:06 -0400 Subject: ASoC: Check return value of strict_strtoul() in WM8962 strict_strtoul() has been made __must_check so do so. Signed-off-by: Mark Brown Acked-by: Liam Girdwood diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index 894d0cd..e809274 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -3500,8 +3500,11 @@ static ssize_t wm8962_beep_set(struct device *dev, { struct wm8962_priv *wm8962 = dev_get_drvdata(dev); long int time; + int ret; - strict_strtol(buf, 10, &time); + ret = strict_strtol(buf, 10, &time); + if (ret != 0) + return ret; input_event(wm8962->beep, EV_SND, SND_TONE, time); -- cgit v0.10.2 From 5a39ce5b491a10f4a15bd30b26e55d3533b5f587 Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Wed, 13 Oct 2010 15:58:17 +0200 Subject: HID: egalax: Use kzalloc To avoid unnecessary explicit initialization, allocate zeroed memory. Signed-off-by: Henrik Rydberg Acked-by: Chase Douglas Signed-off-by: Jiri Kosina diff --git a/drivers/hid/hid-egalax.c b/drivers/hid/hid-egalax.c index 54b017a..5a1b52e 100644 --- a/drivers/hid/hid-egalax.c +++ b/drivers/hid/hid-egalax.c @@ -221,7 +221,7 @@ static int egalax_probe(struct hid_device *hdev, const struct hid_device_id *id) struct egalax_data *td; struct hid_report *report; - td = kmalloc(sizeof(struct egalax_data), GFP_KERNEL); + td = kzalloc(sizeof(struct egalax_data), GFP_KERNEL); if (!td) { dev_err(&hdev->dev, "cannot allocate eGalax data\n"); return -ENOMEM; -- cgit v0.10.2 From add330ec29cb00b26cf45ffb4773bb9094a48368 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Thu, 4 Nov 2010 17:05:40 +0100 Subject: ASoC i.MX eukrea tlv320: Fix for multicomponent Signed-off-by: Sascha Hauer Acked-by: Liam Girdwood Signed-off-by: Mark Brown diff --git a/sound/soc/imx/eukrea-tlv320.c b/sound/soc/imx/eukrea-tlv320.c index b596752..dd4fffd 100644 --- a/sound/soc/imx/eukrea-tlv320.c +++ b/sound/soc/imx/eukrea-tlv320.c @@ -34,8 +34,8 @@ static int eukrea_tlv320_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) { struct snd_soc_pcm_runtime *rtd = substream->private_data; - struct snd_soc_dai *codec_dai = rtd->dai->codec_dai; - struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai; + struct snd_soc_dai *codec_dai = rtd->codec_dai; + struct snd_soc_dai *cpu_dai = rtd->cpu_dai; int ret; ret = snd_soc_dai_set_fmt(cpu_dai, SND_SOC_DAIFMT_I2S | @@ -79,10 +79,10 @@ static struct snd_soc_ops eukrea_tlv320_snd_ops = { static struct snd_soc_dai_link eukrea_tlv320_dai = { .name = "tlv320aic23", .stream_name = "TLV320AIC23", - .codec_dai = "tlv320aic23-hifi", + .codec_dai_name = "tlv320aic23-hifi", .platform_name = "imx-pcm-audio.0", .codec_name = "tlv320aic23-codec.0-001a", - .cpu_dai = "imx-ssi.0", + .cpu_dai_name = "imx-ssi.0", .ops = &eukrea_tlv320_snd_ops, }; -- cgit v0.10.2 From bf0199b7a5085e8d1908d2b0a9c530ed8d142fb8 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Thu, 4 Nov 2010 17:05:41 +0100 Subject: ASoC i.MX phycore ac97: remove unnecessary includes Signed-off-by: Sascha Hauer Acked-by: Liam Girdwood Signed-off-by: Mark Brown diff --git a/sound/soc/imx/phycore-ac97.c b/sound/soc/imx/phycore-ac97.c index 6a65dd7..cf46a17 100644 --- a/sound/soc/imx/phycore-ac97.c +++ b/sound/soc/imx/phycore-ac97.c @@ -20,9 +20,6 @@ #include #include -#include "../codecs/wm9712.h" -#include "imx-ssi.h" - static struct snd_soc_card imx_phycore; static struct snd_soc_ops imx_phycore_hifi_ops = { -- cgit v0.10.2 From f562be51fe9021c913e661c46681cb5bae70f369 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Thu, 4 Nov 2010 17:05:42 +0100 Subject: ASoC i.MX: register dma audio device We have two different transfer methods on i.MX: FIQ and DMA. Since the merge of the ASoC multicomponent support the DMA device is lost. Add it again. Also, imx_ssi_dai_probe has to be called for !AC97 aswell. Signed-off-by: Sascha Hauer Acked-by: Liam Girdwood Signed-off-by: Mark Brown diff --git a/sound/soc/imx/imx-ssi.c b/sound/soc/imx/imx-ssi.c index d4bd345..d2d98c7 100644 --- a/sound/soc/imx/imx-ssi.c +++ b/sound/soc/imx/imx-ssi.c @@ -439,7 +439,22 @@ void imx_pcm_free(struct snd_pcm *pcm) } EXPORT_SYMBOL_GPL(imx_pcm_free); +static int imx_ssi_dai_probe(struct snd_soc_dai *dai) +{ + struct imx_ssi *ssi = dev_get_drvdata(dai->dev); + uint32_t val; + + snd_soc_dai_set_drvdata(dai, ssi); + + val = SSI_SFCSR_TFWM0(ssi->dma_params_tx.burstsize) | + SSI_SFCSR_RFWM0(ssi->dma_params_rx.burstsize); + writel(val, ssi->base + SSI_SFCSR); + + return 0; +} + static struct snd_soc_dai_driver imx_ssi_dai = { + .probe = imx_ssi_dai_probe, .playback = { .channels_min = 2, .channels_max = 2, @@ -455,20 +470,6 @@ static struct snd_soc_dai_driver imx_ssi_dai = { .ops = &imx_ssi_pcm_dai_ops, }; -static int imx_ssi_dai_probe(struct snd_soc_dai *dai) -{ - struct imx_ssi *ssi = dev_get_drvdata(dai->dev); - uint32_t val; - - snd_soc_dai_set_drvdata(dai, ssi); - - val = SSI_SFCSR_TFWM0(ssi->dma_params_tx.burstsize) | - SSI_SFCSR_RFWM0(ssi->dma_params_rx.burstsize); - writel(val, ssi->base + SSI_SFCSR); - - return 0; -} - static struct snd_soc_dai_driver imx_ac97_dai = { .probe = imx_ssi_dai_probe, .ac97_control = 1, @@ -677,7 +678,17 @@ static int imx_ssi_probe(struct platform_device *pdev) goto failed_register; } - ssi->soc_platform_pdev = platform_device_alloc("imx-fiq-pcm-audio", pdev->id); + ssi->soc_platform_pdev_fiq = platform_device_alloc("imx-fiq-pcm-audio", pdev->id); + if (!ssi->soc_platform_pdev_fiq) + goto failed_pdev_fiq_alloc; + platform_set_drvdata(ssi->soc_platform_pdev_fiq, ssi); + ret = platform_device_add(ssi->soc_platform_pdev_fiq); + if (ret) { + dev_err(&pdev->dev, "failed to add platform device\n"); + goto failed_pdev_fiq_add; + } + + ssi->soc_platform_pdev = platform_device_alloc("imx-pcm-audio", pdev->id); if (!ssi->soc_platform_pdev) goto failed_pdev_alloc; platform_set_drvdata(ssi->soc_platform_pdev, ssi); @@ -692,6 +703,9 @@ static int imx_ssi_probe(struct platform_device *pdev) failed_pdev_add: platform_device_put(ssi->soc_platform_pdev); failed_pdev_alloc: +failed_pdev_fiq_add: + platform_device_put(ssi->soc_platform_pdev_fiq); +failed_pdev_fiq_alloc: snd_soc_unregister_dai(&pdev->dev); failed_register: failed_ac97: diff --git a/sound/soc/imx/imx-ssi.h b/sound/soc/imx/imx-ssi.h index 53b780d..4fc17da 100644 --- a/sound/soc/imx/imx-ssi.h +++ b/sound/soc/imx/imx-ssi.h @@ -212,6 +212,7 @@ struct imx_ssi { int enabled; struct platform_device *soc_platform_pdev; + struct platform_device *soc_platform_pdev_fiq; }; struct snd_soc_platform *imx_ssi_fiq_init(struct platform_device *pdev, -- cgit v0.10.2 From bf974a0d77a318a733a47c18a47fa6ff8960c361 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Thu, 4 Nov 2010 17:05:43 +0100 Subject: ASoC i.MX: switch to new DMA api Signed-off-by: Sascha Hauer Acked-by: Liam Girdwood Signed-off-by: Mark Brown diff --git a/sound/soc/imx/imx-pcm-dma-mx2.c b/sound/soc/imx/imx-pcm-dma-mx2.c index fd493ee..671ef8d 100644 --- a/sound/soc/imx/imx-pcm-dma-mx2.c +++ b/sound/soc/imx/imx-pcm-dma-mx2.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -27,165 +28,146 @@ #include #include -#include +#include #include "imx-ssi.h" struct imx_pcm_runtime_data { - int sg_count; - struct scatterlist *sg_list; - int period; + int period_bytes; int periods; - unsigned long dma_addr; int dma; - struct snd_pcm_substream *substream; unsigned long offset; unsigned long size; - unsigned long period_cnt; void *buf; int period_time; + struct dma_async_tx_descriptor *desc; + struct dma_chan *dma_chan; + struct imx_dma_data dma_data; }; -/* Called by the DMA framework when a period has elapsed */ -static void imx_ssi_dma_progression(int channel, void *data, - struct scatterlist *sg) +static void audio_dma_irq(void *data) { - struct snd_pcm_substream *substream = data; + struct snd_pcm_substream *substream = (struct snd_pcm_substream *)data; struct snd_pcm_runtime *runtime = substream->runtime; struct imx_pcm_runtime_data *iprtd = runtime->private_data; - if (!sg) - return; - - runtime = iprtd->substream->runtime; + iprtd->offset += iprtd->period_bytes; + iprtd->offset %= iprtd->period_bytes * iprtd->periods; - iprtd->offset = sg->dma_address - runtime->dma_addr; - - snd_pcm_period_elapsed(iprtd->substream); + snd_pcm_period_elapsed(substream); } -static void imx_ssi_dma_callback(int channel, void *data) +static bool filter(struct dma_chan *chan, void *param) { - pr_err("%s shouldn't be called\n", __func__); -} + struct imx_pcm_runtime_data *iprtd = param; -static void snd_imx_dma_err_callback(int channel, void *data, int err) -{ - struct snd_pcm_substream *substream = data; - struct snd_soc_pcm_runtime *rtd = substream->private_data; - struct imx_pcm_dma_params *dma_params = - snd_soc_dai_get_dma_data(rtd->dai->cpu_dai, substream); - struct snd_pcm_runtime *runtime = substream->runtime; - struct imx_pcm_runtime_data *iprtd = runtime->private_data; - int ret; + if (!imx_dma_is_general_purpose(chan)) + return false; - pr_err("DMA timeout on channel %d -%s%s%s%s\n", - channel, - err & IMX_DMA_ERR_BURST ? " burst" : "", - err & IMX_DMA_ERR_REQUEST ? " request" : "", - err & IMX_DMA_ERR_TRANSFER ? " transfer" : "", - err & IMX_DMA_ERR_BUFFER ? " buffer" : ""); + chan->private = &iprtd->dma_data; - imx_dma_disable(iprtd->dma); - ret = imx_dma_setup_sg(iprtd->dma, iprtd->sg_list, iprtd->sg_count, - IMX_DMA_LENGTH_LOOP, dma_params->dma_addr, - substream->stream == SNDRV_PCM_STREAM_PLAYBACK ? - DMA_MODE_WRITE : DMA_MODE_READ); - if (!ret) - imx_dma_enable(iprtd->dma); + return true; } -static int imx_ssi_dma_alloc(struct snd_pcm_substream *substream) +static int imx_ssi_dma_alloc(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct imx_pcm_dma_params *dma_params; struct snd_pcm_runtime *runtime = substream->runtime; struct imx_pcm_runtime_data *iprtd = runtime->private_data; + struct dma_slave_config slave_config; + dma_cap_mask_t mask; + enum dma_slave_buswidth buswidth; int ret; dma_params = snd_soc_dai_get_dma_data(rtd->cpu_dai, substream); - iprtd->dma = imx_dma_request_by_prio(DRV_NAME, DMA_PRIO_HIGH); - if (iprtd->dma < 0) { - pr_err("Failed to claim the audio DMA\n"); - return -ENODEV; - } + iprtd->dma_data.peripheral_type = IMX_DMATYPE_SSI; + iprtd->dma_data.priority = DMA_PRIO_HIGH; + iprtd->dma_data.dma_request = dma_params->dma; - ret = imx_dma_setup_handlers(iprtd->dma, - imx_ssi_dma_callback, - snd_imx_dma_err_callback, substream); - if (ret) - goto out; + /* Try to grab a DMA channel */ + dma_cap_zero(mask); + dma_cap_set(DMA_SLAVE, mask); + iprtd->dma_chan = dma_request_channel(mask, filter, iprtd); + if (!iprtd->dma_chan) + return -EINVAL; - ret = imx_dma_setup_progression_handler(iprtd->dma, - imx_ssi_dma_progression); - if (ret) { - pr_err("Failed to setup the DMA handler\n"); - goto out; + switch (params_format(params)) { + case SNDRV_PCM_FORMAT_S16_LE: + buswidth = DMA_SLAVE_BUSWIDTH_2_BYTES; + break; + case SNDRV_PCM_FORMAT_S20_3LE: + case SNDRV_PCM_FORMAT_S24_LE: + buswidth = DMA_SLAVE_BUSWIDTH_4_BYTES; + break; + default: + return 0; } - ret = imx_dma_config_channel(iprtd->dma, - IMX_DMA_MEMSIZE_16 | IMX_DMA_TYPE_FIFO, - IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR, - dma_params->dma, 1); - if (ret < 0) { - pr_err("Cannot configure DMA channel: %d\n", ret); - goto out; + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { + slave_config.direction = DMA_TO_DEVICE; + slave_config.dst_addr = dma_params->dma_addr; + slave_config.dst_addr_width = buswidth; + slave_config.dst_maxburst = dma_params->burstsize; + } else { + slave_config.direction = DMA_FROM_DEVICE; + slave_config.src_addr = dma_params->dma_addr; + slave_config.src_addr_width = buswidth; + slave_config.src_maxburst = dma_params->burstsize; } - imx_dma_config_burstlen(iprtd->dma, dma_params->burstsize * 2); + ret = dmaengine_slave_config(iprtd->dma_chan, &slave_config); + if (ret) + return ret; return 0; -out: - imx_dma_free(iprtd->dma); - return ret; } static int snd_imx_pcm_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) { + struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_pcm_runtime *runtime = substream->runtime; struct imx_pcm_runtime_data *iprtd = runtime->private_data; - int i; unsigned long dma_addr; + struct dma_chan *chan; + struct imx_pcm_dma_params *dma_params; + int ret; - imx_ssi_dma_alloc(substream); + dma_params = snd_soc_dai_get_dma_data(rtd->cpu_dai, substream); + ret = imx_ssi_dma_alloc(substream, params); + if (ret) + return ret; + chan = iprtd->dma_chan; iprtd->size = params_buffer_bytes(params); iprtd->periods = params_periods(params); - iprtd->period = params_period_bytes(params); + iprtd->period_bytes = params_period_bytes(params); iprtd->offset = 0; iprtd->period_time = HZ / (params_rate(params) / params_period_size(params)); snd_pcm_set_runtime_buffer(substream, &substream->dma_buffer); - if (iprtd->sg_count != iprtd->periods) { - kfree(iprtd->sg_list); - - iprtd->sg_list = kcalloc(iprtd->periods + 1, - sizeof(struct scatterlist), GFP_KERNEL); - if (!iprtd->sg_list) - return -ENOMEM; - iprtd->sg_count = iprtd->periods + 1; - } - - sg_init_table(iprtd->sg_list, iprtd->sg_count); dma_addr = runtime->dma_addr; - for (i = 0; i < iprtd->periods; i++) { - iprtd->sg_list[i].page_link = 0; - iprtd->sg_list[i].offset = 0; - iprtd->sg_list[i].dma_address = dma_addr; - iprtd->sg_list[i].length = iprtd->period; - dma_addr += iprtd->period; + iprtd->buf = (unsigned int *)substream->dma_buffer.area; + + iprtd->desc = chan->device->device_prep_dma_cyclic(chan, dma_addr, + iprtd->period_bytes * iprtd->periods, + iprtd->period_bytes, + substream->stream == SNDRV_PCM_STREAM_PLAYBACK ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); + if (!iprtd->desc) { + dev_err(&chan->dev->device, "cannot prepare slave dma\n"); + return -EINVAL; } - /* close the loop */ - iprtd->sg_list[iprtd->sg_count - 1].offset = 0; - iprtd->sg_list[iprtd->sg_count - 1].length = 0; - iprtd->sg_list[iprtd->sg_count - 1].page_link = - ((unsigned long) iprtd->sg_list | 0x01) & ~0x02; + iprtd->desc->callback = audio_dma_irq; + iprtd->desc->callback_param = substream; + return 0; } @@ -194,41 +176,21 @@ static int snd_imx_pcm_hw_free(struct snd_pcm_substream *substream) struct snd_pcm_runtime *runtime = substream->runtime; struct imx_pcm_runtime_data *iprtd = runtime->private_data; - if (iprtd->dma >= 0) { - imx_dma_free(iprtd->dma); - iprtd->dma = -EINVAL; + if (iprtd->dma_chan) { + dma_release_channel(iprtd->dma_chan); + iprtd->dma_chan = NULL; } - kfree(iprtd->sg_list); - iprtd->sg_list = NULL; - return 0; } static int snd_imx_pcm_prepare(struct snd_pcm_substream *substream) { - struct snd_pcm_runtime *runtime = substream->runtime; struct snd_soc_pcm_runtime *rtd = substream->private_data; struct imx_pcm_dma_params *dma_params; - struct imx_pcm_runtime_data *iprtd = runtime->private_data; - int err; dma_params = snd_soc_dai_get_dma_data(rtd->cpu_dai, substream); - iprtd->substream = substream; - iprtd->buf = (unsigned int *)substream->dma_buffer.area; - iprtd->period_cnt = 0; - - pr_debug("%s: buf: %p period: %d periods: %d\n", - __func__, iprtd->buf, iprtd->period, iprtd->periods); - - err = imx_dma_setup_sg(iprtd->dma, iprtd->sg_list, iprtd->sg_count, - IMX_DMA_LENGTH_LOOP, dma_params->dma_addr, - substream->stream == SNDRV_PCM_STREAM_PLAYBACK ? - DMA_MODE_WRITE : DMA_MODE_READ); - if (err) - return err; - return 0; } @@ -241,14 +203,14 @@ static int snd_imx_pcm_trigger(struct snd_pcm_substream *substream, int cmd) case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: - imx_dma_enable(iprtd->dma); + dmaengine_submit(iprtd->desc); break; case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - imx_dma_disable(iprtd->dma); + dmaengine_terminate_all(iprtd->dma_chan); break; default: @@ -263,6 +225,9 @@ static snd_pcm_uframes_t snd_imx_pcm_pointer(struct snd_pcm_substream *substream struct snd_pcm_runtime *runtime = substream->runtime; struct imx_pcm_runtime_data *iprtd = runtime->private_data; + pr_debug("%s: %ld %ld\n", __func__, iprtd->offset, + bytes_to_frames(substream->runtime, iprtd->offset)); + return bytes_to_frames(substream->runtime, iprtd->offset); } @@ -279,7 +244,7 @@ static struct snd_pcm_hardware snd_imx_hardware = { .channels_max = 2, .buffer_bytes_max = IMX_SSI_DMABUF_SIZE, .period_bytes_min = 128, - .period_bytes_max = 16 * 1024, + .period_bytes_max = 65535, /* Limited by SDMA engine */ .periods_min = 2, .periods_max = 255, .fifo_size = 0, @@ -304,11 +269,23 @@ static int snd_imx_open(struct snd_pcm_substream *substream) } snd_soc_set_runtime_hwparams(substream, &snd_imx_hardware); + + return 0; +} + +static int snd_imx_close(struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct imx_pcm_runtime_data *iprtd = runtime->private_data; + + kfree(iprtd); + return 0; } static struct snd_pcm_ops imx_pcm_ops = { .open = snd_imx_open, + .close = snd_imx_close, .ioctl = snd_pcm_lib_ioctl, .hw_params = snd_imx_pcm_hw_params, .hw_free = snd_imx_pcm_hw_free, @@ -340,7 +317,6 @@ static struct platform_driver imx_pcm_driver = { .name = "imx-pcm-audio", .owner = THIS_MODULE, }, - .probe = imx_soc_platform_probe, .remove = __devexit_p(imx_soc_platform_remove), }; @@ -356,4 +332,3 @@ static void __exit snd_imx_pcm_exit(void) platform_driver_unregister(&imx_pcm_driver); } module_exit(snd_imx_pcm_exit); - diff --git a/sound/soc/imx/imx-ssi.h b/sound/soc/imx/imx-ssi.h index 4fc17da..a4406a1 100644 --- a/sound/soc/imx/imx-ssi.h +++ b/sound/soc/imx/imx-ssi.h @@ -185,6 +185,9 @@ #define DRV_NAME "imx-ssi" +#include +#include + struct imx_pcm_dma_params { int dma; unsigned long dma_addr; -- cgit v0.10.2 From 6424dca23e6b5a2f7a19a69cf7c0990b11717b00 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Thu, 4 Nov 2010 17:05:44 +0100 Subject: phycore-ac97: add ac97 to cardname We have different codecs on the pcm038 (ac97 wm9712 and mc13783). To make alsactl restore work correctly these should have different names. Signed-off-by: Sascha Hauer Acked-by: Liam Girdwood Signed-off-by: Mark Brown diff --git a/sound/soc/imx/phycore-ac97.c b/sound/soc/imx/phycore-ac97.c index cf46a17..39f2373 100644 --- a/sound/soc/imx/phycore-ac97.c +++ b/sound/soc/imx/phycore-ac97.c @@ -38,7 +38,7 @@ static struct snd_soc_dai_link imx_phycore_dai_ac97[] = { }; static struct snd_soc_card imx_phycore = { - .name = "PhyCORE-audio", + .name = "PhyCORE-ac97-audio", .dai_link = imx_phycore_dai_ac97, .num_links = ARRAY_SIZE(imx_phycore_dai_ac97), }; -- cgit v0.10.2 From 71a295602ed967fa22d96d57a2e38bb86de24db7 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 5 Nov 2010 13:50:48 -0400 Subject: ASoC: Lock the CODEC in PXA external jack controls When doing anything with the system, especially DAPM, we need to hold the CODEC mutex. Signed-off-by: Mark Brown Acked-by: Liam Girdwood diff --git a/sound/soc/pxa/corgi.c b/sound/soc/pxa/corgi.c index 97e9423..f451acd 100644 --- a/sound/soc/pxa/corgi.c +++ b/sound/soc/pxa/corgi.c @@ -100,8 +100,13 @@ static int corgi_startup(struct snd_pcm_substream *substream) struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_codec *codec = rtd->codec; + mutex_lock(&codec->mutex); + /* check the jack status at stream startup */ corgi_ext_control(codec); + + mutex_unlock(&codec->mutex); + return 0; } diff --git a/sound/soc/pxa/magician.c b/sound/soc/pxa/magician.c index b8207ce..5ef0526 100644 --- a/sound/soc/pxa/magician.c +++ b/sound/soc/pxa/magician.c @@ -72,9 +72,13 @@ static int magician_startup(struct snd_pcm_substream *substream) struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_codec *codec = rtd->codec; + mutex_lock(&codec->mutex); + /* check the jack status at stream startup */ magician_ext_control(codec); + mutex_unlock(&codec->mutex); + return 0; } diff --git a/sound/soc/pxa/poodle.c b/sound/soc/pxa/poodle.c index af84ee9..84edd03 100644 --- a/sound/soc/pxa/poodle.c +++ b/sound/soc/pxa/poodle.c @@ -77,8 +77,13 @@ static int poodle_startup(struct snd_pcm_substream *substream) struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_codec *codec = rtd->codec; + mutex_lock(&codec->mutex); + /* check the jack status at stream startup */ poodle_ext_control(codec); + + mutex_unlock(&codec->mutex); + return 0; } diff --git a/sound/soc/pxa/spitz.c b/sound/soc/pxa/spitz.c index f470f36..0b30d7d 100644 --- a/sound/soc/pxa/spitz.c +++ b/sound/soc/pxa/spitz.c @@ -108,8 +108,13 @@ static int spitz_startup(struct snd_pcm_substream *substream) struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_codec *codec = rtd->codec; + mutex_lock(&codec->mutex); + /* check the jack status at stream startup */ spitz_ext_control(codec); + + mutex_unlock(&codec->mutex); + return 0; } diff --git a/sound/soc/pxa/tosa.c b/sound/soc/pxa/tosa.c index 73d0edd..7b983f9 100644 --- a/sound/soc/pxa/tosa.c +++ b/sound/soc/pxa/tosa.c @@ -81,8 +81,13 @@ static int tosa_startup(struct snd_pcm_substream *substream) struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_codec *codec = rtd->codec; + mutex_lock(&codec->mutex); + /* check the jack status at stream startup */ tosa_ext_control(codec); + + mutex_unlock(&codec->mutex); + return 0; } -- cgit v0.10.2 From 197ebd4053c42351e3737d83aebb33ed97ed2dd8 Mon Sep 17 00:00:00 2001 From: Dimitris Papastamos Date: Fri, 5 Nov 2010 10:36:24 +0000 Subject: ASoC: WM8776: Removed unneeded struct member The member reg_cache is not used at all and therefore it should be removed. This member was usually needed for older versions of ASoC that did not handle caching automatically and had to be done in the driver itself. Signed-off-by: Dimitris Papastamos Acked-by: Liam Girdwood Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/wm8776.c b/sound/soc/codecs/wm8776.c index 04182c4..0132a27 100644 --- a/sound/soc/codecs/wm8776.c +++ b/sound/soc/codecs/wm8776.c @@ -34,7 +34,6 @@ /* codec private data */ struct wm8776_priv { enum snd_soc_control_type control_type; - u16 reg_cache[WM8776_CACHEREGNUM]; int sysclk[2]; }; -- cgit v0.10.2 From d8b16b3d1c9d8d9124d647d05797383d35e2d645 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sat, 6 Nov 2010 12:41:16 -0700 Subject: ceph: fix bad pointer dereference in ceph_fill_trace We dereference *in a few lines down, but only set it on rename. It is apparently pretty rare for this to trigger, but I have been hitting it with a clustered MDSs. Signed-off-by: Sage Weil diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 1d6a45b..cd0432c 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1055,7 +1055,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, ininfo = rinfo->targeti.in; vino.ino = le64_to_cpu(ininfo->ino); vino.snap = le64_to_cpu(ininfo->snapid); - if (!dn->d_inode) { + in = dn->d_inode; + if (!in) { in = ceph_get_inode(sb, vino); if (IS_ERR(in)) { pr_err("fill_trace bad get_inode " -- cgit v0.10.2 From 7421ab8041d98363edfb85955fa3b9849ffae366 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 7 Nov 2010 09:07:15 -0800 Subject: ceph: fix open for write on clustered mds Normally when we open a file we already have a cap, and simply update the wanted set. However, if we open a file for write, but don't have an auth cap, that doesn't work; we need to open a new cap with the auth MDS. Only reuse existing caps if we are opening for read or the existing cap is auth. Signed-off-by: Sage Weil diff --git a/fs/ceph/file.c b/fs/ceph/file.c index e77c28c..87ee944 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -154,11 +154,13 @@ int ceph_open(struct inode *inode, struct file *file) } /* - * No need to block if we have any caps. Update wanted set + * No need to block if we have caps on the auth MDS (for + * write) or any MDS (for read). Update wanted set * asynchronously. */ spin_lock(&inode->i_lock); - if (__ceph_is_any_real_caps(ci)) { + if (__ceph_is_any_real_caps(ci) && + (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) { int mds_wanted = __ceph_caps_mds_wanted(ci); int issued = __ceph_caps_issued(ci, NULL); -- cgit v0.10.2 From 912a9b0319a8eb9e0834b19a25e01013ab2d6a9f Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 7 Nov 2010 09:37:25 -0800 Subject: ceph: only let auth caps update max_size Only the auth MDS has a meaningful max_size value for us, so only update it in fill_inode if we're being issued an auth cap. Otherwise, a random stat result from a non-auth MDS can clobber a meaningful max_size, get the client<->mds cap state out of sync, and make writes hang. Specifically, even if the client re-requests a larger max_size (which it will), the MDS won't respond because as far as it knows we already have a sufficiently large value. Signed-off-by: Sage Weil diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index cd0432c..0a49ffd 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -606,7 +606,14 @@ static int fill_inode(struct inode *inode, le32_to_cpu(info->time_warp_seq), &ctime, &mtime, &atime); - ci->i_max_size = le64_to_cpu(info->max_size); + /* only update max_size on auth cap */ + if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && + ci->i_max_size != le64_to_cpu(info->max_size)) { + dout("max_size %lld -> %llu\n", ci->i_max_size, + le64_to_cpu(info->max_size)); + ci->i_max_size = le64_to_cpu(info->max_size); + } + ci->i_layout = info->layout; inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; -- cgit v0.10.2 From feb4cc9bb433bf1491ac5ffbba133f3258dacf06 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 7 Nov 2010 09:39:00 -0800 Subject: ceph: re-request max_size if cap auth changes If the auth cap migrates to another MDS, clear requested_max_size so that we resend any pending max_size increase requests. This fixes potential hangs on writes that extend a file and race with an cap migration between MDSs. Signed-off-by: Sage Weil diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 6e0942f..04b207b 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2689,6 +2689,11 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, NULL /* no caps context */); try_flush_caps(inode, session, NULL); up_read(&mdsc->snap_rwsem); + + /* make sure we re-request max_size, if necessary */ + spin_lock(&inode->i_lock); + ci->i_requested_max_size = 0; + spin_unlock(&inode->i_lock); } /* -- cgit v0.10.2 From 25d5c699f983a2da51f5165eb9a8fc6338124b6c Mon Sep 17 00:00:00 2001 From: Philip Rakity Date: Sun, 7 Nov 2010 16:22:28 -0500 Subject: mmc: Fix printing of card DDR type We should not call mmc_card_set_ddr_mode() if we are in single data mode. This sets DDR and causes the kernel log to say the card is DDR when it is not. Explicitly set ddr to 0 rather then rely on MMC_SDR_MODE being 0 when doing the checks. Signed-off-by: Philip Rakity Acked-by: Linus Walleij Acked-by: Kyungmin Park Signed-off-by: Chris Ball diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 995261f..77f93c3 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -375,7 +375,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, struct mmc_card *oldcard) { struct mmc_card *card; - int err, ddr = MMC_SDR_MODE; + int err, ddr = 0; u32 cid[4]; unsigned int max_dtr; @@ -562,7 +562,11 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, 1 << bus_width, ddr); err = 0; } else { - mmc_card_set_ddr_mode(card); + if (ddr) + mmc_card_set_ddr_mode(card); + else + ddr = MMC_SDR_MODE; + mmc_set_bus_width_ddr(card->host, bus_width, ddr); } } -- cgit v0.10.2 From 14d4031d21d8a63ad84e5ab9198d0503efabc780 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Thu, 4 Nov 2010 13:59:11 +0800 Subject: mmc: ushc: Return proper error code for ushc_probe() Improves error handling in the ushc driver. Signed-off-by: Axel Lin Acked-by: David Vrabel Signed-off-by: Chris Ball diff --git a/drivers/mmc/host/ushc.c b/drivers/mmc/host/ushc.c index b4ead4a..f8f65df 100644 --- a/drivers/mmc/host/ushc.c +++ b/drivers/mmc/host/ushc.c @@ -425,7 +425,7 @@ static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id struct usb_device *usb_dev = interface_to_usbdev(intf); struct mmc_host *mmc; struct ushc_data *ushc; - int ret = -ENOMEM; + int ret; mmc = mmc_alloc_host(sizeof(struct ushc_data), &intf->dev); if (mmc == NULL) @@ -462,11 +462,15 @@ static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id mmc->max_blk_count = 511; ushc->int_urb = usb_alloc_urb(0, GFP_KERNEL); - if (ushc->int_urb == NULL) + if (ushc->int_urb == NULL) { + ret = -ENOMEM; goto err; + } ushc->int_data = kzalloc(sizeof(struct ushc_int_data), GFP_KERNEL); - if (ushc->int_data == NULL) + if (ushc->int_data == NULL) { + ret = -ENOMEM; goto err; + } usb_fill_int_urb(ushc->int_urb, ushc->usb_dev, usb_rcvintpipe(usb_dev, intf->cur_altsetting->endpoint[0].desc.bEndpointAddress), @@ -475,11 +479,15 @@ static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id intf->cur_altsetting->endpoint[0].desc.bInterval); ushc->cbw_urb = usb_alloc_urb(0, GFP_KERNEL); - if (ushc->cbw_urb == NULL) + if (ushc->cbw_urb == NULL) { + ret = -ENOMEM; goto err; + } ushc->cbw = kzalloc(sizeof(struct ushc_cbw), GFP_KERNEL); - if (ushc->cbw == NULL) + if (ushc->cbw == NULL) { + ret = -ENOMEM; goto err; + } ushc->cbw->signature = USHC_CBW_SIGNATURE; usb_fill_bulk_urb(ushc->cbw_urb, ushc->usb_dev, usb_sndbulkpipe(usb_dev, 2), @@ -487,15 +495,21 @@ static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id cbw_callback, ushc); ushc->data_urb = usb_alloc_urb(0, GFP_KERNEL); - if (ushc->data_urb == NULL) + if (ushc->data_urb == NULL) { + ret = -ENOMEM; goto err; + } ushc->csw_urb = usb_alloc_urb(0, GFP_KERNEL); - if (ushc->csw_urb == NULL) + if (ushc->csw_urb == NULL) { + ret = -ENOMEM; goto err; + } ushc->csw = kzalloc(sizeof(struct ushc_cbw), GFP_KERNEL); - if (ushc->csw == NULL) + if (ushc->csw == NULL) { + ret = -ENOMEM; goto err; + } usb_fill_bulk_urb(ushc->csw_urb, ushc->usb_dev, usb_rcvbulkpipe(usb_dev, 6), ushc->csw, sizeof(struct ushc_csw), csw_callback, ushc); -- cgit v0.10.2 From 5f619704d18b93869d045abc49e09cdba109b04b Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Thu, 4 Nov 2010 22:20:39 +0000 Subject: mmc: sdhci: Properly enable SDIO IRQ wakeups A little more work was needed for SDIO IRQ wakeups to be functional. Wake-on-WLAN on the SD WiFi adapter in the XO-1.5 laptop is now working. Signed-off-by: Daniel Drake Signed-off-by: Chris Ball diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c index 55746ba..d196e77 100644 --- a/drivers/mmc/host/sdhci-pci.c +++ b/drivers/mmc/host/sdhci-pci.c @@ -637,6 +637,7 @@ static int sdhci_pci_suspend (struct pci_dev *pdev, pm_message_t state) { struct sdhci_pci_chip *chip; struct sdhci_pci_slot *slot; + mmc_pm_flag_t slot_pm_flags; mmc_pm_flag_t pm_flags = 0; int i, ret; @@ -657,7 +658,11 @@ static int sdhci_pci_suspend (struct pci_dev *pdev, pm_message_t state) return ret; } - pm_flags |= slot->host->mmc->pm_flags; + slot_pm_flags = slot->host->mmc->pm_flags; + if (slot_pm_flags & MMC_PM_WAKE_SDIO_IRQ) + sdhci_enable_irq_wakeups(slot->host); + + pm_flags |= slot_pm_flags; } if (chip->fixes && chip->fixes->suspend) { @@ -671,8 +676,10 @@ static int sdhci_pci_suspend (struct pci_dev *pdev, pm_message_t state) pci_save_state(pdev); if (pm_flags & MMC_PM_KEEP_POWER) { - if (pm_flags & MMC_PM_WAKE_SDIO_IRQ) + if (pm_flags & MMC_PM_WAKE_SDIO_IRQ) { + pci_pme_active(pdev, true); pci_enable_wake(pdev, PCI_D3hot, 1); + } pci_set_power_state(pdev, PCI_D3hot); } else { pci_enable_wake(pdev, pci_choose_state(pdev, state), 0); diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 782c0ee..154cbf8 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1681,6 +1681,16 @@ int sdhci_resume_host(struct sdhci_host *host) EXPORT_SYMBOL_GPL(sdhci_resume_host); +void sdhci_enable_irq_wakeups(struct sdhci_host *host) +{ + u8 val; + val = sdhci_readb(host, SDHCI_WAKE_UP_CONTROL); + val |= SDHCI_WAKE_ON_INT; + sdhci_writeb(host, val, SDHCI_WAKE_UP_CONTROL); +} + +EXPORT_SYMBOL_GPL(sdhci_enable_irq_wakeups); + #endif /* CONFIG_PM */ /*****************************************************************************\ diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index b7b8a3b..d52a716 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -87,6 +87,9 @@ #define SDHCI_BLOCK_GAP_CONTROL 0x2A #define SDHCI_WAKE_UP_CONTROL 0x2B +#define SDHCI_WAKE_ON_INT 0x01 +#define SDHCI_WAKE_ON_INSERT 0x02 +#define SDHCI_WAKE_ON_REMOVE 0x04 #define SDHCI_CLOCK_CONTROL 0x2C #define SDHCI_DIVIDER_SHIFT 8 @@ -317,6 +320,7 @@ extern void sdhci_remove_host(struct sdhci_host *host, int dead); #ifdef CONFIG_PM extern int sdhci_suspend_host(struct sdhci_host *host, pm_message_t state); extern int sdhci_resume_host(struct sdhci_host *host); +extern void sdhci_enable_irq_wakeups(struct sdhci_host *host); #endif #endif /* __SDHCI_HW_H */ -- cgit v0.10.2 From 37865fe91582582a6f6c00652f6a2b1ff71f8a78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eric=20B=C3=A9nard?= Date: Sat, 23 Oct 2010 01:57:21 +0200 Subject: mmc: sdhci-esdhc-imx: fix timeout on i.MX's sdhci MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes timeout problems on i.MX's sdhci as suggested by Richard Zhu. Tested on: - i.MX257: not needed - i.MX357: needed - i.MX515: needed More details can be found here: http://lists.infradead.org/pipermail/linux-arm-kernel/2010-October/029748.html Signed-off-by: Eric Bénard Tested-by: Shawn Guo Acked-by: Wolfram Sang Signed-off-by: Chris Ball diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 2e9cca1..28e63ef 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "sdhci.h" #include "sdhci-pltfm.h" #include "sdhci-esdhc.h" @@ -112,6 +113,9 @@ static int esdhc_pltfm_init(struct sdhci_host *host, struct sdhci_pltfm_data *pd clk_enable(clk); pltfm_host->clk = clk; + if (cpu_is_mx35() || cpu_is_mx51()) + host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL; + return 0; } -- cgit v0.10.2 From 16a790bcce87740d219b7227eaa4df72804097ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eric=20B=C3=A9nard?= Date: Sat, 23 Oct 2010 01:57:22 +0200 Subject: mmc: sdhci-esdhc-imx: enable QUIRK_NO_MULTIBLOCK only for i.MX25 and i.MX35 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only these CPUs list the bug in their errata. Signed-off-by: Eric Bénard Acked-by: Wolfram Sang Signed-off-by: Chris Ball diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 28e63ef..9b82910 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -116,6 +116,10 @@ static int esdhc_pltfm_init(struct sdhci_host *host, struct sdhci_pltfm_data *pd if (cpu_is_mx35() || cpu_is_mx51()) host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL; + /* Fix errata ENGcm07207 which is present on i.MX25 and i.MX35 */ + if (cpu_is_mx25() || cpu_is_mx35()) + host->quirks |= SDHCI_QUIRK_NO_MULTIBLOCK; + return 0; } @@ -137,10 +141,8 @@ static struct sdhci_ops sdhci_esdhc_ops = { }; struct sdhci_pltfm_data sdhci_esdhc_imx_pdata = { - .quirks = ESDHC_DEFAULT_QUIRKS | SDHCI_QUIRK_NO_MULTIBLOCK - | SDHCI_QUIRK_BROKEN_ADMA, + .quirks = ESDHC_DEFAULT_QUIRKS | SDHCI_QUIRK_BROKEN_ADMA, /* ADMA has issues. Might be fixable */ - /* NO_MULTIBLOCK might be MX35 only (Errata: ENGcm07207) */ .ops = &sdhci_esdhc_ops, .init = esdhc_pltfm_init, .exit = esdhc_pltfm_exit, -- cgit v0.10.2 From cd045cb42a266882ac24bc21a3a8d03683c72954 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 4 Nov 2010 11:05:05 -0700 Subject: ceph: fix rdcache_gen usage and invalidate We used to use rdcache_gen to indicate whether we "might" have cached pages. Now we just look at the mapping to determine that. However, some old behavior remains from that transition. First, rdcache_gen == 0 no longer means we have no pages. That can happen at any time (presumably when we carry FILE_CACHE). We should not reset it to zero, and we should not check that it is zero. That means that the only purpose for rdcache_revoking is to resolve races between new issues of FILE_CACHE and an async invalidate. If they are equal, we should invalidate. On success, we decrement rdcache_revoking, so that it is no longer equal to rdcache_gen. Similarly, if we success in doing a sync invalidate, set revoking = gen - 1. (This is a small optimization to avoid doing unnecessary invalidate work and does not affect correctness.) Signed-off-by: Sage Weil diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 04b207b..60d27bc 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1430,8 +1430,8 @@ static int try_nonblocking_invalidate(struct inode *inode) invalidating_gen == ci->i_rdcache_gen) { /* success. */ dout("try_nonblocking_invalidate %p success\n", inode); - ci->i_rdcache_gen = 0; - ci->i_rdcache_revoking = 0; + /* save any racing async invalidate some trouble */ + ci->i_rdcache_revoking = ci->i_rdcache_gen - 1; return 0; } dout("try_nonblocking_invalidate %p failed\n", inode); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 0a49ffd..5a9f907 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1394,11 +1394,8 @@ static void ceph_invalidate_work(struct work_struct *work) spin_lock(&inode->i_lock); dout("invalidate_pages %p gen %d revoking %d\n", inode, ci->i_rdcache_gen, ci->i_rdcache_revoking); - if (ci->i_rdcache_gen == 0 || - ci->i_rdcache_revoking != ci->i_rdcache_gen) { - BUG_ON(ci->i_rdcache_revoking > ci->i_rdcache_gen); + if (ci->i_rdcache_revoking != ci->i_rdcache_gen) { /* nevermind! */ - ci->i_rdcache_revoking = 0; spin_unlock(&inode->i_lock); goto out; } @@ -1408,15 +1405,16 @@ static void ceph_invalidate_work(struct work_struct *work) ceph_invalidate_nondirty_pages(inode->i_mapping); spin_lock(&inode->i_lock); - if (orig_gen == ci->i_rdcache_gen) { + if (orig_gen == ci->i_rdcache_gen && + orig_gen == ci->i_rdcache_revoking) { dout("invalidate_pages %p gen %d successful\n", inode, ci->i_rdcache_gen); - ci->i_rdcache_gen = 0; - ci->i_rdcache_revoking = 0; + ci->i_rdcache_revoking--; check = 1; } else { - dout("invalidate_pages %p gen %d raced, gen now %d\n", - inode, orig_gen, ci->i_rdcache_gen); + dout("invalidate_pages %p gen %d raced, now %d revoking %d\n", + inode, orig_gen, ci->i_rdcache_gen, + ci->i_rdcache_revoking); } spin_unlock(&inode->i_lock); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 1886294..7f01728 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -293,9 +293,7 @@ struct ceph_inode_info { int i_rd_ref, i_rdcache_ref, i_wr_ref; int i_wrbuffer_ref, i_wrbuffer_ref_head; u32 i_shared_gen; /* increment each time we get FILE_SHARED */ - u32 i_rdcache_gen; /* we increment this each time we get - FILE_CACHE. If it's non-zero, we - _may_ have cached pages. */ + u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */ u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */ struct list_head i_unsafe_writes; /* uncommitted sync writes */ -- cgit v0.10.2 From cb4276cca4695670916a82e359f2e3776f0a9138 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 8 Nov 2010 07:28:52 -0800 Subject: ceph: fix uid/gid on resent mds requests MDS requests can be rebuilt and resent in non-process context, but were filling in uid/gid from current_fsuid/gid. Put that information in the request struct on request setup. This fixes incorrect (and root) uid/gid getting set for requests that are forwarded between MDSs, usually due to metadata migrations. Signed-off-by: Sage Weil diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 3142b15..d22fbbe 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -529,6 +529,9 @@ static void __register_request(struct ceph_mds_client *mdsc, ceph_mdsc_get_request(req); __insert_request(mdsc, req); + req->r_uid = current_fsuid(); + req->r_gid = current_fsgid(); + if (dir) { struct ceph_inode_info *ci = ceph_inode(dir); @@ -1588,8 +1591,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch); head->op = cpu_to_le32(req->r_op); - head->caller_uid = cpu_to_le32(current_fsuid()); - head->caller_gid = cpu_to_le32(current_fsgid()); + head->caller_uid = cpu_to_le32(req->r_uid); + head->caller_gid = cpu_to_le32(req->r_gid); head->args = req->r_args; ceph_encode_filepath(&p, end, ino1, path1); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index d66d63c..9341fd4 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -170,6 +170,8 @@ struct ceph_mds_request { union ceph_mds_request_args r_args; int r_fmode; /* file mode, if expecting cap */ + uid_t r_uid; + gid_t r_gid; /* for choosing which mds to send this request to */ int r_direct_mode; -- cgit v0.10.2 From 1ebd0061ededeb8b495360a772d0b885dd3e036e Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 8 Nov 2010 13:24:58 +0800 Subject: ASoC: Return proper error if snd_soc_register_dais fails in psc_i2s_of_probe Signed-off-by: Axel Lin Acked-by: Liam Girdwood Signed-off-by: Mark Brown diff --git a/sound/soc/fsl/mpc5200_psc_i2s.c b/sound/soc/fsl/mpc5200_psc_i2s.c index 74ffed4..9018fa5 100644 --- a/sound/soc/fsl/mpc5200_psc_i2s.c +++ b/sound/soc/fsl/mpc5200_psc_i2s.c @@ -160,7 +160,7 @@ static int __devinit psc_i2s_of_probe(struct platform_device *op, rc = snd_soc_register_dais(&op->dev, psc_i2s_dai, ARRAY_SIZE(psc_i2s_dai)); if (rc != 0) { pr_err("Failed to register DAI\n"); - return 0; + return rc; } psc_dma = dev_get_drvdata(&op->dev); -- cgit v0.10.2 From b0fc7b840926654a3a6eaf0f41f3a4da33441d3d Mon Sep 17 00:00:00 2001 From: Marek Belisko Date: Mon, 8 Nov 2010 13:14:51 +0100 Subject: ASoC: s3c24xx: Fix compilation problem for mini2440 When make mini2440_defconfig compilation end with undefined references to DMA functions. There was missing selection for S3C2410_DMA when compile ASoC audio for S3C24xx CPU. Tested on mini2440 board. Signed-off-by: Marek Belisko Acked-by: Liam Girdwood Signed-off-by: Mark Brown diff --git a/sound/soc/s3c24xx/Kconfig b/sound/soc/s3c24xx/Kconfig index 8a6b53c..d85bf8a 100644 --- a/sound/soc/s3c24xx/Kconfig +++ b/sound/soc/s3c24xx/Kconfig @@ -2,6 +2,7 @@ config SND_S3C24XX_SOC tristate "SoC Audio for the Samsung S3CXXXX chips" depends on ARCH_S3C2410 || ARCH_S3C64XX || ARCH_S5PC100 || ARCH_S5PV210 select S3C64XX_DMA if ARCH_S3C64XX + select S3C2410_DMA if ARCH_S3C2410 help Say Y or M if you want to add support for codecs attached to the S3C24XX AC97 or I2S interfaces. You will also need to -- cgit v0.10.2 From 8bd59e0188c04f6540f00e13f633f22e4804ce06 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 8 Nov 2010 09:23:12 -0800 Subject: ceph: fix version check on racing inode updates We may get updates on the same inode from multiple MDSs; generally we only pay attention if the update is newer than what we already have. The exception is when an MDS sense unstable information, in which case we always update. The old > check got this wrong when our version was odd (e.g. 3) and the reply version was even (e.g. 2): the older stale (v2) info would be applied. Fixed and clarified the comment. Signed-off-by: Sage Weil diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 5a9f907..425c5b1 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -567,12 +567,17 @@ static int fill_inode(struct inode *inode, /* * provided version will be odd if inode value is projected, - * even if stable. skip the update if we have a newer info - * (e.g., due to inode info racing form multiple MDSs), or if - * we are getting projected (unstable) inode info. + * even if stable. skip the update if we have newer stable + * info (ours>=theirs, e.g. due to racing mds replies), unless + * we are getting projected (unstable) info (in which case the + * version is odd, and we want ours>theirs). + * us them + * 2 2 skip + * 3 2 skip + * 3 3 update */ if (le64_to_cpu(info->version) > 0 && - (ci->i_version & ~1) > le64_to_cpu(info->version)) + (ci->i_version & ~1) >= le64_to_cpu(info->version)) goto no_change; issued = __ceph_caps_issued(ci, &implemented); -- cgit v0.10.2 From d8672d64b88cdb7aa8139fb6d218f40b8cbf60af Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 8 Nov 2010 09:24:34 -0800 Subject: ceph: fix update of ctime from MDS The client can have a newer ctime than the MDS due to AUTH_EXCL and XATTR_EXCL caps as well; update the check in ceph_fill_file_time appropriately. This fixes cases where ctime/mtime goes backward under the right sequence of local updates (e.g. chmod) and mds replies (e.g. subsequent stat that goes to the MDS). Signed-off-by: Sage Weil diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 425c5b1..7bc0fbd 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -471,7 +471,9 @@ void ceph_fill_file_time(struct inode *inode, int issued, if (issued & (CEPH_CAP_FILE_EXCL| CEPH_CAP_FILE_WR| - CEPH_CAP_FILE_BUFFER)) { + CEPH_CAP_FILE_BUFFER| + CEPH_CAP_AUTH_EXCL| + CEPH_CAP_XATTR_EXCL)) { if (timespec_compare(ctime, &inode->i_ctime) > 0) { dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, @@ -511,7 +513,7 @@ void ceph_fill_file_time(struct inode *inode, int issued, warn = 1; } } else { - /* we have no write caps; whatever the MDS says is true */ + /* we have no write|excl caps; whatever the MDS says is true */ if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) { inode->i_ctime = *ctime; inode->i_mtime = *mtime; -- cgit v0.10.2 From 35ac6f081f26e1b6b3482b9c8dfccebe7817c691 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Tue, 9 Nov 2010 13:57:29 +0000 Subject: mmc: sdhci: Fix crash on boot with C0 stepping Moorestown platforms SDHC2 is newly added in C0 stepping of Langwell. Without the Moorestown specific quirk, the default pci_probe will be called and crash the kernel. This patch unblocks the crash problem on C0 by using the same probing function as HC1, which limits the number of slots to one. Signed-off-by: Jacob Pan Signed-off-by: Alan Cox Signed-off-by: Chris Ball diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c index d196e77..3d9c246 100644 --- a/drivers/mmc/host/sdhci-pci.c +++ b/drivers/mmc/host/sdhci-pci.c @@ -149,11 +149,11 @@ static const struct sdhci_pci_fixes sdhci_cafe = { * ADMA operation is disabled for Moorestown platform due to * hardware bugs. */ -static int mrst_hc1_probe(struct sdhci_pci_chip *chip) +static int mrst_hc_probe(struct sdhci_pci_chip *chip) { /* - * slots number is fixed here for MRST as SDIO3 is never used and has - * hardware bugs. + * slots number is fixed here for MRST as SDIO3/5 are never used and + * have hardware bugs. */ chip->num_slots = 1; return 0; @@ -163,9 +163,9 @@ static const struct sdhci_pci_fixes sdhci_intel_mrst_hc0 = { .quirks = SDHCI_QUIRK_BROKEN_ADMA | SDHCI_QUIRK_NO_HISPD_BIT, }; -static const struct sdhci_pci_fixes sdhci_intel_mrst_hc1 = { +static const struct sdhci_pci_fixes sdhci_intel_mrst_hc1_hc2 = { .quirks = SDHCI_QUIRK_BROKEN_ADMA | SDHCI_QUIRK_NO_HISPD_BIT, - .probe = mrst_hc1_probe, + .probe = mrst_hc_probe, }; static const struct sdhci_pci_fixes sdhci_intel_mfd_sd = { @@ -538,7 +538,15 @@ static const struct pci_device_id pci_ids[] __devinitdata = { .device = PCI_DEVICE_ID_INTEL_MRST_SD1, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, - .driver_data = (kernel_ulong_t)&sdhci_intel_mrst_hc1, + .driver_data = (kernel_ulong_t)&sdhci_intel_mrst_hc1_hc2, + }, + + { + .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_MRST_SD2, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_intel_mrst_hc1_hc2, }, { diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index c6bcfe9..d369b53 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2441,6 +2441,7 @@ #define PCI_DEVICE_ID_INTEL_MFD_SDIO2 0x0822 #define PCI_DEVICE_ID_INTEL_MFD_EMMC0 0x0823 #define PCI_DEVICE_ID_INTEL_MFD_EMMC1 0x0824 +#define PCI_DEVICE_ID_INTEL_MRST_SD2 0x084F #define PCI_DEVICE_ID_INTEL_I960 0x0960 #define PCI_DEVICE_ID_INTEL_I960RM 0x0962 #define PCI_DEVICE_ID_INTEL_8257X_SOL 0x1062 -- cgit v0.10.2 From e658e9fe65306346e827676a121eca3534ad75ff Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 9 Nov 2010 09:12:42 -0800 Subject: driver core: the development tree has switched to git So change the MAINTAINERS file to show where the tree now is at. Signed-off-by: Greg Kroah-Hartman diff --git a/MAINTAINERS b/MAINTAINERS index 0094224..283d5f5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2051,7 +2051,7 @@ F: Documentation/blockdev/drbd/ DRIVER CORE, KOBJECTS, DEBUGFS AND SYSFS M: Greg Kroah-Hartman -T: quilt kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core-2.6.git S: Supported F: Documentation/kobject.txt F: drivers/base/ -- cgit v0.10.2 From e98b6fed84d0f0155d7b398e0dfeac74c792f2d0 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 9 Nov 2010 12:24:53 -0800 Subject: ceph: fix comment, remove extraneous args The offset/length arguments aren't used. Signed-off-by: Sage Weil diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 87ee944..603fd00 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -376,21 +376,19 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, dout("sync_read on file %p %llu~%u %s\n", file, off, len, (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); - if (file->f_flags & O_DIRECT) { - pages = ceph_get_direct_page_vector(data, num_pages, off, len); - - /* - * flush any page cache pages in this range. this - * will make concurrent normal and O_DIRECT io slow, - * but it will at least behave sensibly when they are - * in sequence. - */ - } else { + if (file->f_flags & O_DIRECT) + pages = ceph_get_direct_page_vector(data, num_pages); + else pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); - } if (IS_ERR(pages)) return PTR_ERR(pages); + /* + * flush any page cache pages in this range. this + * will make concurrent normal and sync io slow, + * but it will at least behave sensibly when they are + * in sequence. + */ ret = filemap_write_and_wait(inode->i_mapping); if (ret < 0) goto done; diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index f22b2e9..9e76d35 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -227,8 +227,7 @@ extern int ceph_open_session(struct ceph_client *client); extern void ceph_release_page_vector(struct page **pages, int num_pages); extern struct page **ceph_get_direct_page_vector(const char __user *data, - int num_pages, - loff_t off, size_t len); + int num_pages); extern void ceph_put_page_vector(struct page **pages, int num_pages); extern void ceph_release_page_vector(struct page **pages, int num_pages); extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index 54caf06..ac34fee 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -13,8 +13,7 @@ * build a vector of user pages */ struct page **ceph_get_direct_page_vector(const char __user *data, - int num_pages, - loff_t off, size_t len) + int num_pages) { struct page **pages; int rc; -- cgit v0.10.2 From b7495fc2ff941db6a118a93ab8d61149e3f4cef8 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 9 Nov 2010 12:43:12 -0800 Subject: ceph: make page alignment explicit in osd interface We used to infer alignment of IOs within a page based on the file offset, which assumed they matched. This broke with direct IO that was not aligned to pages (e.g., 512-byte aligned IO). We were also trusting the alignment specified in the OSD reply, which could have been adjusted by the server. Explicitly specify the page alignment when setting up OSD IO requests. Signed-off-by: Sage Weil diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 51bcc5c..4aa85776 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -204,7 +204,7 @@ static int readpage_nounlock(struct file *filp, struct page *page) err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, page->index << PAGE_CACHE_SHIFT, &len, ci->i_truncate_seq, ci->i_truncate_size, - &page, 1); + &page, 1, 0); if (err == -ENOENT) err = 0; if (err < 0) { @@ -287,7 +287,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, rc = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, offset, &len, ci->i_truncate_seq, ci->i_truncate_size, - pages, nr_pages); + pages, nr_pages, 0); if (rc == -ENOENT) rc = 0; if (rc < 0) @@ -782,7 +782,7 @@ get_more_pages: snapc, do_sync, ci->i_truncate_seq, ci->i_truncate_size, - &inode->i_mtime, true, 1); + &inode->i_mtime, true, 1, 0); max_pages = req->r_num_pages; alloc_page_vec(fsc, req); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 603fd00..8d79b89 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -282,11 +282,12 @@ int ceph_release(struct inode *inode, struct file *file) static int striped_read(struct inode *inode, u64 off, u64 len, struct page **pages, int num_pages, - int *checkeof) + int *checkeof, bool align_to_pages) { struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); u64 pos, this_len; + int io_align, page_align; int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ int left, pages_left; int read; @@ -302,14 +303,19 @@ static int striped_read(struct inode *inode, page_pos = pages; pages_left = num_pages; read = 0; + io_align = off & ~PAGE_MASK; more: + if (align_to_pages) + page_align = (pos - io_align) & ~PAGE_MASK; + else + page_align = pos & ~PAGE_MASK; this_len = left; ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), &ci->i_layout, pos, &this_len, ci->i_truncate_seq, ci->i_truncate_size, - page_pos, pages_left); + page_pos, pages_left, page_align); hit_stripe = this_len < left; was_short = ret >= 0 && ret < this_len; if (ret == -ENOENT) @@ -393,7 +399,8 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, if (ret < 0) goto done; - ret = striped_read(inode, off, len, pages, num_pages, checkeof); + ret = striped_read(inode, off, len, pages, num_pages, checkeof, + file->f_flags & O_DIRECT); if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) ret = ceph_copy_page_vector_to_user(pages, data, off, ret); @@ -448,6 +455,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, int flags; int do_sync = 0; int check_caps = 0; + int page_align, io_align; int ret; struct timespec mtime = CURRENT_TIME; @@ -462,6 +470,8 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, else pos = *offset; + io_align = pos & ~PAGE_MASK; + ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); if (ret < 0) return ret; @@ -486,20 +496,26 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, */ more: len = left; + if (file->f_flags & O_DIRECT) + /* write from beginning of first page, regardless of + io alignment */ + page_align = (pos - io_align) & ~PAGE_MASK; + else + page_align = pos & ~PAGE_MASK; req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, ceph_vino(inode), pos, &len, CEPH_OSD_OP_WRITE, flags, ci->i_snap_realm->cached_context, do_sync, ci->i_truncate_seq, ci->i_truncate_size, - &mtime, false, 2); + &mtime, false, 2, page_align); if (!req) return -ENOMEM; num_pages = calc_pages_for(pos, len); if (file->f_flags & O_DIRECT) { - pages = ceph_get_direct_page_vector(data, num_pages, pos, len); + pages = ceph_get_direct_page_vector(data, num_pages); if (IS_ERR(pages)) { ret = PTR_ERR(pages); goto out; diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 7bc0fbd..8153ee5 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1752,7 +1752,7 @@ int ceph_do_getattr(struct inode *inode, int mask) return 0; } - dout("do_getattr inode %p mask %s\n", inode, ceph_cap_string(mask)); + dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode); if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1)) return 0; diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 6c91fb0..a1af296 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -79,6 +79,7 @@ struct ceph_osd_request { struct ceph_file_layout r_file_layout; struct ceph_snap_context *r_snapc; /* snap context for writes */ unsigned r_num_pages; /* size of page array (follows) */ + unsigned r_page_alignment; /* io offset in first page */ struct page **r_pages; /* pages for data payload */ int r_pages_from_pool; int r_own_pages; /* if true, i own page list */ @@ -194,7 +195,8 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, int do_sync, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, - bool use_mempool, int num_reply); + bool use_mempool, int num_reply, + int page_align); static inline void ceph_osdc_get_request(struct ceph_osd_request *req) { @@ -218,7 +220,8 @@ extern int ceph_osdc_readpages(struct ceph_osd_client *osdc, struct ceph_file_layout *layout, u64 off, u64 *plen, u32 truncate_seq, u64 truncate_size, - struct page **pages, int nr_pages); + struct page **pages, int nr_pages, + int page_align); extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 7939199..6c09623 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -71,6 +71,7 @@ void ceph_calc_raw_layout(struct ceph_osd_client *osdc, op->extent.length = objlen; } req->r_num_pages = calc_pages_for(off, *plen); + req->r_page_alignment = off & ~PAGE_MASK; if (op->op == CEPH_OSD_OP_WRITE) op->payload_len = *plen; @@ -419,7 +420,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, - bool use_mempool, int num_reply) + bool use_mempool, int num_reply, + int page_align) { struct ceph_osd_req_op ops[3]; struct ceph_osd_request *req; @@ -447,6 +449,10 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, calc_layout(osdc, vino, layout, off, plen, req, ops); req->r_file_layout = *layout; /* keep a copy */ + /* in case it differs from natural alignment that calc_layout + filled in for us */ + req->r_page_alignment = page_align; + ceph_osdc_build_request(req, off, plen, ops, snapc, mtime, @@ -1489,7 +1495,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, struct ceph_vino vino, struct ceph_file_layout *layout, u64 off, u64 *plen, u32 truncate_seq, u64 truncate_size, - struct page **pages, int num_pages) + struct page **pages, int num_pages, int page_align) { struct ceph_osd_request *req; int rc = 0; @@ -1499,15 +1505,15 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, req = ceph_osdc_new_request(osdc, layout, vino, off, plen, CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, 0, truncate_seq, truncate_size, NULL, - false, 1); + false, 1, page_align); if (!req) return -ENOMEM; /* it may be a short read due to an object boundary */ req->r_pages = pages; - dout("readpages final extent is %llu~%llu (%d pages)\n", - off, *plen, req->r_num_pages); + dout("readpages final extent is %llu~%llu (%d pages align %d)\n", + off, *plen, req->r_num_pages, page_align); rc = ceph_osdc_start_request(osdc, req, false); if (!rc) @@ -1533,6 +1539,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, { struct ceph_osd_request *req; int rc = 0; + int page_align = off & ~PAGE_MASK; BUG_ON(vino.snap != CEPH_NOSNAP); req = ceph_osdc_new_request(osdc, layout, vino, off, &len, @@ -1541,7 +1548,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, CEPH_OSD_FLAG_WRITE, snapc, do_sync, truncate_seq, truncate_size, mtime, - nofail, 1); + nofail, 1, page_align); if (!req) return -ENOMEM; @@ -1638,8 +1645,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, m = ceph_msg_get(req->r_reply); if (data_len > 0) { - unsigned data_off = le16_to_cpu(hdr->data_off); - int want = calc_pages_for(data_off & ~PAGE_MASK, data_len); + int want = calc_pages_for(req->r_page_alignment, data_len); if (unlikely(req->r_num_pages < want)) { pr_warning("tid %lld reply %d > expected %d pages\n", -- cgit v0.10.2 From c5c6b19d4b8f5431fca05f28ae9e141045022149 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 9 Nov 2010 12:40:00 -0800 Subject: ceph: explicitly specify page alignment in network messages The alignment used for reading data into or out of pages used to be taken from the data_off field in the message header. This only worked as long as the page alignment matched the object offset, breaking direct io to non-page aligned offsets. Instead, explicitly specify the page alignment next to the page vector in the ceph_msg struct, and use that instead of the message header (which probably shouldn't be trusted). The alloc_msg callback is responsible for filling in this field properly when it sets up the page vector. Signed-off-by: Sage Weil diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 5956d62..a108b42 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -82,6 +82,7 @@ struct ceph_msg { struct ceph_buffer *middle; struct page **pages; /* data payload. NOT OWNER. */ unsigned nr_pages; /* size of page array */ + unsigned page_alignment; /* io offset in first page */ struct ceph_pagelist *pagelist; /* instead of pages */ struct list_head list_head; struct kref kref; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index d379abf..1c7a2ec 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -540,8 +540,7 @@ static void prepare_write_message(struct ceph_connection *con) /* initialize page iterator */ con->out_msg_pos.page = 0; if (m->pages) - con->out_msg_pos.page_pos = - le16_to_cpu(m->hdr.data_off) & ~PAGE_MASK; + con->out_msg_pos.page_pos = m->page_alignment; else con->out_msg_pos.page_pos = 0; con->out_msg_pos.data_pos = 0; @@ -1491,7 +1490,7 @@ static int read_partial_message(struct ceph_connection *con) struct ceph_msg *m = con->in_msg; int ret; int to, left; - unsigned front_len, middle_len, data_len, data_off; + unsigned front_len, middle_len, data_len; int datacrc = con->msgr->nocrc; int skip; u64 seq; @@ -1527,7 +1526,6 @@ static int read_partial_message(struct ceph_connection *con) data_len = le32_to_cpu(con->in_hdr.data_len); if (data_len > CEPH_MSG_MAX_DATA_LEN) return -EIO; - data_off = le16_to_cpu(con->in_hdr.data_off); /* verify seq# */ seq = le64_to_cpu(con->in_hdr.seq); @@ -1575,7 +1573,7 @@ static int read_partial_message(struct ceph_connection *con) con->in_msg_pos.page = 0; if (m->pages) - con->in_msg_pos.page_pos = data_off & ~PAGE_MASK; + con->in_msg_pos.page_pos = m->page_alignment; else con->in_msg_pos.page_pos = 0; con->in_msg_pos.data_pos = 0; @@ -2300,6 +2298,7 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) /* data */ m->nr_pages = 0; + m->page_alignment = 0; m->pages = NULL; m->pagelist = NULL; m->bio = NULL; @@ -2369,6 +2368,7 @@ static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, type, front_len); return NULL; } + msg->page_alignment = le16_to_cpu(hdr->data_off); } memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 6c09623..3e20a12 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -391,6 +391,8 @@ void ceph_osdc_build_request(struct ceph_osd_request *req, req->r_request->hdr.data_len = cpu_to_le32(data_len); } + req->r_request->page_alignment = req->r_page_alignment; + BUG_ON(p > msg->front.iov_base + msg->front.iov_len); msg_size = p - msg->front.iov_base; msg->front.iov_len = msg_size; @@ -1657,6 +1659,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, } m->pages = req->r_pages; m->nr_pages = req->r_num_pages; + m->page_alignment = req->r_page_alignment; #ifdef CONFIG_BLOCK m->bio = req->r_bio; #endif -- cgit v0.10.2 From 69669455b049c0f1f04bb306625c5d4db6838b11 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Fri, 5 Nov 2010 18:51:34 +0100 Subject: drm/i915: Fix I2C adapter registration Fix many small bugs in I2C adapter registration: * Properly reject unsupported GPIO pin. * Fix improper use of I2C_NAME_SIZE (which is the size of i2c_client.name, not i2c_adapter.name.) * Prefix adapter names with "i915" so that the user knows what the I2C channel is connected to. * Fix swapped characters in the string used to name the GPIO-based adapter. * Add missing comma in gmbus name table. Signed-off-by: Jean Delvare Signed-off-by: Chris Wilson diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index 2be4f72..3dba086 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -160,7 +160,7 @@ intel_gpio_create(struct drm_i915_private *dev_priv, u32 pin) }; struct intel_gpio *gpio; - if (pin < 1 || pin > 7) + if (pin >= ARRAY_SIZE(map_pin_to_reg) || !map_pin_to_reg[pin]) return NULL; gpio = kzalloc(sizeof(struct intel_gpio), GFP_KERNEL); @@ -172,7 +172,8 @@ intel_gpio_create(struct drm_i915_private *dev_priv, u32 pin) gpio->reg += PCH_GPIOA - GPIOA; gpio->dev_priv = dev_priv; - snprintf(gpio->adapter.name, I2C_NAME_SIZE, "GPIO%c", "?BACDEF?"[pin]); + snprintf(gpio->adapter.name, sizeof(gpio->adapter.name), + "i915 GPIO%c", "?BACDE?F"[pin]); gpio->adapter.owner = THIS_MODULE; gpio->adapter.algo_data = &gpio->algo; gpio->adapter.dev.parent = &dev_priv->dev->pdev->dev; @@ -349,7 +350,7 @@ int intel_setup_gmbus(struct drm_device *dev) "panel", "dpc", "dpb", - "reserved" + "reserved", "dpd", }; struct drm_i915_private *dev_priv = dev->dev_private; @@ -366,8 +367,8 @@ int intel_setup_gmbus(struct drm_device *dev) bus->adapter.owner = THIS_MODULE; bus->adapter.class = I2C_CLASS_DDC; snprintf(bus->adapter.name, - I2C_NAME_SIZE, - "gmbus %s", + sizeof(bus->adapter.name), + "i915 gmbus %s", names[i]); bus->adapter.dev.parent = &dev->pdev->dev; -- cgit v0.10.2 From 25591b07033663e09f5e60355fc5ec4d4aa53e63 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 10 Nov 2010 10:05:51 +0100 Subject: [S390] fix get_user_pages_fast The check for the _PAGE_RO bit in get_user_pages_fast for write==1 is the wrong way around. It must not be set for the fast path. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 38e641c..45b405c 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -20,18 +20,17 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { - unsigned long mask, result; + unsigned long mask; pte_t *ptep, pte; struct page *page; - result = write ? 0 : _PAGE_RO; - mask = result | _PAGE_INVALID | _PAGE_SPECIAL; + mask = (write ? _PAGE_RO : 0) | _PAGE_INVALID | _PAGE_SPECIAL; ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr); do { pte = *ptep; barrier(); - if ((pte_val(pte) & mask) != result) + if ((pte_val(pte) & mask) != 0) return 0; VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); -- cgit v0.10.2 From 3b210e7652a0ac638b1a267b6a181c8f742d8462 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 10 Nov 2010 10:05:52 +0100 Subject: [S390] tape: add medium state notifications Add uevent notifications for tape cartridge load and tape cartridge unload events. Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/char/tape_core.c b/drivers/s390/char/tape_core.c index 6c40867..b3a3e8e 100644 --- a/drivers/s390/char/tape_core.c +++ b/drivers/s390/char/tape_core.c @@ -209,29 +209,79 @@ tape_state_set(struct tape_device *device, enum tape_state newstate) wake_up(&device->state_change_wq); } +struct tape_med_state_work_data { + struct tape_device *device; + enum tape_medium_state state; + struct work_struct work; +}; + +static void +tape_med_state_work_handler(struct work_struct *work) +{ + static char env_state_loaded[] = "MEDIUM_STATE=LOADED"; + static char env_state_unloaded[] = "MEDIUM_STATE=UNLOADED"; + struct tape_med_state_work_data *p = + container_of(work, struct tape_med_state_work_data, work); + struct tape_device *device = p->device; + char *envp[] = { NULL, NULL }; + + switch (p->state) { + case MS_UNLOADED: + pr_info("%s: The tape cartridge has been successfully " + "unloaded\n", dev_name(&device->cdev->dev)); + envp[0] = env_state_unloaded; + kobject_uevent_env(&device->cdev->dev.kobj, KOBJ_CHANGE, envp); + break; + case MS_LOADED: + pr_info("%s: A tape cartridge has been mounted\n", + dev_name(&device->cdev->dev)); + envp[0] = env_state_loaded; + kobject_uevent_env(&device->cdev->dev.kobj, KOBJ_CHANGE, envp); + break; + default: + break; + } + tape_put_device(device); + kfree(p); +} + +static void +tape_med_state_work(struct tape_device *device, enum tape_medium_state state) +{ + struct tape_med_state_work_data *p; + + p = kzalloc(sizeof(*p), GFP_ATOMIC); + if (p) { + INIT_WORK(&p->work, tape_med_state_work_handler); + p->device = tape_get_device(device); + p->state = state; + schedule_work(&p->work); + } +} + void tape_med_state_set(struct tape_device *device, enum tape_medium_state newstate) { - if (device->medium_state == newstate) + enum tape_medium_state oldstate; + + oldstate = device->medium_state; + if (oldstate == newstate) return; + device->medium_state = newstate; switch(newstate){ case MS_UNLOADED: device->tape_generic_status |= GMT_DR_OPEN(~0); - if (device->medium_state == MS_LOADED) - pr_info("%s: The tape cartridge has been successfully " - "unloaded\n", dev_name(&device->cdev->dev)); + if (oldstate == MS_LOADED) + tape_med_state_work(device, MS_UNLOADED); break; case MS_LOADED: device->tape_generic_status &= ~GMT_DR_OPEN(~0); - if (device->medium_state == MS_UNLOADED) - pr_info("%s: A tape cartridge has been mounted\n", - dev_name(&device->cdev->dev)); + if (oldstate == MS_UNLOADED) + tape_med_state_work(device, MS_LOADED); break; default: - // print nothing break; } - device->medium_state = newstate; wake_up(&device->state_change_wq); } -- cgit v0.10.2 From 16d2ce271c6b8b3527ed1461d03b5f373d53f78f Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 10 Nov 2010 10:05:53 +0100 Subject: [S390] cio: fix incorrect ccw_device_init_count If device recognition is interrupted by a subchannel event indicating that the device is gone, ccw_device_init_count is not correctly decreased. Fix this by reporting the corresponding event to the device recognition callback via the state machine. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 2ff8a22..e8391b89 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -1455,7 +1455,16 @@ static int io_subchannel_sch_event(struct subchannel *sch, int process) break; case IO_SCH_UNREG_ATTACH: case IO_SCH_UNREG: - if (cdev) + if (!cdev) + break; + if (cdev->private->state == DEV_STATE_SENSE_ID) { + /* + * Note: delayed work triggered by this event + * and repeated calls to sch_event are synchronized + * by the above check for work_pending(cdev). + */ + dev_fsm_event(cdev, DEV_EVENT_NOTOPER); + } else ccw_device_set_notoper(cdev); break; case IO_SCH_NOP: -- cgit v0.10.2 From ca768b663131ca644689fcadc9ca092dcc96a758 Mon Sep 17 00:00:00 2001 From: Stefan Weinhuber Date: Wed, 10 Nov 2010 10:05:54 +0100 Subject: [S390] vmlogrdr: purge after recording is switched off If automatic purge is enabled for a vmlogrdr device, old records are purged before an IUCV recording service is switched on or off. If z/VM generates a large number of records between purging and switching the recording service off, these records remain queued, and may have a negative performance impact on the z/VM system. To avoid this problem, we need to purge the records after recording is switched off. Signed-off-by: Stefan Weinhuber Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c index 9f66142..1cc726b 100644 --- a/drivers/s390/char/vmlogrdr.c +++ b/drivers/s390/char/vmlogrdr.c @@ -249,27 +249,25 @@ static int vmlogrdr_recording(struct vmlogrdr_priv_t * logptr, char cp_command[80]; char cp_response[160]; char *onoff, *qid_string; + int rc; - memset(cp_command, 0x00, sizeof(cp_command)); - memset(cp_response, 0x00, sizeof(cp_response)); - - onoff = ((action == 1) ? "ON" : "OFF"); + onoff = ((action == 1) ? "ON" : "OFF"); qid_string = ((recording_class_AB == 1) ? " QID * " : ""); - /* + /* * The recording commands needs to be called with option QID * for guests that have previlege classes A or B. * Purging has to be done as separate step, because recording * can't be switched on as long as records are on the queue. * Doing both at the same time doesn't work. */ - - if (purge) { + if (purge && (action == 1)) { + memset(cp_command, 0x00, sizeof(cp_command)); + memset(cp_response, 0x00, sizeof(cp_response)); snprintf(cp_command, sizeof(cp_command), "RECORDING %s PURGE %s", logptr->recording_name, qid_string); - cpcmd(cp_command, cp_response, sizeof(cp_response), NULL); } @@ -279,19 +277,33 @@ static int vmlogrdr_recording(struct vmlogrdr_priv_t * logptr, logptr->recording_name, onoff, qid_string); - cpcmd(cp_command, cp_response, sizeof(cp_response), NULL); /* The recording command will usually answer with 'Command complete' * on success, but when the specific service was never connected * before then there might be an additional informational message * 'HCPCRC8072I Recording entry not found' before the - * 'Command complete'. So I use strstr rather then the strncmp. + * 'Command complete'. So I use strstr rather then the strncmp. */ if (strstr(cp_response,"Command complete")) - return 0; + rc = 0; else - return -EIO; + rc = -EIO; + /* + * If we turn recording off, we have to purge any remaining records + * afterwards, as a large number of queued records may impact z/VM + * performance. + */ + if (purge && (action == 0)) { + memset(cp_command, 0x00, sizeof(cp_command)); + memset(cp_response, 0x00, sizeof(cp_response)); + snprintf(cp_command, sizeof(cp_command), + "RECORDING %s PURGE %s", + logptr->recording_name, + qid_string); + cpcmd(cp_command, cp_response, sizeof(cp_response), NULL); + } + return rc; } -- cgit v0.10.2 From ec6743bb06510c7b629603ce35713d6ae9273579 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 10 Nov 2010 10:05:55 +0100 Subject: [S390] mm: add devmem_is_allowed() for STRICT_DEVMEM checking Provide the devmem_is_allowed() routine to restrict access to kernel memory from userspace. Set the CONFIG_STRICT_DEVMEM config option to switch on checking. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index 45e0c61..05221b1 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -6,6 +6,18 @@ config TRACE_IRQFLAGS_SUPPORT source "lib/Kconfig.debug" +config STRICT_DEVMEM + def_bool y + prompt "Filter access to /dev/mem" + ---help--- + This option restricts access to /dev/mem. If this option is + disabled, you allow userspace access to all memory, including + kernel and userspace memory. Accidental memory access is likely + to be disastrous. + Memory access is required for experts who want to debug the kernel. + + If you are unsure, say Y. + config DEBUG_STRICT_USER_COPY_CHECKS bool "Strict user copy size checks" ---help--- diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index a8729ea..3c987e9 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -130,6 +130,11 @@ struct page; void arch_free_page(struct page *page, int order); void arch_alloc_page(struct page *page, int order); +static inline int devmem_is_allowed(unsigned long pfn) +{ + return 0; +} + #define HAVE_ARCH_FREE_PAGE #define HAVE_ARCH_ALLOC_PAGE -- cgit v0.10.2 From becf91f18750cf1c60828aa2ee63a36b05c2e4d0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 10 Nov 2010 10:05:56 +0100 Subject: [S390] ftrace: build without frame pointers on s390 s390 doesn't need FRAME_POINTERS in order to have a working function tracer. We don't need frame pointers in order to get strack traces since we always have valid backchains by using the -mkernel-backchain gcc option. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index e04b8bc..ea37e2f 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -126,7 +126,7 @@ if FTRACE config FUNCTION_TRACER bool "Kernel Function Tracer" depends on HAVE_FUNCTION_TRACER - select FRAME_POINTER if (!ARM_UNWIND) + select FRAME_POINTER if !ARM_UNWIND && !S390 select KALLSYMS select GENERIC_TRACER select CONTEXT_SWITCH_TRACER -- cgit v0.10.2 From adb45839817392102e659c19e5c19aa39530021f Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 10 Nov 2010 10:05:57 +0100 Subject: [S390] kprobes: disable interrupts throughout Execute the kprobe exception and fault handler with interrupts disabled. To disable the interrupts only while a single step is in progress is not good enough, a kprobe from interrupt context while another kprobe is handled can confuse the internal house keeping. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index d60fc43..70cf73b 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -30,6 +30,7 @@ #include #include #include +#include DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); @@ -212,7 +213,7 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) /* Set the PER control regs, turns on single step for this address */ __ctl_load(kprobe_per_regs, 9, 11); regs->psw.mask |= PSW_MASK_PER; - regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK); + regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT); } static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) @@ -239,7 +240,7 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, __get_cpu_var(current_kprobe) = p; /* Save the interrupt and per flags */ kcb->kprobe_saved_imask = regs->psw.mask & - (PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK); + (PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT); /* Save the control regs that govern PER */ __ctl_store(kcb->kprobe_saved_ctl, 9, 11); } @@ -316,8 +317,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) return 1; ss_probe: - if (regs->psw.mask & (PSW_MASK_PER | PSW_MASK_IO)) - local_irq_disable(); prepare_singlestep(p, regs); kcb->kprobe_status = KPROBE_HIT_SS; return 1; @@ -465,8 +464,6 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs) goto out; } reset_current_kprobe(); - if (regs->psw.mask & (PSW_MASK_PER | PSW_MASK_IO)) - local_irq_enable(); out: preempt_enable_no_resched(); @@ -482,7 +479,7 @@ out: return 1; } -int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) +static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -508,8 +505,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) restore_previous_kprobe(kcb); else { reset_current_kprobe(); - if (regs->psw.mask & (PSW_MASK_PER | PSW_MASK_IO)) - local_irq_enable(); } preempt_enable_no_resched(); break; @@ -553,6 +548,18 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) return 0; } +int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) +{ + int ret; + + if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT)) + local_irq_disable(); + ret = kprobe_trap_handler(regs, trapnr); + if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT)) + local_irq_restore(regs->psw.mask & ~PSW_MASK_PER); + return ret; +} + /* * Wrapper routine to for handling exceptions. */ @@ -560,8 +567,12 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) { struct die_args *args = (struct die_args *)data; + struct pt_regs *regs = args->regs; int ret = NOTIFY_DONE; + if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT)) + local_irq_disable(); + switch (val) { case DIE_BPT: if (kprobe_handler(args->regs)) @@ -572,16 +583,17 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, ret = NOTIFY_STOP; break; case DIE_TRAP: - /* kprobe_running() needs smp_processor_id() */ - preempt_disable(); - if (kprobe_running() && - kprobe_fault_handler(args->regs, args->trapnr)) + if (!preemptible() && kprobe_running() && + kprobe_trap_handler(args->regs, args->trapnr)) ret = NOTIFY_STOP; - preempt_enable(); break; default: break; } + + if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT)) + local_irq_restore(regs->psw.mask & ~PSW_MASK_PER); + return ret; } @@ -595,6 +607,7 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) /* setup return addr to the jprobe handler routine */ regs->psw.addr = (unsigned long)(jp->entry) | PSW_ADDR_AMODE; + regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT); /* r14 is the function return address */ kcb->jprobe_saved_r14 = (unsigned long)regs->gprs[14]; -- cgit v0.10.2 From 89480801a17a3069f45169d40b828c8e511aa005 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 10 Nov 2010 10:05:58 +0100 Subject: [S390] kprobes: Fix the return address of multiple kretprobes Analog to git commit 737480a0d525dae13306296da08029dff545bc72 fix the return address of subsequent kretprobes when multiple kretprobes are set on the same function. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 70cf73b..2564793 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -349,6 +349,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, struct hlist_node *node, *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; + kprobe_opcode_t *correct_ret_addr = NULL; INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); @@ -371,10 +372,32 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, /* another task is sharing our hash bucket */ continue; - if (ri->rp && ri->rp->handler) - ri->rp->handler(ri, regs); + orig_ret_address = (unsigned long)ri->ret_addr; + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_assert(ri, orig_ret_address, trampoline_address); + + correct_ret_addr = ri->ret_addr; + hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; orig_ret_address = (unsigned long)ri->ret_addr; + + if (ri->rp && ri->rp->handler) { + ri->ret_addr = correct_ret_addr; + ri->rp->handler(ri, regs); + } + recycle_rp_inst(ri, &empty_rp); if (orig_ret_address != trampoline_address) { @@ -386,7 +409,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, break; } } - kretprobe_assert(ri, orig_ret_address, trampoline_address); + regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE; reset_current_kprobe(); -- cgit v0.10.2 From f6cd24777513fcc673d432cc29ef59881d3e4df1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 4 Nov 2010 11:13:48 +0100 Subject: irq: Better struct irqaction layout We currently use kmalloc-96 slab for struct irqaction allocations on 64bit arches. This is unfortunate because of possible false sharing and two cache lines accesses. Move 'name' and 'dir' fields at the end of the structure, and force a suitable alignement. Hot path fields now use one cache line on x86_64. Signed-off-by: Eric Dumazet Reviewed-by: Andi Kleen Cc: Peter Zijlstra LKML-Reference: <1288865628.2659.69.camel@edumazet-laptop> Signed-off-by: Thomas Gleixner diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 79d0c4f..55e0d42 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -114,15 +114,15 @@ typedef irqreturn_t (*irq_handler_t)(int, void *); struct irqaction { irq_handler_t handler; unsigned long flags; - const char *name; void *dev_id; struct irqaction *next; int irq; - struct proc_dir_entry *dir; irq_handler_t thread_fn; struct task_struct *thread; unsigned long thread_flags; -}; + const char *name; + struct proc_dir_entry *dir; +} ____cacheline_internodealigned_in_smp; extern irqreturn_t no_action(int cpl, void *dev_id); -- cgit v0.10.2 From 3cf9b85b474e656a0856b88290c7a289ac5ea247 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 5 Nov 2010 16:12:38 -0700 Subject: locking, lockdep: Convert sprintf_symbol to %pS Signed-off-by: Joe Perches Cc: Peter Zijlstra Cc: Jiri Kosina LKML-Reference: <1288998760-11775-6-git-send-email-joe@perches.com> Signed-off-by: Ingo Molnar diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index 59b76c8..1969d2f 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c @@ -494,7 +494,6 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) namelen += 2; for (i = 0; i < LOCKSTAT_POINTS; i++) { - char sym[KSYM_SYMBOL_LEN]; char ip[32]; if (class->contention_point[i] == 0) @@ -503,15 +502,13 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) if (!i) seq_line(m, '-', 40-namelen, namelen); - sprint_symbol(sym, class->contention_point[i]); snprintf(ip, sizeof(ip), "[<%p>]", (void *)class->contention_point[i]); - seq_printf(m, "%40s %14lu %29s %s\n", name, - stats->contention_point[i], - ip, sym); + seq_printf(m, "%40s %14lu %29s %pS\n", + name, stats->contention_point[i], + ip, (void *)class->contention_point[i]); } for (i = 0; i < LOCKSTAT_POINTS; i++) { - char sym[KSYM_SYMBOL_LEN]; char ip[32]; if (class->contending_point[i] == 0) @@ -520,12 +517,11 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) if (!i) seq_line(m, '-', 40-namelen, namelen); - sprint_symbol(sym, class->contending_point[i]); snprintf(ip, sizeof(ip), "[<%p>]", (void *)class->contending_point[i]); - seq_printf(m, "%40s %14lu %29s %s\n", name, - stats->contending_point[i], - ip, sym); + seq_printf(m, "%40s %14lu %29s %pS\n", + name, stats->contending_point[i], + ip, (void *)class->contending_point[i]); } if (i) { seq_puts(m, "\n"); -- cgit v0.10.2 From 5e4f083f78d03e9f8d2e327daccde16976f9bb00 Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Sun, 24 Oct 2010 11:50:53 +0800 Subject: hrtimer: Remove stale comment on curr_timer curr_timer doesn't resident in struct hrtimer_cpu_base anymore. Signed-off-by: Yong Zhang LKML-Reference: <1287892253-2587-1-git-send-email-yong.zhang0@gmail.com> Signed-off-by: Thomas Gleixner diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index fd0c1b8..dd9954b 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -158,7 +158,6 @@ struct hrtimer_clock_base { * @lock: lock protecting the base and associated clock bases * and timers * @clock_base: array of clock bases for this cpu - * @curr_timer: the timer which is executing a callback right now * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() * @hres_active: State of high resolution mode -- cgit v0.10.2 From c0deae8c9587419ab13874b74425ce2eb2e18508 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 3 Nov 2010 18:52:56 +0200 Subject: posix-cpu-timers: Rcu_read_lock/unlock protect find_task_by_vpid call Commit 4221a9918e38b7494cee341dda7b7b4bb8c04bde "Add RCU check for find_task_by_vpid()" introduced rcu_lockdep_assert to find_task_by_pid_ns. Add rcu_read_lock/rcu_read_unlock to call find_task_by_vpid. Tetsuo Handa wrote: | Quoting from one of posts in that thead | http://kerneltrap.org/mailarchive/linux-kernel/2010/2/8/4536388 | || Usually tasklist gives enough protection, but if copy_process() fails || it calls free_pid() lockless and does call_rcu(delayed_put_pid(). || This means, without rcu lock find_pid_ns() can't scan the hash table || safely. Thomas Gleixner wrote: | We can remove the tasklist_lock while at it. rcu_read_lock is enough. Patch also replaces thread_group_leader with has_group_leader_pid in accordance to comment by Oleg Nesterov: | ... thread_group_leader() check is not relaible without | tasklist. If we race with de_thread() find_task_by_vpid() can find | the new leader before it updates its ->group_leader. | | perhaps it makes sense to change posix_cpu_timer_create() to use | has_group_leader_pid() instead, just to make this code not look racy | and avoid adding new problems. Signed-off-by: Sergey Senozhatsky Cc: Peter Zijlstra Cc: Stanislaw Gruszka Reviewed-by: Oleg Nesterov LKML-Reference: <20101103165256.GD30053@swordfish.minsk.epam.com> Signed-off-by: Thomas Gleixner diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 6842eeb..05bb717 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -37,13 +37,13 @@ static int check_clock(const clockid_t which_clock) if (pid == 0) return 0; - read_lock(&tasklist_lock); + rcu_read_lock(); p = find_task_by_vpid(pid); if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ? - same_thread_group(p, current) : thread_group_leader(p))) { + same_thread_group(p, current) : has_group_leader_pid(p))) { error = -EINVAL; } - read_unlock(&tasklist_lock); + rcu_read_unlock(); return error; } @@ -390,7 +390,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer) INIT_LIST_HEAD(&new_timer->it.cpu.entry); - read_lock(&tasklist_lock); + rcu_read_lock(); if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) { if (pid == 0) { p = current; @@ -404,7 +404,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer) p = current->group_leader; } else { p = find_task_by_vpid(pid); - if (p && !thread_group_leader(p)) + if (p && !has_group_leader_pid(p)) p = NULL; } } @@ -414,7 +414,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer) } else { ret = -EINVAL; } - read_unlock(&tasklist_lock); + rcu_read_unlock(); return ret; } -- cgit v0.10.2 From 4c115e951d80aff126468adaec7a6c7854f61ab8 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Thu, 4 Nov 2010 15:00:00 -0400 Subject: futex: Address compiler warnings in exit_robust_list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 1dcc41bb (futex: Change 3rd arg of fetch_robust_entry() to unsigned int*) some gcc versions decided to emit the following warning: kernel/futex.c: In function ‘exit_robust_list’: kernel/futex.c:2492: warning: ‘next_pi’ may be used uninitialized in this function The commit did not introduce the warning as gcc should have warned before that commit as well. It's just gcc being silly. The code path really can't result in next_pi being unitialized (or should not), but let's keep the build clean. Annotate next_pi as an uninitialized_var. [ tglx: Addressed the same issue in futex_compat.c and massaged the changelog ] Signed-off-by: Darren Hart Tested-by: Matt Fleming Tested-by: Uwe Kleine-König Cc: Peter Zijlstra Cc: Eric Dumazet Cc: John Kacur Cc: Ingo Molnar LKML-Reference: <1288897200-13008-1-git-send-email-dvhart@linux.intel.com> Signed-off-by: Thomas Gleixner diff --git a/kernel/futex.c b/kernel/futex.c index 6c683b3..40a8777 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2489,7 +2489,8 @@ void exit_robust_list(struct task_struct *curr) { struct robust_list_head __user *head = curr->robust_list; struct robust_list __user *entry, *next_entry, *pending; - unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip; + unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; + unsigned int uninitialized_var(next_pi); unsigned long futex_offset; int rc; diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 06da4df..a7934ac 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -49,7 +49,8 @@ void compat_exit_robust_list(struct task_struct *curr) { struct compat_robust_list_head __user *head = curr->compat_robust_list; struct robust_list __user *entry, *next_entry, *pending; - unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip; + unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; + unsigned int uninitialized_var(next_pi); compat_uptr_t uentry, next_uentry, upending; compat_long_t futex_offset; int rc; -- cgit v0.10.2 From 1ea6be212eea5ce1e8fabadacb0c639ad87b2f00 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Mon, 1 Nov 2010 22:44:34 +0100 Subject: x86, microcode, AMD: Replace vmalloc+memset with vzalloc We don't have to do memset() ourselves after vmalloc() when we have vzalloc(), so change that in arch/x86/kernel/microcode_amd.c::get_next_ucode(). Signed-off-by: Jesper Juhl Signed-off-by: Borislav Petkov diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index e1af7c0..383d4f8 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -183,16 +183,17 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) return NULL; } - mc = vmalloc(UCODE_MAX_SIZE); - if (mc) { - memset(mc, 0, UCODE_MAX_SIZE); - if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, - total_size)) { - vfree(mc); - mc = NULL; - } else - *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; + mc = vzalloc(UCODE_MAX_SIZE); + if (!mc) + return NULL; + + if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size)) { + vfree(mc); + mc = NULL; + } else { + *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; } + return mc; } -- cgit v0.10.2 From c7657ac0c3e4d4ab569296911164b7a2b0ff871a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 1 Nov 2010 23:36:53 +0100 Subject: x86, microcode, AMD: Cleanup code a bit get_ucode_data is a memcpy() wrapper which always returns 0. Move it into the header and make it an inline. Remove all code checking its return value and turn it into a void. There should be no functionality change resulting from this patch. Signed-off-by: Borislav Petkov diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index ef51b50..2421507 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -48,6 +48,12 @@ static inline struct microcode_ops * __init init_intel_microcode(void) #ifdef CONFIG_MICROCODE_AMD extern struct microcode_ops * __init init_amd_microcode(void); + +static inline void get_ucode_data(void *to, const u8 *from, size_t n) +{ + memcpy(to, from, n); +} + #else static inline struct microcode_ops * __init init_amd_microcode(void) { diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 383d4f8..1583133 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -155,12 +155,6 @@ static int apply_microcode_amd(int cpu) return 0; } -static int get_ucode_data(void *to, const u8 *from, size_t n) -{ - memcpy(to, from, n); - return 0; -} - static void * get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) { @@ -168,8 +162,7 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; void *mc; - if (get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR)) - return NULL; + get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR); if (section_hdr[0] != UCODE_UCODE_TYPE) { pr_err("error: invalid type field in container file section header\n"); @@ -187,12 +180,8 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) if (!mc) return NULL; - if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size)) { - vfree(mc); - mc = NULL; - } else { - *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; - } + get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size); + *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; return mc; } @@ -203,8 +192,7 @@ static int install_equiv_cpu_table(const u8 *buf) unsigned int *buf_pos = (unsigned int *)container_hdr; unsigned long size; - if (get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE)) - return 0; + get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE); size = buf_pos[2]; @@ -220,10 +208,7 @@ static int install_equiv_cpu_table(const u8 *buf) } buf += UCODE_CONTAINER_HEADER_SIZE; - if (get_ucode_data(equiv_cpu_table, buf, size)) { - vfree(equiv_cpu_table); - return 0; - } + get_ucode_data(equiv_cpu_table, buf, size); return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ } -- cgit v0.10.2 From ae791a2d2e382adc69990a144a7f1a6c4bc24f1e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 10 Nov 2010 13:30:36 +0100 Subject: futex: Cleanup stale fshared flag interfaces The fast GUP changes stopped using the fshared flag in put_futex_keys(), but we kept the interface the same. Cleanup all stale users. This patch is split out from Darren Harts combo patch which also combines various flags. This way the changes are clearly separated. Signed-off-by: Thomas Gleixner Cc: Darren Hart LKML-Reference: <1289250609-16304-1-git-send-email-dvhart@linux.intel.com> diff --git a/kernel/futex.c b/kernel/futex.c index 40a8777..38cf606 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -283,8 +283,7 @@ again: return 0; } -static inline -void put_futex_key(int fshared, union futex_key *key) +static inline void put_futex_key(union futex_key *key) { drop_futex_key_refs(key); } @@ -907,7 +906,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) } spin_unlock(&hb->lock); - put_futex_key(fshared, &key); + put_futex_key(&key); out: return ret; } @@ -965,8 +964,8 @@ retry_private: if (!fshared) goto retry_private; - put_futex_key(fshared, &key2); - put_futex_key(fshared, &key1); + put_futex_key(&key2); + put_futex_key(&key1); goto retry; } @@ -996,9 +995,9 @@ retry_private: double_unlock_hb(hb1, hb2); out_put_keys: - put_futex_key(fshared, &key2); + put_futex_key(&key2); out_put_key1: - put_futex_key(fshared, &key1); + put_futex_key(&key1); out: return ret; } @@ -1219,8 +1218,8 @@ retry_private: if (!fshared) goto retry_private; - put_futex_key(fshared, &key2); - put_futex_key(fshared, &key1); + put_futex_key(&key2); + put_futex_key(&key1); goto retry; } if (curval != *cmpval) { @@ -1260,8 +1259,8 @@ retry_private: break; case -EFAULT: double_unlock_hb(hb1, hb2); - put_futex_key(fshared, &key2); - put_futex_key(fshared, &key1); + put_futex_key(&key2); + put_futex_key(&key1); ret = fault_in_user_writeable(uaddr2); if (!ret) goto retry; @@ -1269,8 +1268,8 @@ retry_private: case -EAGAIN: /* The owner was exiting, try again. */ double_unlock_hb(hb1, hb2); - put_futex_key(fshared, &key2); - put_futex_key(fshared, &key1); + put_futex_key(&key2); + put_futex_key(&key1); cond_resched(); goto retry; default: @@ -1352,9 +1351,9 @@ out_unlock: drop_futex_key_refs(&key1); out_put_keys: - put_futex_key(fshared, &key2); + put_futex_key(&key2); out_put_key1: - put_futex_key(fshared, &key1); + put_futex_key(&key1); out: if (pi_state != NULL) free_pi_state(pi_state); @@ -1494,7 +1493,7 @@ static void unqueue_me_pi(struct futex_q *q) * private futexes. */ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, - struct task_struct *newowner, int fshared) + struct task_struct *newowner) { u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; struct futex_pi_state *pi_state = q->pi_state; @@ -1600,7 +1599,6 @@ static long futex_wait_restart(struct restart_block *restart); /** * fixup_owner() - Post lock pi_state and corner case management * @uaddr: user address of the futex - * @fshared: whether the futex is shared (1) or not (0) * @q: futex_q (contains pi_state and access to the rt_mutex) * @locked: if the attempt to take the rt_mutex succeeded (1) or not (0) * @@ -1613,8 +1611,7 @@ static long futex_wait_restart(struct restart_block *restart); * 0 - success, lock not taken * <0 - on error (-EFAULT) */ -static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q, - int locked) +static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) { struct task_struct *owner; int ret = 0; @@ -1625,7 +1622,7 @@ static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q, * did a lock-steal - fix up the PI-state in that case: */ if (q->pi_state->owner != current) - ret = fixup_pi_state_owner(uaddr, q, current, fshared); + ret = fixup_pi_state_owner(uaddr, q, current); goto out; } @@ -1652,7 +1649,7 @@ static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q, * lock. Fix the state up. */ owner = rt_mutex_owner(&q->pi_state->pi_mutex); - ret = fixup_pi_state_owner(uaddr, q, owner, fshared); + ret = fixup_pi_state_owner(uaddr, q, owner); goto out; } @@ -1772,7 +1769,7 @@ retry_private: if (!fshared) goto retry_private; - put_futex_key(fshared, &q->key); + put_futex_key(&q->key); goto retry; } @@ -1783,7 +1780,7 @@ retry_private: out: if (ret) - put_futex_key(fshared, &q->key); + put_futex_key(&q->key); return ret; } @@ -1941,7 +1938,7 @@ retry_private: * exit to complete. */ queue_unlock(&q, hb); - put_futex_key(fshared, &q.key); + put_futex_key(&q.key); cond_resched(); goto retry; default: @@ -1971,7 +1968,7 @@ retry_private: * Fixup the pi_state owner and possibly acquire the lock if we * haven't already. */ - res = fixup_owner(uaddr, fshared, &q, !ret); + res = fixup_owner(uaddr, &q, !ret); /* * If fixup_owner() returned an error, proprogate that. If it acquired * the lock, clear our -ETIMEDOUT or -EINTR. @@ -1995,7 +1992,7 @@ out_unlock_put_key: queue_unlock(&q, hb); out_put_key: - put_futex_key(fshared, &q.key); + put_futex_key(&q.key); out: if (to) destroy_hrtimer_on_stack(&to->timer); @@ -2011,7 +2008,7 @@ uaddr_faulted: if (!fshared) goto retry_private; - put_futex_key(fshared, &q.key); + put_futex_key(&q.key); goto retry; } @@ -2093,14 +2090,14 @@ retry: out_unlock: spin_unlock(&hb->lock); - put_futex_key(fshared, &key); + put_futex_key(&key); out: return ret; pi_faulted: spin_unlock(&hb->lock); - put_futex_key(fshared, &key); + put_futex_key(&key); ret = fault_in_user_writeable(uaddr); if (!ret) @@ -2273,8 +2270,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, */ if (q.pi_state && (q.pi_state->owner != current)) { spin_lock(q.lock_ptr); - ret = fixup_pi_state_owner(uaddr2, &q, current, - fshared); + ret = fixup_pi_state_owner(uaddr2, &q, current); spin_unlock(q.lock_ptr); } } else { @@ -2293,7 +2289,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, * Fixup the pi_state owner and possibly acquire the lock if we * haven't already. */ - res = fixup_owner(uaddr2, fshared, &q, !ret); + res = fixup_owner(uaddr2, &q, !ret); /* * If fixup_owner() returned an error, proprogate that. If it * acquired the lock, clear -ETIMEDOUT or -EINTR. @@ -2324,9 +2320,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, } out_put_keys: - put_futex_key(fshared, &q.key); + put_futex_key(&q.key); out_key2: - put_futex_key(fshared, &key2); + put_futex_key(&key2); out: if (to) { -- cgit v0.10.2 From b41277dc7a18ee332d9e8078e978bacdf6e76157 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Mon, 8 Nov 2010 13:10:09 -0800 Subject: futex: Replace fshared and clockrt with combined flags In the early days we passed the mmap sem around. That became the "int fshared" with the fast gup improvements. Then we added "int clockrt" in places. This patch unifies these options as "flags". [ tglx: Split out the stale fshared cleanup ] Signed-off-by: Darren Hart Cc: Peter Zijlstra Cc: Eric Dumazet Cc: John Kacur Cc: Ingo Molnar LKML-Reference: <1289250609-16304-1-git-send-email-dvhart@linux.intel.com> Signed-off-by: Thomas Gleixner diff --git a/kernel/futex.c b/kernel/futex.c index 38cf606..87ad287 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -69,6 +69,14 @@ int __read_mostly futex_cmpxchg_enabled; #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) /* + * Futex flags used to encode options to functions and preserve them across + * restarts. + */ +#define FLAGS_SHARED 0x01 +#define FLAGS_CLOCKRT 0x02 +#define FLAGS_HAS_TIMEOUT 0x04 + +/* * Priority Inheritance state: */ struct futex_pi_state { @@ -869,7 +877,8 @@ double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) /* * Wake up waiters matching bitset queued on this futex (uaddr). */ -static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) +static int +futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) { struct futex_hash_bucket *hb; struct futex_q *this, *next; @@ -880,7 +889,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) if (!bitset) return -EINVAL; - ret = get_futex_key(uaddr, fshared, &key); + ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key); if (unlikely(ret != 0)) goto out; @@ -916,7 +925,7 @@ out: * to this virtual address: */ static int -futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, +futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, int nr_wake, int nr_wake2, int op) { union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; @@ -926,10 +935,10 @@ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, int ret, op_ret; retry: - ret = get_futex_key(uaddr1, fshared, &key1); + ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1); if (unlikely(ret != 0)) goto out; - ret = get_futex_key(uaddr2, fshared, &key2); + ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); if (unlikely(ret != 0)) goto out_put_key1; @@ -961,7 +970,7 @@ retry_private: if (ret) goto out_put_keys; - if (!fshared) + if (!(flags & FLAGS_SHARED)) goto retry_private; put_futex_key(&key2); @@ -1132,13 +1141,13 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, /** * futex_requeue() - Requeue waiters from uaddr1 to uaddr2 * @uaddr1: source futex user address - * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED + * @flags: futex flags (FLAGS_SHARED, etc.) * @uaddr2: target futex user address * @nr_wake: number of waiters to wake (must be 1 for requeue_pi) * @nr_requeue: number of waiters to requeue (0-INT_MAX) * @cmpval: @uaddr1 expected value (or %NULL) * @requeue_pi: if we are attempting to requeue from a non-pi futex to a - * pi futex (pi to pi requeue is not supported) + * pi futex (pi to pi requeue is not supported) * * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire * uaddr2 atomically on behalf of the top waiter. @@ -1147,9 +1156,9 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, * >=0 - on success, the number of tasks requeued or woken * <0 - on error */ -static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, - int nr_wake, int nr_requeue, u32 *cmpval, - int requeue_pi) +static int futex_requeue(u32 __user *uaddr1, unsigned int flags, + u32 __user *uaddr2, int nr_wake, int nr_requeue, + u32 *cmpval, int requeue_pi) { union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; int drop_count = 0, task_count = 0, ret; @@ -1190,10 +1199,10 @@ retry: pi_state = NULL; } - ret = get_futex_key(uaddr1, fshared, &key1); + ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1); if (unlikely(ret != 0)) goto out; - ret = get_futex_key(uaddr2, fshared, &key2); + ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); if (unlikely(ret != 0)) goto out_put_key1; @@ -1215,7 +1224,7 @@ retry_private: if (ret) goto out_put_keys; - if (!fshared) + if (!(flags & FLAGS_SHARED)) goto retry_private; put_futex_key(&key2); @@ -1586,14 +1595,6 @@ handle_fault: goto retry; } -/* - * In case we must use restart_block to restart a futex_wait, - * we encode in the 'flags' shared capability - */ -#define FLAGS_SHARED 0x01 -#define FLAGS_CLOCKRT 0x02 -#define FLAGS_HAS_TIMEOUT 0x04 - static long futex_wait_restart(struct restart_block *restart); /** @@ -1712,7 +1713,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, * futex_wait_setup() - Prepare to wait on a futex * @uaddr: the futex userspace address * @val: the expected value - * @fshared: whether the futex is shared (1) or not (0) + * @flags: futex flags (FLAGS_SHARED, etc.) * @q: the associated futex_q * @hb: storage for hash_bucket pointer to be returned to caller * @@ -1725,7 +1726,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, * 0 - uaddr contains val and hb has been locked * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked */ -static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared, +static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, struct futex_q *q, struct futex_hash_bucket **hb) { u32 uval; @@ -1750,7 +1751,7 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared, */ retry: q->key = FUTEX_KEY_INIT; - ret = get_futex_key(uaddr, fshared, &q->key); + ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key); if (unlikely(ret != 0)) return ret; @@ -1766,7 +1767,7 @@ retry_private: if (ret) goto out; - if (!fshared) + if (!(flags & FLAGS_SHARED)) goto retry_private; put_futex_key(&q->key); @@ -1784,8 +1785,8 @@ out: return ret; } -static int futex_wait(u32 __user *uaddr, int fshared, - u32 val, ktime_t *abs_time, u32 bitset, int clockrt) +static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, + ktime_t *abs_time, u32 bitset) { struct hrtimer_sleeper timeout, *to = NULL; struct restart_block *restart; @@ -1804,8 +1805,9 @@ static int futex_wait(u32 __user *uaddr, int fshared, if (abs_time) { to = &timeout; - hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME : - CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ? + CLOCK_REALTIME : CLOCK_MONOTONIC, + HRTIMER_MODE_ABS); hrtimer_init_sleeper(to, current); hrtimer_set_expires_range_ns(&to->timer, *abs_time, current->timer_slack_ns); @@ -1816,7 +1818,7 @@ retry: * Prepare to wait on uaddr. On success, holds hb lock and increments * q.key refs. */ - ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); + ret = futex_wait_setup(uaddr, val, flags, &q, &hb); if (ret) goto out; @@ -1849,12 +1851,7 @@ retry: restart->futex.val = val; restart->futex.time = abs_time->tv64; restart->futex.bitset = bitset; - restart->futex.flags = FLAGS_HAS_TIMEOUT; - - if (fshared) - restart->futex.flags |= FLAGS_SHARED; - if (clockrt) - restart->futex.flags |= FLAGS_CLOCKRT; + restart->futex.flags = flags; ret = -ERESTART_RESTARTBLOCK; @@ -1870,7 +1867,6 @@ out: static long futex_wait_restart(struct restart_block *restart) { u32 __user *uaddr = restart->futex.uaddr; - int fshared = 0; ktime_t t, *tp = NULL; if (restart->futex.flags & FLAGS_HAS_TIMEOUT) { @@ -1878,11 +1874,9 @@ static long futex_wait_restart(struct restart_block *restart) tp = &t; } restart->fn = do_no_restart_syscall; - if (restart->futex.flags & FLAGS_SHARED) - fshared = 1; - return (long)futex_wait(uaddr, fshared, restart->futex.val, tp, - restart->futex.bitset, - restart->futex.flags & FLAGS_CLOCKRT); + + return (long)futex_wait(uaddr, restart->futex.flags, + restart->futex.val, tp, restart->futex.bitset); } @@ -1892,8 +1886,8 @@ static long futex_wait_restart(struct restart_block *restart) * if there are waiters then it will block, it does PI, etc. (Due to * races the kernel might see a 0 value of the futex too.) */ -static int futex_lock_pi(u32 __user *uaddr, int fshared, - int detect, ktime_t *time, int trylock) +static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect, + ktime_t *time, int trylock) { struct hrtimer_sleeper timeout, *to = NULL; struct futex_hash_bucket *hb; @@ -1916,7 +1910,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, q.requeue_pi_key = NULL; retry: q.key = FUTEX_KEY_INIT; - ret = get_futex_key(uaddr, fshared, &q.key); + ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key); if (unlikely(ret != 0)) goto out; @@ -2005,7 +1999,7 @@ uaddr_faulted: if (ret) goto out_put_key; - if (!fshared) + if (!(flags & FLAGS_SHARED)) goto retry_private; put_futex_key(&q.key); @@ -2017,7 +2011,7 @@ uaddr_faulted: * This is the in-kernel slowpath: we look up the PI state (if any), * and do the rt-mutex unlock. */ -static int futex_unlock_pi(u32 __user *uaddr, int fshared) +static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) { struct futex_hash_bucket *hb; struct futex_q *this, *next; @@ -2035,7 +2029,7 @@ retry: if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current)) return -EPERM; - ret = get_futex_key(uaddr, fshared, &key); + ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key); if (unlikely(ret != 0)) goto out; @@ -2157,7 +2151,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, /** * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 * @uaddr: the futex we initially wait on (non-pi) - * @fshared: whether the futexes are shared (1) or not (0). They must be + * @flags: futex flags (FLAGS_SHARED, FLAGS_CLOCKRT, etc.), they must be * the same type, no requeueing from private to shared, etc. * @val: the expected value of uaddr * @abs_time: absolute timeout @@ -2195,9 +2189,9 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, * 0 - On success * <0 - On error */ -static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, +static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset, - int clockrt, u32 __user *uaddr2) + u32 __user *uaddr2) { struct hrtimer_sleeper timeout, *to = NULL; struct rt_mutex_waiter rt_waiter; @@ -2212,8 +2206,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, if (abs_time) { to = &timeout; - hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME : - CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ? + CLOCK_REALTIME : CLOCK_MONOTONIC, + HRTIMER_MODE_ABS); hrtimer_init_sleeper(to, current); hrtimer_set_expires_range_ns(&to->timer, *abs_time, current->timer_slack_ns); @@ -2227,7 +2222,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, rt_waiter.task = NULL; key2 = FUTEX_KEY_INIT; - ret = get_futex_key(uaddr2, fshared, &key2); + ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); if (unlikely(ret != 0)) goto out; @@ -2240,7 +2235,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, * Prepare to wait on uaddr. On success, increments q.key (key1) ref * count. */ - ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); + ret = futex_wait_setup(uaddr, val, flags, &q, &hb); if (ret) goto out_key2; @@ -2547,58 +2542,57 @@ void exit_robust_list(struct task_struct *curr) long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3) { - int clockrt, ret = -ENOSYS; - int cmd = op & FUTEX_CMD_MASK; - int fshared = 0; + int ret = -ENOSYS, cmd = op & FUTEX_CMD_MASK; + unsigned int flags = 0; if (!(op & FUTEX_PRIVATE_FLAG)) - fshared = 1; + flags |= FLAGS_SHARED; - clockrt = op & FUTEX_CLOCK_REALTIME; - if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI) - return -ENOSYS; + if (op & FUTEX_CLOCK_REALTIME) { + flags |= FLAGS_CLOCKRT; + if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI) + return -ENOSYS; + } switch (cmd) { case FUTEX_WAIT: val3 = FUTEX_BITSET_MATCH_ANY; case FUTEX_WAIT_BITSET: - ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt); + ret = futex_wait(uaddr, flags, val, timeout, val3); break; case FUTEX_WAKE: val3 = FUTEX_BITSET_MATCH_ANY; case FUTEX_WAKE_BITSET: - ret = futex_wake(uaddr, fshared, val, val3); + ret = futex_wake(uaddr, flags, val, val3); break; case FUTEX_REQUEUE: - ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0); + ret = futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0); break; case FUTEX_CMP_REQUEUE: - ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, - 0); + ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0); break; case FUTEX_WAKE_OP: - ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); + ret = futex_wake_op(uaddr, flags, uaddr2, val, val2, val3); break; case FUTEX_LOCK_PI: if (futex_cmpxchg_enabled) - ret = futex_lock_pi(uaddr, fshared, val, timeout, 0); + ret = futex_lock_pi(uaddr, flags, val, timeout, 0); break; case FUTEX_UNLOCK_PI: if (futex_cmpxchg_enabled) - ret = futex_unlock_pi(uaddr, fshared); + ret = futex_unlock_pi(uaddr, flags); break; case FUTEX_TRYLOCK_PI: if (futex_cmpxchg_enabled) - ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); + ret = futex_lock_pi(uaddr, flags, 0, timeout, 1); break; case FUTEX_WAIT_REQUEUE_PI: val3 = FUTEX_BITSET_MATCH_ANY; - ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3, - clockrt, uaddr2); + ret = futex_wait_requeue_pi(uaddr, flags, val, timeout, val3, + uaddr2); break; case FUTEX_CMP_REQUEUE_PI: - ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, - 1); + ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1); break; default: ret = -ENOSYS; -- cgit v0.10.2 From 5bdb05f91b27b9361c4f348a4e05999f597df72e Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Mon, 8 Nov 2010 13:40:28 -0800 Subject: futex: Add futex_q static initializer The futex_q struct has grown considerably over the last couple years. I believe it now merits a static initializer to avoid uninitialized data errors (having spent more time than I care to admit debugging an uninitialized q.bitset in an experimental new op code). With the key initializer built in, several of the FUTEX_KEY_INIT calls can be removed. V2: use a static variable instead of an init macro. use a C99 initializer and don't rely on variable ordering in the struct. V3: make futex_q_init const Signed-off-by: Darren Hart Cc: Peter Zijlstra Cc: Eric Dumazet Cc: John Kacur Cc: Ingo Molnar LKML-Reference: <1289252428-18383-1-git-send-email-dvhart@linux.intel.com> Signed-off-by: Thomas Gleixner diff --git a/kernel/futex.c b/kernel/futex.c index 87ad287..3019b92 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -131,6 +131,12 @@ struct futex_q { u32 bitset; }; +static const struct futex_q futex_q_init = { + /* list gets initialized in queue_me()*/ + .key = FUTEX_KEY_INIT, + .bitset = FUTEX_BITSET_MATCH_ANY +}; + /* * Hash buckets are shared by all the futex_keys that hash to the same * location. Each key may have multiple futex_q structures, one for each task @@ -1750,7 +1756,6 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, * rare, but normal. */ retry: - q->key = FUTEX_KEY_INIT; ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key); if (unlikely(ret != 0)) return ret; @@ -1791,16 +1796,12 @@ static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, struct hrtimer_sleeper timeout, *to = NULL; struct restart_block *restart; struct futex_hash_bucket *hb; - struct futex_q q; + struct futex_q q = futex_q_init; int ret; if (!bitset) return -EINVAL; - - q.pi_state = NULL; q.bitset = bitset; - q.rt_waiter = NULL; - q.requeue_pi_key = NULL; if (abs_time) { to = &timeout; @@ -1891,7 +1892,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect, { struct hrtimer_sleeper timeout, *to = NULL; struct futex_hash_bucket *hb; - struct futex_q q; + struct futex_q q = futex_q_init; int res, ret; if (refill_pi_state_cache()) @@ -1905,11 +1906,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect, hrtimer_set_expires(&to->timer, *time); } - q.pi_state = NULL; - q.rt_waiter = NULL; - q.requeue_pi_key = NULL; retry: - q.key = FUTEX_KEY_INIT; ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key); if (unlikely(ret != 0)) goto out; @@ -2197,8 +2194,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, struct rt_mutex_waiter rt_waiter; struct rt_mutex *pi_mutex = NULL; struct futex_hash_bucket *hb; - union futex_key key2; - struct futex_q q; + union futex_key key2 = FUTEX_KEY_INIT; + struct futex_q q = futex_q_init; int res, ret; if (!bitset) @@ -2221,12 +2218,10 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, debug_rt_mutex_init_waiter(&rt_waiter); rt_waiter.task = NULL; - key2 = FUTEX_KEY_INIT; ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); if (unlikely(ret != 0)) goto out; - q.pi_state = NULL; q.bitset = bitset; q.rt_waiter = &rt_waiter; q.requeue_pi_key = &key2; -- cgit v0.10.2 From ebe2e91e000c59aed0300d81815f451c85e0bda6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 10 Nov 2010 10:19:10 -0500 Subject: cifs: fix potential use-after-free in cifs_oplock_break_put cfile may very well be freed after the cifsFileInfo_put. Make sure we have a valid pointer to the superblock for cifs_sb_deactive. Signed-off-by: Jeff Layton Signed-off-by: Steve French diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 06c3e83..b857ce5 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2271,8 +2271,10 @@ void cifs_oplock_break_get(struct cifsFileInfo *cfile) void cifs_oplock_break_put(struct cifsFileInfo *cfile) { + struct super_block *sb = cfile->dentry->d_sb; + cifsFileInfo_put(cfile); - cifs_sb_deactive(cfile->dentry->d_sb); + cifs_sb_deactive(sb); } const struct address_space_operations cifs_addr_ops = { -- cgit v0.10.2 From c28a9926f28e8c7c52603db58754a78008768ca1 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 9 Nov 2010 12:00:11 +0000 Subject: ASoC: Remove broken WM8350 direction constants The WM8350 driver was using some custom constants to interpret the direction of the MCLK signal which had the opposite values to those used as standard by the ASoC core, causing confusion in machine drivers such as the 1133-EV1 board. Reported-by: Tommy Zhu Signed-off-by: Mark Brown Acked-by: Liam Girdwood diff --git a/include/linux/mfd/wm8350/audio.h b/include/linux/mfd/wm8350/audio.h index a95141e..bd581c6 100644 --- a/include/linux/mfd/wm8350/audio.h +++ b/include/linux/mfd/wm8350/audio.h @@ -522,9 +522,6 @@ #define WM8350_MCLK_SEL_PLL_32K 3 #define WM8350_MCLK_SEL_MCLK 5 -#define WM8350_MCLK_DIR_OUT 0 -#define WM8350_MCLK_DIR_IN 1 - /* clock divider id's */ #define WM8350_ADC_CLKDIV 0 #define WM8350_DAC_CLKDIV 1 diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c index f4f1fba..4f3e919 100644 --- a/sound/soc/codecs/wm8350.c +++ b/sound/soc/codecs/wm8350.c @@ -831,7 +831,7 @@ static int wm8350_set_dai_sysclk(struct snd_soc_dai *codec_dai, } /* MCLK direction */ - if (dir == WM8350_MCLK_DIR_OUT) + if (dir == SND_SOC_CLOCK_OUT) wm8350_set_bits(wm8350, WM8350_CLOCK_CONTROL_2, WM8350_MCLK_DIR); else -- cgit v0.10.2 From 0049317edb76d17bfac736b658523c15935391a3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 9 Nov 2010 14:38:58 +0000 Subject: ASoC: Ensure sane WM835x AIF configuration by default Ensure that whatever ran before us leaves the WM835x with a sane default audio interface configuration as we do not override the companding, loopback or tristate settings and do not reset the chip at startup (as it is a PMIC). Reported-by: Keiji Mitsuhisa Signed-off-by: Mark Brown Acked-by: Liam Girdwood diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c index 4f3e919..7611add 100644 --- a/sound/soc/codecs/wm8350.c +++ b/sound/soc/codecs/wm8350.c @@ -1586,6 +1586,13 @@ static int wm8350_codec_probe(struct snd_soc_codec *codec) wm8350_set_bits(wm8350, WM8350_ROUT2_VOLUME, WM8350_OUT2_VU | WM8350_OUT2R_MUTE); + /* Make sure AIF tristating is disabled by default */ + wm8350_clear_bits(wm8350, WM8350_AI_FORMATING, WM8350_AIF_TRI); + + /* Make sure we've got a sane companding setup too */ + wm8350_clear_bits(wm8350, WM8350_ADC_DAC_COMP, + WM8350_DAC_COMP | WM8350_LOOPBACK); + /* Make sure jack detect is disabled to start off with */ wm8350_clear_bits(wm8350, WM8350_JACK_DETECT, WM8350_JDL_ENA | WM8350_JDR_ENA); -- cgit v0.10.2 From aae6d3ddd8b90f5b2c8d79a2b914d1706d124193 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 17 Sep 2010 15:02:32 -0700 Subject: sched: Use group weight, idle cpu metrics to fix imbalances during idle Currently we consider a sched domain to be well balanced when the imbalance is less than the domain's imablance_pct. As the number of cores and threads are increasing, current values of imbalance_pct (for example 25% for a NUMA domain) are not enough to detect imbalances like: a) On a WSM-EP system (two sockets, each having 6 cores and 12 logical threads), 24 cpu-hogging tasks get scheduled as 13 on one socket and 11 on another socket. Leading to an idle HT cpu. b) On a hypothetial 2 socket NHM-EX system (each socket having 8 cores and 16 logical threads), 16 cpu-hogging tasks can get scheduled as 9 on one socket and 7 on another socket. Leaving one core in a socket idle whereas in another socket we have a core having both its HT siblings busy. While this issue can be fixed by decreasing the domain's imbalance_pct (by making it a function of number of logical cpus in the domain), it can potentially cause more task migrations across sched groups in an overloaded case. Fix this by using imbalance_pct only during newly_idle and busy load balancing. And during idle load balancing, check if there is an imbalance in number of idle cpu's across the busiest and this sched_group or if the busiest group has more tasks than its weight that the idle cpu in this_group can pull. Reported-by: Nikhil Rao Signed-off-by: Suresh Siddha Signed-off-by: Peter Zijlstra LKML-Reference: <1284760952.2676.11.camel@sbsiddha-MOBL3.sc.intel.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/sched.h b/include/linux/sched.h index d0036e5..2c79e92 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -862,6 +862,7 @@ struct sched_group { * single CPU. */ unsigned int cpu_power, cpu_power_orig; + unsigned int group_weight; /* * The CPUs this group covers. diff --git a/kernel/sched.c b/kernel/sched.c index aa14a56..36a0880 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6960,6 +6960,8 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) if (cpu != group_first_cpu(sd->groups)) return; + sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups)); + child = sd->child; sd->groups->cpu_power = 0; diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index f4f6a83..034c4f4 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -2035,13 +2035,16 @@ struct sd_lb_stats { unsigned long this_load_per_task; unsigned long this_nr_running; unsigned long this_has_capacity; + unsigned int this_idle_cpus; /* Statistics of the busiest group */ + unsigned int busiest_idle_cpus; unsigned long max_load; unsigned long busiest_load_per_task; unsigned long busiest_nr_running; unsigned long busiest_group_capacity; unsigned long busiest_has_capacity; + unsigned int busiest_group_weight; int group_imb; /* Is there imbalance in this sd */ #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) @@ -2063,6 +2066,8 @@ struct sg_lb_stats { unsigned long sum_nr_running; /* Nr tasks running in the group */ unsigned long sum_weighted_load; /* Weighted load of group's tasks */ unsigned long group_capacity; + unsigned long idle_cpus; + unsigned long group_weight; int group_imb; /* Is there an imbalance in the group ? */ int group_has_capacity; /* Is there extra capacity in the group? */ }; @@ -2431,7 +2436,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, sgs->group_load += load; sgs->sum_nr_running += rq->nr_running; sgs->sum_weighted_load += weighted_cpuload(i); - + if (idle_cpu(i)) + sgs->idle_cpus++; } /* @@ -2469,6 +2475,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); if (!sgs->group_capacity) sgs->group_capacity = fix_small_capacity(sd, group); + sgs->group_weight = group->group_weight; if (sgs->group_capacity > sgs->sum_nr_running) sgs->group_has_capacity = 1; @@ -2576,13 +2583,16 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, sds->this_nr_running = sgs.sum_nr_running; sds->this_load_per_task = sgs.sum_weighted_load; sds->this_has_capacity = sgs.group_has_capacity; + sds->this_idle_cpus = sgs.idle_cpus; } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) { sds->max_load = sgs.avg_load; sds->busiest = sg; sds->busiest_nr_running = sgs.sum_nr_running; + sds->busiest_idle_cpus = sgs.idle_cpus; sds->busiest_group_capacity = sgs.group_capacity; sds->busiest_load_per_task = sgs.sum_weighted_load; sds->busiest_has_capacity = sgs.group_has_capacity; + sds->busiest_group_weight = sgs.group_weight; sds->group_imb = sgs.group_imb; } @@ -2860,8 +2870,26 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, if (sds.this_load >= sds.avg_load) goto out_balanced; - if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) - goto out_balanced; + /* + * In the CPU_NEWLY_IDLE, use imbalance_pct to be conservative. + * And to check for busy balance use !idle_cpu instead of + * CPU_NOT_IDLE. This is because HT siblings will use CPU_NOT_IDLE + * even when they are idle. + */ + if (idle == CPU_NEWLY_IDLE || !idle_cpu(this_cpu)) { + if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) + goto out_balanced; + } else { + /* + * This cpu is idle. If the busiest group load doesn't + * have more tasks than the number of available cpu's and + * there is no imbalance between this and busiest group + * wrt to idle cpu's, it is balanced. + */ + if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) && + sds.busiest_nr_running <= sds.busiest_group_weight) + goto out_balanced; + } force_balance: /* Looks like there is an imbalance. Compute it */ -- cgit v0.10.2 From 2d46709082c062cae7cce1a15f8cd4cd81b92d88 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 9 Nov 2010 14:36:52 +0100 Subject: sched: Fix runnable condition for stoptask Heiko reported that the TASK_RUNNING check is not sufficient for CONFIG_PREEMPT=y since we can get preempted with !TASK_RUNNING. He suggested adding a ->se.on_rq test to the existing TASK_RUNNING one, however TASK_RUNNING will always have ->se.on_rq, so we might as well reduce that to a single test. [ stop tasks should never get preempted, but its good to handle this case correctly should this ever happen ] Reported-by: Heiko Carstens Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c index 45bddc0..755483b 100644 --- a/kernel/sched_stoptask.c +++ b/kernel/sched_stoptask.c @@ -26,7 +26,7 @@ static struct task_struct *pick_next_task_stop(struct rq *rq) { struct task_struct *stop = rq->stop; - if (stop && stop->state == TASK_RUNNING) + if (stop && stop->se.on_rq) return stop; return NULL; -- cgit v0.10.2 From c5cbac69422a9bffe7c7fd9a115130e272b547f5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 10 Nov 2010 10:35:51 -0800 Subject: x86, cpu: Rename verify_cpu_64.S to verify_cpu.S The code is 32bit already, and can be used in 32bit routines. Signed-off-by: Kees Cook LKML-Reference: <1289414154-7829-2-git-send-email-kees.cook@canonical.com> Acked-by: Pekka Enberg Acked-by: Alan Cox Signed-off-by: H. Peter Anvin diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 52f85a1..35af09d 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -182,7 +182,7 @@ no_longmode: hlt jmp 1b -#include "../../kernel/verify_cpu_64.S" +#include "../../kernel/verify_cpu.S" /* * Be careful here startup_64 needs to be at a predictable diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index 3af2dff..075d130 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S @@ -127,7 +127,7 @@ startup_64: no_longmode: hlt jmp no_longmode -#include "verify_cpu_64.S" +#include "verify_cpu.S" # Careful these need to be in the same 64K segment as the above; tidt: diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S new file mode 100644 index 0000000..56a8c2a --- /dev/null +++ b/arch/x86/kernel/verify_cpu.S @@ -0,0 +1,106 @@ +/* + * + * verify_cpu.S - Code for cpu long mode and SSE verification. This + * code has been borrowed from boot/setup.S and was introduced by + * Andi Kleen. + * + * Copyright (c) 2007 Andi Kleen (ak@suse.de) + * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com) + * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com) + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + * + * This is a common code for verification whether CPU supports + * long mode and SSE or not. It is not called directly instead this + * file is included at various places and compiled in that context. + * Following are the current usage. + * + * This file is included by both 16bit and 32bit code. + * + * arch/x86_64/boot/setup.S : Boot cpu verification (16bit) + * arch/x86_64/boot/compressed/head.S: Boot cpu verification (32bit) + * arch/x86_64/kernel/trampoline.S: secondary processor verfication (16bit) + * arch/x86_64/kernel/acpi/wakeup.S:Verfication at resume (16bit) + * + * verify_cpu, returns the status of cpu check in register %eax. + * 0: Success 1: Failure + * + * The caller needs to check for the error code and take the action + * appropriately. Either display a message or halt. + */ + +#include +#include + +verify_cpu: + pushfl # Save caller passed flags + pushl $0 # Kill any dangerous flags + popfl + + pushfl # standard way to check for cpuid + popl %eax + movl %eax,%ebx + xorl $0x200000,%eax + pushl %eax + popfl + pushfl + popl %eax + cmpl %eax,%ebx + jz verify_cpu_no_longmode # cpu has no cpuid + + movl $0x0,%eax # See if cpuid 1 is implemented + cpuid + cmpl $0x1,%eax + jb verify_cpu_no_longmode # no cpuid 1 + + xor %di,%di + cmpl $0x68747541,%ebx # AuthenticAMD + jnz verify_cpu_noamd + cmpl $0x69746e65,%edx + jnz verify_cpu_noamd + cmpl $0x444d4163,%ecx + jnz verify_cpu_noamd + mov $1,%di # cpu is from AMD + +verify_cpu_noamd: + movl $0x1,%eax # Does the cpu have what it takes + cpuid + andl $REQUIRED_MASK0,%edx + xorl $REQUIRED_MASK0,%edx + jnz verify_cpu_no_longmode + + movl $0x80000000,%eax # See if extended cpuid is implemented + cpuid + cmpl $0x80000001,%eax + jb verify_cpu_no_longmode # no extended cpuid + + movl $0x80000001,%eax # Does the cpu have what it takes + cpuid + andl $REQUIRED_MASK1,%edx + xorl $REQUIRED_MASK1,%edx + jnz verify_cpu_no_longmode + +verify_cpu_sse_test: + movl $1,%eax + cpuid + andl $SSE_MASK,%edx + cmpl $SSE_MASK,%edx + je verify_cpu_sse_ok + test %di,%di + jz verify_cpu_no_longmode # only try to force SSE on AMD + movl $MSR_K7_HWCR,%ecx + rdmsr + btr $15,%eax # enable SSE + wrmsr + xor %di,%di # don't loop + jmp verify_cpu_sse_test # try again + +verify_cpu_no_longmode: + popfl # Restore caller passed flags + movl $1,%eax + ret +verify_cpu_sse_ok: + popfl # Restore caller passed flags + xorl %eax, %eax + ret diff --git a/arch/x86/kernel/verify_cpu_64.S b/arch/x86/kernel/verify_cpu_64.S deleted file mode 100644 index 56a8c2a..0000000 --- a/arch/x86/kernel/verify_cpu_64.S +++ /dev/null @@ -1,106 +0,0 @@ -/* - * - * verify_cpu.S - Code for cpu long mode and SSE verification. This - * code has been borrowed from boot/setup.S and was introduced by - * Andi Kleen. - * - * Copyright (c) 2007 Andi Kleen (ak@suse.de) - * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com) - * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com) - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. - * - * This is a common code for verification whether CPU supports - * long mode and SSE or not. It is not called directly instead this - * file is included at various places and compiled in that context. - * Following are the current usage. - * - * This file is included by both 16bit and 32bit code. - * - * arch/x86_64/boot/setup.S : Boot cpu verification (16bit) - * arch/x86_64/boot/compressed/head.S: Boot cpu verification (32bit) - * arch/x86_64/kernel/trampoline.S: secondary processor verfication (16bit) - * arch/x86_64/kernel/acpi/wakeup.S:Verfication at resume (16bit) - * - * verify_cpu, returns the status of cpu check in register %eax. - * 0: Success 1: Failure - * - * The caller needs to check for the error code and take the action - * appropriately. Either display a message or halt. - */ - -#include -#include - -verify_cpu: - pushfl # Save caller passed flags - pushl $0 # Kill any dangerous flags - popfl - - pushfl # standard way to check for cpuid - popl %eax - movl %eax,%ebx - xorl $0x200000,%eax - pushl %eax - popfl - pushfl - popl %eax - cmpl %eax,%ebx - jz verify_cpu_no_longmode # cpu has no cpuid - - movl $0x0,%eax # See if cpuid 1 is implemented - cpuid - cmpl $0x1,%eax - jb verify_cpu_no_longmode # no cpuid 1 - - xor %di,%di - cmpl $0x68747541,%ebx # AuthenticAMD - jnz verify_cpu_noamd - cmpl $0x69746e65,%edx - jnz verify_cpu_noamd - cmpl $0x444d4163,%ecx - jnz verify_cpu_noamd - mov $1,%di # cpu is from AMD - -verify_cpu_noamd: - movl $0x1,%eax # Does the cpu have what it takes - cpuid - andl $REQUIRED_MASK0,%edx - xorl $REQUIRED_MASK0,%edx - jnz verify_cpu_no_longmode - - movl $0x80000000,%eax # See if extended cpuid is implemented - cpuid - cmpl $0x80000001,%eax - jb verify_cpu_no_longmode # no extended cpuid - - movl $0x80000001,%eax # Does the cpu have what it takes - cpuid - andl $REQUIRED_MASK1,%edx - xorl $REQUIRED_MASK1,%edx - jnz verify_cpu_no_longmode - -verify_cpu_sse_test: - movl $1,%eax - cpuid - andl $SSE_MASK,%edx - cmpl $SSE_MASK,%edx - je verify_cpu_sse_ok - test %di,%di - jz verify_cpu_no_longmode # only try to force SSE on AMD - movl $MSR_K7_HWCR,%ecx - rdmsr - btr $15,%eax # enable SSE - wrmsr - xor %di,%di # don't loop - jmp verify_cpu_sse_test # try again - -verify_cpu_no_longmode: - popfl # Restore caller passed flags - movl $1,%eax - ret -verify_cpu_sse_ok: - popfl # Restore caller passed flags - xorl %eax, %eax - ret -- cgit v0.10.2 From ae84739c27b6b3725993202fe02ff35ab86468e1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 10 Nov 2010 10:35:52 -0800 Subject: x86, cpu: Clear XD_DISABLED flag on Intel to regain NX Intel CPUs have an additional MSR bit to indicate if the BIOS was configured to disable the NX cpu feature. This bit was traditionally used for operating systems that did not understand how to handle the NX bit. Since Linux understands this, this BIOS flag should be ignored by default. In a review[1] of reported hardware being used by Ubuntu bug reporters, almost 10% of systems had an incorrectly configured BIOS, leaving their systems unable to use the NX features of their CPU. This change will clear the MSR_IA32_MISC_ENABLE_XD_DISABLE bit so that NX cannot be inappropriately controlled by the BIOS on Intel CPUs. If, under very strange hardware configurations, NX actually needs to be disabled, "noexec=off" can be used to restore the prior behavior. [1] http://www.outflux.net/blog/archives/2010/02/18/data-mining-for-nx-bit/ Signed-off-by: Kees Cook LKML-Reference: <1289414154-7829-3-git-send-email-kees.cook@canonical.com> Acked-by: Pekka Enberg Acked-by: Alan Cox Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index 56a8c2a..ccb4136 100644 --- a/arch/x86/kernel/verify_cpu.S +++ b/arch/x86/kernel/verify_cpu.S @@ -7,6 +7,7 @@ * Copyright (c) 2007 Andi Kleen (ak@suse.de) * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com) * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com) + * Copyright (c) 2010 Kees Cook (kees.cook@canonical.com) * * This source code is licensed under the GNU General Public License, * Version 2. See the file COPYING for more details. @@ -14,18 +15,16 @@ * This is a common code for verification whether CPU supports * long mode and SSE or not. It is not called directly instead this * file is included at various places and compiled in that context. - * Following are the current usage. + * This file is expected to run in 32bit code. Currently: * - * This file is included by both 16bit and 32bit code. + * arch/x86_64/boot/compressed/head_64.S: Boot cpu verification + * arch/x86_64/kernel/trampoline_64.S: secondary processor verfication * - * arch/x86_64/boot/setup.S : Boot cpu verification (16bit) - * arch/x86_64/boot/compressed/head.S: Boot cpu verification (32bit) - * arch/x86_64/kernel/trampoline.S: secondary processor verfication (16bit) - * arch/x86_64/kernel/acpi/wakeup.S:Verfication at resume (16bit) - * - * verify_cpu, returns the status of cpu check in register %eax. + * verify_cpu, returns the status of longmode and SSE in register %eax. * 0: Success 1: Failure * + * On Intel, the XD_DISABLE flag will be cleared as a side-effect. + * * The caller needs to check for the error code and take the action * appropriately. Either display a message or halt. */ @@ -62,8 +61,41 @@ verify_cpu: cmpl $0x444d4163,%ecx jnz verify_cpu_noamd mov $1,%di # cpu is from AMD + jmp verify_cpu_check verify_cpu_noamd: + cmpl $0x756e6547,%ebx # GenuineIntel? + jnz verify_cpu_check + cmpl $0x49656e69,%edx + jnz verify_cpu_check + cmpl $0x6c65746e,%ecx + jnz verify_cpu_check + + # only call IA32_MISC_ENABLE when: + # family > 6 || (family == 6 && model >= 0xd) + movl $0x1, %eax # check CPU family and model + cpuid + movl %eax, %ecx + + andl $0x0ff00f00, %eax # mask family and extended family + shrl $8, %eax + cmpl $6, %eax + ja verify_cpu_clear_xd # family > 6, ok + jb verify_cpu_check # family < 6, skip + + andl $0x000f00f0, %ecx # mask model and extended model + shrl $4, %ecx + cmpl $0xd, %ecx + jb verify_cpu_check # family == 6, model < 0xd, skip + +verify_cpu_clear_xd: + movl $MSR_IA32_MISC_ENABLE, %ecx + rdmsr + btrl $2, %edx # clear MSR_IA32_MISC_ENABLE_XD_DISABLE + jnc verify_cpu_check # only write MSR if bit was changed + wrmsr + +verify_cpu_check: movl $0x1,%eax # Does the cpu have what it takes cpuid andl $REQUIRED_MASK0,%edx -- cgit v0.10.2 From ebba638ae723d8a8fc2f7abce5ec18b688b791d7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 10 Nov 2010 10:35:53 -0800 Subject: x86, cpu: Call verify_cpu during 32bit CPU startup The XD_DISABLE-clearing side-effect needs to happen for both 32bit and 64bit, but the 32bit init routines were not calling verify_cpu() yet. This adds that call to gain the side-effect. The longmode/SSE tests being performed in verify_cpu() need to happen very early for 64bit but not for 32bit. Instead of including it in two places for 32bit, we can just include it once in arch/x86/kernel/head_32.S. Signed-off-by: Kees Cook LKML-Reference: <1289414154-7829-4-git-send-email-kees.cook@canonical.com> Acked-by: Pekka Enberg Acked-by: Alan Cox Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index bcece91..fdaea52 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -314,6 +314,10 @@ ENTRY(startup_32_smp) subl $0x80000001, %eax cmpl $(0x8000ffff-0x80000001), %eax ja 6f + + /* Clear bogus XD_DISABLE bits */ + call verify_cpu + mov $0x80000001, %eax cpuid /* Execute Disable bit supported? */ @@ -609,6 +613,8 @@ ignore_int: #endif iret +#include "verify_cpu.S" + __REFDATA .align 4 ENTRY(initial_code) diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index ccb4136..5644b4b 100644 --- a/arch/x86/kernel/verify_cpu.S +++ b/arch/x86/kernel/verify_cpu.S @@ -19,6 +19,7 @@ * * arch/x86_64/boot/compressed/head_64.S: Boot cpu verification * arch/x86_64/kernel/trampoline_64.S: secondary processor verfication + * arch/x86_64/kernel/head_32.S: processor startup * * verify_cpu, returns the status of longmode and SSE in register %eax. * 0: Success 1: Failure -- cgit v0.10.2 From 6036f373ea03687d355634fa70fb04baa95ab75e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 10 Nov 2010 10:35:54 -0800 Subject: x86, cpu: Only CPU features determine NX capabilities Fix the NX feature boot warning when NX is missing to correctly reflect that BIOSes cannot disable NX now. Signed-off-by: Kees Cook LKML-Reference: <1289414154-7829-5-git-send-email-kees.cook@canonical.com> Acked-by: Pekka Enberg Acked-by: Alan Cox Signed-off-by: H. Peter Anvin diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c index a3250aa..410531d 100644 --- a/arch/x86/mm/setup_nx.c +++ b/arch/x86/mm/setup_nx.c @@ -41,7 +41,7 @@ void __init x86_report_nx(void) { if (!cpu_has_nx) { printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " - "missing in CPU or disabled in BIOS!\n"); + "missing in CPU!\n"); } else { #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) if (disable_nx) { -- cgit v0.10.2 From d08a5ace18dce9b18d8eb56bb6c0feef082b1b33 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 11 Nov 2010 01:50:53 +0100 Subject: PM: Allow devices to be removed during late suspend and early resume Holding dpm_list_mtx across late suspend and early resume of devices is problematic for the PCMCIA subsystem and doesn't allow device objects to be removed by late suspend and early resume driver callbacks. This appears to be overly restrictive, as drivers are generally allowed to remove device objects in other phases of suspend and resume. Therefore rework dpm_{suspend|resume}_noirq() so that they don't have to hold dpm_list_mtx all the time. Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 31b5266..ead3e79 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -475,20 +475,33 @@ End: */ void dpm_resume_noirq(pm_message_t state) { - struct device *dev; + struct list_head list; ktime_t starttime = ktime_get(); + INIT_LIST_HEAD(&list); mutex_lock(&dpm_list_mtx); transition_started = false; - list_for_each_entry(dev, &dpm_list, power.entry) + while (!list_empty(&dpm_list)) { + struct device *dev = to_device(dpm_list.next); + + get_device(dev); if (dev->power.status > DPM_OFF) { int error; dev->power.status = DPM_OFF; + mutex_unlock(&dpm_list_mtx); + error = device_resume_noirq(dev, state); + + mutex_lock(&dpm_list_mtx); if (error) pm_dev_err(dev, state, " early", error); } + if (!list_empty(&dev->power.entry)) + list_move_tail(&dev->power.entry, &list); + put_device(dev); + } + list_splice(&list, &dpm_list); mutex_unlock(&dpm_list_mtx); dpm_show_time(starttime, state, "early"); resume_device_irqs(); @@ -789,20 +802,33 @@ End: */ int dpm_suspend_noirq(pm_message_t state) { - struct device *dev; + struct list_head list; ktime_t starttime = ktime_get(); int error = 0; + INIT_LIST_HEAD(&list); suspend_device_irqs(); mutex_lock(&dpm_list_mtx); - list_for_each_entry_reverse(dev, &dpm_list, power.entry) { + while (!list_empty(&dpm_list)) { + struct device *dev = to_device(dpm_list.prev); + + get_device(dev); + mutex_unlock(&dpm_list_mtx); + error = device_suspend_noirq(dev, state); + + mutex_lock(&dpm_list_mtx); if (error) { pm_dev_err(dev, state, " late", error); + put_device(dev); break; } dev->power.status = DPM_OFF_IRQ; + if (!list_empty(&dev->power.entry)) + list_move(&dev->power.entry, &list); + put_device(dev); } + list_splice_tail(&list, &dpm_list); mutex_unlock(&dpm_list_mtx); if (error) dpm_resume_noirq(resume_event(state)); -- cgit v0.10.2 From 43e60861fe9d39740cf5b355f58fecedf0d8e9ba Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 11 Nov 2010 01:51:26 +0100 Subject: PM / OPP: Hide OPP configuration when SoCs do not provide an implementation Since the OPP API is only useful with an appropraite SoC-specific implementation there is no point in offering the ability to enable the API on general systems. Provide an ARCH_HAS OPP Kconfig symbol which masks out the option unless selected by an implementation. Signed-off-by: Mark Brown Acked-by: Nishanth Menon Acked-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki diff --git a/Documentation/power/opp.txt b/Documentation/power/opp.txt index 44d87ad..cd44558 100644 --- a/Documentation/power/opp.txt +++ b/Documentation/power/opp.txt @@ -37,6 +37,9 @@ Typical usage of the OPP library is as follows: SoC framework -> modifies on required cases certain OPPs -> OPP layer -> queries to search/retrieve information -> +Architectures that provide a SoC framework for OPP should select ARCH_HAS_OPP +to make the OPP layer available. + OPP layer expects each domain to be represented by a unique device pointer. SoC framework registers a set of initial OPPs per device with the OPP layer. This list is expected to be an optimally small number typically around 5 per device. diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 29bff61..a5aff3e 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -246,9 +246,13 @@ config PM_OPS depends on PM_SLEEP || PM_RUNTIME default y +config ARCH_HAS_OPP + bool + config PM_OPP bool "Operating Performance Point (OPP) Layer library" depends on PM + depends on ARCH_HAS_OPP ---help--- SOCs have a standard set of tuples consisting of frequency and voltage pairs that the device will support per voltage domain. This -- cgit v0.10.2 From f724bd240adef304e222590826cb0c17d6168b68 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 4 Nov 2010 20:08:12 -0700 Subject: sound/oss/dev_table.c: Use vzalloc Signed-off-by: Joe Perches Signed-off-by: Takashi Iwai diff --git a/sound/oss/dev_table.c b/sound/oss/dev_table.c index 727bdb9..d8cf3e5 100644 --- a/sound/oss/dev_table.c +++ b/sound/oss/dev_table.c @@ -71,7 +71,7 @@ int sound_install_audiodrv(int vers, char *name, struct audio_driver *driver, if (sound_nblocks >= MAX_MEM_BLOCKS) sound_nblocks = MAX_MEM_BLOCKS - 1; - op = (struct audio_operations *) (sound_mem_blocks[sound_nblocks] = vmalloc(sizeof(struct audio_operations))); + op = (struct audio_operations *) (sound_mem_blocks[sound_nblocks] = vzalloc(sizeof(struct audio_operations))); sound_nblocks++; if (sound_nblocks >= MAX_MEM_BLOCKS) sound_nblocks = MAX_MEM_BLOCKS - 1; @@ -81,7 +81,6 @@ int sound_install_audiodrv(int vers, char *name, struct audio_driver *driver, sound_unload_audiodev(num); return -(ENOMEM); } - memset((char *) op, 0, sizeof(struct audio_operations)); init_waitqueue_head(&op->in_sleeper); init_waitqueue_head(&op->out_sleeper); init_waitqueue_head(&op->poll_sleeper); @@ -128,7 +127,7 @@ int sound_install_mixer(int vers, char *name, struct mixer_operations *driver, /* FIXME: This leaks a mixer_operations struct every time its called until you unload sound! */ - op = (struct mixer_operations *) (sound_mem_blocks[sound_nblocks] = vmalloc(sizeof(struct mixer_operations))); + op = (struct mixer_operations *) (sound_mem_blocks[sound_nblocks] = vzalloc(sizeof(struct mixer_operations))); sound_nblocks++; if (sound_nblocks >= MAX_MEM_BLOCKS) sound_nblocks = MAX_MEM_BLOCKS - 1; @@ -137,7 +136,6 @@ int sound_install_mixer(int vers, char *name, struct mixer_operations *driver, printk(KERN_ERR "Sound: Can't allocate mixer driver for (%s)\n", name); return -ENOMEM; } - memset((char *) op, 0, sizeof(struct mixer_operations)); memcpy((char *) op, (char *) driver, driver_size); strlcpy(op->name, name, sizeof(op->name)); -- cgit v0.10.2 From b17cd8d69a75f921d9d444cc3ac9b5b1d0b66ca0 Mon Sep 17 00:00:00 2001 From: Brandon Philips Date: Sun, 7 Nov 2010 01:28:24 -0500 Subject: driver core: prune docs about device_interface drivers/base/intf.c was removed before the beginning of (git) time but its Documentation stuck around. Remove it. Signed-off-by: Brandon Philips Signed-off-by: Greg Kroah-Hartman diff --git a/Documentation/driver-model/interface.txt b/Documentation/driver-model/interface.txt deleted file mode 100644 index c66912bf..0000000 --- a/Documentation/driver-model/interface.txt +++ /dev/null @@ -1,129 +0,0 @@ - -Device Interfaces - -Introduction -~~~~~~~~~~~~ - -Device interfaces are the logical interfaces of device classes that correlate -directly to userspace interfaces, like device nodes. - -Each device class may have multiple interfaces through which you can -access the same device. An input device may support the mouse interface, -the 'evdev' interface, and the touchscreen interface. A SCSI disk would -support the disk interface, the SCSI generic interface, and possibly a raw -device interface. - -Device interfaces are registered with the class they belong to. As devices -are added to the class, they are added to each interface registered with -the class. The interface is responsible for determining whether the device -supports the interface or not. - - -Programming Interface -~~~~~~~~~~~~~~~~~~~~~ - -struct device_interface { - char * name; - rwlock_t lock; - u32 devnum; - struct device_class * devclass; - - struct list_head node; - struct driver_dir_entry dir; - - int (*add_device)(struct device *); - int (*add_device)(struct intf_data *); -}; - -int interface_register(struct device_interface *); -void interface_unregister(struct device_interface *); - - -An interface must specify the device class it belongs to. It is added -to that class's list of interfaces on registration. - - -Interfaces can be added to a device class at any time. Whenever it is -added, each device in the class is passed to the interface's -add_device callback. When an interface is removed, each device is -removed from the interface. - - -Devices -~~~~~~~ -Once a device is added to a device class, it is added to each -interface that is registered with the device class. The class -is expected to place a class-specific data structure in -struct device::class_data. The interface can use that (along with -other fields of struct device) to determine whether or not the driver -and/or device support that particular interface. - - -Data -~~~~ - -struct intf_data { - struct list_head node; - struct device_interface * intf; - struct device * dev; - u32 intf_num; -}; - -int interface_add_data(struct interface_data *); - -The interface is responsible for allocating and initializing a struct -intf_data and calling interface_add_data() to add it to the device's list -of interfaces it belongs to. This list will be iterated over when the device -is removed from the class (instead of all possible interfaces for a class). -This structure should probably be embedded in whatever per-device data -structure the interface is allocating anyway. - -Devices are enumerated within the interface. This happens in interface_add_data() -and the enumerated value is stored in the struct intf_data for that device. - -sysfs -~~~~~ -Each interface is given a directory in the directory of the device -class it belongs to: - -Interfaces get a directory in the class's directory as well: - - class/ - `-- input - |-- devices - |-- drivers - |-- mouse - `-- evdev - -When a device is added to the interface, a symlink is created that points -to the device's directory in the physical hierarchy: - - class/ - `-- input - |-- devices - | `-- 1 -> ../../../root/pci0/00:1f.0/usb_bus/00:1f.2-1:0/ - |-- drivers - | `-- usb:usb_mouse -> ../../../bus/drivers/usb_mouse/ - |-- mouse - | `-- 1 -> ../../../root/pci0/00:1f.0/usb_bus/00:1f.2-1:0/ - `-- evdev - `-- 1 -> ../../../root/pci0/00:1f.0/usb_bus/00:1f.2-1:0/ - - -Future Plans -~~~~~~~~~~~~ -A device interface is correlated directly with a userspace interface -for a device, specifically a device node. For instance, a SCSI disk -exposes at least two interfaces to userspace: the standard SCSI disk -interface and the SCSI generic interface. It might also export a raw -device interface. - -Many interfaces have a major number associated with them and each -device gets a minor number. Or, multiple interfaces might share one -major number, and each will receive a range of minor numbers (like in -the case of input devices). - -These major and minor numbers could be stored in the interface -structure. Major and minor allocations could happen when the interface -is registered with the class, or via a helper function. - diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 4823af6..5f09323 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -10,11 +10,6 @@ * * CPUs are exported via sysfs in the class/cpu/devices/ * directory. - * - * Per-cpu interfaces can be implemented using a struct device_interface. - * See the following for how to do this: - * - drivers/base/intf.c - * - Documentation/driver-model/interface.txt */ #ifndef _LINUX_CPU_H_ #define _LINUX_CPU_H_ diff --git a/include/linux/node.h b/include/linux/node.h index 06292da..1466945 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -10,11 +10,6 @@ * * Nodes are exported via driverfs in the class/node/devices/ * directory. - * - * Per-node interfaces can be implemented using a struct device_interface. - * See the following for how to do this: - * - drivers/base/intf.c - * - Documentation/driver-model/interface.txt */ #ifndef _LINUX_NODE_H_ #define _LINUX_NODE_H_ -- cgit v0.10.2 From 318af55ddd38bdaaa2b57f5c3bd394f3ce3a2610 Mon Sep 17 00:00:00 2001 From: "Hans J. Koch" Date: Sat, 30 Oct 2010 00:36:47 +0200 Subject: uio: Change mail address of Hans J. Koch My old mail address doesn't exist anymore. This changes all occurrences to my new address. Signed-off-by: Hans J. Koch Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index a858d2b..51fe179 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -3,7 +3,7 @@ * * Copyright(C) 2005, Benedikt Spranger * Copyright(C) 2005, Thomas Gleixner - * Copyright(C) 2006, Hans J. Koch + * Copyright(C) 2006, Hans J. Koch * Copyright(C) 2006, Greg Kroah-Hartman * * Userspace IO diff --git a/drivers/uio/uio_cif.c b/drivers/uio/uio_cif.c index a8ea2f1..a84a451 100644 --- a/drivers/uio/uio_cif.c +++ b/drivers/uio/uio_cif.c @@ -1,7 +1,7 @@ /* * UIO Hilscher CIF card driver * - * (C) 2007 Hans J. Koch + * (C) 2007 Hans J. Koch * Original code (C) 2005 Benedikt Spranger * * Licensed under GPL version 2 only. diff --git a/drivers/uio/uio_netx.c b/drivers/uio/uio_netx.c index 5a18e9f..5ffdb48 100644 --- a/drivers/uio/uio_netx.c +++ b/drivers/uio/uio_netx.c @@ -2,7 +2,7 @@ * UIO driver for Hilscher NetX based fieldbus cards (cifX, comX). * See http://www.hilscher.com for details. * - * (C) 2007 Hans J. Koch + * (C) 2007 Hans J. Koch * (C) 2008 Manuel Traut * * Licensed under GPL version 2 only. diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index d6188e5..665517c 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -3,7 +3,7 @@ * * Copyright(C) 2005, Benedikt Spranger * Copyright(C) 2005, Thomas Gleixner - * Copyright(C) 2006, Hans J. Koch + * Copyright(C) 2006, Hans J. Koch * Copyright(C) 2006, Greg Kroah-Hartman * * Userspace IO driver. -- cgit v0.10.2 From ea7dd251251a8d4694e9929104209dcc06220630 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Tue, 9 Nov 2010 00:11:03 +0100 Subject: sound/oss: Remove unnecessary casts of void ptr The [vk][cmz]alloc(_node) family of functions return void pointers which it's completely unnecessary/pointless to cast to other pointer types since that happens implicitly. This patch removes such casts from sound/oss/ Signed-off-by: Jesper Juhl Signed-off-by: Takashi Iwai diff --git a/sound/oss/midibuf.c b/sound/oss/midibuf.c index 782b3b8..ceedb1e 100644 --- a/sound/oss/midibuf.c +++ b/sound/oss/midibuf.c @@ -178,7 +178,7 @@ int MIDIbuf_open(int dev, struct file *file) return err; parms[dev].prech_timeout = MAX_SCHEDULE_TIMEOUT; - midi_in_buf[dev] = (struct midi_buf *) vmalloc(sizeof(struct midi_buf)); + midi_in_buf[dev] = vmalloc(sizeof(struct midi_buf)); if (midi_in_buf[dev] == NULL) { @@ -188,7 +188,7 @@ int MIDIbuf_open(int dev, struct file *file) } midi_in_buf[dev]->len = midi_in_buf[dev]->head = midi_in_buf[dev]->tail = 0; - midi_out_buf[dev] = (struct midi_buf *) vmalloc(sizeof(struct midi_buf)); + midi_out_buf[dev] = vmalloc(sizeof(struct midi_buf)); if (midi_out_buf[dev] == NULL) { diff --git a/sound/oss/pss.c b/sound/oss/pss.c index e19dd5d..9b800ce 100644 --- a/sound/oss/pss.c +++ b/sound/oss/pss.c @@ -859,7 +859,7 @@ static int pss_coproc_ioctl(void *dev_info, unsigned int cmd, void __user *arg, return 0; case SNDCTL_COPR_LOAD: - buf = (copr_buffer *) vmalloc(sizeof(copr_buffer)); + buf = vmalloc(sizeof(copr_buffer)); if (buf == NULL) return -ENOSPC; if (copy_from_user(buf, arg, sizeof(copr_buffer))) { @@ -871,7 +871,7 @@ static int pss_coproc_ioctl(void *dev_info, unsigned int cmd, void __user *arg, return err; case SNDCTL_COPR_SENDMSG: - mbuf = (copr_msg *)vmalloc(sizeof(copr_msg)); + mbuf = vmalloc(sizeof(copr_msg)); if (mbuf == NULL) return -ENOSPC; if (copy_from_user(mbuf, arg, sizeof(copr_msg))) { @@ -895,7 +895,7 @@ static int pss_coproc_ioctl(void *dev_info, unsigned int cmd, void __user *arg, case SNDCTL_COPR_RCVMSG: err = 0; - mbuf = (copr_msg *)vmalloc(sizeof(copr_msg)); + mbuf = vmalloc(sizeof(copr_msg)); if (mbuf == NULL) return -ENOSPC; data = (unsigned short *)mbuf->data; diff --git a/sound/oss/sequencer.c b/sound/oss/sequencer.c index e85789e..5ea1098 100644 --- a/sound/oss/sequencer.c +++ b/sound/oss/sequencer.c @@ -1646,13 +1646,13 @@ void sequencer_init(void) { if (sequencer_ok) return; - queue = (unsigned char *)vmalloc(SEQ_MAX_QUEUE * EV_SZ); + queue = vmalloc(SEQ_MAX_QUEUE * EV_SZ); if (queue == NULL) { printk(KERN_ERR "sequencer: Can't allocate memory for sequencer output queue\n"); return; } - iqueue = (unsigned char *)vmalloc(SEQ_MAX_QUEUE * IEV_SZ); + iqueue = vmalloc(SEQ_MAX_QUEUE * IEV_SZ); if (iqueue == NULL) { printk(KERN_ERR "sequencer: Can't allocate memory for sequencer input queue\n"); -- cgit v0.10.2 From 89feca1a16b05651d9c500e5572c0d6882873396 Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Wed, 13 Oct 2010 15:48:24 +0200 Subject: ALSA: HDA: Enable digital mic on IDT 92HD87B BugLink: http://launchpad.net/bugs/673075 According to the datasheet of 92HD87B, there is a digital mic at nid 0x11, so enable it in order to be able to use the mic. Cc: stable@kernel.org Signed-off-by: David Henningsson Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 93fa59c..cfd73af 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -389,6 +389,11 @@ static hda_nid_t stac92hd83xxx_dmic_nids[STAC92HD83XXX_NUM_DMICS + 1] = { 0x11, 0x20, 0 }; +#define STAC92HD87B_NUM_DMICS 1 +static hda_nid_t stac92hd87b_dmic_nids[STAC92HD87B_NUM_DMICS + 1] = { + 0x11, 0 +}; + #define STAC92HD83XXX_NUM_CAPS 2 static unsigned long stac92hd83xxx_capvols[] = { HDA_COMPOSE_AMP_VAL(0x17, 3, 0, HDA_OUTPUT), @@ -5452,12 +5457,17 @@ again: stac92hd83xxx_brd_tbl[spec->board_config]); switch (codec->vendor_id) { + case 0x111d76d1: + case 0x111d76d9: + spec->dmic_nids = stac92hd87b_dmic_nids; + spec->num_dmics = stac92xx_connected_ports(codec, + stac92hd87b_dmic_nids, + STAC92HD87B_NUM_DMICS); + /* Fall through */ case 0x111d7666: case 0x111d7667: case 0x111d7668: case 0x111d7669: - case 0x111d76d1: - case 0x111d76d9: spec->num_pins = ARRAY_SIZE(stac92hd88xxx_pin_nids); spec->pin_nids = stac92hd88xxx_pin_nids; spec->mono_nid = 0; -- cgit v0.10.2 From e9161512017f11050ef2b826cbb10be1673554c6 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 9 Nov 2010 18:29:08 +0100 Subject: ALSA: sound/mixart: avoid redefining {readl,write}_{le,be} accessors If the platform already provides a definition for these accessors do not redefine them. The warning was caught on MIPS. Signed-off-by: Florian Fainelli Signed-off-by: Takashi Iwai diff --git a/sound/pci/mixart/mixart_hwdep.h b/sound/pci/mixart/mixart_hwdep.h index a46f508..812e288 100644 --- a/sound/pci/mixart/mixart_hwdep.h +++ b/sound/pci/mixart/mixart_hwdep.h @@ -25,11 +25,21 @@ #include +#ifndef readl_be #define readl_be(x) be32_to_cpu(__raw_readl(x)) +#endif + +#ifndef writel_be #define writel_be(data,addr) __raw_writel(cpu_to_be32(data),addr) +#endif +#ifndef readl_le #define readl_le(x) le32_to_cpu(__raw_readl(x)) +#endif + +#ifndef writel_le #define writel_le(data,addr) __raw_writel(cpu_to_le32(data),addr) +#endif #define MIXART_MEM(mgr,x) ((mgr)->mem[0].virt + (x)) #define MIXART_REG(mgr,x) ((mgr)->mem[1].virt + (x)) -- cgit v0.10.2 From fa2b30af84e84129b8d4cf955890ad167cc20cf0 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 9 Nov 2010 23:00:41 +0100 Subject: ALSA: sound/pci/ctxfi/ctpcm.c: Remove potential for use after free In each function, the value apcm is stored in the private_data field of runtime. At the same time the function ct_atc_pcm_free_substream is stored in the private_free field of the same structure. ct_atc_pcm_free_substream dereferences and ultimately frees the value in the private_data field. But each function can exit in an error case with apcm having been freed, in which case a subsequent call to the private_free function would perform a dereference after free. On the other hand, if the private_free field is not initialized, it is NULL, and not invoked (see snd_pcm_detach_substream in sound/core/pcm.c). To avoid the introduction of a dangling pointer, the initializations of the private_data and private_free fields are moved to the end of the function, past any possible free of apcm. This is safe because the previous calls to snd_pcm_hw_constraint_integer and snd_pcm_hw_constraint_minmax, which take runtime as an argument, do not refer to either of these fields. In each function, there is one error case where apcm needs to be freed, and a call to kfree is added. The sematic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression e,e1,e2,e3; identifier f,free1,free2; expression a; @@ *e->f = a ... when != e->f = e1 when any if (...) { ... when != free1(...,e,...) when != e->f = e2 * kfree(a) ... when != free2(...,e,...) when != e->f = e3 } // Signed-off-by: Julia Lawall Signed-off-by: Takashi Iwai diff --git a/sound/pci/ctxfi/ctpcm.c b/sound/pci/ctxfi/ctpcm.c index 85ab43e..457d211 100644 --- a/sound/pci/ctxfi/ctpcm.c +++ b/sound/pci/ctxfi/ctpcm.c @@ -129,8 +129,6 @@ static int ct_pcm_playback_open(struct snd_pcm_substream *substream) apcm->substream = substream; apcm->interrupt = ct_atc_pcm_interrupt; - runtime->private_data = apcm; - runtime->private_free = ct_atc_pcm_free_substream; if (IEC958 == substream->pcm->device) { runtime->hw = ct_spdif_passthru_playback_hw; atc->spdif_out_passthru(atc, 1); @@ -155,8 +153,12 @@ static int ct_pcm_playback_open(struct snd_pcm_substream *substream) } apcm->timer = ct_timer_instance_new(atc->timer, apcm); - if (!apcm->timer) + if (!apcm->timer) { + kfree(apcm); return -ENOMEM; + } + runtime->private_data = apcm; + runtime->private_free = ct_atc_pcm_free_substream; return 0; } @@ -278,8 +280,6 @@ static int ct_pcm_capture_open(struct snd_pcm_substream *substream) apcm->started = 0; apcm->substream = substream; apcm->interrupt = ct_atc_pcm_interrupt; - runtime->private_data = apcm; - runtime->private_free = ct_atc_pcm_free_substream; runtime->hw = ct_pcm_capture_hw; runtime->hw.rate_max = atc->rsr * atc->msr; @@ -298,8 +298,12 @@ static int ct_pcm_capture_open(struct snd_pcm_substream *substream) } apcm->timer = ct_timer_instance_new(atc->timer, apcm); - if (!apcm->timer) + if (!apcm->timer) { + kfree(apcm); return -ENOMEM; + } + runtime->private_data = apcm; + runtime->private_free = ct_atc_pcm_free_substream; return 0; } -- cgit v0.10.2 From e2e9566230e0c93d89948cbc799a191d35383d09 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Wed, 10 Nov 2010 15:55:05 +0100 Subject: ALSA: AT73C213: Rectify misleading comment. The Atmel SSC can divide by even numbers, not only powers of two. Signed-off-by: Peter Rosin Signed-off-by: Takashi Iwai diff --git a/sound/spi/at73c213.c b/sound/spi/at73c213.c index 1bc56b2..337a002 100644 --- a/sound/spi/at73c213.c +++ b/sound/spi/at73c213.c @@ -155,7 +155,7 @@ static int snd_at73c213_set_bitrate(struct snd_at73c213 *chip) if (max_tries < 1) max_tries = 1; - /* ssc_div must be a power of 2. */ + /* ssc_div must be even. */ ssc_div = (ssc_div + 1) & ~1UL; if ((ssc_rate / (ssc_div * 2 * 16)) < BITRATE_MIN) { -- cgit v0.10.2 From 13b9b6e746d753d43270a78dd39694912646b5d9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 10 Nov 2010 22:19:24 -0500 Subject: tracing: Fix module use of trace_bprintk() On use of trace_printk() there's a macro that determines if the format is static or a variable. If it is static, it defaults to __trace_bprintk() otherwise it uses __trace_printk(). A while ago, Lai Jiangshan added __trace_bprintk(). In that patch, we discussed a way to allow modules to use it. The difference between __trace_bprintk() and __trace_printk() is that for faster processing, just the format and args are stored in the trace instead of running it through a sprintf function. In order to do this, the format used by the __trace_bprintk() had to be persistent. See commit 1ba28e02a18cbdbea123836f6c98efb09cbf59ec The problem comes with trace_bprintk() where the module is unloaded. The pointer left in the buffer is still pointing to the format. To solve this issue, the formats in the module were copied into kernel core. If the same format was used, they would use the same copy (to prevent memory leak). This all worked well until we tried to merge everything. At the time this was written, Lai Jiangshan, Frederic Weisbecker, Ingo Molnar and myself were all touching the same code. When this was merged, we lost the part of it that was in module.c. This kept out the copying of the formats and unloading the module could cause bad pointers left in the ring buffer. This patch adds back (with updates required for current kernel) the module code that sets up the necessary pointers. Cc: Lai Jiangshan Cc: Rusty Russell Signed-off-by: Steven Rostedt diff --git a/kernel/module.c b/kernel/module.c index 437a74a..d190664 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2326,6 +2326,18 @@ static void find_module_sections(struct module *mod, struct load_info *info) kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) * mod->num_trace_events, GFP_KERNEL); #endif +#ifdef CONFIG_TRACING + mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt", + sizeof(*mod->trace_bprintk_fmt_start), + &mod->num_trace_bprintk_fmt); + /* + * This section contains pointers to allocated objects in the trace + * code and not scanning it leads to false positives. + */ + kmemleak_scan_area(mod->trace_bprintk_fmt_start, + sizeof(*mod->trace_bprintk_fmt_start) * + mod->num_trace_bprintk_fmt, GFP_KERNEL); +#endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD /* sechdrs[0].sh_size is always zero */ mod->ftrace_callsites = section_objs(info, "__mcount_loc", -- cgit v0.10.2 From b5908548537ccd3ada258ca5348df7ffc93e5a06 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 10 Nov 2010 22:29:49 -0500 Subject: tracing: Force arch_local_irq_* notrace for paravirt When running ktest.pl randconfig tests, I would sometimes trigger a lockdep annotation bug (possible reason: unannotated irqs-on). This triggering happened right after function tracer self test was executed. After doing a config bisect I found that this was caused with having function tracer, paravirt guest, prove locking, and rcu torture all enabled. The rcu torture just enhanced the likelyhood of triggering the bug. Prove locking was needed, since it was the thing that was bugging. Function tracer would trace and disable interrupts in all sorts of funny places. paravirt guest would turn arch_local_irq_* into functions that would be traced. Besides the fact that tracing arch_local_irq_* is just a bad idea, this is what is happening. The bug happened simply in the local_irq_restore() code: if (raw_irqs_disabled_flags(flags)) { \ raw_local_irq_restore(flags); \ trace_hardirqs_off(); \ } else { \ trace_hardirqs_on(); \ raw_local_irq_restore(flags); \ } \ The raw_local_irq_restore() was defined as arch_local_irq_restore(). Now imagine, we are about to enable interrupts. We go into the else case and call trace_hardirqs_on() which tells lockdep that we are enabling interrupts, so it sets the current->hardirqs_enabled = 1. Then we call raw_local_irq_restore() which calls arch_local_irq_restore() which gets traced! Now in the function tracer we disable interrupts with local_irq_save(). This is fine, but flags is stored that we have interrupts disabled. When the function tracer calls local_irq_restore() it does it, but this time with flags set as disabled, so we go into the if () path. This keeps interrupts disabled and calls trace_hardirqs_off() which sets current->hardirqs_enabled = 0. When the tracer is finished and proceeds with the original code, we enable interrupts but leave current->hardirqs_enabled as 0. Which now breaks lockdeps internal processing. Cc: Thomas Gleixner Signed-off-by: Steven Rostedt diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 18e3b8a..ef99758 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -824,27 +824,27 @@ static __always_inline void arch_spin_unlock(struct arch_spinlock *lock) #define __PV_IS_CALLEE_SAVE(func) \ ((struct paravirt_callee_save) { func }) -static inline unsigned long arch_local_save_flags(void) +static inline notrace unsigned long arch_local_save_flags(void) { return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl); } -static inline void arch_local_irq_restore(unsigned long f) +static inline notrace void arch_local_irq_restore(unsigned long f) { PVOP_VCALLEE1(pv_irq_ops.restore_fl, f); } -static inline void arch_local_irq_disable(void) +static inline notrace void arch_local_irq_disable(void) { PVOP_VCALLEE0(pv_irq_ops.irq_disable); } -static inline void arch_local_irq_enable(void) +static inline notrace void arch_local_irq_enable(void) { PVOP_VCALLEE0(pv_irq_ops.irq_enable); } -static inline unsigned long arch_local_irq_save(void) +static inline notrace unsigned long arch_local_irq_save(void) { unsigned long f; -- cgit v0.10.2 From a7851ce73b9fdef53f251420e6883cf4f3766534 Mon Sep 17 00:00:00 2001 From: Oskar Schirmer Date: Wed, 10 Nov 2010 21:06:13 +0000 Subject: cifs: fix another memleak, in cifs_root_iget cifs_root_iget allocates full_path through cifs_build_path_to_root, but fails to kfree it upon cifs_get_inode_info* failure. Make all failure exit paths traverse clean up handling at the end of the function. Signed-off-by: Oskar Schirmer Reviewed-by: Jesper Juhl Cc: stable@kernel.org Signed-off-by: Steve French diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index ef3a55b..ff7d299 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -881,8 +881,10 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino) rc = cifs_get_inode_info(&inode, full_path, NULL, sb, xid, NULL); - if (!inode) - return ERR_PTR(rc); + if (!inode) { + inode = ERR_PTR(rc); + goto out; + } #ifdef CONFIG_CIFS_FSCACHE /* populate tcon->resource_id */ @@ -898,13 +900,11 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino) inode->i_uid = cifs_sb->mnt_uid; inode->i_gid = cifs_sb->mnt_gid; } else if (rc) { - kfree(full_path); - _FreeXid(xid); iget_failed(inode); - return ERR_PTR(rc); + inode = ERR_PTR(rc); } - +out: kfree(full_path); /* can not call macro FreeXid here since in a void func * TODO: This is no longer true -- cgit v0.10.2 From 987b21d7d91d0335e164a41a312174851c58f333 Mon Sep 17 00:00:00 2001 From: Shirish Pargaonkar Date: Wed, 10 Nov 2010 07:50:35 -0600 Subject: cifs: Percolate error up to the caller during get/set acls [try #4] Modify get/set_cifs_acl* calls to reutrn error code and percolate the error code up to the caller. Signed-off-by: Shirish Pargaonkar Reviewed-by: Jeff Layton Signed-off-by: Steve French diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index c9b4792..91992ab 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -560,7 +560,7 @@ static struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) - return NULL; + return ERR_CAST(tlink); xid = GetXid(); rc = CIFSSMBGetCIFSACL(xid, tlink_tcon(tlink), fid, &pntsd, pacllen); @@ -568,7 +568,9 @@ static struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, cifs_put_tlink(tlink); - cFYI(1, "GetCIFSACL rc = %d ACL len %d", rc, *pacllen); + cFYI(1, "%s: rc = %d ACL len %d", __func__, rc, *pacllen); + if (rc) + return ERR_PTR(rc); return pntsd; } @@ -583,7 +585,7 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) - return NULL; + return ERR_CAST(tlink); tcon = tlink_tcon(tlink); xid = GetXid(); @@ -591,18 +593,17 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, rc = CIFSSMBOpen(xid, tcon, path, FILE_OPEN, READ_CONTROL, 0, &fid, &oplock, NULL, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc) { - cERROR(1, "Unable to open file to get ACL"); - goto out; + if (!rc) { + rc = CIFSSMBGetCIFSACL(xid, tcon, fid, &pntsd, pacllen); + CIFSSMBClose(xid, tcon, fid); } - rc = CIFSSMBGetCIFSACL(xid, tcon, fid, &pntsd, pacllen); - cFYI(1, "GetCIFSACL rc = %d ACL len %d", rc, *pacllen); - - CIFSSMBClose(xid, tcon, fid); - out: cifs_put_tlink(tlink); FreeXid(xid); + + cFYI(1, "%s: rc = %d ACL len %d", __func__, rc, *pacllen); + if (rc) + return ERR_PTR(rc); return pntsd; } @@ -695,7 +696,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, } /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ -void +int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, struct inode *inode, const char *path, const __u16 *pfid) { @@ -711,13 +712,17 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, pntsd = get_cifs_acl(cifs_sb, inode, path, &acllen); /* if we can retrieve the ACL, now parse Access Control Entries, ACEs */ - if (pntsd) + if (IS_ERR(pntsd)) { + rc = PTR_ERR(pntsd); + cERROR(1, "%s: error %d getting sec desc", __func__, rc); + } else { rc = parse_sec_desc(pntsd, acllen, fattr); - if (rc) - cFYI(1, "parse sec desc failed rc = %d", rc); + kfree(pntsd); + if (rc) + cERROR(1, "parse sec desc failed rc = %d", rc); + } - kfree(pntsd); - return; + return rc; } /* Convert mode bits to an ACL so we can update the ACL on the server */ @@ -736,7 +741,10 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode) /* Add three ACEs for owner, group, everyone getting rid of other ACEs as chmod disables ACEs and set the security descriptor */ - if (pntsd) { + if (IS_ERR(pntsd)) { + rc = PTR_ERR(pntsd); + cERROR(1, "%s: error %d getting sec desc", __func__, rc); + } else { /* allocate memory for the smb header, set security descriptor request security descriptor parameters, and secuirty descriptor itself */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 7ed69b6b..6ed59af 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -130,7 +130,7 @@ extern int cifs_get_file_info_unix(struct file *filp); extern int cifs_get_inode_info_unix(struct inode **pinode, const unsigned char *search_path, struct super_block *sb, int xid); -extern void cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, +extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, struct inode *inode, const char *path, const __u16 *pfid); extern int mode_to_acl(struct inode *inode, const char *path, __u64); -- cgit v0.10.2 From 1e5a74059f9059d330744eac84873b1b99657008 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sun, 31 Oct 2010 12:37:04 +0100 Subject: sched: Fix cross-sched-class wakeup preemption Instead of dealing with sched classes inside each check_preempt_curr() implementation, pull out this logic into the generic wakeup preemption path. This fixes a hang in KVM (and others) where we are waiting for the stop machine thread to run ... Reported-by: Markus Trippelsdorf Tested-by: Marcelo Tosatti Tested-by: Sergey Senozhatsky Signed-off-by: Peter Zijlstra LKML-Reference: <1288891946.2039.31.camel@laptop> Signed-off-by: Ingo Molnar diff --git a/kernel/sched.c b/kernel/sched.c index 36a0880..dc91a4d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -560,18 +560,8 @@ struct rq { static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); -static inline -void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) -{ - rq->curr->sched_class->check_preempt_curr(rq, p, flags); - /* - * A queue event has occurred, and we're going to schedule. In - * this case, we can save a useless back to back clock update. - */ - if (test_tsk_need_resched(p)) - rq->skip_clock_update = 1; -} +static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags); static inline int cpu_of(struct rq *rq) { @@ -2118,6 +2108,31 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, p->sched_class->prio_changed(rq, p, oldprio, running); } +static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) +{ + const struct sched_class *class; + + if (p->sched_class == rq->curr->sched_class) { + rq->curr->sched_class->check_preempt_curr(rq, p, flags); + } else { + for_each_class(class) { + if (class == rq->curr->sched_class) + break; + if (class == p->sched_class) { + resched_task(rq->curr); + break; + } + } + } + + /* + * A queue event has occurred, and we're going to schedule. In + * this case, we can save a useless back to back clock update. + */ + if (test_tsk_need_resched(rq->curr)) + rq->skip_clock_update = 1; +} + #ifdef CONFIG_SMP /* * Is this task likely cache-hot: diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 034c4f4..52ab113 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1654,12 +1654,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ struct cfs_rq *cfs_rq = task_cfs_rq(curr); int scale = cfs_rq->nr_running >= sched_nr_latency; - if (unlikely(rt_prio(p->prio))) - goto preempt; - - if (unlikely(p->sched_class != &fair_sched_class)) - return; - if (unlikely(se == pse)) return; diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c index 755483b..2bf6b47 100644 --- a/kernel/sched_stoptask.c +++ b/kernel/sched_stoptask.c @@ -19,7 +19,7 @@ select_task_rq_stop(struct rq *rq, struct task_struct *p, static void check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags) { - resched_task(rq->curr); /* we preempt everything */ + /* we're never preempted */ } static struct task_struct *pick_next_task_stop(struct rq *rq) -- cgit v0.10.2 From bbde7814cbc54d6b564d3f65b4b0e82eddef30a6 Mon Sep 17 00:00:00 2001 From: Ryan Mallon Date: Thu, 11 Nov 2010 09:02:30 +1300 Subject: Fix Atmel soc audio boards Kconfig dependency Add Kconfig dependency on AT91_PROGRAMMABLE_CLOCKS for the Atmel SoC audio SAM9G20-EK and PlayPaq boards. Fixes link errors on missing clk_set_parent and clk_set_rate when building without AT91_PROGRAMMABLE_CLOCKS. Signed-off-by: Ryan Mallon Acked-by: Geoffrey Wossum Acked-by: Liam Girdwood Signed-off-by: Mark Brown diff --git a/sound/soc/atmel/Kconfig b/sound/soc/atmel/Kconfig index e720d5e..bee3c94 100644 --- a/sound/soc/atmel/Kconfig +++ b/sound/soc/atmel/Kconfig @@ -16,7 +16,8 @@ config SND_ATMEL_SOC_SSC config SND_AT91_SOC_SAM9G20_WM8731 tristate "SoC Audio support for WM8731-based At91sam9g20 evaluation board" - depends on ATMEL_SSC && ARCH_AT91SAM9G20 && SND_ATMEL_SOC + depends on ATMEL_SSC && ARCH_AT91SAM9G20 && SND_ATMEL_SOC && \ + AT91_PROGRAMMABLE_CLOCKS select SND_ATMEL_SOC_SSC select SND_SOC_WM8731 help @@ -25,7 +26,7 @@ config SND_AT91_SOC_SAM9G20_WM8731 config SND_AT32_SOC_PLAYPAQ tristate "SoC Audio support for PlayPaq with WM8510" - depends on SND_ATMEL_SOC && BOARD_PLAYPAQ + depends on SND_ATMEL_SOC && BOARD_PLAYPAQ && AT91_PROGRAMMABLE_CLOCKS select SND_ATMEL_SOC_SSC select SND_SOC_WM8510 help -- cgit v0.10.2 From 241b652f1995de138106afd2f2e4eda9f8a3c240 Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Fri, 5 Nov 2010 09:59:01 -0400 Subject: xhci: Remove excessive printks with shared IRQs. If the xHCI host controller shares an interrupt line with another device, the xHCI driver needs to check if the interrupt was generated by its hardware. Unfortunately, the user will see a ton of "Spurious interrupt." lines if the other hardware interrupts often. Lawrence found his dmesg output cluttered with this output when the xHCI host shared an interrupt with his i915 hardware. Remove the warning, as sharing an interrupt is a normal thing. This should be applied to the 2.6.36 stable tree. Signed-off-by: Sarah Sharp Reported-by: Lawrence Rust Cc: stable@kernel.org diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 9f3115e..df558f6 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2104,7 +2104,6 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) if (!(status & STS_EINT)) { spin_unlock(&xhci->lock); - xhci_warn(xhci, "Spurious interrupt.\n"); return IRQ_NONE; } xhci_dbg(xhci, "op reg status = %08x\n", status); -- cgit v0.10.2 From 74bb844af8ec6a4c360b1fc7feb139801c0cacfd Mon Sep 17 00:00:00 2001 From: Andiry Xu Date: Wed, 27 Oct 2010 16:44:52 +0800 Subject: xHCI: release spinlock when setup interrupt Jiri Slaby reports spinlock is held while calling kmalloc(GFP_KERNEL) and request_irq() in xhci_resume(). Release the spinlock when setup interrupt. Reported-by: Jiri Slaby Signed-off-by: Andiry Xu Signed-off-by: Sarah Sharp diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 5d7d4e9..7c8d70f 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -714,6 +714,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) return retval; } + spin_unlock_irq(&xhci->lock); /* Re-setup MSI-X */ if (hcd->irq) free_irq(hcd->irq, hcd); @@ -736,6 +737,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) hcd->irq = pdev->irq; } + spin_lock_irq(&xhci->lock); /* step 4: set Run/Stop bit */ command = xhci_readl(xhci, &xhci->op_regs->command); command |= CMD_RUN; -- cgit v0.10.2 From dc07c91b9b4067022210e68d914a6890a4d70622 Mon Sep 17 00:00:00 2001 From: Andiry Xu Date: Thu, 11 Nov 2010 17:43:57 +0800 Subject: xHCI: fix wMaxPacketSize mask USB2.0 spec 9.6.6 says: For all endpoints, bit 10..0 specify the maximum packet size(in bytes). So the wMaxPacketSize mask should be 0x7ff rather than 0x3ff. This patch should be queued for the stable tree. The bug in xhci_endpoint_init() was present as far back as 2.6.31, and the bug in xhci_get_max_esit_payload() was present when the function was introduced in 2.6.34. Reported-by: Sander Eikelenboom Signed-off-by: Andiry Xu Signed-off-by: Sarah Sharp Cc: stable@kernel.org diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 2027706..d178761 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1045,7 +1045,7 @@ static inline u32 xhci_get_max_esit_payload(struct xhci_hcd *xhci, if (udev->speed == USB_SPEED_SUPER) return ep->ss_ep_comp.wBytesPerInterval; - max_packet = ep->desc.wMaxPacketSize & 0x3ff; + max_packet = GET_MAX_PACKET(ep->desc.wMaxPacketSize); max_burst = (ep->desc.wMaxPacketSize & 0x1800) >> 11; /* A 0 in max burst means 1 transfer per ESIT */ return max_packet * (max_burst + 1); @@ -1135,7 +1135,7 @@ int xhci_endpoint_init(struct xhci_hcd *xhci, /* Fall through */ case USB_SPEED_FULL: case USB_SPEED_LOW: - max_packet = ep->desc.wMaxPacketSize & 0x3ff; + max_packet = GET_MAX_PACKET(ep->desc.wMaxPacketSize); ep_ctx->ep_info2 |= MAX_PACKET(max_packet); break; default: diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 93d3bf4..85e6564 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -621,6 +621,11 @@ struct xhci_ep_ctx { #define MAX_PACKET_MASK (0xffff << 16) #define MAX_PACKET_DECODED(p) (((p) >> 16) & 0xffff) +/* Get max packet size from ep desc. Bit 10..0 specify the max packet size. + * USB2.0 spec 9.6.6. + */ +#define GET_MAX_PACKET(p) ((p) & 0x7ff) + /* tx_info bitmasks */ #define AVG_TRB_LENGTH_FOR_EP(p) ((p) & 0xffff) #define MAX_ESIT_PAYLOAD_FOR_EP(p) (((p) & 0xffff) << 16) -- cgit v0.10.2 From e060e7af98182494b764d002eba7fa022fe91bdf Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 11 Nov 2010 12:37:43 -0800 Subject: xen: set vma flag VM_PFNMAP in the privcmd mmap file_op Set VM_PFNMAP in the privcmd mmap file_op, rather than later in xen_remap_domain_mfn_range when it is too late because vma_wants_writenotify has already been called and vm_page_prot has already been modified. Signed-off-by: Stefano Stabellini Signed-off-by: Jeremy Fitzhardinge diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index f08ea04..792de434 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2299,7 +2299,8 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); - vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; + BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) == + (VM_PFNMAP | VM_RESERVED | VM_IO))); rmd.mfn = mfn; rmd.prot = prot; diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c index 2eb04c8..88474d4 100644 --- a/drivers/xen/xenfs/privcmd.c +++ b/drivers/xen/xenfs/privcmd.c @@ -380,8 +380,9 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) if (xen_feature(XENFEAT_auto_translated_physmap)) return -ENOSYS; - /* DONTCOPY is essential for Xen as copy_page_range is broken. */ - vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY; + /* DONTCOPY is essential for Xen because copy_page_range doesn't know + * how to recreate these mappings */ + vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP; vma->vm_ops = &privcmd_vm_ops; vma->vm_private_data = NULL; -- cgit v0.10.2 From a1629c3b24f26ec1b0f534874af674a6b4c1540b Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 11 Nov 2010 15:24:06 -0800 Subject: ceph: fix dangling pointer Clear fi->last_name when it's freed. The only caller is rewinddir() (or equivalent lseek). Signed-off-by: Sage Weil diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index e0a2dc6..1e11ed7 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -414,6 +414,7 @@ static void reset_readdir(struct ceph_file_info *fi) fi->last_readdir = NULL; } kfree(fi->last_name); + fi->last_name = NULL; fi->next_offset = 2; /* compensate for . and .. */ if (fi->dentry) { dput(fi->dentry); -- cgit v0.10.2 From d9bcbf343ec63e1104b5276195888ee06b4d086f Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Thu, 11 Nov 2010 17:32:25 +0100 Subject: mmc: fix rmmod race for hosts using card-detection polling MMC hosts that poll for card detection by defining the MMC_CAP_NEEDS_POLL flag have a race on rmmod, where the delayed work is cancelled without waiting for completed polling. To prevent this a _sync version of the work cancellation has to be used. Signed-off-by: Guennadi Liakhovetski Cc: Signed-off-by: Chris Ball diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 8f86d70..31ae07a 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -1559,7 +1559,7 @@ void mmc_stop_host(struct mmc_host *host) if (host->caps & MMC_CAP_DISABLE) cancel_delayed_work(&host->disable); - cancel_delayed_work(&host->detect); + cancel_delayed_work_sync(&host->detect); mmc_flush_scheduled_work(); /* clear pm flags now and let card drivers set them as needed */ -- cgit v0.10.2 From 7b88dadc13e0004947de52df128dbd5b0754ed0a Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 11 Nov 2010 16:48:59 -0800 Subject: ceph: fix frag offset for non-leftmost frags We start at offset 2 for the leftmost frag, and 0 for subsequent frags. When we reach the end (rightmost), we go back to 2. This fixes readdir on fragmented (large) directories. Signed-off-by: Sage Weil diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 1e11ed7..5f67728 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -336,7 +336,10 @@ more: if (req->r_reply_info.dir_end) { kfree(fi->last_name); fi->last_name = NULL; - fi->next_offset = 2; + if (ceph_frag_is_rightmost(frag)) + fi->next_offset = 2; + else + fi->next_offset = 0; } else { rinfo = &req->r_reply_info; err = note_last_dentry(fi, -- cgit v0.10.2 From 6c0aca288e726405b01dacb12cac556454d34b2a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 11 Nov 2010 21:18:43 +0100 Subject: x86: Ignore trap bits on single step exceptions When a single step exception fires, the trap bits, used to signal hardware breakpoints, are in a random state. These trap bits might be set if another exception will follow, like a breakpoint in the next instruction, or a watchpoint in the previous one. Or there can be any junk there. So if we handle these trap bits during the single step exception, we are going to handle an exception twice, or we are going to handle junk. Just ignore them in this case. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=21332 Reported-by: Michael Stefaniuc Signed-off-by: Frederic Weisbecker Cc: Rafael J. Wysocki Cc: Maciej Rutecki Cc: Alexandre Julliard Cc: Jason Wessel Cc: All since 2.6.33.x diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index ff15c9d..42c5942 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -433,6 +433,10 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) dr6_p = (unsigned long *)ERR_PTR(args->err); dr6 = *dr6_p; + /* If it's a single step, TRAP bits are random */ + if (dr6 & DR_STEP) + return NOTIFY_DONE; + /* Do an early return if no trap bits are set in DR6 */ if ((dr6 & DR_TRAP_BITS) == 0) return NOTIFY_DONE; -- cgit v0.10.2 From 3c502e7a0255d82621ff25d60cc816624830497e Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 4 Nov 2010 17:33:01 -0500 Subject: perf,hw_breakpoint: Initialize hardware api earlier When using early debugging, the kernel does not initialize the hw_breakpoint API early enough and causes the late initialization of the kernel debugger to fail. The boot arguments are: earlyprintk=vga ekgdboc=kbd kgdbwait Then simply type "go" at the kdb prompt and boot. The kernel will later emit the message: kgdb: Could not allocate hwbreakpoints And at that point the kernel debugger will cease to work correctly. The solution is to initialize the hw_breakpoint at the same time that all the other perf call backs are initialized instead of using a core_initcall() initialization which happens well after the kernel debugger can make use of hardware breakpoints. Signed-off-by: Jason Wessel CC: Frederic Weisbecker CC: Ingo Molnar CC: Peter Zijlstra LKML-Reference: <4CD3396D.1090308@windriver.com> Signed-off-by: Frederic Weisbecker diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index a2d6ea4..d1e55fe 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -33,6 +33,8 @@ enum bp_type_idx { #ifdef CONFIG_HAVE_HW_BREAKPOINT +extern int __init init_hw_breakpoint(void); + static inline void hw_breakpoint_init(struct perf_event_attr *attr) { memset(attr, 0, sizeof(*attr)); @@ -108,6 +110,8 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) #else /* !CONFIG_HAVE_HW_BREAKPOINT */ +static inline int __init init_hw_breakpoint(void) { return 0; } + static inline struct perf_event * register_user_hw_breakpoint(struct perf_event_attr *attr, perf_overflow_handler_t triggered, diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 2c9120f..e532582 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -620,7 +620,7 @@ static struct pmu perf_breakpoint = { .read = hw_breakpoint_pmu_read, }; -static int __init init_hw_breakpoint(void) +int __init init_hw_breakpoint(void) { unsigned int **task_bp_pinned; int cpu, err_cpu; @@ -655,6 +655,5 @@ static int __init init_hw_breakpoint(void) return -ENOMEM; } -core_initcall(init_hw_breakpoint); diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 517d827..05b7d8c 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -31,6 +31,7 @@ #include #include #include +#include #include @@ -6295,6 +6296,8 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) void __init perf_event_init(void) { + int ret; + perf_event_init_all_cpus(); init_srcu_struct(&pmus_srcu); perf_pmu_register(&perf_swevent); @@ -6302,4 +6305,7 @@ void __init perf_event_init(void) perf_pmu_register(&perf_task_clock); perf_tp_register(); perf_cpu_notifier(perf_cpu_notify); + + ret = init_hw_breakpoint(); + WARN(ret, "hw_breakpoint initialization failed with: %d", ret); } -- cgit v0.10.2 From 7e77506a5918d82cafa2ffa783ab57c23f9e9817 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Thu, 30 Sep 2010 12:37:26 +0100 Subject: xen: implement XENMEM_machphys_mapping This hypercall allows Xen to specify a non-default location for the machine to physical mapping. This capability is used when running a 32 bit domain 0 on a 64 bit hypervisor to shrink the hypervisor hole to exactly the size required. [ Impact: add Xen hypercall definitions ] Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Stefano Stabellini diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index e8506c1..1c10c88 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -61,9 +61,9 @@ DEFINE_GUEST_HANDLE(void); #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) #endif -#ifndef machine_to_phys_mapping -#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) -#endif +#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>__MACH2PHYS_SHIFT) /* Maximum number of virtual CPUs in multi-processor guests. */ #define MAX_VIRT_CPUS 32 diff --git a/arch/x86/include/asm/xen/interface_32.h b/arch/x86/include/asm/xen/interface_32.h index 42a7e00..8413688 100644 --- a/arch/x86/include/asm/xen/interface_32.h +++ b/arch/x86/include/asm/xen/interface_32.h @@ -32,6 +32,11 @@ /* And the trap vector is... */ #define TRAP_INSTR "int $0x82" +#define __MACH2PHYS_VIRT_START 0xF5800000 +#define __MACH2PHYS_VIRT_END 0xF6800000 + +#define __MACH2PHYS_SHIFT 2 + /* * Virtual addresses beyond this are not modifiable by guest OSes. The * machine->physical mapping table starts at this address, read-only. diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h index 100d266..839a481 100644 --- a/arch/x86/include/asm/xen/interface_64.h +++ b/arch/x86/include/asm/xen/interface_64.h @@ -39,18 +39,7 @@ #define __HYPERVISOR_VIRT_END 0xFFFF880000000000 #define __MACH2PHYS_VIRT_START 0xFFFF800000000000 #define __MACH2PHYS_VIRT_END 0xFFFF804000000000 - -#ifndef HYPERVISOR_VIRT_START -#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) -#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) -#endif - -#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) -#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) -#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) -#ifndef machine_to_phys_mapping -#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) -#endif +#define __MACH2PHYS_SHIFT 3 /* * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index dd8c141..8760cc6 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -35,6 +36,8 @@ typedef struct xpaddr { #define MAX_DOMAIN_PAGES \ ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) +extern unsigned long *machine_to_phys_mapping; +extern unsigned int machine_to_phys_order; extern unsigned long get_phys_to_machine(unsigned long pfn); extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); @@ -69,10 +72,8 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) if (xen_feature(XENFEAT_auto_translated_physmap)) return mfn; -#if 0 if (unlikely((mfn >> machine_to_phys_order) != 0)) - return max_mapnr; -#endif + return ~0; pfn = 0; /* diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 235c0f4..bd35549 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -75,6 +75,11 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); enum xen_domain_type xen_domain_type = XEN_NATIVE; EXPORT_SYMBOL_GPL(xen_domain_type); +unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; +EXPORT_SYMBOL(machine_to_phys_mapping); +unsigned int machine_to_phys_order; +EXPORT_SYMBOL(machine_to_phys_order); + struct start_info *xen_start_info; EXPORT_SYMBOL_GPL(xen_start_info); @@ -1097,6 +1102,8 @@ asmlinkage void __init xen_start_kernel(void) xen_domain_type = XEN_PV_DOMAIN; + xen_setup_machphys_mapping(); + /* Install Xen paravirt ops */ pv_info = xen_info; pv_init_ops = xen_init_ops; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 21ed8d7..bd2713a 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2034,6 +2034,20 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) set_page_prot(pmd, PAGE_KERNEL_RO); } +void __init xen_setup_machphys_mapping(void) +{ + struct xen_machphys_mapping mapping; + unsigned long machine_to_phys_nr_ents; + + if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { + machine_to_phys_mapping = (unsigned long *)mapping.v_start; + machine_to_phys_nr_ents = mapping.max_mfn + 1; + } else { + machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES; + } + machine_to_phys_order = fls(machine_to_phys_nr_ents - 1); +} + #ifdef CONFIG_X86_64 static void convert_pfn_mfn(void *v) { diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h index d7a6c13..eac3ce1 100644 --- a/include/xen/interface/memory.h +++ b/include/xen/interface/memory.h @@ -141,6 +141,19 @@ struct xen_machphys_mfn_list { DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list); /* + * Returns the location in virtual address space of the machine_to_phys + * mapping table. Architectures which do not have a m2p table, or which do not + * map it by default into guest address space, do not implement this command. + * arg == addr of xen_machphys_mapping_t. + */ +#define XENMEM_machphys_mapping 12 +struct xen_machphys_mapping { + unsigned long v_start, v_end; /* Start and end virtual addresses. */ + unsigned long max_mfn; /* Maximum MFN that can be looked up. */ +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t); + +/* * Sets the GPFN at which a particular page appears in the specified guest's * pseudophysical address space. * arg == addr of xen_add_to_physmap_t. -- cgit v0.10.2 From 91e86e560d0b3ce4c5fc64fd2bbb99f856a30a4e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 10 Nov 2010 12:56:12 +0100 Subject: tracing: Fix recursive user stack trace The user stack trace can fault when examining the trace. Which would call the do_page_fault handler, which would trace again, which would do the user stack trace, which would fault and call do_page_fault again ... Thus this is causing a recursive bug. We need to have a recursion detector here. [ Resubmitted by Jiri Olsa ] [ Eric Dumazet recommended using __this_cpu_* instead of __get_cpu_* ] Cc: Eric Dumazet Signed-off-by: Jiri Olsa LKML-Reference: <1289390172-9730-3-git-send-email-jolsa@redhat.com> Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 82d9b81..ee6a733 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1284,6 +1284,8 @@ void trace_dump_stack(void) __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count()); } +static DEFINE_PER_CPU(int, user_stack_count); + void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) { @@ -1302,6 +1304,18 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) if (unlikely(in_nmi())) return; + /* + * prevent recursion, since the user stack tracing may + * trigger other kernel events. + */ + preempt_disable(); + if (__this_cpu_read(user_stack_count)) + goto out; + + __this_cpu_inc(user_stack_count); + + + event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, sizeof(*entry), flags, pc); if (!event) @@ -1319,6 +1333,11 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) save_stack_trace_user(&trace); if (!filter_check_discard(call, entry, buffer, event)) ring_buffer_unlock_commit(buffer, event); + + __this_cpu_dec(user_stack_count); + + out: + preempt_enable(); } #ifdef UNUSED -- cgit v0.10.2 From 59c55ba1fba61ae13fbfff094a674e73407201f1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 12 Nov 2010 06:30:29 -0500 Subject: cifs: don't take extra tlink reference in initiate_cifs_search It's possible for initiate_cifs_search to be called on a filp that already has private_data attached. If this happens, we'll end up calling cifs_sb_tlink, taking an extra reference to the tlink and attaching that to the cifsFileInfo. This leads to refcount leaks that manifest as a "stuck" cifsd at umount time. Fix this by only looking up the tlink for the cifsFile on the filp's first pass through this function. When called on a filp that already has cifsFileInfo associated with it, just use the tlink reference that it already owns. This patch fixes samba.org bug 7792: https://bugzilla.samba.org/show_bug.cgi?id=7792 Signed-off-by: Jeff Layton Reviewed-and-Tested-by: Suresh Jayaraman Signed-off-by: Steve French diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index ef7bb7b..32d300e 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -226,26 +226,29 @@ static int initiate_cifs_search(const int xid, struct file *file) char *full_path = NULL; struct cifsFileInfo *cifsFile; struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); - struct tcon_link *tlink; + struct tcon_link *tlink = NULL; struct cifsTconInfo *pTcon; - tlink = cifs_sb_tlink(cifs_sb); - if (IS_ERR(tlink)) - return PTR_ERR(tlink); - pTcon = tlink_tcon(tlink); - - if (file->private_data == NULL) - file->private_data = - kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); if (file->private_data == NULL) { - rc = -ENOMEM; - goto error_exit; + tlink = cifs_sb_tlink(cifs_sb); + if (IS_ERR(tlink)) + return PTR_ERR(tlink); + + cifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); + if (cifsFile == NULL) { + rc = -ENOMEM; + goto error_exit; + } + file->private_data = cifsFile; + cifsFile->tlink = cifs_get_tlink(tlink); + pTcon = tlink_tcon(tlink); + } else { + cifsFile = file->private_data; + pTcon = tlink_tcon(cifsFile->tlink); } - cifsFile = file->private_data; cifsFile->invalidHandle = true; cifsFile->srch_inf.endOfSearch = false; - cifsFile->tlink = cifs_get_tlink(tlink); full_path = build_path_from_dentry(file->f_path.dentry); if (full_path == NULL) { -- cgit v0.10.2 From 85345517fe6d4de27b0d6ca19fef9d28ac947c4a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 13 Nov 2010 09:49:11 +0000 Subject: drm/i915: Retire any pending operations on the old scanout when switching An old and oft reported bug, is that of the GPU hanging on a MI_WAIT_FOR_EVENT following a mode switch. The cause is that the GPU is waiting on a scanline counter on an inactive pipe, and so waits for a very long time until eventually the user reboots his machine. We can prevent this either by moving the WAIT into the kernel and thereby incurring considerable cost on every swapbuffers, or by waiting for the GPU to retire the last batch that accesses the framebuffer before installing a new one. As mode switches are much rarer than swap buffers, this looks like an easy choice. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=28964 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=29252 Signed-off-by: Chris Wilson Cc: stable@kernel.org diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 90414ae..409826d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1045,6 +1045,8 @@ void i915_gem_clflush_object(struct drm_gem_object *obj); int i915_gem_object_set_domain(struct drm_gem_object *obj, uint32_t read_domains, uint32_t write_domain); +int i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj, + bool interruptible); int i915_gem_init_ringbuffer(struct drm_device *dev); void i915_gem_cleanup_ringbuffer(struct drm_device *dev); int i915_gem_do_init(struct drm_device *dev, unsigned long start, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index eba9b16..951e3d4 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2907,6 +2907,20 @@ i915_gem_object_set_to_display_plane(struct drm_gem_object *obj, return 0; } +int +i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj, + bool interruptible) +{ + if (!obj->active) + return 0; + + if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) + i915_gem_flush_ring(obj->base.dev, NULL, obj->ring, + 0, obj->base.write_domain); + + return i915_gem_object_wait_rendering(&obj->base, interruptible); +} + /** * Moves a single object to the CPU read, and possibly write domain. * diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 48d8fd6..bee24b1 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1611,6 +1611,18 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, wait_event(dev_priv->pending_flip_queue, atomic_read(&obj_priv->pending_flip) == 0); + + /* Big Hammer, we also need to ensure that any pending + * MI_WAIT_FOR_EVENT inside a user batch buffer on the + * current scanout is retired before unpinning the old + * framebuffer. + */ + ret = i915_gem_object_flush_gpu(obj_priv, false); + if (ret) { + i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj); + mutex_unlock(&dev->struct_mutex); + return ret; + } } ret = intel_pipe_set_base_atomic(crtc, crtc->fb, x, y, -- cgit v0.10.2 From 362d31297fafb150676f4d564ecc7f7f3e3b7fd4 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 14 Nov 2010 03:34:30 +0000 Subject: [CIFS] fs/cifs/Kconfig: CIFS depends on CRYPTO_HMAC linux-2.6.37-rc1: I compiled a kernel with CIFS which subsequently failed with an error indicating it couldn't initialize crypto module "hmacmd5". CONFIG_CRYPTO_HMAC=y fixed the problem. This patch makes CIFS depend on CRYPTO_HMAC in kconfig. Signed-off-by: Jody Bruchon CC: Shirish Pargaonkar Signed-off-by: Steve French diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 0ed2139..3bdbfb0 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -4,6 +4,7 @@ config CIFS select NLS select CRYPTO select CRYPTO_MD5 + select CRYPTO_HMAC select CRYPTO_ARC4 help This is the client VFS module for the Common Internet File System -- cgit v0.10.2 From fba4312e223f1187efc8c083daed70e57fa9c9d3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sun, 14 Nov 2010 20:24:35 -0500 Subject: drm/radeon/kms: fix and unify tiled buffer alignment checking for r6xx/7xx Tiled buffers have the same alignment requirements regardless of whether the surface is for db, cb, or textures. Previously, the calculations where inconsistent for each buffer type. - Unify the alignment calculations in a common function - Standardize the alignment units (pixels for pitch/height/depth, bytes for base) - properly check the buffer base alignments Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 37cc2aa..0f90fc3 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -50,6 +50,7 @@ struct r600_cs_track { u32 nsamples; u32 cb_color_base_last[8]; struct radeon_bo *cb_color_bo[8]; + u64 cb_color_bo_mc[8]; u32 cb_color_bo_offset[8]; struct radeon_bo *cb_color_frag_bo[8]; struct radeon_bo *cb_color_tile_bo[8]; @@ -67,6 +68,7 @@ struct r600_cs_track { u32 db_depth_size; u32 db_offset; struct radeon_bo *db_bo; + u64 db_bo_mc; }; static inline int r600_bpe_from_format(u32 *bpe, u32 format) @@ -140,6 +142,68 @@ static inline int r600_bpe_from_format(u32 *bpe, u32 format) return 0; } +struct array_mode_checker { + int array_mode; + u32 group_size; + u32 nbanks; + u32 npipes; + u32 nsamples; + u32 bpe; +}; + +/* returns alignment in pixels for pitch/height/depth and bytes for base */ +static inline int r600_get_array_mode_alignment(struct array_mode_checker *values, + u32 *pitch_align, + u32 *height_align, + u32 *depth_align, + u64 *base_align) +{ + u32 tile_width = 8; + u32 tile_height = 8; + u32 macro_tile_width = values->nbanks; + u32 macro_tile_height = values->npipes; + u32 tile_bytes = tile_width * tile_height * values->bpe * values->nsamples; + u32 macro_tile_bytes = macro_tile_width * macro_tile_height * tile_bytes; + + switch (values->array_mode) { + case ARRAY_LINEAR_GENERAL: + /* technically tile_width/_height for pitch/height */ + *pitch_align = 1; /* tile_width */ + *height_align = 1; /* tile_height */ + *depth_align = 1; + *base_align = 1; + break; + case ARRAY_LINEAR_ALIGNED: + *pitch_align = max((u32)64, (u32)(values->group_size / values->bpe)); + *height_align = tile_height; + *depth_align = 1; + *base_align = values->group_size; + break; + case ARRAY_1D_TILED_THIN1: + *pitch_align = max((u32)tile_width, + (u32)(values->group_size / + (tile_height * values->bpe * values->nsamples))); + *height_align = tile_height; + *depth_align = 1; + *base_align = values->group_size; + break; + case ARRAY_2D_TILED_THIN1: + *pitch_align = max((u32)macro_tile_width, + (u32)(((values->group_size / tile_height) / + (values->bpe * values->nsamples)) * + values->nbanks)) * tile_width; + *height_align = macro_tile_height * tile_height; + *depth_align = 1; + *base_align = max(macro_tile_bytes, + (*pitch_align) * values->bpe * (*height_align) * values->nsamples); + break; + default: + return -EINVAL; + } + + return 0; +} + static void r600_cs_track_init(struct r600_cs_track *track) { int i; @@ -153,10 +217,12 @@ static void r600_cs_track_init(struct r600_cs_track *track) track->cb_color_info[i] = 0; track->cb_color_bo[i] = NULL; track->cb_color_bo_offset[i] = 0xFFFFFFFF; + track->cb_color_bo_mc[i] = 0xFFFFFFFF; } track->cb_target_mask = 0xFFFFFFFF; track->cb_shader_mask = 0xFFFFFFFF; track->db_bo = NULL; + track->db_bo_mc = 0xFFFFFFFF; /* assume the biggest format and that htile is enabled */ track->db_depth_info = 7 | (1 << 25); track->db_depth_view = 0xFFFFC000; @@ -168,7 +234,10 @@ static void r600_cs_track_init(struct r600_cs_track *track) static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) { struct r600_cs_track *track = p->track; - u32 bpe = 0, pitch, slice_tile_max, size, tmp, height, pitch_align; + u32 bpe = 0, slice_tile_max, size, tmp; + u32 height, height_align, pitch, pitch_align, depth_align; + u64 base_offset, base_align; + struct array_mode_checker array_check; volatile u32 *ib = p->ib->ptr; unsigned array_mode; @@ -183,60 +252,40 @@ static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) i, track->cb_color_info[i]); return -EINVAL; } - /* pitch is the number of 8x8 tiles per row */ - pitch = G_028060_PITCH_TILE_MAX(track->cb_color_size[i]) + 1; + /* pitch in pixels */ + pitch = (G_028060_PITCH_TILE_MAX(track->cb_color_size[i]) + 1) * 8; slice_tile_max = G_028060_SLICE_TILE_MAX(track->cb_color_size[i]) + 1; slice_tile_max *= 64; - height = slice_tile_max / (pitch * 8); + height = slice_tile_max / pitch; if (height > 8192) height = 8192; array_mode = G_0280A0_ARRAY_MODE(track->cb_color_info[i]); + + base_offset = track->cb_color_bo_mc[i] + track->cb_color_bo_offset[i]; + array_check.array_mode = array_mode; + array_check.group_size = track->group_size; + array_check.nbanks = track->nbanks; + array_check.npipes = track->npipes; + array_check.nsamples = track->nsamples; + array_check.bpe = bpe; + if (r600_get_array_mode_alignment(&array_check, + &pitch_align, &height_align, &depth_align, &base_align)) { + dev_warn(p->dev, "%s invalid tiling %d for %d (0x%08X)\n", __func__, + G_0280A0_ARRAY_MODE(track->cb_color_info[i]), i, + track->cb_color_info[i]); + return -EINVAL; + } switch (array_mode) { case V_0280A0_ARRAY_LINEAR_GENERAL: - /* technically height & 0x7 */ break; case V_0280A0_ARRAY_LINEAR_ALIGNED: - pitch_align = max((u32)64, (u32)(track->group_size / bpe)) / 8; - if (!IS_ALIGNED(pitch, pitch_align)) { - dev_warn(p->dev, "%s:%d cb pitch (%d) invalid\n", - __func__, __LINE__, pitch); - return -EINVAL; - } - if (!IS_ALIGNED(height, 8)) { - dev_warn(p->dev, "%s:%d cb height (%d) invalid\n", - __func__, __LINE__, height); - return -EINVAL; - } break; case V_0280A0_ARRAY_1D_TILED_THIN1: - pitch_align = max((u32)8, (u32)(track->group_size / (8 * bpe * track->nsamples))) / 8; - if (!IS_ALIGNED(pitch, pitch_align)) { - dev_warn(p->dev, "%s:%d cb pitch (%d) invalid\n", - __func__, __LINE__, pitch); - return -EINVAL; - } /* avoid breaking userspace */ if (height > 7) height &= ~0x7; - if (!IS_ALIGNED(height, 8)) { - dev_warn(p->dev, "%s:%d cb height (%d) invalid\n", - __func__, __LINE__, height); - return -EINVAL; - } break; case V_0280A0_ARRAY_2D_TILED_THIN1: - pitch_align = max((u32)track->nbanks, - (u32)(((track->group_size / 8) / (bpe * track->nsamples)) * track->nbanks)) / 8; - if (!IS_ALIGNED(pitch, pitch_align)) { - dev_warn(p->dev, "%s:%d cb pitch (%d) invalid\n", - __func__, __LINE__, pitch); - return -EINVAL; - } - if (!IS_ALIGNED((height / 8), track->npipes)) { - dev_warn(p->dev, "%s:%d cb height (%d) invalid\n", - __func__, __LINE__, height); - return -EINVAL; - } break; default: dev_warn(p->dev, "%s invalid tiling %d for %d (0x%08X)\n", __func__, @@ -244,8 +293,24 @@ static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) track->cb_color_info[i]); return -EINVAL; } + + if (!IS_ALIGNED(pitch, pitch_align)) { + dev_warn(p->dev, "%s:%d cb pitch (%d) invalid\n", + __func__, __LINE__, pitch); + return -EINVAL; + } + if (!IS_ALIGNED(height, height_align)) { + dev_warn(p->dev, "%s:%d cb height (%d) invalid\n", + __func__, __LINE__, height); + return -EINVAL; + } + if (!IS_ALIGNED(base_offset, base_align)) { + dev_warn(p->dev, "%s offset[%d] 0x%llx not aligned\n", __func__, i, base_offset); + return -EINVAL; + } + /* check offset */ - tmp = height * pitch * 8 * bpe; + tmp = height * pitch * bpe; if ((tmp + track->cb_color_bo_offset[i]) > radeon_bo_size(track->cb_color_bo[i])) { if (array_mode == V_0280A0_ARRAY_LINEAR_GENERAL) { /* the initial DDX does bad things with the CB size occasionally */ @@ -260,15 +325,11 @@ static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) return -EINVAL; } } - if (!IS_ALIGNED(track->cb_color_bo_offset[i], track->group_size)) { - dev_warn(p->dev, "%s offset[%d] %d not aligned\n", __func__, i, track->cb_color_bo_offset[i]); - return -EINVAL; - } /* limit max tile */ - tmp = (height * pitch * 8) >> 6; + tmp = (height * pitch) >> 6; if (tmp < slice_tile_max) slice_tile_max = tmp; - tmp = S_028060_PITCH_TILE_MAX(pitch - 1) | + tmp = S_028060_PITCH_TILE_MAX((pitch / 8) - 1) | S_028060_SLICE_TILE_MAX(slice_tile_max - 1); ib[track->cb_color_size_idx[i]] = tmp; return 0; @@ -310,7 +371,12 @@ static int r600_cs_track_check(struct radeon_cs_parser *p) /* Check depth buffer */ if (G_028800_STENCIL_ENABLE(track->db_depth_control) || G_028800_Z_ENABLE(track->db_depth_control)) { - u32 nviews, bpe, ntiles, pitch, pitch_align, height, size, slice_tile_max; + u32 nviews, bpe, ntiles, size, slice_tile_max; + u32 height, height_align, pitch, pitch_align, depth_align; + u64 base_offset, base_align; + struct array_mode_checker array_check; + int array_mode; + if (track->db_bo == NULL) { dev_warn(p->dev, "z/stencil with no depth buffer\n"); return -EINVAL; @@ -353,41 +419,34 @@ static int r600_cs_track_check(struct radeon_cs_parser *p) ib[track->db_depth_size_idx] = S_028000_SLICE_TILE_MAX(tmp - 1) | (track->db_depth_size & 0x3FF); } else { size = radeon_bo_size(track->db_bo); - pitch = G_028000_PITCH_TILE_MAX(track->db_depth_size) + 1; + /* pitch in pixels */ + pitch = (G_028000_PITCH_TILE_MAX(track->db_depth_size) + 1) * 8; slice_tile_max = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1; slice_tile_max *= 64; - height = slice_tile_max / (pitch * 8); + height = slice_tile_max / pitch; if (height > 8192) height = 8192; - switch (G_028010_ARRAY_MODE(track->db_depth_info)) { + base_offset = track->db_bo_mc + track->db_offset; + array_mode = G_028010_ARRAY_MODE(track->db_depth_info); + array_check.array_mode = array_mode; + array_check.group_size = track->group_size; + array_check.nbanks = track->nbanks; + array_check.npipes = track->npipes; + array_check.nsamples = track->nsamples; + array_check.bpe = bpe; + if (r600_get_array_mode_alignment(&array_check, + &pitch_align, &height_align, &depth_align, &base_align)) { + dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__, + G_028010_ARRAY_MODE(track->db_depth_info), + track->db_depth_info); + return -EINVAL; + } + switch (array_mode) { case V_028010_ARRAY_1D_TILED_THIN1: - pitch_align = (max((u32)8, (u32)(track->group_size / (8 * bpe))) / 8); - if (!IS_ALIGNED(pitch, pitch_align)) { - dev_warn(p->dev, "%s:%d db pitch (%d) invalid\n", - __func__, __LINE__, pitch); - return -EINVAL; - } /* don't break userspace */ height &= ~0x7; - if (!IS_ALIGNED(height, 8)) { - dev_warn(p->dev, "%s:%d db height (%d) invalid\n", - __func__, __LINE__, height); - return -EINVAL; - } break; case V_028010_ARRAY_2D_TILED_THIN1: - pitch_align = max((u32)track->nbanks, - (u32)(((track->group_size / 8) / bpe) * track->nbanks)) / 8; - if (!IS_ALIGNED(pitch, pitch_align)) { - dev_warn(p->dev, "%s:%d db pitch (%d) invalid\n", - __func__, __LINE__, pitch); - return -EINVAL; - } - if (!IS_ALIGNED((height / 8), track->npipes)) { - dev_warn(p->dev, "%s:%d db height (%d) invalid\n", - __func__, __LINE__, height); - return -EINVAL; - } break; default: dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__, @@ -395,15 +454,27 @@ static int r600_cs_track_check(struct radeon_cs_parser *p) track->db_depth_info); return -EINVAL; } - if (!IS_ALIGNED(track->db_offset, track->group_size)) { - dev_warn(p->dev, "%s offset[%d] %d not aligned\n", __func__, i, track->db_offset); + + if (!IS_ALIGNED(pitch, pitch_align)) { + dev_warn(p->dev, "%s:%d db pitch (%d) invalid\n", + __func__, __LINE__, pitch); + return -EINVAL; + } + if (!IS_ALIGNED(height, height_align)) { + dev_warn(p->dev, "%s:%d db height (%d) invalid\n", + __func__, __LINE__, height); return -EINVAL; } + if (!IS_ALIGNED(base_offset, base_align)) { + dev_warn(p->dev, "%s offset[%d] 0x%llx not aligned\n", __func__, i, base_offset); + return -EINVAL; + } + ntiles = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1; nviews = G_028004_SLICE_MAX(track->db_depth_view) + 1; tmp = ntiles * bpe * 64 * nviews; if ((tmp + track->db_offset) > radeon_bo_size(track->db_bo)) { - dev_warn(p->dev, "z/stencil buffer too small (0x%08X %d %d %d -> %d have %ld)\n", + dev_warn(p->dev, "z/stencil buffer too small (0x%08X %d %d %d -> %u have %lu)\n", track->db_depth_size, ntiles, nviews, bpe, tmp + track->db_offset, radeon_bo_size(track->db_bo)); return -EINVAL; @@ -954,6 +1025,7 @@ static inline int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); track->cb_color_base_last[tmp] = ib[idx]; track->cb_color_bo[tmp] = reloc->robj; + track->cb_color_bo_mc[tmp] = reloc->lobj.gpu_offset; break; case DB_DEPTH_BASE: r = r600_cs_packet_next_reloc(p, &reloc); @@ -965,6 +1037,7 @@ static inline int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx track->db_offset = radeon_get_ib_value(p, idx) << 8; ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); track->db_bo = reloc->robj; + track->db_bo_mc = reloc->lobj.gpu_offset; break; case DB_HTILE_DATA_BASE: case SQ_PGM_START_FS: @@ -1086,16 +1159,25 @@ static void r600_texture_size(unsigned nfaces, unsigned blevel, unsigned nlevels static inline int r600_check_texture_resource(struct radeon_cs_parser *p, u32 idx, struct radeon_bo *texture, struct radeon_bo *mipmap, + u64 base_offset, + u64 mip_offset, u32 tiling_flags) { struct r600_cs_track *track = p->track; u32 nfaces, nlevels, blevel, w0, h0, d0, bpe = 0; - u32 word0, word1, l0_size, mipmap_size, pitch, pitch_align; + u32 word0, word1, l0_size, mipmap_size; + u32 height_align, pitch, pitch_align, depth_align; + u64 base_align; + struct array_mode_checker array_check; /* on legacy kernel we don't perform advanced check */ if (p->rdev == NULL) return 0; + /* convert to bytes */ + base_offset <<= 8; + mip_offset <<= 8; + word0 = radeon_get_ib_value(p, idx + 0); if (tiling_flags & RADEON_TILING_MACRO) word0 |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1); @@ -1128,46 +1210,38 @@ static inline int r600_check_texture_resource(struct radeon_cs_parser *p, u32 i return -EINVAL; } - pitch = G_038000_PITCH(word0) + 1; - switch (G_038000_TILE_MODE(word0)) { - case V_038000_ARRAY_LINEAR_GENERAL: - pitch_align = 1; - /* XXX check height align */ - break; - case V_038000_ARRAY_LINEAR_ALIGNED: - pitch_align = max((u32)64, (u32)(track->group_size / bpe)) / 8; - if (!IS_ALIGNED(pitch, pitch_align)) { - dev_warn(p->dev, "%s:%d tex pitch (%d) invalid\n", - __func__, __LINE__, pitch); - return -EINVAL; - } - /* XXX check height align */ - break; - case V_038000_ARRAY_1D_TILED_THIN1: - pitch_align = max((u32)8, (u32)(track->group_size / (8 * bpe))) / 8; - if (!IS_ALIGNED(pitch, pitch_align)) { - dev_warn(p->dev, "%s:%d tex pitch (%d) invalid\n", - __func__, __LINE__, pitch); - return -EINVAL; - } - /* XXX check height align */ - break; - case V_038000_ARRAY_2D_TILED_THIN1: - pitch_align = max((u32)track->nbanks, - (u32)(((track->group_size / 8) / bpe) * track->nbanks)) / 8; - if (!IS_ALIGNED(pitch, pitch_align)) { - dev_warn(p->dev, "%s:%d tex pitch (%d) invalid\n", - __func__, __LINE__, pitch); - return -EINVAL; - } - /* XXX check height align */ - break; - default: - dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__, - G_038000_TILE_MODE(word0), word0); + /* pitch in texels */ + pitch = (G_038000_PITCH(word0) + 1) * 8; + array_check.array_mode = G_038000_TILE_MODE(word0); + array_check.group_size = track->group_size; + array_check.nbanks = track->nbanks; + array_check.npipes = track->npipes; + array_check.nsamples = 1; + array_check.bpe = bpe; + if (r600_get_array_mode_alignment(&array_check, + &pitch_align, &height_align, &depth_align, &base_align)) { + dev_warn(p->dev, "%s:%d tex array mode (%d) invalid\n", + __func__, __LINE__, G_038000_TILE_MODE(word0)); + return -EINVAL; + } + + /* XXX check height as well... */ + + if (!IS_ALIGNED(pitch, pitch_align)) { + dev_warn(p->dev, "%s:%d tex pitch (%d) invalid\n", + __func__, __LINE__, pitch); + return -EINVAL; + } + if (!IS_ALIGNED(base_offset, base_align)) { + dev_warn(p->dev, "%s:%d tex base offset (0x%llx) invalid\n", + __func__, __LINE__, base_offset); + return -EINVAL; + } + if (!IS_ALIGNED(mip_offset, base_align)) { + dev_warn(p->dev, "%s:%d tex mip offset (0x%llx) invalid\n", + __func__, __LINE__, mip_offset); return -EINVAL; } - /* XXX check offset align */ word0 = radeon_get_ib_value(p, idx + 4); word1 = radeon_get_ib_value(p, idx + 5); @@ -1402,7 +1476,10 @@ static int r600_packet3_check(struct radeon_cs_parser *p, mip_offset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); mipmap = reloc->robj; r = r600_check_texture_resource(p, idx+(i*7)+1, - texture, mipmap, reloc->lobj.tiling_flags); + texture, mipmap, + base_offset + radeon_get_ib_value(p, idx+1+(i*7)+2), + mip_offset + radeon_get_ib_value(p, idx+1+(i*7)+3), + reloc->lobj.tiling_flags); if (r) return r; ib[idx+1+(i*7)+2] += base_offset; diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index 966a793..bff4dc4 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -51,6 +51,12 @@ #define PTE_READABLE (1 << 5) #define PTE_WRITEABLE (1 << 6) +/* tiling bits */ +#define ARRAY_LINEAR_GENERAL 0x00000000 +#define ARRAY_LINEAR_ALIGNED 0x00000001 +#define ARRAY_1D_TILED_THIN1 0x00000002 +#define ARRAY_2D_TILED_THIN1 0x00000004 + /* Registers */ #define ARB_POP 0x2418 #define ENABLE_TC128 (1 << 30) -- cgit v0.10.2 From a41c73e04673b47730df682446f0d52f95e32a5b Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Sun, 14 Nov 2010 23:08:27 +0300 Subject: drm: radeon: fix error value sign enable_vblank implementations should use negative result to indicate error. radeon_enable_vblank() returns EINVAL in this case. Change this to -EINVAL. Signed-off-by: Vasiliy Kulikov Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/radeon/radeon_irq.c b/drivers/gpu/drm/radeon/radeon_irq.c index 2f349a3..465746b 100644 --- a/drivers/gpu/drm/radeon/radeon_irq.c +++ b/drivers/gpu/drm/radeon/radeon_irq.c @@ -76,7 +76,7 @@ int radeon_enable_vblank(struct drm_device *dev, int crtc) default: DRM_ERROR("tried to enable vblank on non-existent crtc %d\n", crtc); - return EINVAL; + return -EINVAL; } } else { switch (crtc) { @@ -89,7 +89,7 @@ int radeon_enable_vblank(struct drm_device *dev, int crtc) default: DRM_ERROR("tried to enable vblank on non-existent crtc %d\n", crtc); - return EINVAL; + return -EINVAL; } } -- cgit v0.10.2 From 1b346af2f88a0e19e718a843d39d34e41e5527e1 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Mon, 16 Aug 2010 08:03:04 +0200 Subject: ARM mx3fb: check for DMA engine type We have two dma engines in MX3 systems: The IPU and the SDMA engine. We have to check if we got a channel from the correct engine before proceeding with a channel. Signed-off-by: Sascha Hauer Cc: Guennadi Liakhovetski Reviewed-by: Guennadi Liakhovetski diff --git a/drivers/video/mx3fb.c b/drivers/video/mx3fb.c index 7cfc170..ca0f6be 100644 --- a/drivers/video/mx3fb.c +++ b/drivers/video/mx3fb.c @@ -27,6 +27,7 @@ #include #include +#include #include #include #include @@ -1420,6 +1421,9 @@ static bool chan_filter(struct dma_chan *chan, void *arg) struct device *dev; struct mx3fb_platform_data *mx3fb_pdata; + if (!imx_dma_is_ipu(chan)) + return false; + if (!rq) return false; -- cgit v0.10.2 From 415d34195b3c0c26544034d37b8766dfffd36bcf Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Thu, 4 Nov 2010 15:43:11 +0100 Subject: ARM i.MX: sdma is merged, so remove #ifdef SDMA_IS_MERGED Signed-off-by: Sascha Hauer diff --git a/arch/arm/plat-mxc/devices/platform-imx-dma.c b/arch/arm/plat-mxc/devices/platform-imx-dma.c index 02d9890..3a705c7 100644 --- a/arch/arm/plat-mxc/devices/platform-imx-dma.c +++ b/arch/arm/plat-mxc/devices/platform-imx-dma.c @@ -12,15 +12,7 @@ #include #include -#ifdef SDMA_IS_MERGED #include -#else -struct sdma_platform_data { - int sdma_version; - char *cpu_name; - int to_version; -}; -#endif struct imx_imx_sdma_data { resource_size_t iobase; -- cgit v0.10.2 From 8cad8fa1d716b16aa22d5c670ead2b952e1e59b9 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Mon, 9 Aug 2010 09:45:09 +0200 Subject: ARM i.MX pcm037 eet: compile fixes The pcm037 eet extension currently does not compile if SPI_IMX is enabled. Fix it. Signed-off-by: Sascha Hauer diff --git a/arch/arm/mach-mx3/mach-pcm037_eet.c b/arch/arm/mach-mx3/mach-pcm037_eet.c index 3392812..fda5654 100644 --- a/arch/arm/mach-mx3/mach-pcm037_eet.c +++ b/arch/arm/mach-mx3/mach-pcm037_eet.c @@ -14,6 +14,7 @@ #include #include +#include #include @@ -59,14 +60,12 @@ static struct spi_board_info pcm037_spi_dev[] = { }; /* Platform Data for MXC CSPI */ -#if defined(CONFIG_SPI_IMX) || defined(CONFIG_SPI_IMX_MODULE) static int pcm037_spi1_cs[] = {MXC_SPI_CS(1), IOMUX_TO_GPIO(MX31_PIN_KEY_COL7)}; static const struct spi_imx_master pcm037_spi1_pdata __initconst = { .chipselect = pcm037_spi1_cs, .num_chipselect = ARRAY_SIZE(pcm037_spi1_cs), }; -#endif /* GPIO-keys input device */ static struct gpio_keys_button pcm037_gpio_keys[] = { -- cgit v0.10.2 From 3530b417f457627432cff1dfd8db659042d66695 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Thu, 4 Nov 2010 23:08:59 +0100 Subject: ARM i.MX spi: fix compilation for i.MX21 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Sascha Hauer Acked-by: Uwe Kleine-König diff --git a/arch/arm/plat-mxc/devices/platform-spi_imx.c b/arch/arm/plat-mxc/devices/platform-spi_imx.c index e48340e..17f724c 100644 --- a/arch/arm/plat-mxc/devices/platform-spi_imx.c +++ b/arch/arm/plat-mxc/devices/platform-spi_imx.c @@ -27,6 +27,7 @@ const struct imx_spi_imx_data imx21_cspi_data[] __initconst = { imx_spi_imx_data_entry(MX21, CSPI, "imx21-cspi", _id, _hwid, SZ_4K) imx21_cspi_data_entry(0, 1), imx21_cspi_data_entry(1, 2), +}; #endif #ifdef CONFIG_ARCH_MX25 -- cgit v0.10.2 From 6f5ae900957b73f5d18c70ad69662ca604ff77e1 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Thu, 4 Nov 2010 21:27:05 +0100 Subject: ARM i.MX27 eukrea: Fix compilation Currently compilation breaks for the eukrea mbimx27 baseboard when CONFIG_SPI_IMX is selected and CONFIG_TOUCHSCREEN_ADS7846 is not selected. Fix this by removing the ifdefs altogether. Signed-off-by: Sascha Hauer Cc: Eric Benard diff --git a/arch/arm/mach-imx/eukrea_mbimx27-baseboard.c b/arch/arm/mach-imx/eukrea_mbimx27-baseboard.c index 026263c..7e1e9dc 100644 --- a/arch/arm/mach-imx/eukrea_mbimx27-baseboard.c +++ b/arch/arm/mach-imx/eukrea_mbimx27-baseboard.c @@ -250,9 +250,6 @@ static const struct imxuart_platform_data uart_pdata __initconst = { .flags = IMXUART_HAVE_RTSCTS, }; -#if defined(CONFIG_TOUCHSCREEN_ADS7846) \ - || defined(CONFIG_TOUCHSCREEN_ADS7846_MODULE) - #define ADS7846_PENDOWN (GPIO_PORTD | 25) static void ads7846_dev_init(void) @@ -273,9 +270,7 @@ static struct ads7846_platform_data ads7846_config __initdata = { .get_pendown_state = ads7846_get_pendown_state, .keep_vref_on = 1, }; -#endif -#if defined(CONFIG_SPI_IMX) || defined(CONFIG_SPI_IMX_MODULE) static struct spi_board_info eukrea_mbimx27_spi_board_info[] __initdata = { [0] = { .modalias = "ads7846", @@ -294,7 +289,6 @@ static const struct spi_imx_master eukrea_mbimx27_spi0_data __initconst = { .chipselect = eukrea_mbimx27_spi_cs, .num_chipselect = ARRAY_SIZE(eukrea_mbimx27_spi_cs), }; -#endif static struct i2c_board_info eukrea_mbimx27_i2c_devices[] = { { -- cgit v0.10.2 From 46e3f3075931493f65e9561ef57bcc23fe077a13 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Tue, 9 Nov 2010 08:47:54 +0200 Subject: mx25: fix spi device registration typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 35bab0589b9a71533b37280eefa430c21dc102fe (ARM: imx: change the way spi-imx devices are registered) contained a typo in mx25, leading to link time failure. Signed-off-by: Baruch Siach Signed-off-by: Sascha Hauer Acked-by: Uwe Kleine-König diff --git a/arch/arm/mach-mx25/devices-imx25.h b/arch/arm/mach-mx25/devices-imx25.h index 93afa10..d94d282 100644 --- a/arch/arm/mach-mx25/devices-imx25.h +++ b/arch/arm/mach-mx25/devices-imx25.h @@ -42,9 +42,9 @@ extern const struct imx_mxc_nand_data imx25_mxc_nand_data __initconst; #define imx25_add_mxc_nand(pdata) \ imx_add_mxc_nand(&imx25_mxc_nand_data, pdata) -extern const struct imx_spi_imx_data imx25_spi_imx_data[] __initconst; +extern const struct imx_spi_imx_data imx25_cspi_data[] __initconst; #define imx25_add_spi_imx(id, pdata) \ - imx_add_spi_imx(&imx25_spi_imx_data[id], pdata) + imx_add_spi_imx(&imx25_cspi_data[id], pdata) #define imx25_add_spi_imx0(pdata) imx25_add_spi_imx(0, pdata) #define imx25_add_spi_imx1(pdata) imx25_add_spi_imx(1, pdata) #define imx25_add_spi_imx2(pdata) imx25_add_spi_imx(2, pdata) -- cgit v0.10.2 From d69b78ba1deaaa95ffa8dac5a9ca819ce454d52e Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Mon, 15 Nov 2010 10:20:52 +0100 Subject: ioprio: grab rcu_read_lock in sys_ioprio_{set,get}() Using: - CONFIG_LOCKUP_DETECTOR=y - CONFIG_PREEMPT=y - CONFIG_LOCKDEP=y - CONFIG_PROVE_LOCKING=y - CONFIG_PROVE_RCU=y found a missing rcu lock during boot on a 512 MiB x86_64 ubuntu vm: =================================================== [ INFO: suspicious rcu_dereference_check() usage. ] --------------------------------------------------- kernel/pid.c:419 invoked rcu_dereference_check() without protection! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 1 lock held by ureadahead/1355: #0: (tasklist_lock){.+.+..}, at: [] sys_ioprio_set+0x7f/0x29e stack backtrace: Pid: 1355, comm: ureadahead Not tainted 2.6.37-dbg-DEV #1 Call Trace: [] lockdep_rcu_dereference+0xaa/0xb3 [] find_task_by_pid_ns+0x44/0x5d [] find_task_by_vpid+0x22/0x24 [] sys_ioprio_set+0xb4/0x29e [] ? trace_hardirqs_off_thunk+0x3a/0x3c [] sysenter_dispatch+0x7/0x2c [] ? trace_hardirqs_on_thunk+0x3a/0x3f The fix is to: a) grab rcu lock in sys_ioprio_{set,get}() and b) avoid grabbing tasklist_lock. Discussion in: http://marc.info/?l=linux-kernel&m=128951324702889 Signed-off-by: Greg Thelen Acked-by: Paul E. McKenney Reviewed-by: Oleg Nesterov Modified by Jens to remove the now redundant inner rcu lock and unlock since they are now protected by the outer lock. Signed-off-by: Jens Axboe diff --git a/fs/ioprio.c b/fs/ioprio.c index 2f7d05c..7da2a06 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -103,22 +103,15 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio) } ret = -ESRCH; - /* - * We want IOPRIO_WHO_PGRP/IOPRIO_WHO_USER to be "atomic", - * so we can't use rcu_read_lock(). See re-copy of ->ioprio - * in copy_process(). - */ - read_lock(&tasklist_lock); + rcu_read_lock(); switch (which) { case IOPRIO_WHO_PROCESS: - rcu_read_lock(); if (!who) p = current; else p = find_task_by_vpid(who); if (p) ret = set_task_ioprio(p, ioprio); - rcu_read_unlock(); break; case IOPRIO_WHO_PGRP: if (!who) @@ -141,12 +134,7 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio) break; do_each_thread(g, p) { - int match; - - rcu_read_lock(); - match = __task_cred(p)->uid == who; - rcu_read_unlock(); - if (!match) + if (__task_cred(p)->uid != who) continue; ret = set_task_ioprio(p, ioprio); if (ret) @@ -160,7 +148,7 @@ free_uid: ret = -EINVAL; } - read_unlock(&tasklist_lock); + rcu_read_unlock(); return ret; } @@ -204,17 +192,15 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who) int ret = -ESRCH; int tmpio; - read_lock(&tasklist_lock); + rcu_read_lock(); switch (which) { case IOPRIO_WHO_PROCESS: - rcu_read_lock(); if (!who) p = current; else p = find_task_by_vpid(who); if (p) ret = get_task_ioprio(p); - rcu_read_unlock(); break; case IOPRIO_WHO_PGRP: if (!who) @@ -241,12 +227,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who) break; do_each_thread(g, p) { - int match; - - rcu_read_lock(); - match = __task_cred(p)->uid == user->uid; - rcu_read_unlock(); - if (!match) + if (__task_cred(p)->uid != user->uid) continue; tmpio = get_task_ioprio(p); if (tmpio < 0) @@ -264,6 +245,6 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who) ret = -EINVAL; } - read_unlock(&tasklist_lock); + rcu_read_unlock(); return ret; } -- cgit v0.10.2 From 9a1683d1dd14d6ed35d2884c6b79ff12fc6bef39 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 15 Nov 2010 18:14:43 +0900 Subject: sh: clkfwk: Kill off unused clk_set_rate_ex(). With the refactoring of the SH7722 clock framework some time ago this abstraction has become unecessary. Kill it off before anyone else gets the bright idea to start using it. Signed-off-by: Paul Mundt diff --git a/Documentation/DocBook/sh.tmpl b/Documentation/DocBook/sh.tmpl index d858d92..4a38f60 100644 --- a/Documentation/DocBook/sh.tmpl +++ b/Documentation/DocBook/sh.tmpl @@ -79,10 +79,6 @@ - - Clock Framework Extensions -!Iinclude/linux/sh_clk.h - Machine Specific Interfaces diff --git a/Documentation/sh/clk.txt b/Documentation/sh/clk.txt deleted file mode 100644 index 114b595..0000000 --- a/Documentation/sh/clk.txt +++ /dev/null @@ -1,32 +0,0 @@ -Clock framework on SuperH architecture - -The framework on SH extends existing API by the function clk_set_rate_ex, -which prototype is as follows: - - clk_set_rate_ex (struct clk *clk, unsigned long rate, int algo_id) - -The algo_id parameter is used to specify algorithm used to recalculate clocks, -adjanced to clock, specified as first argument. It is assumed that algo_id==0 -means no changes to adjanced clock - -Internally, the clk_set_rate_ex forwards request to clk->ops->set_rate method, -if it is present in ops structure. The method should set the clock rate and adjust -all needed clocks according to the passed algo_id. -Exact values for algo_id are machine-dependent. For the sh7722, the following -values are defined: - - NO_CHANGE = 0, - IUS_N1_N1, /* I:U = N:1, U:Sh = N:1 */ - IUS_322, /* I:U:Sh = 3:2:2 */ - IUS_522, /* I:U:Sh = 5:2:2 */ - IUS_N11, /* I:U:Sh = N:1:1 */ - SB_N1, /* Sh:B = N:1 */ - SB3_N1, /* Sh:B3 = N:1 */ - SB3_32, /* Sh:B3 = 3:2 */ - SB3_43, /* Sh:B3 = 4:3 */ - SB3_54, /* Sh:B3 = 5:4 */ - BP_N1, /* B:P = N:1 */ - IP_N1 /* I:P = N:1 */ - -Each of these constants means relation between clocks that can be set via the FRQCR -register diff --git a/drivers/sh/clk/core.c b/drivers/sh/clk/core.c index cb12a8e..69be6bb 100644 --- a/drivers/sh/clk/core.c +++ b/drivers/sh/clk/core.c @@ -455,19 +455,13 @@ EXPORT_SYMBOL_GPL(clk_get_rate); int clk_set_rate(struct clk *clk, unsigned long rate) { - return clk_set_rate_ex(clk, rate, 0); -} -EXPORT_SYMBOL_GPL(clk_set_rate); - -int clk_set_rate_ex(struct clk *clk, unsigned long rate, int algo_id) -{ int ret = -EOPNOTSUPP; unsigned long flags; spin_lock_irqsave(&clock_lock, flags); if (likely(clk->ops && clk->ops->set_rate)) { - ret = clk->ops->set_rate(clk, rate, algo_id); + ret = clk->ops->set_rate(clk, rate, 0); if (ret != 0) goto out_unlock; } else { @@ -485,7 +479,7 @@ out_unlock: return ret; } -EXPORT_SYMBOL_GPL(clk_set_rate_ex); +EXPORT_SYMBOL_GPL(clk_set_rate); int clk_set_parent(struct clk *clk, struct clk *parent) { @@ -654,7 +648,7 @@ static int clks_sysdev_suspend(struct sys_device *dev, pm_message_t state) clkp->parent); if (likely(clkp->ops->set_rate)) clkp->ops->set_rate(clkp, - rate, NO_CHANGE); + rate, 0); else if (likely(clkp->ops->recalc)) clkp->rate = clkp->ops->recalc(clkp); } diff --git a/include/linux/sh_clk.h b/include/linux/sh_clk.h index cea0c38..30885d9 100644 --- a/include/linux/sh_clk.h +++ b/include/linux/sh_clk.h @@ -67,36 +67,6 @@ int clk_register(struct clk *); void clk_unregister(struct clk *); void clk_enable_init_clocks(void); -/** - * clk_set_rate_ex - set the clock rate for a clock source, with additional parameter - * @clk: clock source - * @rate: desired clock rate in Hz - * @algo_id: algorithm id to be passed down to ops->set_rate - * - * Returns success (0) or negative errno. - */ -int clk_set_rate_ex(struct clk *clk, unsigned long rate, int algo_id); - -enum clk_sh_algo_id { - NO_CHANGE = 0, - - IUS_N1_N1, - IUS_322, - IUS_522, - IUS_N11, - - SB_N1, - - SB3_N1, - SB3_32, - SB3_43, - SB3_54, - - BP_N1, - - IP_N1, -}; - struct clk_div_mult_table { unsigned int *divisors; unsigned int nr_divisors; -- cgit v0.10.2 From 35a96c739fd7624b8edff990a74b86b5a85342da Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 15 Nov 2010 18:18:32 +0900 Subject: sh: clkfwk: Kill off now unused algo_id in set_rate op. Now that clk_set_rate_ex() is gone, there is also no way to get at rate setting algo id, which is now also completely unused. Kill it off before new clock ops start using it. Signed-off-by: Paul Mundt diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index 7db31e6..b25ce90 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -220,8 +220,7 @@ static void pllc2_disable(struct clk *clk) __raw_writel(__raw_readl(PLLC2CR) & ~0x80000000, PLLC2CR); } -static int pllc2_set_rate(struct clk *clk, - unsigned long rate, int algo_id) +static int pllc2_set_rate(struct clk *clk, unsigned long rate) { unsigned long value; int idx; @@ -463,8 +462,7 @@ static int fsidiv_enable(struct clk *clk) return 0; } -static int fsidiv_set_rate(struct clk *clk, - unsigned long rate, int algo_id) +static int fsidiv_set_rate(struct clk *clk, unsigned long rate) { int idx; diff --git a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c index 4eabc68c..b601fa3 100644 --- a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c +++ b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c @@ -110,7 +110,7 @@ static int shoc_clk_verify_rate(struct clk *clk, unsigned long rate) return 0; } -static int shoc_clk_set_rate(struct clk *clk, unsigned long rate, int algo_id) +static int shoc_clk_set_rate(struct clk *clk, unsigned long rate) { unsigned long frqcr3; unsigned int tmp; diff --git a/drivers/sh/clk/core.c b/drivers/sh/clk/core.c index 69be6bb..87743e7 100644 --- a/drivers/sh/clk/core.c +++ b/drivers/sh/clk/core.c @@ -461,7 +461,7 @@ int clk_set_rate(struct clk *clk, unsigned long rate) spin_lock_irqsave(&clock_lock, flags); if (likely(clk->ops && clk->ops->set_rate)) { - ret = clk->ops->set_rate(clk, rate, 0); + ret = clk->ops->set_rate(clk, rate); if (ret != 0) goto out_unlock; } else { @@ -647,8 +647,7 @@ static int clks_sysdev_suspend(struct sys_device *dev, pm_message_t state) clkp->ops->set_parent(clkp, clkp->parent); if (likely(clkp->ops->set_rate)) - clkp->ops->set_rate(clkp, - rate, 0); + clkp->ops->set_rate(clkp, rate); else if (likely(clkp->ops->recalc)) clkp->rate = clkp->ops->recalc(clkp); } diff --git a/drivers/sh/clk/cpg.c b/drivers/sh/clk/cpg.c index 3aea5f0..359e9a3 100644 --- a/drivers/sh/clk/cpg.c +++ b/drivers/sh/clk/cpg.c @@ -110,8 +110,7 @@ static int sh_clk_div6_set_parent(struct clk *clk, struct clk *parent) return 0; } -static int sh_clk_div6_set_rate(struct clk *clk, - unsigned long rate, int algo_id) +static int sh_clk_div6_set_rate(struct clk *clk, unsigned long rate) { unsigned long value; int idx; @@ -253,7 +252,7 @@ static int sh_clk_div4_set_parent(struct clk *clk, struct clk *parent) return 0; } -static int sh_clk_div4_set_rate(struct clk *clk, unsigned long rate, int algo_id) +static int sh_clk_div4_set_rate(struct clk *clk, unsigned long rate) { struct clk_div4_table *d4t = clk->priv; unsigned long value; diff --git a/include/linux/sh_clk.h b/include/linux/sh_clk.h index 30885d9..038475a 100644 --- a/include/linux/sh_clk.h +++ b/include/linux/sh_clk.h @@ -23,7 +23,7 @@ struct clk_ops { int (*enable)(struct clk *clk); void (*disable)(struct clk *clk); unsigned long (*recalc)(struct clk *clk); - int (*set_rate)(struct clk *clk, unsigned long rate, int algo_id); + int (*set_rate)(struct clk *clk, unsigned long rate); int (*set_parent)(struct clk *clk, struct clk *parent); long (*round_rate)(struct clk *clk, unsigned long rate); }; -- cgit v0.10.2 From fcb7193096969ca9e5b9219b9a99ddf299d4054f Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 15 Nov 2010 00:23:42 -0800 Subject: Input: sysrq - pass along lone Alt + SysRq When user presses and releases Alt + SysRq without pressing any of the hot keys re-inject the combination and pass it on to userspace instead of suppressing it - maybe he or she wanted to take print screen instead of invoking SysRq handler. Also pass along release events for keys that have been pressed before SysRq mode has been invoked so that keys do not appear to be "stuck". Acked-by: Jason Wessel Tested-by: Jason Wessel Signed-off-by: Dmitry Torokhov diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index eaa5d3e..c556ed9 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -554,7 +554,7 @@ EXPORT_SYMBOL(handle_sysrq); #ifdef CONFIG_INPUT /* Simple translation table for the SysRq keys */ -static const unsigned char sysrq_xlate[KEY_MAX + 1] = +static const unsigned char sysrq_xlate[KEY_CNT] = "\000\0331234567890-=\177\t" /* 0x00 - 0x0f */ "qwertyuiop[]\r\000as" /* 0x10 - 0x1f */ "dfghjkl;'`\000\\zxcv" /* 0x20 - 0x2f */ @@ -563,53 +563,129 @@ static const unsigned char sysrq_xlate[KEY_MAX + 1] = "230\177\000\000\213\214\000\000\000\000\000\000\000\000\000\000" /* 0x50 - 0x5f */ "\r\000/"; /* 0x60 - 0x6f */ -static bool sysrq_down; -static int sysrq_alt_use; -static int sysrq_alt; -static DEFINE_SPINLOCK(sysrq_event_lock); +struct sysrq_state { + struct input_handle handle; + struct work_struct reinject_work; + unsigned long key_down[BITS_TO_LONGS(KEY_CNT)]; + unsigned int alt; + unsigned int alt_use; + bool active; + bool need_reinject; +}; + +static void sysrq_reinject_alt_sysrq(struct work_struct *work) +{ + struct sysrq_state *sysrq = + container_of(work, struct sysrq_state, reinject_work); + struct input_handle *handle = &sysrq->handle; + unsigned int alt_code = sysrq->alt_use; + + if (sysrq->need_reinject) { + /* Simulate press and release of Alt + SysRq */ + input_inject_event(handle, EV_KEY, alt_code, 1); + input_inject_event(handle, EV_KEY, KEY_SYSRQ, 1); + input_inject_event(handle, EV_SYN, SYN_REPORT, 1); + + input_inject_event(handle, EV_KEY, KEY_SYSRQ, 0); + input_inject_event(handle, EV_KEY, alt_code, 0); + input_inject_event(handle, EV_SYN, SYN_REPORT, 1); + } +} -static bool sysrq_filter(struct input_handle *handle, unsigned int type, - unsigned int code, int value) +static bool sysrq_filter(struct input_handle *handle, + unsigned int type, unsigned int code, int value) { + struct sysrq_state *sysrq = handle->private; + bool was_active = sysrq->active; bool suppress; - /* We are called with interrupts disabled, just take the lock */ - spin_lock(&sysrq_event_lock); + switch (type) { - if (type != EV_KEY) - goto out; + case EV_SYN: + suppress = false; + break; - switch (code) { + case EV_KEY: + switch (code) { - case KEY_LEFTALT: - case KEY_RIGHTALT: - if (value) - sysrq_alt = code; - else { - if (sysrq_down && code == sysrq_alt_use) - sysrq_down = false; + case KEY_LEFTALT: + case KEY_RIGHTALT: + if (!value) { + /* One of ALTs is being released */ + if (sysrq->active && code == sysrq->alt_use) + sysrq->active = false; - sysrq_alt = 0; + sysrq->alt = KEY_RESERVED; + + } else if (value != 2) { + sysrq->alt = code; + sysrq->need_reinject = false; + } + break; + + case KEY_SYSRQ: + if (value == 1 && sysrq->alt != KEY_RESERVED) { + sysrq->active = true; + sysrq->alt_use = sysrq->alt; + /* + * If nothing else will be pressed we'll need + * to * re-inject Alt-SysRq keysroke. + */ + sysrq->need_reinject = true; + } + + /* + * Pretend that sysrq was never pressed at all. This + * is needed to properly handle KGDB which will try + * to release all keys after exiting debugger. If we + * do not clear key bit it KGDB will end up sending + * release events for Alt and SysRq, potentially + * triggering print screen function. + */ + if (sysrq->active) + clear_bit(KEY_SYSRQ, handle->dev->key); + + break; + + default: + if (sysrq->active && value && value != 2) { + sysrq->need_reinject = false; + __handle_sysrq(sysrq_xlate[code], true); + } + break; } - break; - case KEY_SYSRQ: - if (value == 1 && sysrq_alt) { - sysrq_down = true; - sysrq_alt_use = sysrq_alt; + suppress = sysrq->active; + + if (!sysrq->active) { + /* + * If we are not suppressing key presses keep track of + * keyboard state so we can release keys that have been + * pressed before entering SysRq mode. + */ + if (value) + set_bit(code, sysrq->key_down); + else + clear_bit(code, sysrq->key_down); + + if (was_active) + schedule_work(&sysrq->reinject_work); + + } else if (value == 0 && + test_and_clear_bit(code, sysrq->key_down)) { + /* + * Pass on release events for keys that was pressed before + * entering SysRq mode. + */ + suppress = false; } break; default: - if (sysrq_down && value && value != 2) - __handle_sysrq(sysrq_xlate[code], true); + suppress = sysrq->active; break; } -out: - suppress = sysrq_down; - spin_unlock(&sysrq_event_lock); - return suppress; } @@ -617,28 +693,28 @@ static int sysrq_connect(struct input_handler *handler, struct input_dev *dev, const struct input_device_id *id) { - struct input_handle *handle; + struct sysrq_state *sysrq; int error; - sysrq_down = false; - sysrq_alt = 0; - - handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL); - if (!handle) + sysrq = kzalloc(sizeof(struct sysrq_state), GFP_KERNEL); + if (!sysrq) return -ENOMEM; - handle->dev = dev; - handle->handler = handler; - handle->name = "sysrq"; + INIT_WORK(&sysrq->reinject_work, sysrq_reinject_alt_sysrq); + + sysrq->handle.dev = dev; + sysrq->handle.handler = handler; + sysrq->handle.name = "sysrq"; + sysrq->handle.private = sysrq; - error = input_register_handle(handle); + error = input_register_handle(&sysrq->handle); if (error) { pr_err("Failed to register input sysrq handler, error %d\n", error); goto err_free; } - error = input_open_device(handle); + error = input_open_device(&sysrq->handle); if (error) { pr_err("Failed to open input device, error %d\n", error); goto err_unregister; @@ -647,17 +723,20 @@ static int sysrq_connect(struct input_handler *handler, return 0; err_unregister: - input_unregister_handle(handle); + input_unregister_handle(&sysrq->handle); err_free: - kfree(handle); + kfree(sysrq); return error; } static void sysrq_disconnect(struct input_handle *handle) { + struct sysrq_state *sysrq = handle->private; + input_close_device(handle); + cancel_work_sync(&sysrq->reinject_work); input_unregister_handle(handle); - kfree(handle); + kfree(sysrq); } /* -- cgit v0.10.2 From 549015c36baadc6e67861bba6e927259e34c4d59 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 15 Nov 2010 18:48:25 +0900 Subject: sh: clkfwk: Disable init clk op for non-legacy clocks. Presently it's only legacy users that are using this clock op, guard it with an ifdef to ensure that no new users start using it. Signed-off-by: Paul Mundt diff --git a/include/linux/sh_clk.h b/include/linux/sh_clk.h index 038475a..9a52f72 100644 --- a/include/linux/sh_clk.h +++ b/include/linux/sh_clk.h @@ -19,7 +19,9 @@ struct clk_mapping { }; struct clk_ops { +#ifdef CONFIG_SH_CLK_CPG_LEGACY void (*init)(struct clk *clk); +#endif int (*enable)(struct clk *clk); void (*disable)(struct clk *clk); unsigned long (*recalc)(struct clk *clk); -- cgit v0.10.2 From ccb3b84fa0fb6fb7b46b461881fd60440f579696 Mon Sep 17 00:00:00 2001 From: Vasily Khoruzhick Date: Sat, 13 Nov 2010 14:53:41 +0200 Subject: ASoC: RX1950: Fix hw_params function Unfortunatelly, I misunderstood datasheet, and on s3c244x-iis when MPLLin source for master clock is selected, prescaler has no effect. Remove dividor calculation for 44100 rate; remove 88200 rate at all, rx1950 can't do it. Signed-off-by: Vasily Khoruzhick Acked-by: Liam Girdwood Signed-off-by: Mark Brown diff --git a/sound/soc/s3c24xx/rx1950_uda1380.c b/sound/soc/s3c24xx/rx1950_uda1380.c index ffd5cf2..468cc11 100644 --- a/sound/soc/s3c24xx/rx1950_uda1380.c +++ b/sound/soc/s3c24xx/rx1950_uda1380.c @@ -50,7 +50,6 @@ static unsigned int rates[] = { 16000, 44100, 48000, - 88200, }; static struct snd_pcm_hw_constraint_list hw_rates = { @@ -130,7 +129,6 @@ static const struct snd_soc_dapm_route audio_map[] = { }; static struct platform_device *s3c24xx_snd_device; -static struct clk *xtal; static int rx1950_startup(struct snd_pcm_substream *substream) { @@ -179,10 +177,8 @@ static int rx1950_hw_params(struct snd_pcm_substream *substream, case 44100: case 88200: clk_source = S3C24XX_CLKSRC_MPLL; - fs_mode = S3C2410_IISMOD_256FS; - div = clk_get_rate(xtal) / (256 * rate); - if (clk_get_rate(xtal) % (256 * rate) > (128 * rate)) - div++; + fs_mode = S3C2410_IISMOD_384FS; + div = 1; break; default: printk(KERN_ERR "%s: rate %d is not supported\n", @@ -210,7 +206,7 @@ static int rx1950_hw_params(struct snd_pcm_substream *substream, /* set MCLK division for sample rate */ ret = snd_soc_dai_set_clkdiv(cpu_dai, S3C24XX_DIV_MCLK, - S3C2410_IISMOD_384FS); + fs_mode); if (ret < 0) return ret; @@ -295,17 +291,8 @@ static int __init rx1950_init(void) goto err_plat_add; } - xtal = clk_get(&s3c24xx_snd_device->dev, "xtal"); - - if (IS_ERR(xtal)) { - ret = PTR_ERR(xtal); - platform_device_unregister(s3c24xx_snd_device); - goto err_clk; - } - return 0; -err_clk: err_plat_add: err_plat_alloc: err_gpio_conf: @@ -320,7 +307,6 @@ static void __exit rx1950_exit(void) platform_device_unregister(s3c24xx_snd_device); snd_soc_jack_free_gpios(&hp_jack, ARRAY_SIZE(hp_jack_gpios), hp_jack_gpios); - clk_put(xtal); gpio_free(S3C2410_GPA(1)); } -- cgit v0.10.2 From bcbb243396b82b0369465e9a547b7d5278cd26ad Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 12 Nov 2010 15:14:55 +0000 Subject: ASoC: Fix dapm_seq_compare() for multi-component Ensure that we keep all widget powerups in DAPM sequence by making the CODEC the last thing we compare on rather than the first thing. Also fix the fact that we're currently comparing the widget pointers rather than the CODEC pointers when we do the substraction so we won't get stable results. Signed-off-by: Mark Brown diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 7d85c64..75ed649 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -683,12 +683,12 @@ static int dapm_seq_compare(struct snd_soc_dapm_widget *a, struct snd_soc_dapm_widget *b, int sort[]) { - if (a->codec != b->codec) - return (unsigned long)a - (unsigned long)b; if (sort[a->id] != sort[b->id]) return sort[a->id] - sort[b->id]; if (a->reg != b->reg) return a->reg - b->reg; + if (a->codec != b->codec) + return (unsigned long)a->codec - (unsigned long)b->codec; return 0; } -- cgit v0.10.2 From 24f3f6b5eff92608a62449e33bfac0eed1447d02 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 15 Nov 2010 09:18:49 -0500 Subject: arch/tile: fix rwlock so would-be write lockers don't block new readers This avoids a deadlock in the IGMP code where one core gets a read lock, another core starts trying to get a write lock (thus blocking new readers), and then the first core tries to recursively re-acquire the read lock. We still try to preserve some degree of balance by giving priority to additional write lockers that come along while the lock is held for write, so they can all complete quickly and return the lock to the readers. Signed-off-by: Chris Metcalf diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c index 485e24d..5cd1c40 100644 --- a/arch/tile/lib/spinlock_32.c +++ b/arch/tile/lib/spinlock_32.c @@ -167,23 +167,30 @@ void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val) * when we compare them. */ u32 my_ticket_; + u32 iterations = 0; - /* Take out the next ticket; this will also stop would-be readers. */ - if (val & 1) - val = get_rwlock(rwlock); - rwlock->lock = __insn_addb(val, 1 << WR_NEXT_SHIFT); + /* + * Wait until there are no readers, then bump up the next + * field and capture the ticket value. + */ + for (;;) { + if (!(val & 1)) { + if ((val >> RD_COUNT_SHIFT) == 0) + break; + rwlock->lock = val; + } + delay_backoff(iterations++); + val = __insn_tns((int *)&rwlock->lock); + } - /* Extract my ticket value from the original word. */ + /* Take out the next ticket and extract my ticket value. */ + rwlock->lock = __insn_addb(val, 1 << WR_NEXT_SHIFT); my_ticket_ = val >> WR_NEXT_SHIFT; - /* - * Wait until the "current" field matches our ticket, and - * there are no remaining readers. - */ + /* Wait until the "current" field matches our ticket. */ for (;;) { u32 curr_ = val >> WR_CURR_SHIFT; - u32 readers = val >> RD_COUNT_SHIFT; - u32 delta = ((my_ticket_ - curr_) & WR_MASK) + !!readers; + u32 delta = ((my_ticket_ - curr_) & WR_MASK); if (likely(delta == 0)) break; -- cgit v0.10.2 From c2f6805d470af369a7337801deeecea800dbfe1c Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Mon, 15 Nov 2010 19:32:42 +0100 Subject: blk-throttle: Fix calculation of max number of WRITES to be dispatched o Currently we try to dispatch more READS and less WRITES (75%, 25%) in one dispatch round. ummy pointed out that there is a bug in max_nr_writes calculation. This patch fixes it. Reported-by: ummy y Signed-off-by: Vivek Goyal Signed-off-by: Jens Axboe diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 56ad453..004be80 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -645,7 +645,7 @@ static int throtl_dispatch_tg(struct throtl_data *td, struct throtl_grp *tg, { unsigned int nr_reads = 0, nr_writes = 0; unsigned int max_nr_reads = throtl_grp_quantum*3/4; - unsigned int max_nr_writes = throtl_grp_quantum - nr_reads; + unsigned int max_nr_writes = throtl_grp_quantum - max_nr_reads; struct bio *bio; /* Try to dispatch 75% READS and 25% WRITES */ -- cgit v0.10.2 From 3e9bb2a071614f1d185740f31ac503ecba11d783 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Mon, 15 Nov 2010 19:32:43 +0100 Subject: block: fix amiga and atari floppy driver compile warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Geert, my crosstool don't produce warning below. I guess this has to do something with compiler version. - Geert noticed following warning during compilation. drivers/block/amiflop.c:1344: warning: ‘rq’ may be used uninitialized in this function drivers/block/ataflop.c:1402: warning: ‘rq’ may be used uninitialized in this function - Initialize rq to NULL to fix the warning. If we can't find a suitable request to dispatch, this function should return NULL instead of a possibly garbage pointer. - Cross compile tested only. Don't have hardware to test it. Reported-by: Geert Uytterhoeven Signed-off-by: Vivek Goyal Signed-off-by: Jens Axboe diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index a1725e6..7888501 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1341,7 +1341,7 @@ static struct request *set_next_request(void) { struct request_queue *q; int cnt = FD_MAX_UNITS; - struct request *rq; + struct request *rq = NULL; /* Find next queue we can dispatch from */ fdc_queue = fdc_queue + 1; diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 4e4cc6c..605a67e 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1399,7 +1399,7 @@ static struct request *set_next_request(void) { struct request_queue *q; int old_pos = fdc_queue; - struct request *rq; + struct request *rq = NULL; do { q = unit[fdc_queue].disk->queue; -- cgit v0.10.2 From 3b42a96dc7870c53d20b419185737d3b8f7a7b74 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Mon, 15 Nov 2010 06:01:59 +0000 Subject: net: rtnetlink.h -- only include linux/netdevice.h when used by the kernel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit below added a new helper dev_ingress_queue to cleanly obtain the ingress queue pointer. This necessitated including 'linux/netdevice.h': commit 24824a09e35402b8d58dcc5be803a5ad3937bdba Author: Eric Dumazet Date: Sat Oct 2 06:11:55 2010 +0000 net: dynamic ingress_queue allocation However this include triggers issues for applications in userspace which use the rtnetlink interfaces. Commonly this requires they include 'net/if.h' and 'linux/rtnetlink.h' leading to a compiler error as below: In file included from /usr/include/linux/netdevice.h:28:0, from /usr/include/linux/rtnetlink.h:9, from t.c:2: /usr/include/linux/if.h:135:8: error: redefinition of ‘struct ifmap’ /usr/include/net/if.h:112:8: note: originally defined here /usr/include/linux/if.h:169:8: error: redefinition of ‘struct ifreq’ /usr/include/net/if.h:127:8: note: originally defined here /usr/include/linux/if.h:218:8: error: redefinition of ‘struct ifconf’ /usr/include/net/if.h:177:8: note: originally defined here The new helper is only defined for the kernel and protected by __KERNEL__ therefore we can simply pull the include down into the same protected section. Signed-off-by: Andy Whitcroft Signed-off-by: David S. Miller diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index d42f2744..bbad657 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -6,7 +6,6 @@ #include #include #include -#include /* rtnetlink families. Values up to 127 are reserved for real address * families, values above 128 may be used arbitrarily. @@ -606,6 +605,7 @@ struct tcamsg { #ifdef __KERNEL__ #include +#include static __inline__ int rtattr_strcmp(const struct rtattr *rta, const char *str) { -- cgit v0.10.2 From 62370e2b9376ea7b76e0423de28ccb322c17e2da Mon Sep 17 00:00:00 2001 From: Arnd Hannemann Date: Thu, 11 Nov 2010 11:44:32 -0600 Subject: b43legacy: Fix compile on ARM architecture When b43legacy is compiled on the arm platform, the following errors are seen: CC [M] drivers/net/wireless/b43legacy/xmit.o In file included from include/net/dst.h:11, from drivers/net/wireless/b43legacy/xmit.c:31: include/net/dst_ops.h:28: error: expected ':', ',', ';', '}' or '__attribute__' before '____cacheline_aligned_in_smp' include/net/dst_ops.h: In function 'dst_entries_get_fast': include/net/dst_ops.h:33: error: 'struct dst_ops' has no member named 'pcpuc_entries' include/net/dst_ops.h: In function 'dst_entries_get_slow': include/net/dst_ops.h:41: error: 'struct dst_ops' has no member named 'pcpuc_entries' include/net/dst_ops.h: In function 'dst_entries_add': include/net/dst_ops.h:49: error: 'struct dst_ops' has no member named 'pcpuc_entries' include/net/dst_ops.h: In function 'dst_entries_init': include/net/dst_ops.h:55: error: 'struct dst_ops' has no member named 'pcpuc_entries' include/net/dst_ops.h: In function 'dst_entries_destroy': include/net/dst_ops.h:60: error: 'struct dst_ops' has no member named 'pcpuc_entries' make[4]: *** [drivers/net/wireless/b43legacy/xmit.o] Error 1 make[3]: *** [drivers/net/wireless/b43legacy] Error 2 make[2]: *** [drivers/net/wireless] Error 2 make[1]: *** [drivers/net] Error 2 make: *** [drivers] Error 2 The cause is a missing include of , which is present for i386 and x86_64 architectures, but not for arm. Signed-off-by: Arnd Hannemann Signed-off-by: Larry Finger Cc: Stable Signed-off-by: John W. Linville diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 1fa5306..51665b3 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -2,6 +2,7 @@ #define _NET_DST_OPS_H #include #include +#include struct dst_entry; struct kmem_cachep; -- cgit v0.10.2 From 309075cf08ed92a7d2c0e22b7653c5daabbd7ad1 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Fri, 12 Nov 2010 08:53:56 +0200 Subject: cfg80211: fix WIPHY_FLAG_IBSS_RSN bit WIPHY_FLAG_IBSS_RSN is BIT(7) as is WIPHY_FLAG_CONTROL_PORT_PROTOCOL. Change to BIT(8). Signed-off-by: Jussi Kivilinna Acked-by: Johannes Berg Signed-off-by: John W. Linville diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2a7936d..97b8b7c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1355,7 +1355,7 @@ enum wiphy_flags { WIPHY_FLAG_4ADDR_AP = BIT(5), WIPHY_FLAG_4ADDR_STATION = BIT(6), WIPHY_FLAG_CONTROL_PORT_PROTOCOL = BIT(7), - WIPHY_FLAG_IBSS_RSN = BIT(7), + WIPHY_FLAG_IBSS_RSN = BIT(8), }; struct mac_address { -- cgit v0.10.2 From dfa31fef5dd3d204c4cdae7369f3542bd1f7e84a Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Mon, 15 Nov 2010 15:11:26 +0100 Subject: carl9170: fix usb anchor wait timeout usb_wait_anchor_empty_timeout's @timeout wants milliseconds and not jiffies. Signed-off-by: Christian Lamparter Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/ath/carl9170/usb.c b/drivers/net/wireless/ath/carl9170/usb.c index 3317039..7504ed1 100644 --- a/drivers/net/wireless/ath/carl9170/usb.c +++ b/drivers/net/wireless/ath/carl9170/usb.c @@ -553,12 +553,12 @@ static int carl9170_usb_flush(struct ar9170 *ar) usb_free_urb(urb); } - ret = usb_wait_anchor_empty_timeout(&ar->tx_cmd, HZ); + ret = usb_wait_anchor_empty_timeout(&ar->tx_cmd, 1000); if (ret == 0) err = -ETIMEDOUT; /* lets wait a while until the tx - queues are dried out */ - ret = usb_wait_anchor_empty_timeout(&ar->tx_anch, HZ); + ret = usb_wait_anchor_empty_timeout(&ar->tx_anch, 1000); if (ret == 0) err = -ETIMEDOUT; -- cgit v0.10.2 From 898213200cbadc570ef4248a6d90430c4a9c2908 Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Fri, 12 Nov 2010 11:59:31 -0800 Subject: xhci: Fix command ring replay after resume. Andiry's xHCI bus suspend patch introduced the possibly of a host controller replaying old commands on the command ring, if the host successfully restores the registers after a resume. After a resume from suspend, the xHCI driver must restore the registers, including the command ring pointer. I had suggested that Andiry set the command ring pointer to the current command ring dequeue pointer, so that the driver wouldn't have to zero the command ring. Unfortunately, setting the command ring pointer to the current dequeue pointer won't work because the register assumes the pointer is 64-byte aligned, and TRBs on the command ring are 16-byte aligned. The lower seven bits will always be masked off, leading to the written pointer being up to 3 TRBs behind the intended pointer. Here's a log excerpt. On init, the xHCI driver places a vendor-specific command on the command ring: [ 215.750958] xhci_hcd 0000:01:00.0: Vendor specific event TRB type = 48 [ 215.750960] xhci_hcd 0000:01:00.0: NEC firmware version 30.25 [ 215.750962] xhci_hcd 0000:01:00.0: Command ring deq = 0x3781e010 (DMA) When we resume, the command ring dequeue pointer to be written should have been 0x3781e010. Instead, it's 0x3781e000: [ 235.557846] xhci_hcd 0000:01:00.0: // Setting command ring address to 0x3781e001 [ 235.557848] xhci_hcd 0000:01:00.0: `MEM_WRITE_DWORD(3'b000, 64'hffffc900100bc038, 64'h3781e001, 4'hf); [ 235.557850] xhci_hcd 0000:01:00.0: `MEM_WRITE_DWORD(3'b000, 32'hffffc900100bc020, 32'h204, 4'hf); [ 235.557866] usb usb9: root hub lost power or was reset (I can't see the results of this bug because the xHCI restore always fails on this box, and the xHCI driver re-allocates everything.) The fix is to zero the command ring and put the software and hardware enqueue and dequeue pointer back to the beginning of the ring. We do this before the system suspends, to be paranoid and prevent the BIOS from starting the host without clearing the command ring pointer, which might cause the host to muck with stale memory. (The pointer isn't required to be in the suspend power well, but it could be.) The command ring pointer is set again after the host resumes. Signed-off-by: Sarah Sharp Tested-by: Andiry Xu diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 7c8d70f..06fca08 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -577,6 +577,65 @@ static void xhci_restore_registers(struct xhci_hcd *xhci) xhci_write_64(xhci, xhci->s3.erst_base, &xhci->ir_set->erst_base); } +static void xhci_set_cmd_ring_deq(struct xhci_hcd *xhci) +{ + u64 val_64; + + /* step 2: initialize command ring buffer */ + val_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring); + val_64 = (val_64 & (u64) CMD_RING_RSVD_BITS) | + (xhci_trb_virt_to_dma(xhci->cmd_ring->deq_seg, + xhci->cmd_ring->dequeue) & + (u64) ~CMD_RING_RSVD_BITS) | + xhci->cmd_ring->cycle_state; + xhci_dbg(xhci, "// Setting command ring address to 0x%llx\n", + (long unsigned long) val_64); + xhci_write_64(xhci, val_64, &xhci->op_regs->cmd_ring); +} + +/* + * The whole command ring must be cleared to zero when we suspend the host. + * + * The host doesn't save the command ring pointer in the suspend well, so we + * need to re-program it on resume. Unfortunately, the pointer must be 64-byte + * aligned, because of the reserved bits in the command ring dequeue pointer + * register. Therefore, we can't just set the dequeue pointer back in the + * middle of the ring (TRBs are 16-byte aligned). + */ +static void xhci_clear_command_ring(struct xhci_hcd *xhci) +{ + struct xhci_ring *ring; + struct xhci_segment *seg; + + ring = xhci->cmd_ring; + seg = ring->deq_seg; + do { + memset(seg->trbs, 0, SEGMENT_SIZE); + seg = seg->next; + } while (seg != ring->deq_seg); + + /* Reset the software enqueue and dequeue pointers */ + ring->deq_seg = ring->first_seg; + ring->dequeue = ring->first_seg->trbs; + ring->enq_seg = ring->deq_seg; + ring->enqueue = ring->dequeue; + + /* + * Ring is now zeroed, so the HW should look for change of ownership + * when the cycle bit is set to 1. + */ + ring->cycle_state = 1; + + /* + * Reset the hardware dequeue pointer. + * Yes, this will need to be re-written after resume, but we're paranoid + * and want to make sure the hardware doesn't access bogus memory + * because, say, the BIOS or an SMI started the host without changing + * the command ring pointers. + */ + xhci_set_cmd_ring_deq(xhci); +} + /* * Stop HC (not bus-specific) * @@ -604,6 +663,7 @@ int xhci_suspend(struct xhci_hcd *xhci) spin_unlock_irq(&xhci->lock); return -ETIMEDOUT; } + xhci_clear_command_ring(xhci); /* step 3: save registers */ xhci_save_registers(xhci); @@ -635,7 +695,6 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) u32 command, temp = 0; struct usb_hcd *hcd = xhci_to_hcd(xhci); struct pci_dev *pdev = to_pci_dev(hcd->self.controller); - u64 val_64; int old_state, retval; old_state = hcd->state; @@ -648,15 +707,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) /* step 1: restore register */ xhci_restore_registers(xhci); /* step 2: initialize command ring buffer */ - val_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring); - val_64 = (val_64 & (u64) CMD_RING_RSVD_BITS) | - (xhci_trb_virt_to_dma(xhci->cmd_ring->deq_seg, - xhci->cmd_ring->dequeue) & - (u64) ~CMD_RING_RSVD_BITS) | - xhci->cmd_ring->cycle_state; - xhci_dbg(xhci, "// Setting command ring address to 0x%llx\n", - (long unsigned long) val_64); - xhci_write_64(xhci, val_64, &xhci->op_regs->cmd_ring); + xhci_set_cmd_ring_deq(xhci); /* step 3: restore state and start state*/ /* step 3: set CRS flag */ command = xhci_readl(xhci, &xhci->op_regs->command); -- cgit v0.10.2 From 00fafcda1773245a5292f953321ec3f0668c8c28 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Mon, 15 Nov 2010 22:45:22 +0100 Subject: PM / PM QoS: Fix reversed min and max pm_qos_get_value had min and max reversed, causing all pm_qos requests to have no effect. Signed-off-by: Colin Cross Acked-by: mark Signed-off-by: Rafael J. Wysocki Cc: stable@kernel.org diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index c7a8f45..aeaa7f8 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c @@ -121,10 +121,10 @@ static inline int pm_qos_get_value(struct pm_qos_object *o) switch (o->type) { case PM_QOS_MIN: - return plist_last(&o->requests)->prio; + return plist_first(&o->requests)->prio; case PM_QOS_MAX: - return plist_first(&o->requests)->prio; + return plist_last(&o->requests)->prio; default: /* runtime check for not using enum */ -- cgit v0.10.2 From e502ac5e1eca99d7dc3f12b2a6780ccbca674858 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 15 Nov 2010 11:11:45 -0800 Subject: USB: atm: ueagle-atm: fix up some permissions on the sysfs files Some of the sysfs files had the incorrect permissions. Some didn't make sense at all (writable for a file that you could not write to?) Reported-by: Linus Torvalds Cc: Matthieu Castet Cc: Stanislaw Gruszka Cc: Damien Bergamini Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c index ea071a5..44447f5 100644 --- a/drivers/usb/atm/ueagle-atm.c +++ b/drivers/usb/atm/ueagle-atm.c @@ -2301,7 +2301,7 @@ out: return ret; } -static DEVICE_ATTR(stat_status, S_IWUGO | S_IRUGO, read_status, reboot); +static DEVICE_ATTR(stat_status, S_IWUSR | S_IRUGO, read_status, reboot); static ssize_t read_human_status(struct device *dev, struct device_attribute *attr, char *buf) @@ -2364,8 +2364,7 @@ out: return ret; } -static DEVICE_ATTR(stat_human_status, S_IWUGO | S_IRUGO, - read_human_status, NULL); +static DEVICE_ATTR(stat_human_status, S_IRUGO, read_human_status, NULL); static ssize_t read_delin(struct device *dev, struct device_attribute *attr, char *buf) @@ -2397,7 +2396,7 @@ out: return ret; } -static DEVICE_ATTR(stat_delin, S_IWUGO | S_IRUGO, read_delin, NULL); +static DEVICE_ATTR(stat_delin, S_IRUGO, read_delin, NULL); #define UEA_ATTR(name, reset) \ \ -- cgit v0.10.2 From 723b991a62d94f74c9f19abd3da6e937288eb969 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 15 Nov 2010 11:15:11 -0800 Subject: USB: ehci: fix debugfs 'lpm' permissions The permissions for the lpm debugfs file is incorrect, this fixes it. Reported-by: Linus Torvalds Cc: Alek Du Cc: Jacob Pan Cc: David Brownell Cc: Alan Stern Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/host/ehci-dbg.c b/drivers/usb/host/ehci-dbg.c index 86afdc7..6e25996 100644 --- a/drivers/usb/host/ehci-dbg.c +++ b/drivers/usb/host/ehci-dbg.c @@ -1067,7 +1067,7 @@ static inline void create_debug_files (struct ehci_hcd *ehci) &debug_registers_fops)) goto file_error; - if (!debugfs_create_file("lpm", S_IRUGO|S_IWUGO, ehci->debug_dir, bus, + if (!debugfs_create_file("lpm", S_IRUGO|S_IWUSR, ehci->debug_dir, bus, &debug_lpm_fops)) goto file_error; -- cgit v0.10.2 From d9624e75f6ad94d8a0718c1fafa89186d271a78c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 15 Nov 2010 11:17:52 -0800 Subject: USB: storage: sierra_ms: fix sysfs file attribute A non-writable sysfs file shouldn't have writable attributes. Reported-by: Linus Torvalds Cc: Kevin Lloyd Cc: Matthew Dharm Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/storage/sierra_ms.c b/drivers/usb/storage/sierra_ms.c index 57fc2f5..ceba512 100644 --- a/drivers/usb/storage/sierra_ms.c +++ b/drivers/usb/storage/sierra_ms.c @@ -121,7 +121,7 @@ static ssize_t show_truinst(struct device *dev, struct device_attribute *attr, } return result; } -static DEVICE_ATTR(truinst, S_IWUGO | S_IRUGO, show_truinst, NULL); +static DEVICE_ATTR(truinst, S_IRUGO, show_truinst, NULL); int sierra_ms_init(struct us_data *us) { -- cgit v0.10.2 From c990600d340641150f7270470a64bd99a5c0b225 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 15 Nov 2010 11:32:38 -0800 Subject: USB: misc: cypress_cy7c63: fix up some sysfs attribute permissions They should not be writable by any user. Reported-by: Linus Torvalds Cc: Oliver Bock Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/misc/cypress_cy7c63.c b/drivers/usb/misc/cypress_cy7c63.c index 2f43c57..9251773 100644 --- a/drivers/usb/misc/cypress_cy7c63.c +++ b/drivers/usb/misc/cypress_cy7c63.c @@ -196,11 +196,9 @@ static ssize_t get_port1_handler(struct device *dev, return read_port(dev, attr, buf, 1, CYPRESS_READ_PORT_ID1); } -static DEVICE_ATTR(port0, S_IWUGO | S_IRUGO, - get_port0_handler, set_port0_handler); +static DEVICE_ATTR(port0, S_IRUGO | S_IWUSR, get_port0_handler, set_port0_handler); -static DEVICE_ATTR(port1, S_IWUGO | S_IRUGO, - get_port1_handler, set_port1_handler); +static DEVICE_ATTR(port1, S_IRUGO | S_IWUSR, get_port1_handler, set_port1_handler); static int cypress_probe(struct usb_interface *interface, -- cgit v0.10.2 From d489a4b3926bad571d404ca6508f6744b9602776 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 15 Nov 2010 11:34:26 -0800 Subject: USB: misc: trancevibrator: fix up a sysfs attribute permission It should not be writable by any user. Reported-by: Linus Torvalds Cc: Sam Hocevar Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/misc/trancevibrator.c b/drivers/usb/misc/trancevibrator.c index d77aba4..f63776a 100644 --- a/drivers/usb/misc/trancevibrator.c +++ b/drivers/usb/misc/trancevibrator.c @@ -86,7 +86,7 @@ static ssize_t set_speed(struct device *dev, struct device_attribute *attr, return count; } -static DEVICE_ATTR(speed, S_IWUGO | S_IRUGO, show_speed, set_speed); +static DEVICE_ATTR(speed, S_IRUGO | S_IWUSR, show_speed, set_speed); static int tv_probe(struct usb_interface *interface, const struct usb_device_id *id) -- cgit v0.10.2 From 48f115470e68d443436b76b22dad63ffbffd6b97 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 15 Nov 2010 11:35:49 -0800 Subject: USB: misc: usbled: fix up some sysfs attribute permissions They should not be writable by any user. Reported-by: Linus Torvalds Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/misc/usbled.c b/drivers/usb/misc/usbled.c index 63da2c3..c96f51d 100644 --- a/drivers/usb/misc/usbled.c +++ b/drivers/usb/misc/usbled.c @@ -94,7 +94,7 @@ static ssize_t set_##value(struct device *dev, struct device_attribute *attr, co change_color(led); \ return count; \ } \ -static DEVICE_ATTR(value, S_IWUGO | S_IRUGO, show_##value, set_##value); +static DEVICE_ATTR(value, S_IRUGO | S_IWUSR, show_##value, set_##value); show_set(blue); show_set(red); show_set(green); -- cgit v0.10.2 From e24d7ace4e822debcb78386bf279c9aba4d7fbd1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 15 Nov 2010 11:36:44 -0800 Subject: USB: misc: usbsevseg: fix up some sysfs attribute permissions They should not be writable by any user. Reported-by: Linus Torvalds Cc: Harrison Metzger Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/misc/usbsevseg.c b/drivers/usb/misc/usbsevseg.c index de8ef94..417b8f2 100644 --- a/drivers/usb/misc/usbsevseg.c +++ b/drivers/usb/misc/usbsevseg.c @@ -192,7 +192,7 @@ static ssize_t set_attr_##name(struct device *dev, \ \ return count; \ } \ -static DEVICE_ATTR(name, S_IWUGO | S_IRUGO, show_attr_##name, set_attr_##name); +static DEVICE_ATTR(name, S_IRUGO | S_IWUSR, show_attr_##name, set_attr_##name); static ssize_t show_attr_text(struct device *dev, struct device_attribute *attr, char *buf) @@ -223,7 +223,7 @@ static ssize_t set_attr_text(struct device *dev, return count; } -static DEVICE_ATTR(text, S_IWUGO | S_IRUGO, show_attr_text, set_attr_text); +static DEVICE_ATTR(text, S_IRUGO | S_IWUSR, show_attr_text, set_attr_text); static ssize_t show_attr_decimals(struct device *dev, struct device_attribute *attr, char *buf) @@ -272,8 +272,7 @@ static ssize_t set_attr_decimals(struct device *dev, return count; } -static DEVICE_ATTR(decimals, S_IWUGO | S_IRUGO, - show_attr_decimals, set_attr_decimals); +static DEVICE_ATTR(decimals, S_IRUGO | S_IWUSR, show_attr_decimals, set_attr_decimals); static ssize_t show_attr_textmode(struct device *dev, struct device_attribute *attr, char *buf) @@ -319,8 +318,7 @@ static ssize_t set_attr_textmode(struct device *dev, return -EINVAL; } -static DEVICE_ATTR(textmode, S_IWUGO | S_IRUGO, - show_attr_textmode, set_attr_textmode); +static DEVICE_ATTR(textmode, S_IRUGO | S_IWUSR, show_attr_textmode, set_attr_textmode); MYDEV_ATTR_SIMPLE_UNSIGNED(powered, update_display_powered); -- cgit v0.10.2 From 3d965875144b905d71dfb4d291c665c0794222c4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 15 Nov 2010 11:37:55 -0800 Subject: USB: OTG: langwell_otg: fix up some sysfs attribute permissions They should not be writable by any user. Reported-by: Linus Torvalds Cc: Hao Wu Cc: Alan Cox Cc: Alek Du Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/otg/langwell_otg.c b/drivers/usb/otg/langwell_otg.c index bdc3ea6..9fea482 100644 --- a/drivers/usb/otg/langwell_otg.c +++ b/drivers/usb/otg/langwell_otg.c @@ -1896,7 +1896,7 @@ set_a_bus_req(struct device *dev, struct device_attribute *attr, } return count; } -static DEVICE_ATTR(a_bus_req, S_IRUGO | S_IWUGO, get_a_bus_req, set_a_bus_req); +static DEVICE_ATTR(a_bus_req, S_IRUGO | S_IWUSR, get_a_bus_req, set_a_bus_req); static ssize_t get_a_bus_drop(struct device *dev, struct device_attribute *attr, char *buf) @@ -1942,8 +1942,7 @@ set_a_bus_drop(struct device *dev, struct device_attribute *attr, } return count; } -static DEVICE_ATTR(a_bus_drop, S_IRUGO | S_IWUGO, - get_a_bus_drop, set_a_bus_drop); +static DEVICE_ATTR(a_bus_drop, S_IRUGO | S_IWUSR, get_a_bus_drop, set_a_bus_drop); static ssize_t get_b_bus_req(struct device *dev, struct device_attribute *attr, char *buf) @@ -1988,7 +1987,7 @@ set_b_bus_req(struct device *dev, struct device_attribute *attr, } return count; } -static DEVICE_ATTR(b_bus_req, S_IRUGO | S_IWUGO, get_b_bus_req, set_b_bus_req); +static DEVICE_ATTR(b_bus_req, S_IRUGO | S_IWUSR, get_b_bus_req, set_b_bus_req); static ssize_t set_a_clr_err(struct device *dev, struct device_attribute *attr, @@ -2012,7 +2011,7 @@ set_a_clr_err(struct device *dev, struct device_attribute *attr, } return count; } -static DEVICE_ATTR(a_clr_err, S_IWUGO, NULL, set_a_clr_err); +static DEVICE_ATTR(a_clr_err, S_IWUSR, NULL, set_a_clr_err); static struct attribute *inputs_attrs[] = { &dev_attr_a_bus_req.attr, -- cgit v0.10.2 From cc267ec5dfa29eba34cbf4eae3e5db9ca499c179 Mon Sep 17 00:00:00 2001 From: Arnd Hannemann Date: Mon, 15 Nov 2010 21:43:22 +0000 Subject: fbdev: sh_mobile_lcdcfb: fix bug in reconfig() The function sh_mobile_fb_reconfig() contained a bug, which caused the line_length to be set wrongly, if a mode with a different X-resolution than the default one was chosen. This caused 1080p24 mode to not work on AP4EVB. Additionally the notifier chain was also called with the wrong mode. This patch fixes this, by using the X-resolution of the new mode instead of the old one to calculate line length and hands over the correct mode to the notifier chain. Signed-off-by: Arnd Hannemann Signed-off-by: Paul Mundt diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c index 9b13647..b02d97a 100644 --- a/drivers/video/sh_mobile_lcdcfb.c +++ b/drivers/video/sh_mobile_lcdcfb.c @@ -860,7 +860,7 @@ static void sh_mobile_fb_reconfig(struct fb_info *info) /* Couldn't reconfigure, hopefully, can continue as before */ return; - info->fix.line_length = mode2.xres * (ch->cfg.bpp / 8); + info->fix.line_length = mode1.xres * (ch->cfg.bpp / 8); /* * fb_set_var() calls the notifier change internally, only if @@ -868,7 +868,7 @@ static void sh_mobile_fb_reconfig(struct fb_info *info) * user event, we have to call the chain ourselves. */ event.info = info; - event.data = &mode2; + event.data = &mode1; fb_notifier_call_chain(evnt, &event); } -- cgit v0.10.2 From e3a4d1d2de7251d4a00b04f50f6b3d2a1fc0fe5f Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 15 Nov 2010 05:03:13 -0500 Subject: fbdev: da8xx: punt duplicated FBIO_WAITFORVSYNC define This is already defined by linux/fb.h now, so punt the duplicate definition from the driver header. Signed-off-by: Mike Frysinger Signed-off-by: Paul Mundt diff --git a/include/video/da8xx-fb.h b/include/video/da8xx-fb.h index 6316cda..89d43b3 100644 --- a/include/video/da8xx-fb.h +++ b/include/video/da8xx-fb.h @@ -99,7 +99,6 @@ struct lcd_sync_arg { #define FBIPUT_COLOR _IOW('F', 6, int) #define FBIPUT_HSYNC _IOW('F', 9, int) #define FBIPUT_VSYNC _IOW('F', 10, int) -#define FBIO_WAITFORVSYNC _IOW('F', 0x20, u_int32_t) #endif /* ifndef DA8XX_FB_H */ -- cgit v0.10.2 From 8e35f8e7c61c88f9a979a4e6f7f4ffd4c158a88a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 2 Nov 2010 09:11:55 -0400 Subject: NLM: Fix a regression in lockd Nick Bowler reports: There are no unusual messages on the client... but I just logged into the server and I see lots of messages of the following form: nfsd: request from insecure port (192.168.8.199:35766)! nfsd: request from insecure port (192.168.8.199:35766)! nfsd: request from insecure port (192.168.8.199:35766)! nfsd: request from insecure port (192.168.8.199:35766)! nfsd: request from insecure port (192.168.8.199:35766)! Bisected to commit 9247685088398cf21bcb513bd2832b4cd42516c4 (SUNRPC: Properly initialize sock_xprt.srcaddr in all cases) Apparently, removing the 'transport->srcaddr.ss_family = family' from xs_create_sock() triggers this due to nlmclnt_lookup_host() incorrectly initialising the srcaddr family to AF_UNSPEC. Reported-by: Nick Bowler Signed-off-by: Trond Myklebust diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 25e21e4..ed0c59f 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -124,7 +124,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni) continue; if (host->h_server != ni->server) continue; - if (ni->server && + if (ni->server && ni->src_len != 0 && !rpc_cmp_addr(nlm_srcaddr(host), ni->src_sap)) continue; @@ -167,6 +167,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni) host->h_addrlen = ni->salen; rpc_set_port(nlm_addr(host), 0); memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len); + host->h_srcaddrlen = ni->src_len; host->h_version = ni->version; host->h_proto = ni->protocol; host->h_rpcclnt = NULL; @@ -238,9 +239,6 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, const char *hostname, int noresvport) { - const struct sockaddr source = { - .sa_family = AF_UNSPEC, - }; struct nlm_lookup_host_info ni = { .server = 0, .sap = sap, @@ -249,8 +247,6 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, .version = version, .hostname = hostname, .hostname_len = strlen(hostname), - .src_sap = &source, - .src_len = sizeof(source), .noresvport = noresvport, }; @@ -357,7 +353,6 @@ nlm_bind_host(struct nlm_host *host) .protocol = host->h_proto, .address = nlm_addr(host), .addrsize = host->h_addrlen, - .saddress = nlm_srcaddr(host), .timeout = &timeparms, .servername = host->h_name, .program = &nlm_program, @@ -376,6 +371,8 @@ nlm_bind_host(struct nlm_host *host) args.flags |= RPC_CLNT_CREATE_HARDRTRY; if (host->h_noresvport) args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; + if (host->h_srcaddrlen) + args.saddress = nlm_srcaddr(host); clnt = rpc_create(&args); if (!IS_ERR(clnt)) diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index a34dea4..2dee05e 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -43,6 +43,7 @@ struct nlm_host { struct sockaddr_storage h_addr; /* peer address */ size_t h_addrlen; struct sockaddr_storage h_srcaddr; /* our address (optional) */ + size_t h_srcaddrlen; struct rpc_clnt *h_rpcclnt; /* RPC client to talk to peer */ char *h_name; /* remote hostname */ u32 h_version; /* interface version */ -- cgit v0.10.2 From 1e657bd51f313d87fbbb22d1edf625dba87ef353 Mon Sep 17 00:00:00 2001 From: Paulius Zaleckas Date: Sun, 31 Oct 2010 18:21:05 +0200 Subject: Regression: fix mounting NFS when NFSv3 support is not compiled Trying to mount NFS (root partition in my case) fails if CONFIG_NFS_V3 is not selected. nfs_validate_mount_data() returns EPROTONOSUPPORT, because of this check: #ifndef CONFIG_NFS_V3 if (args->version == 3) goto out_v3_not_compiled; #endif /* !CONFIG_NFS_V3 */ and args->version was always initialized to 3. It was working in 2.6.36 Signed-off-by: Paulius Zaleckas Signed-off-by: Trond Myklebust diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 0a42e8f..9587506 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -67,6 +67,12 @@ #define NFSDBG_FACILITY NFSDBG_VFS +#ifdef CONFIG_NFS_V3 +#define NFS_DEFAULT_VERSION 3 +#else +#define NFS_DEFAULT_VERSION 2 +#endif + enum { /* Mount options that take no arguments */ Opt_soft, Opt_hard, @@ -2277,7 +2283,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, }; int error = -ENOMEM; - data = nfs_alloc_parsed_mount_data(3); + data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION); mntfh = nfs_alloc_fhandle(); if (data == NULL || mntfh == NULL) goto out_free_fh; -- cgit v0.10.2 From 23ebbd9acf5756b6eb783df84403e3ab668a6bce Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 3 Nov 2010 10:24:16 -0400 Subject: Revert "NFSv4: Fall back to ordinary lookup if nfs4_atomic_open() returns EISDIR" This reverts commit 80e60639f1b7c121a7fea53920c5a4b94009361a. This change requires further fixes to ensure that the open doesn't succeed if the lookup later results in a regular file being created. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 07ac384..635ff65 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1345,12 +1345,12 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry res = NULL; goto out; /* This turned out not to be a regular file */ - case -EISDIR: case -ENOTDIR: goto no_open; case -ELOOP: if (!(nd->intent.open.flags & O_NOFOLLOW)) goto no_open; + /* case -EISDIR: */ /* case -EINVAL: */ default: res = ERR_CAST(inode); -- cgit v0.10.2 From 8cd51a0ccd1beda4482507769887c0be9d70f8c1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 15 Nov 2010 20:26:22 -0500 Subject: NFS: Fix a couple of regressions in readdir. Fix up the issue that array->eof_index needs to be able to be set even if array->size == 0. Ensure that we catch all important memory allocation error conditions and/or kmap() failures. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 635ff65..c6ce8af 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -194,9 +194,13 @@ typedef struct { static struct nfs_cache_array *nfs_readdir_get_array(struct page *page) { + void *ptr; if (page == NULL) return ERR_PTR(-EIO); - return (struct nfs_cache_array *)kmap(page); + ptr = kmap(page); + if (ptr == NULL) + return ERR_PTR(-ENOMEM); + return ptr; } static @@ -213,6 +217,9 @@ int nfs_readdir_clear_array(struct page *page, gfp_t mask) { struct nfs_cache_array *array = nfs_readdir_get_array(page); int i; + + if (IS_ERR(array)) + return PTR_ERR(array); for (i = 0; i < array->size; i++) kfree(array->array[i].string.name); nfs_readdir_release_array(page); @@ -244,7 +251,7 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page) if (IS_ERR(array)) return PTR_ERR(array); - ret = -EIO; + ret = -ENOSPC; if (array->size >= MAX_READDIR_ARRAY) goto out; @@ -255,9 +262,9 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page) if (ret) goto out; array->last_cookie = entry->cookie; + array->size++; if (entry->eof == 1) array->eof_index = array->size; - array->size++; out: nfs_readdir_release_array(page); return ret; @@ -272,7 +279,7 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri if (diff < 0) goto out_eof; if (diff >= array->size) { - if (array->eof_index > 0) + if (array->eof_index >= 0) goto out_eof; desc->current_index += array->size; return -EAGAIN; @@ -281,8 +288,6 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri index = (unsigned int)diff; *desc->dir_cookie = array->array[index].cookie; desc->cache_entry_index = index; - if (index == array->eof_index) - desc->eof = 1; return 0; out_eof: desc->eof = 1; @@ -296,17 +301,17 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des int status = -EAGAIN; for (i = 0; i < array->size; i++) { - if (i == array->eof_index) { - desc->eof = 1; - status = -EBADCOOKIE; - } if (array->array[i].cookie == *desc->dir_cookie) { desc->cache_entry_index = i; status = 0; - break; + goto out; } } - + if (i == array->eof_index) { + desc->eof = 1; + status = -EBADCOOKIE; + } +out: return status; } @@ -449,7 +454,7 @@ out: /* Perform conversion from xdr to cache array */ static -void nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, +int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, void *xdr_page, struct page *page, unsigned int buflen) { struct xdr_stream stream; @@ -471,21 +476,29 @@ void nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *e do { status = xdr_decode(desc, entry, &stream); - if (status != 0) + if (status != 0) { + if (status == -EAGAIN) + status = 0; break; + } - if (nfs_readdir_add_to_array(entry, page) == -1) - break; if (desc->plus == 1) nfs_prime_dcache(desc->file->f_path.dentry, entry); + + status = nfs_readdir_add_to_array(entry, page); + if (status != 0) + break; } while (!entry->eof); if (status == -EBADCOOKIE && entry->eof) { array = nfs_readdir_get_array(page); - array->eof_index = array->size - 1; - status = 0; - nfs_readdir_release_array(page); + if (!IS_ERR(array)) { + array->eof_index = array->size; + status = 0; + nfs_readdir_release_array(page); + } } + return status; } static @@ -537,7 +550,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct nfs_entry entry; struct file *file = desc->file; struct nfs_cache_array *array; - int status = 0; + int status = -ENOMEM; unsigned int array_size = ARRAY_SIZE(pages); entry.prev_cookie = 0; @@ -549,6 +562,10 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, goto out; array = nfs_readdir_get_array(page); + if (IS_ERR(array)) { + status = PTR_ERR(array); + goto out; + } memset(array, 0, sizeof(struct nfs_cache_array)); array->eof_index = -1; @@ -560,8 +577,13 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, if (status < 0) break; - nfs_readdir_page_filler(desc, &entry, pages_ptr, page, array_size * PAGE_SIZE); - } while (array->eof_index < 0 && array->size < MAX_READDIR_ARRAY); + status = nfs_readdir_page_filler(desc, &entry, pages_ptr, page, array_size * PAGE_SIZE); + if (status < 0) { + if (status == -ENOSPC) + status = 0; + break; + } + } while (array->eof_index < 0); nfs_readdir_free_large_page(pages_ptr, pages, array_size); out_release_array: @@ -582,8 +604,10 @@ static int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page) { struct inode *inode = desc->file->f_path.dentry->d_inode; + int ret; - if (nfs_readdir_xdr_to_array(desc, page, inode) < 0) + ret = nfs_readdir_xdr_to_array(desc, page, inode); + if (ret < 0) goto error; SetPageUptodate(page); @@ -595,7 +619,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page) return 0; error: unlock_page(page); - return -EIO; + return ret; } static @@ -608,12 +632,8 @@ void cache_page_release(nfs_readdir_descriptor_t *desc) static struct page *get_cache_page(nfs_readdir_descriptor_t *desc) { - struct page *page; - page = read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping, + return read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping, desc->page_index, (filler_t *)nfs_readdir_filler, desc); - if (IS_ERR(page)) - desc->eof = 1; - return page; } /* @@ -639,8 +659,10 @@ int find_cache_page(nfs_readdir_descriptor_t *desc) static inline int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) { - int res = -EAGAIN; + int res; + if (desc->page_index == 0) + desc->current_index = 0; while (1) { res = find_cache_page(desc); if (res != -EAGAIN) @@ -670,6 +692,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, struct dentry *dentry = NULL; array = nfs_readdir_get_array(desc->page); + if (IS_ERR(array)) + return PTR_ERR(array); for (i = desc->cache_entry_index; i < array->size; i++) { d_type = DT_UNKNOWN; @@ -685,11 +709,9 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, *desc->dir_cookie = array->array[i+1].cookie; else *desc->dir_cookie = array->last_cookie; - if (i == array->eof_index) { - desc->eof = 1; - break; - } } + if (i == array->eof_index) + desc->eof = 1; nfs_readdir_release_array(desc->page); cache_page_release(desc); -- cgit v0.10.2 From ac39612824e1fad8baf82c2841e42b2142af3445 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 15 Nov 2010 20:26:22 -0500 Subject: NFS: readdir shouldn't read beyond the reply returned by the server Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index c6ce8af..c9196c9 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -573,11 +573,13 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, if (!pages_ptr) goto out_release_array; do { + unsigned int pglen; status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode); if (status < 0) break; - status = nfs_readdir_page_filler(desc, &entry, pages_ptr, page, array_size * PAGE_SIZE); + pglen = status; + status = nfs_readdir_page_filler(desc, &entry, pages_ptr, page, pglen); if (status < 0) { if (status == -ENOSPC) status = 0; diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index e6bf457..2563f76 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -423,7 +423,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) struct page **page; size_t hdrlen; unsigned int pglen, recvd; - int status, nr = 0; + int status; if ((status = ntohl(*p++))) return nfs_stat_to_errno(status); @@ -443,7 +443,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) if (pglen > recvd) pglen = recvd; page = rcvbuf->pages; - return nr; + return pglen; } static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index d9a5e83..748dc91 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -555,7 +555,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res struct page **page; size_t hdrlen; u32 recvd, pglen; - int status, nr = 0; + int status; status = ntohl(*p++); /* Decode post_op_attrs */ @@ -586,7 +586,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res pglen = recvd; page = rcvbuf->pages; - return nr; + return pglen; } __be32 * diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0f24cdf..6a653ff 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2852,8 +2852,10 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); res.pgbase = args.pgbase; status = nfs4_call_sync(NFS_SERVER(dir), &msg, &args, &res, 0); - if (status == 0) + if (status >= 0) { memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); + status += args.pgbase; + } nfs_invalidate_atime(dir); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index f313c4c..b7a204f 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4518,7 +4518,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n xdr_read_pages(xdr, pglen); - return 0; + return pglen; } static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) -- cgit v0.10.2 From 6f07d31e46639e4b1b23de6ee88c9e079a7bf32d Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 15 Nov 2010 13:33:25 -0800 Subject: Input: aiptek - tighten up permissions on sysfs attributes Sysfs attributes affecting device behavior should not be, by default, world-writeable. If distributions want to allow console users access these attributes they need to employ udev and friends to adjust permissions as needed. Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/tablet/aiptek.c b/drivers/input/tablet/aiptek.c index 57b25b8..0a619c5 100644 --- a/drivers/input/tablet/aiptek.c +++ b/drivers/input/tablet/aiptek.c @@ -1097,7 +1097,7 @@ store_tabletPointerMode(struct device *dev, struct device_attribute *attr, const } static DEVICE_ATTR(pointer_mode, - S_IRUGO | S_IWUGO, + S_IRUGO | S_IWUSR, show_tabletPointerMode, store_tabletPointerMode); /*********************************************************************** @@ -1134,7 +1134,7 @@ store_tabletCoordinateMode(struct device *dev, struct device_attribute *attr, co } static DEVICE_ATTR(coordinate_mode, - S_IRUGO | S_IWUGO, + S_IRUGO | S_IWUSR, show_tabletCoordinateMode, store_tabletCoordinateMode); /*********************************************************************** @@ -1176,7 +1176,7 @@ store_tabletToolMode(struct device *dev, struct device_attribute *attr, const ch } static DEVICE_ATTR(tool_mode, - S_IRUGO | S_IWUGO, + S_IRUGO | S_IWUSR, show_tabletToolMode, store_tabletToolMode); /*********************************************************************** @@ -1219,7 +1219,7 @@ store_tabletXtilt(struct device *dev, struct device_attribute *attr, const char } static DEVICE_ATTR(xtilt, - S_IRUGO | S_IWUGO, show_tabletXtilt, store_tabletXtilt); + S_IRUGO | S_IWUSR, show_tabletXtilt, store_tabletXtilt); /*********************************************************************** * support routines for the 'ytilt' file. Note that this file @@ -1261,7 +1261,7 @@ store_tabletYtilt(struct device *dev, struct device_attribute *attr, const char } static DEVICE_ATTR(ytilt, - S_IRUGO | S_IWUGO, show_tabletYtilt, store_tabletYtilt); + S_IRUGO | S_IWUSR, show_tabletYtilt, store_tabletYtilt); /*********************************************************************** * support routines for the 'jitter' file. Note that this file @@ -1288,7 +1288,7 @@ store_tabletJitterDelay(struct device *dev, struct device_attribute *attr, const } static DEVICE_ATTR(jitter, - S_IRUGO | S_IWUGO, + S_IRUGO | S_IWUSR, show_tabletJitterDelay, store_tabletJitterDelay); /*********************************************************************** @@ -1317,7 +1317,7 @@ store_tabletProgrammableDelay(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR(delay, - S_IRUGO | S_IWUGO, + S_IRUGO | S_IWUSR, show_tabletProgrammableDelay, store_tabletProgrammableDelay); /*********************************************************************** @@ -1406,7 +1406,7 @@ store_tabletStylusUpper(struct device *dev, struct device_attribute *attr, const } static DEVICE_ATTR(stylus_upper, - S_IRUGO | S_IWUGO, + S_IRUGO | S_IWUSR, show_tabletStylusUpper, store_tabletStylusUpper); /*********************************************************************** @@ -1437,7 +1437,7 @@ store_tabletStylusLower(struct device *dev, struct device_attribute *attr, const } static DEVICE_ATTR(stylus_lower, - S_IRUGO | S_IWUGO, + S_IRUGO | S_IWUSR, show_tabletStylusLower, store_tabletStylusLower); /*********************************************************************** @@ -1475,7 +1475,7 @@ store_tabletMouseLeft(struct device *dev, struct device_attribute *attr, const c } static DEVICE_ATTR(mouse_left, - S_IRUGO | S_IWUGO, + S_IRUGO | S_IWUSR, show_tabletMouseLeft, store_tabletMouseLeft); /*********************************************************************** @@ -1505,7 +1505,7 @@ store_tabletMouseMiddle(struct device *dev, struct device_attribute *attr, const } static DEVICE_ATTR(mouse_middle, - S_IRUGO | S_IWUGO, + S_IRUGO | S_IWUSR, show_tabletMouseMiddle, store_tabletMouseMiddle); /*********************************************************************** @@ -1535,7 +1535,7 @@ store_tabletMouseRight(struct device *dev, struct device_attribute *attr, const } static DEVICE_ATTR(mouse_right, - S_IRUGO | S_IWUGO, + S_IRUGO | S_IWUSR, show_tabletMouseRight, store_tabletMouseRight); /*********************************************************************** @@ -1567,7 +1567,7 @@ store_tabletWheel(struct device *dev, struct device_attribute *attr, const char } static DEVICE_ATTR(wheel, - S_IRUGO | S_IWUGO, show_tabletWheel, store_tabletWheel); + S_IRUGO | S_IWUSR, show_tabletWheel, store_tabletWheel); /*********************************************************************** * support routines for the 'execute' file. Note that this file @@ -1600,7 +1600,7 @@ store_tabletExecute(struct device *dev, struct device_attribute *attr, const cha } static DEVICE_ATTR(execute, - S_IRUGO | S_IWUGO, show_tabletExecute, store_tabletExecute); + S_IRUGO | S_IWUSR, show_tabletExecute, store_tabletExecute); /*********************************************************************** * support routines for the 'odm_code' file. Note that this file -- cgit v0.10.2 From df6e61d4ca268dc8706db38222fde9f04701566c Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 15 Nov 2010 21:17:27 -0800 Subject: kernel/sysctl.c: Fix build failure with !CONFIG_PRINTK Sigh... Signed-off-by: Joe Perches Acked-by: Eric Paris Signed-off-by: Linus Torvalds diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b65bf63..5abfa15 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -702,7 +702,6 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &ten_thousand, }, -#endif { .procname = "dmesg_restrict", .data = &dmesg_restrict, @@ -712,6 +711,7 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &one, }, +#endif { .procname = "ngroups_max", .data = &ngroups_max, -- cgit v0.10.2 From 5685b971362651ee3d99ff3cc512c3bbd049d34d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 12 Nov 2010 09:23:04 -0500 Subject: nfs: trivial: remove unused nfs_wait_event macro Nothing uses this macro anymore. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index bba2668..c66fdb7 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -593,12 +593,6 @@ nfs_fileid_to_ino_t(u64 fileid) return ino; } -#define nfs_wait_event(clnt, wq, condition) \ -({ \ - int __retval = wait_event_killable(wq, condition); \ - __retval; \ -}) - #define NFS_JUKEBOX_RETRY_TIME (5 * HZ) #endif /* __KERNEL__ */ -- cgit v0.10.2 From 94f58df8e545657f0b2d16eca1ac7a4ec39ed6be Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Sun, 7 Nov 2010 22:11:34 +0100 Subject: SUNRPC: Simplify rpc_alloc_iostats by removing pointless local variable Hi, We can simplify net/sunrpc/stats.c::rpc_alloc_iostats() a bit by getting rid of the unneeded local variable 'new'. Please CC me on replies. Signed-off-by: Jesper Juhl Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index f71a731..80df89d 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -115,9 +115,7 @@ EXPORT_SYMBOL_GPL(svc_seq_show); */ struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt) { - struct rpc_iostats *new; - new = kcalloc(clnt->cl_maxproc, sizeof(struct rpc_iostats), GFP_KERNEL); - return new; + return kcalloc(clnt->cl_maxproc, sizeof(struct rpc_iostats), GFP_KERNEL); } EXPORT_SYMBOL_GPL(rpc_alloc_iostats); -- cgit v0.10.2 From 04e4bd1c67f941d81bff78a3b6b94194f081b7df Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 11 Nov 2010 12:53:47 +0000 Subject: nfs: Ignore kmemleak false positive in nfs_readdir_make_qstr Strings allocated via kmemdup() in nfs_readdir_make_qstr() are referenced from the nfs_cache_array which is stored in a page cache page. Kmemleak does not scan such pages and it reports several false positives. This patch annotates the string->name pointer so that kmemleak does not consider it a real leak. Signed-off-by: Catalin Marinas Cc: Bryan Schumaker Cc: Trond Myklebust Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index c9196c9..662df2a 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "delegation.h" #include "iostat.h" @@ -238,6 +239,11 @@ int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int le string->name = kmemdup(name, len, GFP_KERNEL); if (string->name == NULL) return -ENOMEM; + /* + * Avoid a kmemleak false positive. The pointer to the name is stored + * in a page cache page which kmemleak does not scan. + */ + kmemleak_not_leak(string->name); string->hash = full_name_hash(name, len); return 0; } -- cgit v0.10.2 From 8c05cd08a7504b855c265263e84af61aabafa329 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Nov 2010 09:13:41 -0800 Subject: PCI: fix offset check for sysfs mmapped files I just loaded 2.6.37-rc2 on my machines, and I noticed that X no longer starts. Running an strace of the X server shows that it's doing this: open("/sys/bus/pci/devices/0000:07:00.0/resource0", O_RDWR) = 10 mmap(NULL, 16777216, PROT_READ|PROT_WRITE, MAP_SHARED, 10, 0) = -1 EINVAL (Invalid argument) This code seems to be asking for a shared read/write mapping of 16MB worth of BAR0 starting at file offset 0, and letting the kernel assign a starting address. Unfortunately, this -EINVAL causes X not to start. Looking into dmesg, there's a complaint like so: process "Xorg" tried to map 0x01000000 bytes at page 0x00000000 on 0000:07:00.0 BAR 0 (start 0x 96000000, size 0x 1000000) ...with the following code in pci_mmap_fits: pci_start = (mmap_api == PCI_MMAP_SYSFS) ? pci_resource_start(pdev, resno) >> PAGE_SHIFT : 0; if (start >= pci_start && start < pci_start + size && start + nr <= pci_start + size) It looks like the logic here is set up such that when the mmap call comes via sysfs, the check in pci_mmap_fits wants vma->vm_pgoff to be between the resource's start and end address, and the end of the vma to be no farther than the end. However, the sysfs PCI resource files always start at offset zero, which means that this test always fails for programs that mmap the sysfs files. Given the comment in the original commit 3b519e4ea618b6943a82931630872907f9ac2c2b, I _think_ the old procfs files require that the file offset be equal to the resource's base address when mmapping. I think what we want here is for pci_start to be 0 when mmap_api == PCI_MMAP_PROCFS. The following patch makes that change, after which the Matrox and Mach64 X drivers work again. Acked-by: Martin Wilck Signed-off-by: Darrick J. Wong Signed-off-by: Jesse Barnes diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 95712a3..63d5042 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -715,7 +715,7 @@ int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vma, nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; start = vma->vm_pgoff; size = ((pci_resource_len(pdev, resno) - 1) >> PAGE_SHIFT) + 1; - pci_start = (mmap_api == PCI_MMAP_SYSFS) ? + pci_start = (mmap_api == PCI_MMAP_PROCFS) ? pci_resource_start(pdev, resno) >> PAGE_SHIFT : 0; if (start >= pci_start && start < pci_start + size && start + nr <= pci_start + size) -- cgit v0.10.2 From 4c62ab9c538bc09c38093fa079e6902ea4d42b98 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 16 Nov 2010 09:50:47 -0800 Subject: irda: irttp: allow zero byte packets Sending zero byte packets is not neccessarily an error (AF_INET accepts it, too), so just apply a shortcut. This was discovered because of a non-working software with WINE. See http://bugs.winehq.org/show_bug.cgi?id=19397#c86 http://thread.gmane.org/gmane.linux.irda.general/1643 for very detailed debugging information and a testcase. Kudos to Wolfgang for those! Reported-by: Wolfgang Schwotzer Signed-off-by: Wolfram Sang Tested-by: Mike Evans Signed-off-by: David S. Miller diff --git a/net/irda/irttp.c b/net/irda/irttp.c index 285761e..6cfaeaf 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -550,16 +550,23 @@ EXPORT_SYMBOL(irttp_close_tsap); */ int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb) { + int ret = -1; + IRDA_ASSERT(self != NULL, return -1;); IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;); IRDA_ASSERT(skb != NULL, return -1;); IRDA_DEBUG(4, "%s()\n", __func__); + /* Take shortcut on zero byte packets */ + if (skb->len == 0) { + ret = 0; + goto err; + } + /* Check that nothing bad happens */ - if ((skb->len == 0) || (!self->connected)) { - IRDA_DEBUG(1, "%s(), No data, or not connected\n", - __func__); + if (!self->connected) { + IRDA_DEBUG(1, "%s(), Not connected\n", __func__); goto err; } @@ -576,7 +583,7 @@ int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb) err: dev_kfree_skb(skb); - return -1; + return ret; } EXPORT_SYMBOL(irttp_udata_request); @@ -599,9 +606,15 @@ int irttp_data_request(struct tsap_cb *self, struct sk_buff *skb) IRDA_DEBUG(2, "%s() : queue len = %d\n", __func__, skb_queue_len(&self->tx_queue)); + /* Take shortcut on zero byte packets */ + if (skb->len == 0) { + ret = 0; + goto err; + } + /* Check that nothing bad happens */ - if ((skb->len == 0) || (!self->connected)) { - IRDA_WARNING("%s: No data, or not connected\n", __func__); + if (!self->connected) { + IRDA_WARNING("%s: Not connected\n", __func__); ret = -ENOTCONN; goto err; } -- cgit v0.10.2 From 133dc4c39c57eeef2577ca5b4ed24765b7a78ce2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 16 Nov 2010 18:45:39 +0100 Subject: perf: Rename 'perf trace' to 'perf script' Free the perf trace name space and rename the trace to 'script' which is a better match for the scripting engine. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/tools/perf/Documentation/perf-script-perl.txt b/tools/perf/Documentation/perf-script-perl.txt new file mode 100644 index 0000000..5bb41e5 --- /dev/null +++ b/tools/perf/Documentation/perf-script-perl.txt @@ -0,0 +1,217 @@ +perf-script-perl(1) +================== + +NAME +---- +perf-script-perl - Process trace data with a Perl script + +SYNOPSIS +-------- +[verse] +'perf script' [-s [Perl]:script[.pl] ] + +DESCRIPTION +----------- + +This perf script option is used to process perf script data using perf's +built-in Perl interpreter. It reads and processes the input file and +displays the results of the trace analysis implemented in the given +Perl script, if any. + +STARTER SCRIPTS +--------------- + +You can avoid reading the rest of this document by running 'perf script +-g perl' in the same directory as an existing perf.data trace file. +That will generate a starter script containing a handler for each of +the event types in the trace file; it simply prints every available +field for each event in the trace file. + +You can also look at the existing scripts in +~/libexec/perf-core/scripts/perl for typical examples showing how to +do basic things like aggregate event data, print results, etc. Also, +the check-perf-script.pl script, while not interesting for its results, +attempts to exercise all of the main scripting features. + +EVENT HANDLERS +-------------- + +When perf script is invoked using a trace script, a user-defined +'handler function' is called for each event in the trace. If there's +no handler function defined for a given event type, the event is +ignored (or passed to a 'trace_handled' function, see below) and the +next event is processed. + +Most of the event's field values are passed as arguments to the +handler function; some of the less common ones aren't - those are +available as calls back into the perf executable (see below). + +As an example, the following perf record command can be used to record +all sched_wakeup events in the system: + + # perf record -a -e sched:sched_wakeup + +Traces meant to be processed using a script should be recorded with +the above option: -a to enable system-wide collection. + +The format file for the sched_wakep event defines the following fields +(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format): + +---- + format: + field:unsigned short common_type; + field:unsigned char common_flags; + field:unsigned char common_preempt_count; + field:int common_pid; + field:int common_lock_depth; + + field:char comm[TASK_COMM_LEN]; + field:pid_t pid; + field:int prio; + field:int success; + field:int target_cpu; +---- + +The handler function for this event would be defined as: + +---- +sub sched::sched_wakeup +{ + my ($event_name, $context, $common_cpu, $common_secs, + $common_nsecs, $common_pid, $common_comm, + $comm, $pid, $prio, $success, $target_cpu) = @_; +} +---- + +The handler function takes the form subsystem::event_name. + +The $common_* arguments in the handler's argument list are the set of +arguments passed to all event handlers; some of the fields correspond +to the common_* fields in the format file, but some are synthesized, +and some of the common_* fields aren't common enough to to be passed +to every event as arguments but are available as library functions. + +Here's a brief description of each of the invariant event args: + + $event_name the name of the event as text + $context an opaque 'cookie' used in calls back into perf + $common_cpu the cpu the event occurred on + $common_secs the secs portion of the event timestamp + $common_nsecs the nsecs portion of the event timestamp + $common_pid the pid of the current task + $common_comm the name of the current process + +All of the remaining fields in the event's format file have +counterparts as handler function arguments of the same name, as can be +seen in the example above. + +The above provides the basics needed to directly access every field of +every event in a trace, which covers 90% of what you need to know to +write a useful trace script. The sections below cover the rest. + +SCRIPT LAYOUT +------------- + +Every perf script Perl script should start by setting up a Perl module +search path and 'use'ing a few support modules (see module +descriptions below): + +---- + use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/perf-script-Util/lib"; + use lib "./perf-script-Util/lib"; + use Perf::Trace::Core; + use Perf::Trace::Context; + use Perf::Trace::Util; +---- + +The rest of the script can contain handler functions and support +functions in any order. + +Aside from the event handler functions discussed above, every script +can implement a set of optional functions: + +*trace_begin*, if defined, is called before any event is processed and +gives scripts a chance to do setup tasks: + +---- + sub trace_begin + { + } +---- + +*trace_end*, if defined, is called after all events have been + processed and gives scripts a chance to do end-of-script tasks, such + as display results: + +---- +sub trace_end +{ +} +---- + +*trace_unhandled*, if defined, is called after for any event that + doesn't have a handler explicitly defined for it. The standard set + of common arguments are passed into it: + +---- +sub trace_unhandled +{ + my ($event_name, $context, $common_cpu, $common_secs, + $common_nsecs, $common_pid, $common_comm) = @_; +} +---- + +The remaining sections provide descriptions of each of the available +built-in perf script Perl modules and their associated functions. + +AVAILABLE MODULES AND FUNCTIONS +------------------------------- + +The following sections describe the functions and variables available +via the various Perf::Trace::* Perl modules. To use the functions and +variables from the given module, add the corresponding 'use +Perf::Trace::XXX' line to your perf script script. + +Perf::Trace::Core Module +~~~~~~~~~~~~~~~~~~~~~~~~ + +These functions provide some essential functions to user scripts. + +The *flag_str* and *symbol_str* functions provide human-readable +strings for flag and symbolic fields. These correspond to the strings +and values parsed from the 'print fmt' fields of the event format +files: + + flag_str($event_name, $field_name, $field_value) - returns the string represention corresponding to $field_value for the flag field $field_name of event $event_name + symbol_str($event_name, $field_name, $field_value) - returns the string represention corresponding to $field_value for the symbolic field $field_name of event $event_name + +Perf::Trace::Context Module +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some of the 'common' fields in the event format file aren't all that +common, but need to be made accessible to user scripts nonetheless. + +Perf::Trace::Context defines a set of functions that can be used to +access this data in the context of the current event. Each of these +functions expects a $context variable, which is the same as the +$context variable passed into every event handler as the second +argument. + + common_pc($context) - returns common_preempt count for the current event + common_flags($context) - returns common_flags for the current event + common_lock_depth($context) - returns common_lock_depth for the current event + +Perf::Trace::Util Module +~~~~~~~~~~~~~~~~~~~~~~~~ + +Various utility functions for use with perf script: + + nsecs($secs, $nsecs) - returns total nsecs given secs/nsecs pair + nsecs_secs($nsecs) - returns whole secs portion given nsecs + nsecs_nsecs($nsecs) - returns nsecs remainder given nsecs + nsecs_str($nsecs) - returns printable string in the form secs.nsecs + avg($total, $n) - returns average given a sum and a total number of values + +SEE ALSO +-------- +linkperf:perf-script[1] diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt new file mode 100644 index 0000000..36b3827 --- /dev/null +++ b/tools/perf/Documentation/perf-script-python.txt @@ -0,0 +1,623 @@ +perf-script-python(1) +==================== + +NAME +---- +perf-script-python - Process trace data with a Python script + +SYNOPSIS +-------- +[verse] +'perf script' [-s [Python]:script[.py] ] + +DESCRIPTION +----------- + +This perf script option is used to process perf script data using perf's +built-in Python interpreter. It reads and processes the input file and +displays the results of the trace analysis implemented in the given +Python script, if any. + +A QUICK EXAMPLE +--------------- + +This section shows the process, start to finish, of creating a working +Python script that aggregates and extracts useful information from a +raw perf script stream. You can avoid reading the rest of this +document if an example is enough for you; the rest of the document +provides more details on each step and lists the library functions +available to script writers. + +This example actually details the steps that were used to create the +'syscall-counts' script you see when you list the available perf script +scripts via 'perf script -l'. As such, this script also shows how to +integrate your script into the list of general-purpose 'perf script' +scripts listed by that command. + +The syscall-counts script is a simple script, but demonstrates all the +basic ideas necessary to create a useful script. Here's an example +of its output (syscall names are not yet supported, they will appear +as numbers): + +---- +syscall events: + +event count +---------------------------------------- ----------- +sys_write 455067 +sys_getdents 4072 +sys_close 3037 +sys_swapoff 1769 +sys_read 923 +sys_sched_setparam 826 +sys_open 331 +sys_newfstat 326 +sys_mmap 217 +sys_munmap 216 +sys_futex 141 +sys_select 102 +sys_poll 84 +sys_setitimer 12 +sys_writev 8 +15 8 +sys_lseek 7 +sys_rt_sigprocmask 6 +sys_wait4 3 +sys_ioctl 3 +sys_set_robust_list 1 +sys_exit 1 +56 1 +sys_access 1 +---- + +Basically our task is to keep a per-syscall tally that gets updated +every time a system call occurs in the system. Our script will do +that, but first we need to record the data that will be processed by +that script. Theoretically, there are a couple of ways we could do +that: + +- we could enable every event under the tracing/events/syscalls + directory, but this is over 600 syscalls, well beyond the number + allowable by perf. These individual syscall events will however be + useful if we want to later use the guidance we get from the + general-purpose scripts to drill down and get more detail about + individual syscalls of interest. + +- we can enable the sys_enter and/or sys_exit syscalls found under + tracing/events/raw_syscalls. These are called for all syscalls; the + 'id' field can be used to distinguish between individual syscall + numbers. + +For this script, we only need to know that a syscall was entered; we +don't care how it exited, so we'll use 'perf record' to record only +the sys_enter events: + +---- +# perf record -a -e raw_syscalls:sys_enter + +^C[ perf record: Woken up 1 times to write data ] +[ perf record: Captured and wrote 56.545 MB perf.data (~2470503 samples) ] +---- + +The options basically say to collect data for every syscall event +system-wide and multiplex the per-cpu output into a single stream. +That single stream will be recorded in a file in the current directory +called perf.data. + +Once we have a perf.data file containing our data, we can use the -g +'perf script' option to generate a Python script that will contain a +callback handler for each event type found in the perf.data trace +stream (for more details, see the STARTER SCRIPTS section). + +---- +# perf script -g python +generated Python script: perf-script.py + +The output file created also in the current directory is named +perf-script.py. Here's the file in its entirety: + +# perf script event handlers, generated by perf script -g python +# Licensed under the terms of the GNU GPL License version 2 + +# The common_* event handler fields are the most useful fields common to +# all events. They don't necessarily correspond to the 'common_*' fields +# in the format files. Those fields not available as handler params can +# be retrieved using Python functions of the form common_*(context). +# See the perf-script-python Documentation for the list of available functions. + +import os +import sys + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/perf-script-Util/lib/Perf/Trace') + +from perf_trace_context import * +from Core import * + +def trace_begin(): + print "in trace_begin" + +def trace_end(): + print "in trace_end" + +def raw_syscalls__sys_enter(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + id, args): + print_header(event_name, common_cpu, common_secs, common_nsecs, + common_pid, common_comm) + + print "id=%d, args=%s\n" % \ + (id, args), + +def trace_unhandled(event_name, context, common_cpu, common_secs, common_nsecs, + common_pid, common_comm): + print_header(event_name, common_cpu, common_secs, common_nsecs, + common_pid, common_comm) + +def print_header(event_name, cpu, secs, nsecs, pid, comm): + print "%-20s %5u %05u.%09u %8u %-20s " % \ + (event_name, cpu, secs, nsecs, pid, comm), +---- + +At the top is a comment block followed by some import statements and a +path append which every perf script script should include. + +Following that are a couple generated functions, trace_begin() and +trace_end(), which are called at the beginning and the end of the +script respectively (for more details, see the SCRIPT_LAYOUT section +below). + +Following those are the 'event handler' functions generated one for +every event in the 'perf record' output. The handler functions take +the form subsystem__event_name, and contain named parameters, one for +each field in the event; in this case, there's only one event, +raw_syscalls__sys_enter(). (see the EVENT HANDLERS section below for +more info on event handlers). + +The final couple of functions are, like the begin and end functions, +generated for every script. The first, trace_unhandled(), is called +every time the script finds an event in the perf.data file that +doesn't correspond to any event handler in the script. This could +mean either that the record step recorded event types that it wasn't +really interested in, or the script was run against a trace file that +doesn't correspond to the script. + +The script generated by -g option simply prints a line for each +event found in the trace stream i.e. it basically just dumps the event +and its parameter values to stdout. The print_header() function is +simply a utility function used for that purpose. Let's rename the +script and run it to see the default output: + +---- +# mv perf-script.py syscall-counts.py +# perf script -s syscall-counts.py + +raw_syscalls__sys_enter 1 00840.847582083 7506 perf id=1, args= +raw_syscalls__sys_enter 1 00840.847595764 7506 perf id=1, args= +raw_syscalls__sys_enter 1 00840.847620860 7506 perf id=1, args= +raw_syscalls__sys_enter 1 00840.847710478 6533 npviewer.bin id=78, args= +raw_syscalls__sys_enter 1 00840.847719204 6533 npviewer.bin id=142, args= +raw_syscalls__sys_enter 1 00840.847755445 6533 npviewer.bin id=3, args= +raw_syscalls__sys_enter 1 00840.847775601 6533 npviewer.bin id=3, args= +raw_syscalls__sys_enter 1 00840.847781820 6533 npviewer.bin id=3, args= +. +. +. +---- + +Of course, for this script, we're not interested in printing every +trace event, but rather aggregating it in a useful way. So we'll get +rid of everything to do with printing as well as the trace_begin() and +trace_unhandled() functions, which we won't be using. That leaves us +with this minimalistic skeleton: + +---- +import os +import sys + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/perf-script-Util/lib/Perf/Trace') + +from perf_trace_context import * +from Core import * + +def trace_end(): + print "in trace_end" + +def raw_syscalls__sys_enter(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + id, args): +---- + +In trace_end(), we'll simply print the results, but first we need to +generate some results to print. To do that we need to have our +sys_enter() handler do the necessary tallying until all events have +been counted. A hash table indexed by syscall id is a good way to +store that information; every time the sys_enter() handler is called, +we simply increment a count associated with that hash entry indexed by +that syscall id: + +---- + syscalls = autodict() + + try: + syscalls[id] += 1 + except TypeError: + syscalls[id] = 1 +---- + +The syscalls 'autodict' object is a special kind of Python dictionary +(implemented in Core.py) that implements Perl's 'autovivifying' hashes +in Python i.e. with autovivifying hashes, you can assign nested hash +values without having to go to the trouble of creating intermediate +levels if they don't exist e.g syscalls[comm][pid][id] = 1 will create +the intermediate hash levels and finally assign the value 1 to the +hash entry for 'id' (because the value being assigned isn't a hash +object itself, the initial value is assigned in the TypeError +exception. Well, there may be a better way to do this in Python but +that's what works for now). + +Putting that code into the raw_syscalls__sys_enter() handler, we +effectively end up with a single-level dictionary keyed on syscall id +and having the counts we've tallied as values. + +The print_syscall_totals() function iterates over the entries in the +dictionary and displays a line for each entry containing the syscall +name (the dictonary keys contain the syscall ids, which are passed to +the Util function syscall_name(), which translates the raw syscall +numbers to the corresponding syscall name strings). The output is +displayed after all the events in the trace have been processed, by +calling the print_syscall_totals() function from the trace_end() +handler called at the end of script processing. + +The final script producing the output shown above is shown in its +entirety below (syscall_name() helper is not yet available, you can +only deal with id's for now): + +---- +import os +import sys + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/perf-script-Util/lib/Perf/Trace') + +from perf_trace_context import * +from Core import * +from Util import * + +syscalls = autodict() + +def trace_end(): + print_syscall_totals() + +def raw_syscalls__sys_enter(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + id, args): + try: + syscalls[id] += 1 + except TypeError: + syscalls[id] = 1 + +def print_syscall_totals(): + if for_comm is not None: + print "\nsyscall events for %s:\n\n" % (for_comm), + else: + print "\nsyscall events:\n\n", + + print "%-40s %10s\n" % ("event", "count"), + print "%-40s %10s\n" % ("----------------------------------------", \ + "-----------"), + + for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \ + reverse = True): + print "%-40s %10d\n" % (syscall_name(id), val), +---- + +The script can be run just as before: + + # perf script -s syscall-counts.py + +So those are the essential steps in writing and running a script. The +process can be generalized to any tracepoint or set of tracepoints +you're interested in - basically find the tracepoint(s) you're +interested in by looking at the list of available events shown by +'perf list' and/or look in /sys/kernel/debug/tracing events for +detailed event and field info, record the corresponding trace data +using 'perf record', passing it the list of interesting events, +generate a skeleton script using 'perf script -g python' and modify the +code to aggregate and display it for your particular needs. + +After you've done that you may end up with a general-purpose script +that you want to keep around and have available for future use. By +writing a couple of very simple shell scripts and putting them in the +right place, you can have your script listed alongside the other +scripts listed by the 'perf script -l' command e.g.: + +---- +root@tropicana:~# perf script -l +List of available trace scripts: + workqueue-stats workqueue stats (ins/exe/create/destroy) + wakeup-latency system-wide min/max/avg wakeup latency + rw-by-file r/w activity for a program, by file + rw-by-pid system-wide r/w activity +---- + +A nice side effect of doing this is that you also then capture the +probably lengthy 'perf record' command needed to record the events for +the script. + +To have the script appear as a 'built-in' script, you write two simple +scripts, one for recording and one for 'reporting'. + +The 'record' script is a shell script with the same base name as your +script, but with -record appended. The shell script should be put +into the perf/scripts/python/bin directory in the kernel source tree. +In that script, you write the 'perf record' command-line needed for +your script: + +---- +# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-record + +#!/bin/bash +perf record -a -e raw_syscalls:sys_enter +---- + +The 'report' script is also a shell script with the same base name as +your script, but with -report appended. It should also be located in +the perf/scripts/python/bin directory. In that script, you write the +'perf script -s' command-line needed for running your script: + +---- +# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-report + +#!/bin/bash +# description: system-wide syscall counts +perf script -s ~/libexec/perf-core/scripts/python/syscall-counts.py +---- + +Note that the location of the Python script given in the shell script +is in the libexec/perf-core/scripts/python directory - this is where +the script will be copied by 'make install' when you install perf. +For the installation to install your script there, your script needs +to be located in the perf/scripts/python directory in the kernel +source tree: + +---- +# ls -al kernel-source/tools/perf/scripts/python + +root@tropicana:/home/trz/src/tip# ls -al tools/perf/scripts/python +total 32 +drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 . +drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 .. +drwxr-xr-x 2 trz trz 4096 2010-01-26 22:29 bin +-rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-script.py +drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 perf-script-Util +-rw-r--r-- 1 trz trz 1462 2010-01-26 22:30 syscall-counts.py +---- + +Once you've done that (don't forget to do a new 'make install', +otherwise your script won't show up at run-time), 'perf script -l' +should show a new entry for your script: + +---- +root@tropicana:~# perf script -l +List of available trace scripts: + workqueue-stats workqueue stats (ins/exe/create/destroy) + wakeup-latency system-wide min/max/avg wakeup latency + rw-by-file r/w activity for a program, by file + rw-by-pid system-wide r/w activity + syscall-counts system-wide syscall counts +---- + +You can now perform the record step via 'perf script record': + + # perf script record syscall-counts + +and display the output using 'perf script report': + + # perf script report syscall-counts + +STARTER SCRIPTS +--------------- + +You can quickly get started writing a script for a particular set of +trace data by generating a skeleton script using 'perf script -g +python' in the same directory as an existing perf.data trace file. +That will generate a starter script containing a handler for each of +the event types in the trace file; it simply prints every available +field for each event in the trace file. + +You can also look at the existing scripts in +~/libexec/perf-core/scripts/python for typical examples showing how to +do basic things like aggregate event data, print results, etc. Also, +the check-perf-script.py script, while not interesting for its results, +attempts to exercise all of the main scripting features. + +EVENT HANDLERS +-------------- + +When perf script is invoked using a trace script, a user-defined +'handler function' is called for each event in the trace. If there's +no handler function defined for a given event type, the event is +ignored (or passed to a 'trace_handled' function, see below) and the +next event is processed. + +Most of the event's field values are passed as arguments to the +handler function; some of the less common ones aren't - those are +available as calls back into the perf executable (see below). + +As an example, the following perf record command can be used to record +all sched_wakeup events in the system: + + # perf record -a -e sched:sched_wakeup + +Traces meant to be processed using a script should be recorded with +the above option: -a to enable system-wide collection. + +The format file for the sched_wakep event defines the following fields +(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format): + +---- + format: + field:unsigned short common_type; + field:unsigned char common_flags; + field:unsigned char common_preempt_count; + field:int common_pid; + field:int common_lock_depth; + + field:char comm[TASK_COMM_LEN]; + field:pid_t pid; + field:int prio; + field:int success; + field:int target_cpu; +---- + +The handler function for this event would be defined as: + +---- +def sched__sched_wakeup(event_name, context, common_cpu, common_secs, + common_nsecs, common_pid, common_comm, + comm, pid, prio, success, target_cpu): + pass +---- + +The handler function takes the form subsystem__event_name. + +The common_* arguments in the handler's argument list are the set of +arguments passed to all event handlers; some of the fields correspond +to the common_* fields in the format file, but some are synthesized, +and some of the common_* fields aren't common enough to to be passed +to every event as arguments but are available as library functions. + +Here's a brief description of each of the invariant event args: + + event_name the name of the event as text + context an opaque 'cookie' used in calls back into perf + common_cpu the cpu the event occurred on + common_secs the secs portion of the event timestamp + common_nsecs the nsecs portion of the event timestamp + common_pid the pid of the current task + common_comm the name of the current process + +All of the remaining fields in the event's format file have +counterparts as handler function arguments of the same name, as can be +seen in the example above. + +The above provides the basics needed to directly access every field of +every event in a trace, which covers 90% of what you need to know to +write a useful trace script. The sections below cover the rest. + +SCRIPT LAYOUT +------------- + +Every perf script Python script should start by setting up a Python +module search path and 'import'ing a few support modules (see module +descriptions below): + +---- + import os + import sys + + sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/perf-script-Util/lib/Perf/Trace') + + from perf_trace_context import * + from Core import * +---- + +The rest of the script can contain handler functions and support +functions in any order. + +Aside from the event handler functions discussed above, every script +can implement a set of optional functions: + +*trace_begin*, if defined, is called before any event is processed and +gives scripts a chance to do setup tasks: + +---- +def trace_begin: + pass +---- + +*trace_end*, if defined, is called after all events have been + processed and gives scripts a chance to do end-of-script tasks, such + as display results: + +---- +def trace_end: + pass +---- + +*trace_unhandled*, if defined, is called after for any event that + doesn't have a handler explicitly defined for it. The standard set + of common arguments are passed into it: + +---- +def trace_unhandled(event_name, context, common_cpu, common_secs, + common_nsecs, common_pid, common_comm): + pass +---- + +The remaining sections provide descriptions of each of the available +built-in perf script Python modules and their associated functions. + +AVAILABLE MODULES AND FUNCTIONS +------------------------------- + +The following sections describe the functions and variables available +via the various perf script Python modules. To use the functions and +variables from the given module, add the corresponding 'from XXXX +import' line to your perf script script. + +Core.py Module +~~~~~~~~~~~~~~ + +These functions provide some essential functions to user scripts. + +The *flag_str* and *symbol_str* functions provide human-readable +strings for flag and symbolic fields. These correspond to the strings +and values parsed from the 'print fmt' fields of the event format +files: + + flag_str(event_name, field_name, field_value) - returns the string represention corresponding to field_value for the flag field field_name of event event_name + symbol_str(event_name, field_name, field_value) - returns the string represention corresponding to field_value for the symbolic field field_name of event event_name + +The *autodict* function returns a special kind of Python +dictionary that implements Perl's 'autovivifying' hashes in Python +i.e. with autovivifying hashes, you can assign nested hash values +without having to go to the trouble of creating intermediate levels if +they don't exist. + + autodict() - returns an autovivifying dictionary instance + + +perf_trace_context Module +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some of the 'common' fields in the event format file aren't all that +common, but need to be made accessible to user scripts nonetheless. + +perf_trace_context defines a set of functions that can be used to +access this data in the context of the current event. Each of these +functions expects a context variable, which is the same as the +context variable passed into every event handler as the second +argument. + + common_pc(context) - returns common_preempt count for the current event + common_flags(context) - returns common_flags for the current event + common_lock_depth(context) - returns common_lock_depth for the current event + +Util.py Module +~~~~~~~~~~~~~~ + +Various utility functions for use with perf script: + + nsecs(secs, nsecs) - returns total nsecs given secs/nsecs pair + nsecs_secs(nsecs) - returns whole secs portion given nsecs + nsecs_nsecs(nsecs) - returns nsecs remainder given nsecs + nsecs_str(nsecs) - returns printable string in the form secs.nsecs + avg(total, n) - returns average given a sum and a total number of values + +SEE ALSO +-------- +linkperf:perf-script[1] diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt new file mode 100644 index 0000000..f442acc --- /dev/null +++ b/tools/perf/Documentation/perf-script.txt @@ -0,0 +1,111 @@ +perf-script(1) +============= + +NAME +---- +perf-script - Read perf.data (created by perf record) and display trace output + +SYNOPSIS +-------- +[verse] +'perf script' [] +'perf script' [] record