summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/cputime.h4
-rw-r--r--include/linux/alarmtimer.h51
-rw-r--r--include/linux/clockchips.h12
-rw-r--r--include/linux/dw_apb_timer.h2
-rw-r--r--include/linux/interrupt.h41
-rw-r--r--include/linux/irq.h18
-rw-r--r--include/linux/irq_work.h15
-rw-r--r--include/linux/irqdesc.h1
-rw-r--r--include/linux/jiffies.h2
-rw-r--r--include/linux/llist.h77
-rw-r--r--include/linux/lockdep.h2
-rw-r--r--include/linux/module.h12
-rw-r--r--include/linux/perf_event.h5
-rw-r--r--include/linux/posix-timers.h5
-rw-r--r--include/linux/rcupdate.h300
-rw-r--r--include/linux/rcutiny.h20
-rw-r--r--include/linux/rcutree.h2
-rw-r--r--include/linux/ring_buffer.h2
-rw-r--r--include/linux/sched.h11
-rw-r--r--include/linux/trace_clock.h1
-rw-r--r--include/linux/tracepoint.h25
-rw-r--r--include/linux/types.h10
-rw-r--r--include/scsi/osd_ore.h80
-rw-r--r--include/trace/events/rcu.h459
-rw-r--r--include/trace/events/sched.h9
-rw-r--r--include/trace/ftrace.h3
26 files changed, 924 insertions, 245 deletions
diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h
index 61e03dd..62ce682 100644
--- a/include/asm-generic/cputime.h
+++ b/include/asm-generic/cputime.h
@@ -38,8 +38,8 @@ typedef u64 cputime64_t;
/*
* Convert cputime to microseconds and back.
*/
-#define cputime_to_usecs(__ct) jiffies_to_usecs(__ct);
-#define usecs_to_cputime(__msecs) usecs_to_jiffies(__msecs);
+#define cputime_to_usecs(__ct) jiffies_to_usecs(__ct)
+#define usecs_to_cputime(__msecs) usecs_to_jiffies(__msecs)
/*
* Convert cputime to seconds and back.
diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h
index c5d6095..975009e 100644
--- a/include/linux/alarmtimer.h
+++ b/include/linux/alarmtimer.h
@@ -13,6 +13,16 @@ enum alarmtimer_type {
ALARM_NUMTYPE,
};
+enum alarmtimer_restart {
+ ALARMTIMER_NORESTART,
+ ALARMTIMER_RESTART,
+};
+
+
+#define ALARMTIMER_STATE_INACTIVE 0x00
+#define ALARMTIMER_STATE_ENQUEUED 0x01
+#define ALARMTIMER_STATE_CALLBACK 0x02
+
/**
* struct alarm - Alarm timer structure
* @node: timerqueue node for adding to the event list this value
@@ -25,16 +35,45 @@ enum alarmtimer_type {
*/
struct alarm {
struct timerqueue_node node;
- ktime_t period;
- void (*function)(struct alarm *);
+ enum alarmtimer_restart (*function)(struct alarm *, ktime_t now);
enum alarmtimer_type type;
- bool enabled;
+ int state;
void *data;
};
void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
- void (*function)(struct alarm *));
-void alarm_start(struct alarm *alarm, ktime_t start, ktime_t period);
-void alarm_cancel(struct alarm *alarm);
+ enum alarmtimer_restart (*function)(struct alarm *, ktime_t));
+void alarm_start(struct alarm *alarm, ktime_t start);
+int alarm_try_to_cancel(struct alarm *alarm);
+int alarm_cancel(struct alarm *alarm);
+
+u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval);
+
+/*
+ * A alarmtimer is active, when it is enqueued into timerqueue or the
+ * callback function is running.
+ */
+static inline int alarmtimer_active(const struct alarm *timer)
+{
+ return timer->state != ALARMTIMER_STATE_INACTIVE;
+}
+
+/*
+ * Helper function to check, whether the timer is on one of the queues
+ */
+static inline int alarmtimer_is_queued(struct alarm *timer)
+{
+ return timer->state & ALARMTIMER_STATE_ENQUEUED;
+}
+
+/*
+ * Helper function to check, whether the timer is running the callback
+ * function
+ */
+static inline int alarmtimer_callback_running(struct alarm *timer)
+{
+ return timer->state & ALARMTIMER_STATE_CALLBACK;
+}
+
#endif
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index d6733e2..81e803e 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -45,20 +45,22 @@ enum clock_event_nofitiers {
*/
#define CLOCK_EVT_FEAT_PERIODIC 0x000001
#define CLOCK_EVT_FEAT_ONESHOT 0x000002
+#define CLOCK_EVT_FEAT_KTIME 0x000004
/*
* x86(64) specific misfeatures:
*
* - Clockevent source stops in C3 State and needs broadcast support.
* - Local APIC timer is used as a dummy device.
*/
-#define CLOCK_EVT_FEAT_C3STOP 0x000004
-#define CLOCK_EVT_FEAT_DUMMY 0x000008
+#define CLOCK_EVT_FEAT_C3STOP 0x000008
+#define CLOCK_EVT_FEAT_DUMMY 0x000010
/**
* struct clock_event_device - clock event device descriptor
* @event_handler: Assigned by the framework to be called by the low
* level handler of the event source
- * @set_next_event: set next event function
+ * @set_next_event: set next event function using a clocksource delta
+ * @set_next_ktime: set next event function using a direct ktime value
* @next_event: local storage for the next event in oneshot mode
* @max_delta_ns: maximum delta value in ns
* @min_delta_ns: minimum delta value in ns
@@ -81,6 +83,8 @@ struct clock_event_device {
void (*event_handler)(struct clock_event_device *);
int (*set_next_event)(unsigned long evt,
struct clock_event_device *);
+ int (*set_next_ktime)(ktime_t expires,
+ struct clock_event_device *);
ktime_t next_event;
u64 max_delta_ns;
u64 min_delta_ns;
@@ -140,7 +144,7 @@ extern void clockevents_set_mode(struct clock_event_device *dev,
enum clock_event_mode mode);
extern int clockevents_register_notifier(struct notifier_block *nb);
extern int clockevents_program_event(struct clock_event_device *dev,
- ktime_t expires, ktime_t now);
+ ktime_t expires, bool force);
extern void clockevents_handle_noop(struct clock_event_device *dev);
diff --git a/include/linux/dw_apb_timer.h b/include/linux/dw_apb_timer.h
index 49638ea..07261d5 100644
--- a/include/linux/dw_apb_timer.h
+++ b/include/linux/dw_apb_timer.h
@@ -46,7 +46,7 @@ struct dw_apb_clock_event_device *
dw_apb_clockevent_init(int cpu, const char *name, unsigned rating,
void __iomem *base, int irq, unsigned long freq);
struct dw_apb_clocksource *
-dw_apb_clocksource_init(unsigned rating, char *name, void __iomem *base,
+dw_apb_clocksource_init(unsigned rating, const char *name, void __iomem *base,
unsigned long freq);
void dw_apb_clocksource_register(struct dw_apb_clocksource *dw_cs);
void dw_apb_clocksource_start(struct dw_apb_clocksource *dw_cs);
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index a103732..a64b00e 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -59,6 +59,8 @@
* IRQF_NO_SUSPEND - Do not disable this IRQ during suspend
* IRQF_FORCE_RESUME - Force enable it on resume even if IRQF_NO_SUSPEND is set
* IRQF_NO_THREAD - Interrupt cannot be threaded
+ * IRQF_EARLY_RESUME - Resume IRQ early during syscore instead of at device
+ * resume time.
*/
#define IRQF_DISABLED 0x00000020
#define IRQF_SAMPLE_RANDOM 0x00000040
@@ -72,6 +74,7 @@
#define IRQF_NO_SUSPEND 0x00004000
#define IRQF_FORCE_RESUME 0x00008000
#define IRQF_NO_THREAD 0x00010000
+#define IRQF_EARLY_RESUME 0x00020000
#define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
@@ -95,6 +98,7 @@ typedef irqreturn_t (*irq_handler_t)(int, void *);
* @flags: flags (see IRQF_* above)
* @name: name of the device
* @dev_id: cookie to identify the device
+ * @percpu_dev_id: cookie to identify the device
* @next: pointer to the next irqaction for shared interrupts
* @irq: interrupt number
* @dir: pointer to the proc/irq/NN/name entry
@@ -104,17 +108,18 @@ typedef irqreturn_t (*irq_handler_t)(int, void *);
* @thread_mask: bitmask for keeping track of @thread activity
*/
struct irqaction {
- irq_handler_t handler;
- unsigned long flags;
- void *dev_id;
- struct irqaction *next;
- int irq;
- irq_handler_t thread_fn;
- struct task_struct *thread;
- unsigned long thread_flags;
- unsigned long thread_mask;
- const char *name;
- struct proc_dir_entry *dir;
+ irq_handler_t handler;
+ unsigned long flags;
+ void *dev_id;
+ void __percpu *percpu_dev_id;
+ struct irqaction *next;
+ int irq;
+ irq_handler_t thread_fn;
+ struct task_struct *thread;
+ unsigned long thread_flags;
+ unsigned long thread_mask;
+ const char *name;
+ struct proc_dir_entry *dir;
} ____cacheline_internodealigned_in_smp;
extern irqreturn_t no_action(int cpl, void *dev_id);
@@ -136,6 +141,10 @@ extern int __must_check
request_any_context_irq(unsigned int irq, irq_handler_t handler,
unsigned long flags, const char *name, void *dev_id);
+extern int __must_check
+request_percpu_irq(unsigned int irq, irq_handler_t handler,
+ const char *devname, void __percpu *percpu_dev_id);
+
extern void exit_irq_thread(void);
#else
@@ -164,10 +173,18 @@ request_any_context_irq(unsigned int irq, irq_handler_t handler,
return request_irq(irq, handler, flags, name, dev_id);
}
+static inline int __must_check
+request_percpu_irq(unsigned int irq, irq_handler_t handler,
+ const char *devname, void __percpu *percpu_dev_id)
+{
+ return request_irq(irq, handler, 0, devname, percpu_dev_id);
+}
+
static inline void exit_irq_thread(void) { }
#endif
extern void free_irq(unsigned int, void *);
+extern void free_percpu_irq(unsigned int, void __percpu *);
struct device;
@@ -207,7 +224,9 @@ extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id);
extern void disable_irq_nosync(unsigned int irq);
extern void disable_irq(unsigned int irq);
+extern void disable_percpu_irq(unsigned int irq);
extern void enable_irq(unsigned int irq);
+extern void enable_percpu_irq(unsigned int irq, unsigned int type);
/* The following three functions are for the core kernel use only. */
#ifdef CONFIG_GENERIC_HARDIRQS
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 5951730..59e49c8 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -66,6 +66,7 @@ typedef void (*irq_preflow_handler_t)(struct irq_data *data);
* IRQ_NO_BALANCING - Interrupt cannot be balanced (affinity set)
* IRQ_MOVE_PCNTXT - Interrupt can be migrated from process context
* IRQ_NESTED_TRHEAD - Interrupt nests into another thread
+ * IRQ_PER_CPU_DEVID - Dev_id is a per-cpu variable
*/
enum {
IRQ_TYPE_NONE = 0x00000000,
@@ -88,12 +89,13 @@ enum {
IRQ_MOVE_PCNTXT = (1 << 14),
IRQ_NESTED_THREAD = (1 << 15),
IRQ_NOTHREAD = (1 << 16),
+ IRQ_PER_CPU_DEVID = (1 << 17),
};
#define IRQF_MODIFY_MASK \
(IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \
IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \
- IRQ_PER_CPU | IRQ_NESTED_THREAD)
+ IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID)
#define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING)
@@ -336,12 +338,14 @@ struct irq_chip {
* IRQCHIP_MASK_ON_SUSPEND: Mask non wake irqs in the suspend path
* IRQCHIP_ONOFFLINE_ENABLED: Only call irq_on/off_line callbacks
* when irq enabled
+ * IRQCHIP_SKIP_SET_WAKE: Skip chip.irq_set_wake(), for this irq chip
*/
enum {
IRQCHIP_SET_TYPE_MASKED = (1 << 0),
IRQCHIP_EOI_IF_HANDLED = (1 << 1),
IRQCHIP_MASK_ON_SUSPEND = (1 << 2),
IRQCHIP_ONOFFLINE_ENABLED = (1 << 3),
+ IRQCHIP_SKIP_SET_WAKE = (1 << 4),
};
/* This include will go away once we isolated irq_desc usage to core code */
@@ -365,6 +369,8 @@ enum {
struct irqaction;
extern int setup_irq(unsigned int irq, struct irqaction *new);
extern void remove_irq(unsigned int irq, struct irqaction *act);
+extern int setup_percpu_irq(unsigned int irq, struct irqaction *new);
+extern void remove_percpu_irq(unsigned int irq, struct irqaction *act);
extern void irq_cpu_online(void);
extern void irq_cpu_offline(void);
@@ -392,6 +398,7 @@ extern void handle_edge_irq(unsigned int irq, struct irq_desc *desc);
extern void handle_edge_eoi_irq(unsigned int irq, struct irq_desc *desc);
extern void handle_simple_irq(unsigned int irq, struct irq_desc *desc);
extern void handle_percpu_irq(unsigned int irq, struct irq_desc *desc);
+extern void handle_percpu_devid_irq(unsigned int irq, struct irq_desc *desc);
extern void handle_bad_irq(unsigned int irq, struct irq_desc *desc);
extern void handle_nested_irq(unsigned int irq);
@@ -420,6 +427,8 @@ static inline void irq_set_chip_and_handler(unsigned int irq, struct irq_chip *c
irq_set_chip_and_handler_name(irq, chip, handle, NULL);
}
+extern int irq_set_percpu_devid(unsigned int irq);
+
extern void
__irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
const char *name);
@@ -481,6 +490,13 @@ static inline void irq_set_nested_thread(unsigned int irq, bool nest)
irq_clear_status_flags(irq, IRQ_NESTED_THREAD);
}
+static inline void irq_set_percpu_devid_flags(unsigned int irq)
+{
+ irq_set_status_flags(irq,
+ IRQ_NOAUTOEN | IRQ_PER_CPU | IRQ_NOTHREAD |
+ IRQ_NOPROBE | IRQ_PER_CPU_DEVID);
+}
+
/* Handle dynamic irq creation and destruction */
extern unsigned int create_irq_nr(unsigned int irq_want, int node);
extern int create_irq(void);
diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index 4fa09d4..6a9e8f5 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -1,20 +1,23 @@
#ifndef _LINUX_IRQ_WORK_H
#define _LINUX_IRQ_WORK_H
+#include <linux/llist.h>
+
struct irq_work {
- struct irq_work *next;
+ unsigned long flags;
+ struct llist_node llnode;
void (*func)(struct irq_work *);
};
static inline
-void init_irq_work(struct irq_work *entry, void (*func)(struct irq_work *))
+void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *))
{
- entry->next = NULL;
- entry->func = func;
+ work->flags = 0;
+ work->func = func;
}
-bool irq_work_queue(struct irq_work *entry);
+bool irq_work_queue(struct irq_work *work);
void irq_work_run(void);
-void irq_work_sync(struct irq_work *entry);
+void irq_work_sync(struct irq_work *work);
#endif /* _LINUX_IRQ_WORK_H */
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 150134a..6b69c2c 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -53,6 +53,7 @@ struct irq_desc {
unsigned long last_unhandled; /* Aging timer for unhandled count */
unsigned int irqs_unhandled;
raw_spinlock_t lock;
+ struct cpumask *percpu_enabled;
#ifdef CONFIG_SMP
const struct cpumask *affinity_hint;
struct irq_affinity_notify *affinity_notify;
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index f97672a..265e2c3 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -303,7 +303,7 @@ extern void jiffies_to_timespec(const unsigned long jiffies,
extern unsigned long timeval_to_jiffies(const struct timeval *value);
extern void jiffies_to_timeval(const unsigned long jiffies,
struct timeval *value);
-extern clock_t jiffies_to_clock_t(long x);
+extern clock_t jiffies_to_clock_t(unsigned long x);
extern unsigned long clock_t_to_jiffies(unsigned long x);
extern u64 jiffies_64_to_clock_t(u64 x);
extern u64 nsec_to_clock_t(u64 x);
diff --git a/include/linux/llist.h b/include/linux/llist.h
index aa0c8b5..7287734 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -35,10 +35,30 @@
*
* The basic atomic operation of this list is cmpxchg on long. On
* architectures that don't have NMI-safe cmpxchg implementation, the
- * list can NOT be used in NMI handler. So code uses the list in NMI
- * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
+ * list can NOT be used in NMI handlers. So code that uses the list in
+ * an NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
+ *
+ * Copyright 2010,2011 Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation;
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#include <linux/kernel.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+
struct llist_head {
struct llist_node *first;
};
@@ -113,14 +133,55 @@ static inline void init_llist_head(struct llist_head *list)
* test whether the list is empty without deleting something from the
* list.
*/
-static inline int llist_empty(const struct llist_head *head)
+static inline bool llist_empty(const struct llist_head *head)
{
return ACCESS_ONCE(head->first) == NULL;
}
-void llist_add(struct llist_node *new, struct llist_head *head);
-void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
- struct llist_head *head);
-struct llist_node *llist_del_first(struct llist_head *head);
-struct llist_node *llist_del_all(struct llist_head *head);
+static inline struct llist_node *llist_next(struct llist_node *node)
+{
+ return node->next;
+}
+
+/**
+ * llist_add - add a new entry
+ * @new: new entry to be added
+ * @head: the head for your lock-less list
+ *
+ * Return whether list is empty before adding.
+ */
+static inline bool llist_add(struct llist_node *new, struct llist_head *head)
+{
+ struct llist_node *entry, *old_entry;
+
+ entry = head->first;
+ for (;;) {
+ old_entry = entry;
+ new->next = entry;
+ entry = cmpxchg(&head->first, old_entry, new);
+ if (entry == old_entry)
+ break;
+ }
+
+ return old_entry == NULL;
+}
+
+/**
+ * llist_del_all - delete all entries from lock-less list
+ * @head: the head of lock-less list to delete all entries
+ *
+ * If list is empty, return NULL, otherwise, delete all entries and
+ * return the pointer to the first entry. The order of entries
+ * deleted is from the newest to the oldest added one.
+ */
+static inline struct llist_node *llist_del_all(struct llist_head *head)
+{
+ return xchg(&head->first, NULL);
+}
+
+extern bool llist_add_batch(struct llist_node *new_first,
+ struct llist_node *new_last,
+ struct llist_head *head);
+extern struct llist_node *llist_del_first(struct llist_head *head);
+
#endif /* LLIST_H */
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index ef820a3..b6a56e3 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -548,7 +548,7 @@ do { \
#endif
#ifdef CONFIG_PROVE_RCU
-extern void lockdep_rcu_dereference(const char *file, const int line);
+void lockdep_rcu_suspicious(const char *file, const int line, const char *s);
#endif
#endif /* __LINUX_LOCKDEP_H */
diff --git a/include/linux/module.h b/include/linux/module.h
index 1c30087..8639216 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -580,9 +580,6 @@ int unregister_module_notifier(struct notifier_block * nb);
extern void print_modules(void);
-extern void module_update_tracepoints(void);
-extern int module_get_iter_tracepoints(struct tracepoint_iter *iter);
-
#else /* !CONFIG_MODULES... */
#define EXPORT_SYMBOL(sym)
#define EXPORT_SYMBOL_GPL(sym)
@@ -698,15 +695,6 @@ static inline int unregister_module_notifier(struct notifier_block * nb)
static inline void print_modules(void)
{
}
-
-static inline void module_update_tracepoints(void)
-{
-}
-
-static inline int module_get_iter_tracepoints(struct tracepoint_iter *iter)
-{
- return 0;
-}
#endif /* CONFIG_MODULES */
#ifdef CONFIG_SYSFS
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c816075..1e9ebe5 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -220,7 +220,10 @@ struct perf_event_attr {
mmap_data : 1, /* non-exec mmap data */
sample_id_all : 1, /* sample_type all events */
- __reserved_1 : 45;
+ exclude_host : 1, /* don't count in host */
+ exclude_guest : 1, /* don't count in guest */
+
+ __reserved_1 : 43;
union {
__u32 wakeup_events; /* wakeup every n events */
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 959c141..042058f 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -81,7 +81,10 @@ struct k_itimer {
unsigned long incr;
unsigned long expires;
} mmtimer;
- struct alarm alarmtimer;
+ struct {
+ struct alarm alarmtimer;
+ ktime_t interval;
+ } alarm;
struct rcu_head rcu;
} it;
};
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 8f4f881..2cf4226 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -33,6 +33,7 @@
#ifndef __LINUX_RCUPDATE_H
#define __LINUX_RCUPDATE_H
+#include <linux/types.h>
#include <linux/cache.h>
#include <linux/spinlock.h>
#include <linux/threads.h>
@@ -64,32 +65,74 @@ static inline void rcutorture_record_progress(unsigned long vernum)
#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b))
#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b))
+/* Exported common interfaces */
+
+#ifdef CONFIG_PREEMPT_RCU
+
/**
- * struct rcu_head - callback structure for use with RCU
- * @next: next update requests in a list
- * @func: actual update function to call after the grace period.
+ * call_rcu() - Queue an RCU callback for invocation after a grace period.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all pre-existing RCU read-side
+ * critical sections have completed. However, the callback function
+ * might well execute concurrently with RCU read-side critical sections
+ * that started after call_rcu() was invoked. RCU read-side critical
+ * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
+ * and may be nested.
*/
-struct rcu_head {
- struct rcu_head *next;
- void (*func)(struct rcu_head *head);
-};
+extern void call_rcu(struct rcu_head *head,
+ void (*func)(struct rcu_head *head));
-/* Exported common interfaces */
+#else /* #ifdef CONFIG_PREEMPT_RCU */
+
+/* In classic RCU, call_rcu() is just call_rcu_sched(). */
+#define call_rcu call_rcu_sched
+
+#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
+
+/**
+ * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all currently executing RCU
+ * read-side critical sections have completed. call_rcu_bh() assumes
+ * that the read-side critical sections end on completion of a softirq
+ * handler. This means that read-side critical sections in process
+ * context must not be interrupted by softirqs. This interface is to be
+ * used when most of the read-side critical sections are in softirq context.
+ * RCU read-side critical sections are delimited by :
+ * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context.
+ * OR
+ * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
+ * These may be nested.
+ */
+extern void call_rcu_bh(struct rcu_head *head,
+ void (*func)(struct rcu_head *head));
+
+/**
+ * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all currently executing RCU
+ * read-side critical sections have completed. call_rcu_sched() assumes
+ * that the read-side critical sections end on enabling of preemption
+ * or on voluntary preemption.
+ * RCU read-side critical sections are delimited by :
+ * - rcu_read_lock_sched() and rcu_read_unlock_sched(),
+ * OR
+ * anything that disables preemption.
+ * These may be nested.
+ */
extern void call_rcu_sched(struct rcu_head *head,
void (*func)(struct rcu_head *rcu));
-extern void synchronize_sched(void);
-extern void rcu_barrier_bh(void);
-extern void rcu_barrier_sched(void);
-
-static inline void __rcu_read_lock_bh(void)
-{
- local_bh_disable();
-}
-static inline void __rcu_read_unlock_bh(void)
-{
- local_bh_enable();
-}
+extern void synchronize_sched(void);
#ifdef CONFIG_PREEMPT_RCU
@@ -152,6 +195,15 @@ static inline void rcu_exit_nohz(void)
#endif /* #else #ifdef CONFIG_NO_HZ */
+/*
+ * Infrastructure to implement the synchronize_() primitives in
+ * TREE_RCU and rcu_barrier_() primitives in TINY_RCU.
+ */
+
+typedef void call_rcu_func_t(struct rcu_head *head,
+ void (*func)(struct rcu_head *head));
+void wait_rcu_gp(call_rcu_func_t crf);
+
#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
#include <linux/rcutree.h>
#elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
@@ -297,19 +349,31 @@ extern int rcu_my_thread_group_empty(void);
/**
* rcu_lockdep_assert - emit lockdep splat if specified condition not met
* @c: condition to check
+ * @s: informative message
*/
-#define rcu_lockdep_assert(c) \
+#define rcu_lockdep_assert(c, s) \
do { \
static bool __warned; \
if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \
__warned = true; \
- lockdep_rcu_dereference(__FILE__, __LINE__); \
+ lockdep_rcu_suspicious(__FILE__, __LINE__, s); \
} \
} while (0)
+#define rcu_sleep_check() \
+ do { \
+ rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map), \
+ "Illegal context switch in RCU-bh" \
+ " read-side critical section"); \
+ rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map), \
+ "Illegal context switch in RCU-sched"\
+ " read-side critical section"); \
+ } while (0)
+
#else /* #ifdef CONFIG_PROVE_RCU */
-#define rcu_lockdep_assert(c) do { } while (0)
+#define rcu_lockdep_assert(c, s) do { } while (0)
+#define rcu_sleep_check() do { } while (0)
#endif /* #else #ifdef CONFIG_PROVE_RCU */
@@ -338,14 +402,16 @@ extern int rcu_my_thread_group_empty(void);
#define __rcu_dereference_check(p, c, space) \
({ \
typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \
- rcu_lockdep_assert(c); \
+ rcu_lockdep_assert(c, "suspicious rcu_dereference_check()" \
+ " usage"); \
rcu_dereference_sparse(p, space); \
smp_read_barrier_depends(); \
((typeof(*p) __force __kernel *)(_________p1)); \
})
#define __rcu_dereference_protected(p, c, space) \
({ \
- rcu_lockdep_assert(c); \
+ rcu_lockdep_assert(c, "suspicious rcu_dereference_protected()" \
+ " usage"); \
rcu_dereference_sparse(p, space); \
((typeof(*p) __force __kernel *)(p)); \
})
@@ -359,15 +425,15 @@ extern int rcu_my_thread_group_empty(void);
#define __rcu_dereference_index_check(p, c) \
({ \
typeof(p) _________p1 = ACCESS_ONCE(p); \
- rcu_lockdep_assert(c); \
+ rcu_lockdep_assert(c, \
+ "suspicious rcu_dereference_index_check()" \
+ " usage"); \
smp_read_barrier_depends(); \
(_________p1); \
})
#define __rcu_assign_pointer(p, v, space) \
({ \
- if (!__builtin_constant_p(v) || \
- ((v) != NULL)) \
- smp_wmb(); \
+ smp_wmb(); \
(p) = (typeof(*v) __force space *)(v); \
})
@@ -500,26 +566,6 @@ extern int rcu_my_thread_group_empty(void);
#define rcu_dereference_protected(p, c) \
__rcu_dereference_protected((p), (c), __rcu)
-/**
- * rcu_dereference_bh_protected() - fetch RCU-bh pointer when updates prevented
- * @p: The pointer to read, prior to dereferencing
- * @c: The conditions under which the dereference will take place
- *
- * This is the RCU-bh counterpart to rcu_dereference_protected().
- */
-#define rcu_dereference_bh_protected(p, c) \
- __rcu_dereference_protected((p), (c), __rcu)
-
-/**
- * rcu_dereference_sched_protected() - fetch RCU-sched pointer when updates prevented
- * @p: The pointer to read, prior to dereferencing
- * @c: The conditions under which the dereference will take place
- *
- * This is the RCU-sched counterpart to rcu_dereference_protected().
- */
-#define rcu_dereference_sched_protected(p, c) \
- __rcu_dereference_protected((p), (c), __rcu)
-
/**
* rcu_dereference() - fetch RCU-protected pointer for dereferencing
@@ -630,7 +676,7 @@ static inline void rcu_read_unlock(void)
*/
static inline void rcu_read_lock_bh(void)
{
- __rcu_read_lock_bh();
+ local_bh_disable();
__acquire(RCU_BH);
rcu_read_acquire_bh();
}
@@ -644,7 +690,7 @@ static inline void rcu_read_unlock_bh(void)
{
rcu_read_release_bh();
__release(RCU_BH);
- __rcu_read_unlock_bh();
+ local_bh_enable();
}
/**
@@ -698,11 +744,18 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
* any prior initialization. Returns the value assigned.
*
* Inserts memory barriers on architectures that require them
- * (pretty much all of them other than x86), and also prevents
- * the compiler from reordering the code that initializes the
- * structure after the pointer assignment. More importantly, this
- * call documents which pointers will be dereferenced by RCU read-side
- * code.
+ * (which is most of them), and also prevents the compiler from
+ * reordering the code that initializes the structure after the pointer
+ * assignment. More importantly, this call documents which pointers
+ * will be dereferenced by RCU read-side code.
+ *
+ * In some special cases, you may use RCU_INIT_POINTER() instead
+ * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due
+ * to the fact that it does not constrain either the CPU or the compiler.
+ * That said, using RCU_INIT_POINTER() when you should have used
+ * rcu_assign_pointer() is a very bad thing that results in
+ * impossible-to-diagnose memory corruption. So please be careful.
+ * See the RCU_INIT_POINTER() comment header for details.
*/
#define rcu_assign_pointer(p, v) \
__rcu_assign_pointer((p), (v), __rcu)
@@ -710,105 +763,38 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
/**
* RCU_INIT_POINTER() - initialize an RCU protected pointer
*
- * Initialize an RCU-protected pointer in such a way to avoid RCU-lockdep
- * splats.
+ * Initialize an RCU-protected pointer in special cases where readers
+ * do not need ordering constraints on the CPU or the compiler. These
+ * special cases are:
+ *
+ * 1. This use of RCU_INIT_POINTER() is NULLing out the pointer -or-
+ * 2. The caller has taken whatever steps are required to prevent
+ * RCU readers from concurrently accessing this pointer -or-
+ * 3. The referenced data structure has already been exposed to
+ * readers either at compile time or via rcu_assign_pointer() -and-
+ * a. You have not made -any- reader-visible changes to
+ * this structure since then -or-
+ * b. It is OK for readers accessing this structure from its
+ * new location to see the old state of the structure. (For
+ * example, the changes were to statistical counters or to
+ * other state where exact synchronization is not required.)
+ *
+ * Failure to follow these rules governing use of RCU_INIT_POINTER() will
+ * result in impossible-to-diagnose memory corruption. As in the structures
+ * will look OK in crash dumps, but any concurrent RCU readers might
+ * see pre-initialized values of the referenced data structure. So
+ * please be very careful how you use RCU_INIT_POINTER()!!!
+ *
+ * If you are creating an RCU-protected linked structure that is accessed
+ * by a single external-to-structure RCU-protected pointer, then you may
+ * use RCU_INIT_POINTER() to initialize the internal RCU-protected
+ * pointers, but you must use rcu_assign_pointer() to initialize the
+ * external-to-structure pointer -after- you have completely initialized
+ * the reader-accessible portions of the linked structure.
*/
#define RCU_INIT_POINTER(p, v) \
p = (typeof(*v) __force __rcu *)(v)
-/* Infrastructure to implement the synchronize_() primitives. */
-
-struct rcu_synchronize {
- struct rcu_head head;
- struct completion completion;
-};
-
-extern void wakeme_after_rcu(struct rcu_head *head);
-
-#ifdef CONFIG_PREEMPT_RCU
-
-/**
- * call_rcu() - Queue an RCU callback for invocation after a grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * The callback function will be invoked some time after a full grace
- * period elapses, in other words after all pre-existing RCU read-side
- * critical sections have completed. However, the callback function
- * might well execute concurrently with RCU read-side critical sections
- * that started after call_rcu() was invoked. RCU read-side critical
- * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
- * and may be nested.
- */
-extern void call_rcu(struct rcu_head *head,
- void (*func)(struct rcu_head *head));
-
-#else /* #ifdef CONFIG_PREEMPT_RCU */
-
-/* In classic RCU, call_rcu() is just call_rcu_sched(). */
-#define call_rcu call_rcu_sched
-
-#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
-
-/**
- * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * The callback function will be invoked some time after a full grace
- * period elapses, in other words after all currently executing RCU
- * read-side critical sections have completed. call_rcu_bh() assumes
- * that the read-side critical sections end on completion of a softirq
- * handler. This means that read-side critical sections in process
- * context must not be interrupted by softirqs. This interface is to be
- * used when most of the read-side critical sections are in softirq context.
- * RCU read-side critical sections are delimited by :
- * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context.
- * OR
- * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
- * These may be nested.
- */
-extern void call_rcu_bh(struct rcu_head *head,
- void (*func)(struct rcu_head *head));
-
-/*
- * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally
- * by call_rcu() and rcu callback execution, and are therefore not part of the
- * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors.
- */
-
-#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
-# define STATE_RCU_HEAD_READY 0
-# define STATE_RCU_HEAD_QUEUED 1
-
-extern struct debug_obj_descr rcuhead_debug_descr;
-
-static inline void debug_rcu_head_queue(struct rcu_head *head)
-{
- WARN_ON_ONCE((unsigned long)head & 0x3);
- debug_object_activate(head, &rcuhead_debug_descr);
- debug_object_active_state(head, &rcuhead_debug_descr,
- STATE_RCU_HEAD_READY,
- STATE_RCU_HEAD_QUEUED);
-}
-
-static inline void debug_rcu_head_unqueue(struct rcu_head *head)
-{
- debug_object_active_state(head, &rcuhead_debug_descr,
- STATE_RCU_HEAD_QUEUED,
- STATE_RCU_HEAD_READY);
- debug_object_deactivate(head, &rcuhead_debug_descr);
-}
-#else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
-static inline void debug_rcu_head_queue(struct rcu_head *head)
-{
-}
-
-static inline void debug_rcu_head_unqueue(struct rcu_head *head)
-{
-}
-#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
-
static __always_inline bool __is_kfree_rcu_offset(unsigned long offset)
{
return offset < 4096;
@@ -827,18 +813,6 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset)
call_rcu(head, (rcu_callback)offset);
}
-extern void kfree(const void *);
-
-static inline void __rcu_reclaim(struct rcu_head *head)
-{
- unsigned long offset = (unsigned long)head->func;
-
- if (__is_kfree_rcu_offset(offset))
- kfree((void *)head - offset);
- else
- head->func(head);
-}
-
/**
* kfree_rcu() - kfree an object after a grace period.
* @ptr: pointer to kfree
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 52b3e02..00b7a5e 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -27,9 +27,23 @@
#include <linux/cache.h>
+#ifdef CONFIG_RCU_BOOST
static inline void rcu_init(void)
{
}
+#else /* #ifdef CONFIG_RCU_BOOST */
+void rcu_init(void);
+#endif /* #else #ifdef CONFIG_RCU_BOOST */
+
+static inline void rcu_barrier_bh(void)
+{
+ wait_rcu_gp(call_rcu_bh);
+}
+
+static inline void rcu_barrier_sched(void)
+{
+ wait_rcu_gp(call_rcu_sched);
+}
#ifdef CONFIG_TINY_RCU
@@ -45,9 +59,13 @@ static inline void rcu_barrier(void)
#else /* #ifdef CONFIG_TINY_RCU */
-void rcu_barrier(void);
void synchronize_rcu_expedited(void);
+static inline void rcu_barrier(void)
+{
+ wait_rcu_gp(call_rcu);
+}
+
#endif /* #else #ifdef CONFIG_TINY_RCU */
static inline void synchronize_rcu_bh(void)
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index e65d066..6745846 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -67,6 +67,8 @@ static inline void synchronize_rcu_bh_expedited(void)
}
extern void rcu_barrier(void);
+extern void rcu_barrier_bh(void);
+extern void rcu_barrier_sched(void);
extern unsigned long rcutorture_testseq;
extern unsigned long rcutorture_vernum;
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index b891de9..67be037 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -154,6 +154,8 @@ void ring_buffer_record_enable(struct ring_buffer *buffer);
void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu);
void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu);
+unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu);
+unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu);
unsigned long ring_buffer_entries(struct ring_buffer *buffer);
unsigned long ring_buffer_overruns(struct ring_buffer *buffer);
unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1be699d..e8acce7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -90,6 +90,7 @@ struct sched_param {
#include <linux/task_io_accounting.h>
#include <linux/latencytop.h>
#include <linux/cred.h>
+#include <linux/llist.h>
#include <asm/processor.h>
@@ -270,7 +271,6 @@ extern void init_idle_bootup_task(struct task_struct *idle);
extern int runqueue_is_locked(int cpu);
-extern cpumask_var_t nohz_cpu_mask;
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
extern void select_nohz_load_balancer(int stop_tick);
extern int get_nohz_timer_target(void);
@@ -1225,7 +1225,7 @@ struct task_struct {
unsigned int ptrace;
#ifdef CONFIG_SMP
- struct task_struct *wake_entry;
+ struct llist_node wake_entry;
int on_cpu;
#endif
int on_rq;
@@ -1260,9 +1260,6 @@ struct task_struct {
#ifdef CONFIG_PREEMPT_RCU
int rcu_read_lock_nesting;
char rcu_read_unlock_special;
-#if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU)
- int rcu_boosted;
-#endif /* #if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU) */
struct list_head rcu_node_entry;
#endif /* #ifdef CONFIG_PREEMPT_RCU */
#ifdef CONFIG_TREE_PREEMPT_RCU
@@ -2039,6 +2036,10 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { }
static inline void sched_autogroup_exit(struct signal_struct *sig) { }
#endif
+#ifdef CONFIG_CFS_BANDWIDTH
+extern unsigned int sysctl_sched_cfs_bandwidth_slice;
+#endif
+
#ifdef CONFIG_RT_MUTEXES
extern int rt_mutex_getprio(struct task_struct *p);
extern void rt_mutex_setprio(struct task_struct *p, int prio);
diff --git a/include/linux/trace_clock.h b/include/linux/trace_clock.h
index 7a81303..4eb4902 100644
--- a/include/linux/trace_clock.h
+++ b/include/linux/trace_clock.h
@@ -15,5 +15,6 @@
extern u64 notrace trace_clock_local(void);
extern u64 notrace trace_clock(void);
extern u64 notrace trace_clock_global(void);
+extern u64 notrace trace_clock_counter(void);
#endif /* _LINUX_TRACE_CLOCK_H */
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index d530a44..df0a779 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -54,8 +54,18 @@ extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe,
void *data);
extern void tracepoint_probe_update_all(void);
+#ifdef CONFIG_MODULES
+struct tp_module {
+ struct list_head list;
+ unsigned int num_tracepoints;
+ struct tracepoint * const *tracepoints_ptrs;
+};
+#endif /* CONFIG_MODULES */
+
struct tracepoint_iter {
- struct module *module;
+#ifdef CONFIG_MODULES
+ struct tp_module *module;
+#endif /* CONFIG_MODULES */
struct tracepoint * const *tracepoint;
};
@@ -63,8 +73,6 @@ extern void tracepoint_iter_start(struct tracepoint_iter *iter);
extern void tracepoint_iter_next(struct tracepoint_iter *iter);
extern void tracepoint_iter_stop(struct tracepoint_iter *iter);
extern void tracepoint_iter_reset(struct tracepoint_iter *iter);
-extern int tracepoint_get_iter_range(struct tracepoint * const **tracepoint,
- struct tracepoint * const *begin, struct tracepoint * const *end);
/*
* tracepoint_synchronize_unregister must be called between the last tracepoint
@@ -78,17 +86,6 @@ static inline void tracepoint_synchronize_unregister(void)
#define PARAMS(args...) args
-#ifdef CONFIG_TRACEPOINTS
-extern
-void tracepoint_update_probe_range(struct tracepoint * const *begin,
- struct tracepoint * const *end);
-#else
-static inline
-void tracepoint_update_probe_range(struct tracepoint * const *begin,
- struct tracepoint * const *end)
-{ }
-#endif /* CONFIG_TRACEPOINTS */
-
#endif /* _LINUX_TRACEPOINT_H */
/*
diff --git a/include/linux/types.h b/include/linux/types.h
index 176da8c..57a9723 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -238,6 +238,16 @@ struct ustat {
char f_fpack[6];
};
+/**
+ * struct rcu_head - callback structure for use with RCU
+ * @next: next update requests in a list
+ * @func: actual update function to call after the grace period.
+ */
+struct rcu_head {
+ struct rcu_head *next;
+ void (*func)(struct rcu_head *head);
+};
+
#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
#endif /* _LINUX_TYPES_H */
diff --git a/include/scsi/osd_ore.h b/include/scsi/osd_ore.h
index c5c5e00..f05fa82 100644
--- a/include/scsi/osd_ore.h
+++ b/include/scsi/osd_ore.h
@@ -34,15 +34,30 @@ struct ore_comp {
struct ore_layout {
/* Our way of looking at the data_map */
+ enum pnfs_osd_raid_algorithm4
+ raid_algorithm;
unsigned stripe_unit;
unsigned mirrors_p1;
unsigned group_width;
+ unsigned parity;
u64 group_depth;
unsigned group_count;
+
+ /* Cached often needed calculations filled in by
+ * ore_verify_layout
+ */
+ unsigned long max_io_length; /* Max length that should be passed to
+ * ore_get_rw_state
+ */
+};
+
+struct ore_dev {
+ struct osd_dev *od;
};
struct ore_components {
+ unsigned first_dev; /* First logical device no */
unsigned numdevs; /* Num of devices in array */
/* If @single_comp == EC_SINGLE_COMP, @comps points to a single
* component. else there are @numdevs components
@@ -51,20 +66,60 @@ struct ore_components {
EC_SINGLE_COMP = 0, EC_MULTPLE_COMPS = 0xffffffff
} single_comp;
struct ore_comp *comps;
- struct osd_dev **ods; /* osd_dev array */
+
+ /* Array of pointers to ore_dev-* . User will usually have these pointed
+ * too a bigger struct which contain an "ore_dev ored" member and use
+ * container_of(oc->ods[i], struct foo_dev, ored) to access the bigger
+ * structure.
+ */
+ struct ore_dev **ods;
+};
+
+/* ore_comp_dev Recievies a logical device index */
+static inline struct osd_dev *ore_comp_dev(
+ const struct ore_components *oc, unsigned i)
+{
+ BUG_ON((i < oc->first_dev) || (oc->first_dev + oc->numdevs <= i));
+ return oc->ods[i - oc->first_dev]->od;
+}
+
+static inline void ore_comp_set_dev(
+ struct ore_components *oc, unsigned i, struct osd_dev *od)
+{
+ oc->ods[i - oc->first_dev]->od = od;
+}
+
+struct ore_striping_info {
+ u64 offset;
+ u64 obj_offset;
+ u64 length;
+ u64 first_stripe_start; /* only used in raid writes */
+ u64 M; /* for truncate */
+ unsigned bytes_in_stripe;
+ unsigned dev;
+ unsigned par_dev;
+ unsigned unit_off;
+ unsigned cur_pg;
+ unsigned cur_comp;
};
struct ore_io_state;
typedef void (*ore_io_done_fn)(struct ore_io_state *ios, void *private);
+struct _ore_r4w_op {
+ /* @Priv given here is passed ios->private */
+ struct page * (*get_page)(void *priv, u64 page_index, bool *uptodate);
+ void (*put_page)(void *priv, struct page *page);
+};
struct ore_io_state {
struct kref kref;
+ struct ore_striping_info si;
void *private;
ore_io_done_fn done;
struct ore_layout *layout;
- struct ore_components *comps;
+ struct ore_components *oc;
/* Global read/write IO*/
loff_t offset;
@@ -84,6 +139,16 @@ struct ore_io_state {
bool reading;
+ /* House keeping of Parity pages */
+ bool extra_part_alloc;
+ struct page **parity_pages;
+ unsigned max_par_pages;
+ unsigned cur_par_page;
+ unsigned sgs_per_dev;
+ struct __stripe_pages_2d *sp2d;
+ struct ore_io_state *ios_read_4_write;
+ const struct _ore_r4w_op *r4w;
+
/* Variable array of size numdevs */
unsigned numdevs;
struct ore_per_dev_state {
@@ -91,7 +156,10 @@ struct ore_io_state {
struct bio *bio;
loff_t offset;
unsigned length;
+ unsigned last_sgs_total;
unsigned dev;
+ struct osd_sg_entry *sglist;
+ unsigned cur_sg;
} per_dev[];
};
@@ -102,6 +170,9 @@ static inline unsigned ore_io_state_size(unsigned numdevs)
}
/* ore.c */
+int ore_verify_layout(unsigned total_comps, struct ore_layout *layout);
+void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset,
+ u64 length, struct ore_striping_info *si);
int ore_get_rw_state(struct ore_layout *layout, struct ore_components *comps,
bool is_reading, u64 offset, u64 length,
struct ore_io_state **ios);
@@ -109,7 +180,10 @@ int ore_get_io_state(struct ore_layout *layout, struct ore_components *comps,
struct ore_io_state **ios);
void ore_put_io_state(struct ore_io_state *ios);
-int ore_check_io(struct ore_io_state *ios, u64 *resid);
+typedef void (*ore_on_dev_error)(struct ore_io_state *ios, struct ore_dev *od,
+ unsigned dev_index, enum osd_err_priority oep,
+ u64 dev_offset, u64 dev_len);
+int ore_check_io(struct ore_io_state *ios, ore_on_dev_error rep);
int ore_create(struct ore_io_state *ios);
int ore_remove(struct ore_io_state *ios);
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
new file mode 100644
index 0000000..669fbd6
--- /dev/null
+++ b/include/trace/events/rcu.h
@@ -0,0 +1,459 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rcu
+
+#if !defined(_TRACE_RCU_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_RCU_H
+
+#include <linux/tracepoint.h>
+
+/*
+ * Tracepoint for start/end markers used for utilization calculations.
+ * By convention, the string is of the following forms:
+ *
+ * "Start <activity>" -- Mark the start of the specified activity,
+ * such as "context switch". Nesting is permitted.
+ * "End <activity>" -- Mark the end of the specified activity.
+ *
+ * An "@" character within "<activity>" is a comment character: Data
+ * reduction scripts will ignore the "@" and the remainder of the line.
+ */
+TRACE_EVENT(rcu_utilization,
+
+ TP_PROTO(char *s),
+
+ TP_ARGS(s),
+
+ TP_STRUCT__entry(
+ __field(char *, s)
+ ),
+
+ TP_fast_assign(
+ __entry->s = s;
+ ),
+
+ TP_printk("%s", __entry->s)
+);
+
+#ifdef CONFIG_RCU_TRACE
+
+#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
+
+/*
+ * Tracepoint for grace-period events: starting and ending a grace
+ * period ("start" and "end", respectively), a CPU noting the start
+ * of a new grace period or the end of an old grace period ("cpustart"
+ * and "cpuend", respectively), a CPU passing through a quiescent
+ * state ("cpuqs"), a CPU coming online or going offline ("cpuonl"
+ * and "cpuofl", respectively), and a CPU being kicked for being too
+ * long in dyntick-idle mode ("kick").
+ */
+TRACE_EVENT(rcu_grace_period,
+
+ TP_PROTO(char *rcuname, unsigned long gpnum, char *gpevent),
+
+ TP_ARGS(rcuname, gpnum, gpevent),
+
+ TP_STRUCT__entry(
+ __field(char *, rcuname)
+ __field(unsigned long, gpnum)
+ __field(char *, gpevent)
+ ),
+
+ TP_fast_assign(
+ __entry->rcuname = rcuname;
+ __entry->gpnum = gpnum;
+ __entry->gpevent = gpevent;
+ ),
+
+ TP_printk("%s %lu %s",
+ __entry->rcuname, __entry->gpnum, __entry->gpevent)
+);
+
+/*
+ * Tracepoint for grace-period-initialization events. These are
+ * distinguished by the type of RCU, the new grace-period number, the
+ * rcu_node structure level, the starting and ending CPU covered by the
+ * rcu_node structure, and the mask of CPUs that will be waited for.
+ * All but the type of RCU are extracted from the rcu_node structure.
+ */
+TRACE_EVENT(rcu_grace_period_init,
+
+ TP_PROTO(char *rcuname, unsigned long gpnum, u8 level,
+ int grplo, int grphi, unsigned long qsmask),
+
+ TP_ARGS(rcuname, gpnum, level, grplo, grphi, qsmask),
+
+ TP_STRUCT__entry(
+ __field(char *, rcuname)
+ __field(unsigned long, gpnum)
+ __field(u8, level)
+ __field(int, grplo)
+ __field(int, grphi)
+ __field(unsigned long, qsmask)
+ ),
+
+ TP_fast_assign(
+ __entry->rcuname = rcuname;
+ __entry->gpnum = gpnum;
+ __entry->level = level;
+ __entry->grplo = grplo;
+ __entry->grphi = grphi;
+ __entry->qsmask = qsmask;
+ ),
+
+ TP_printk("%s %lu %u %d %d %lx",
+ __entry->rcuname, __entry->gpnum, __entry->level,
+ __entry->grplo, __entry->grphi, __entry->qsmask)
+);
+
+/*
+ * Tracepoint for tasks blocking within preemptible-RCU read-side
+ * critical sections. Track the type of RCU (which one day might
+ * include SRCU), the grace-period number that the task is blocking
+ * (the current or the next), and the task's PID.
+ */
+TRACE_EVENT(rcu_preempt_task,
+
+ TP_PROTO(char *rcuname, int pid, unsigned long gpnum),
+
+ TP_ARGS(rcuname, pid, gpnum),
+
+ TP_STRUCT__entry(
+ __field(char *, rcuname)
+ __field(unsigned long, gpnum)
+ __field(int, pid)
+ ),
+
+ TP_fast_assign(
+ __entry->rcuname = rcuname;
+ __entry->gpnum = gpnum;
+ __entry->pid = pid;
+ ),
+
+ TP_printk("%s %lu %d",
+ __entry->rcuname, __entry->gpnum, __entry->pid)
+);
+
+/*
+ * Tracepoint for tasks that blocked within a given preemptible-RCU
+ * read-side critical section exiting that critical section. Track the
+ * type of RCU (which one day might include SRCU) and the task's PID.
+ */
+TRACE_EVENT(rcu_unlock_preempted_task,
+
+ TP_PROTO(char *rcuname, unsigned long gpnum, int pid),
+
+ TP_ARGS(rcuname, gpnum, pid),
+
+ TP_STRUCT__entry(
+ __field(char *, rcuname)
+ __field(unsigned long, gpnum)
+ __field(int, pid)
+ ),
+
+ TP_fast_assign(
+ __entry->rcuname = rcuname;
+ __entry->gpnum = gpnum;
+ __entry->pid = pid;
+ ),
+
+ TP_printk("%s %lu %d", __entry->rcuname, __entry->gpnum, __entry->pid)
+);
+
+/*
+ * Tracepoint for quiescent-state-reporting events. These are
+ * distinguished by the type of RCU, the grace-period number, the
+ * mask of quiescent lower-level entities, the rcu_node structure level,
+ * the starting and ending CPU covered by the rcu_node structure, and
+ * whether there are any blocked tasks blocking the current grace period.
+ * All but the type of RCU are extracted from the rcu_node structure.
+ */
+TRACE_EVENT(rcu_quiescent_state_report,
+
+ TP_PROTO(char *rcuname, unsigned long gpnum,
+ unsigned long mask, unsigned long qsmask,
+ u8 level, int grplo, int grphi, int gp_tasks),
+
+ TP_ARGS(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks),
+
+ TP_STRUCT__entry(
+ __field(char *, rcuname)
+ __field(unsigned long, gpnum)
+ __field(unsigned long, mask)
+ __field(unsigned long, qsmask)
+ __field(u8, level)
+ __field(int, grplo)
+ __field(int, grphi)
+ __field(u8, gp_tasks)
+ ),
+
+ TP_fast_assign(
+ __entry->rcuname = rcuname;
+ __entry->gpnum = gpnum;
+ __entry->mask = mask;
+ __entry->qsmask = qsmask;
+ __entry->level = level;
+ __entry->grplo = grplo;
+ __entry->grphi = grphi;
+ __entry->gp_tasks = gp_tasks;
+ ),
+
+ TP_printk("%s %lu %lx>%lx %u %d %d %u",
+ __entry->rcuname, __entry->gpnum,
+ __entry->mask, __entry->qsmask, __entry->level,
+ __entry->grplo, __entry->grphi, __entry->gp_tasks)
+);
+
+/*
+ * Tracepoint for quiescent states detected by force_quiescent_state().
+ * These trace events include the type of RCU, the grace-period number
+ * that was blocked by the CPU, the CPU itself, and the type of quiescent
+ * state, which can be "dti" for dyntick-idle mode, "ofl" for CPU offline,
+ * or "kick" when kicking a CPU that has been in dyntick-idle mode for
+ * too long.
+ */
+TRACE_EVENT(rcu_fqs,
+
+ TP_PROTO(char *rcuname, unsigned long gpnum, int cpu, char *qsevent),
+
+ TP_ARGS(rcuname, gpnum, cpu, qsevent),
+
+ TP_STRUCT__entry(
+ __field(char *, rcuname)
+ __field(unsigned long, gpnum)
+ __field(int, cpu)
+ __field(char *, qsevent)
+ ),
+
+ TP_fast_assign(
+ __entry->rcuname = rcuname;
+ __entry->gpnum = gpnum;
+ __entry->cpu = cpu;
+ __entry->qsevent = qsevent;
+ ),
+
+ TP_printk("%s %lu %d %s",
+ __entry->rcuname, __entry->gpnum,
+ __entry->cpu, __entry->qsevent)
+);
+
+#endif /* #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) */
+
+/*
+ * Tracepoint for dyntick-idle entry/exit events. These take a string
+ * as argument: "Start" for entering dyntick-idle mode and "End" for
+ * leaving it.
+ */
+TRACE_EVENT(rcu_dyntick,
+
+ TP_PROTO(char *polarity),
+
+ TP_ARGS(polarity),
+
+ TP_STRUCT__entry(
+ __field(char *, polarity)
+ ),
+
+ TP_fast_assign(
+ __entry->polarity = polarity;
+ ),
+
+ TP_printk("%s", __entry->polarity)
+);
+
+/*
+ * Tracepoint for the registration of a single RCU callback function.
+ * The first argument is the type of RCU, the second argument is
+ * a pointer to the RCU callback itself, and the third element is the
+ * new RCU callback queue length for the current CPU.
+ */
+TRACE_EVENT(rcu_callback,
+
+ TP_PROTO(char *rcuname, struct rcu_head *rhp, long qlen),
+
+ TP_ARGS(rcuname, rhp, qlen),
+
+ TP_STRUCT__entry(
+ __field(char *, rcuname)
+ __field(void *, rhp)
+ __field(void *, func)
+ __field(long, qlen)
+ ),
+
+ TP_fast_assign(
+ __entry->rcuname = rcuname;
+ __entry->rhp = rhp;
+ __entry->func = rhp->func;
+ __entry->qlen = qlen;
+ ),
+
+ TP_printk("%s rhp=%p func=%pf %ld",
+ __entry->rcuname, __entry->rhp, __entry->func, __entry->qlen)
+);
+
+/*
+ * Tracepoint for the registration of a single RCU callback of the special
+ * kfree() form. The first argument is the RCU type, the second argument
+ * is a pointer to the RCU callback, the third argument is the offset
+ * of the callback within the enclosing RCU-protected data structure,
+ * and the fourth argument is the new RCU callback queue length for the
+ * current CPU.
+ */
+TRACE_EVENT(rcu_kfree_callback,
+
+ TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset,
+ long qlen),
+
+ TP_ARGS(rcuname, rhp, offset, qlen),
+
+ TP_STRUCT__entry(
+ __field(char *, rcuname)
+ __field(void *, rhp)
+ __field(unsigned long, offset)
+ __field(long, qlen)
+ ),
+
+ TP_fast_assign(
+ __entry->rcuname = rcuname;
+ __entry->rhp = rhp;
+ __entry->offset = offset;
+ __entry->qlen = qlen;
+ ),
+
+ TP_printk("%s rhp=%p func=%ld %ld",
+ __entry->rcuname, __entry->rhp, __entry->offset,
+ __entry->qlen)
+);
+
+/*
+ * Tracepoint for marking the beginning rcu_do_batch, performed to start
+ * RCU callback invocation. The first argument is the RCU flavor,
+ * the second is the total number of callbacks (including those that
+ * are not yet ready to be invoked), and the third argument is the
+ * current RCU-callback batch limit.
+ */
+TRACE_EVENT(rcu_batch_start,
+
+ TP_PROTO(char *rcuname, long qlen, int blimit),
+
+ TP_ARGS(rcuname, qlen, blimit),
+
+ TP_STRUCT__entry(
+ __field(char *, rcuname)
+ __field(long, qlen)
+ __field(int, blimit)
+ ),
+
+ TP_fast_assign(
+ __entry->rcuname = rcuname;
+ __entry->qlen = qlen;
+ __entry->blimit = blimit;
+ ),
+
+ TP_printk("%s CBs=%ld bl=%d",
+ __entry->rcuname, __entry->qlen, __entry->blimit)
+);
+
+/*
+ * Tracepoint for the invocation of a single RCU callback function.
+ * The first argument is the type of RCU, and the second argument is
+ * a pointer to the RCU callback itself.
+ */
+TRACE_EVENT(rcu_invoke_callback,
+
+ TP_PROTO(char *rcuname, struct rcu_head *rhp),
+
+ TP_ARGS(rcuname, rhp),
+
+ TP_STRUCT__entry(
+ __field(char *, rcuname)
+ __field(void *, rhp)
+ __field(void *, func)
+ ),
+
+ TP_fast_assign(
+ __entry->rcuname = rcuname;
+ __entry->rhp = rhp;
+ __entry->func = rhp->func;
+ ),
+
+ TP_printk("%s rhp=%p func=%pf",
+ __entry->rcuname, __entry->rhp, __entry->func)
+);
+
+/*
+ * Tracepoint for the invocation of a single RCU callback of the special
+ * kfree() form. The first argument is the RCU flavor, the second
+ * argument is a pointer to the RCU callback, and the third argument
+ * is the offset of the callback within the enclosing RCU-protected
+ * data structure.
+ */
+TRACE_EVENT(rcu_invoke_kfree_callback,
+
+ TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset),
+
+ TP_ARGS(rcuname, rhp, offset),
+
+ TP_STRUCT__entry(
+ __field(char *, rcuname)
+ __field(void *, rhp)
+ __field(unsigned long, offset)
+ ),
+
+ TP_fast_assign(
+ __entry->rcuname = rcuname;
+ __entry->rhp = rhp;
+ __entry->offset = offset;
+ ),
+
+ TP_printk("%s rhp=%p func=%ld",
+ __entry->rcuname, __entry->rhp, __entry->offset)
+);
+
+/*
+ * Tracepoint for exiting rcu_do_batch after RCU callbacks have been
+ * invoked. The first argument is the name of the RCU flavor and
+ * the second argument is number of callbacks actually invoked.
+ */
+TRACE_EVENT(rcu_batch_end,
+
+ TP_PROTO(char *rcuname, int callbacks_invoked),
+
+ TP_ARGS(rcuname, callbacks_invoked),
+
+ TP_STRUCT__entry(
+ __field(char *, rcuname)
+ __field(int, callbacks_invoked)
+ ),
+
+ TP_fast_assign(
+ __entry->rcuname = rcuname;
+ __entry->callbacks_invoked = callbacks_invoked;
+ ),
+
+ TP_printk("%s CBs-invoked=%d",
+ __entry->rcuname, __entry->callbacks_invoked)
+);
+
+#else /* #ifdef CONFIG_RCU_TRACE */
+
+#define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0)
+#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, qsmask) do { } while (0)
+#define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0)
+#define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0)
+#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0)
+#define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0)
+#define trace_rcu_dyntick(polarity) do { } while (0)
+#define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0)
+#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0)
+#define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0)
+#define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0)
+#define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0)
+#define trace_rcu_batch_end(rcuname, callbacks_invoked) do { } while (0)
+
+#endif /* #else #ifdef CONFIG_RCU_TRACE */
+
+#endif /* _TRACE_RCU_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index f633478..959ff18 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -100,7 +100,7 @@ static inline long __trace_sched_switch_state(struct task_struct *p)
* For all intents and purposes a preempted task is a running task.
*/
if (task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)
- state = TASK_RUNNING;
+ state = TASK_RUNNING | TASK_STATE_MAX;
#endif
return state;
@@ -137,13 +137,14 @@ TRACE_EVENT(sched_switch,
__entry->next_prio = next->prio;
),
- TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s ==> next_comm=%s next_pid=%d next_prio=%d",
+ TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
- __entry->prev_state ?
- __print_flags(__entry->prev_state, "|",
+ __entry->prev_state & (TASK_STATE_MAX-1) ?
+ __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
{ 16, "Z" }, { 32, "X" }, { 64, "x" },
{ 128, "W" }) : "R",
+ __entry->prev_state & TASK_STATE_MAX ? "+" : "",
__entry->next_comm, __entry->next_pid, __entry->next_prio)
);
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 533c49f..7697249 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -711,6 +711,9 @@ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call
#undef __perf_count
#define __perf_count(c) __count = (c)
+#undef TP_perf_assign
+#define TP_perf_assign(args...) args
+
#undef DECLARE_EVENT_CLASS
#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
static notrace void \