diff options
Diffstat (limited to 'kernel/sched/sched.h')
-rw-r--r-- | kernel/sched/sched.h | 139 |
1 files changed, 88 insertions, 51 deletions
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index ce39224d..88c85b2 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -6,12 +6,21 @@ #include <linux/spinlock.h> #include <linux/stop_machine.h> #include <linux/tick.h> +#include <linux/slab.h> #include "cpupri.h" #include "cpuacct.h" +struct rq; + extern __read_mostly int scheduler_running; +extern unsigned long calc_load_update; +extern atomic_long_t calc_load_tasks; + +extern long calc_load_fold_active(struct rq *this_rq); +extern void update_cpu_load_active(struct rq *this_rq); + /* * Convert user-nice values [ -20 ... 0 ... 19 ] * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], @@ -140,10 +149,11 @@ struct task_group { struct cfs_rq **cfs_rq; unsigned long shares; - atomic_t load_weight; - atomic64_t load_avg; +#ifdef CONFIG_SMP + atomic_long_t load_avg; atomic_t runnable_avg; #endif +#endif #ifdef CONFIG_RT_GROUP_SCHED struct sched_rt_entity **rt_se; @@ -261,27 +271,21 @@ struct cfs_rq { #endif #ifdef CONFIG_SMP -/* - * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be - * removed when useful for applications beyond shares distribution (e.g. - * load-balance). - */ -#ifdef CONFIG_FAIR_GROUP_SCHED /* * CFS Load tracking * Under CFS, load is tracked on a per-entity basis and aggregated up. * This allows for the description of both thread and group usage (in * the FAIR_GROUP_SCHED case). */ - u64 runnable_load_avg, blocked_load_avg; - atomic64_t decay_counter, removed_load; + unsigned long runnable_load_avg, blocked_load_avg; + atomic64_t decay_counter; u64 last_decay; -#endif /* CONFIG_FAIR_GROUP_SCHED */ -/* These always depend on CONFIG_FAIR_GROUP_SCHED */ + atomic_long_t removed_load; + #ifdef CONFIG_FAIR_GROUP_SCHED + /* Required to track per-cpu representation of a task_group */ u32 tg_runnable_contrib; - u64 tg_load_contrib; -#endif /* CONFIG_FAIR_GROUP_SCHED */ + unsigned long tg_load_contrib; /* * h_load = weight * f(tg) @@ -290,6 +294,9 @@ struct cfs_rq { * this group. */ unsigned long h_load; + u64 last_h_load_update; + struct sched_entity *h_load_next; +#endif /* CONFIG_FAIR_GROUP_SCHED */ #endif /* CONFIG_SMP */ #ifdef CONFIG_FAIR_GROUP_SCHED @@ -353,7 +360,6 @@ struct rt_rq { unsigned long rt_nr_boosted; struct rq *rq; - struct list_head leaf_rt_rq_list; struct task_group *tg; #endif }; @@ -403,6 +409,10 @@ struct rq { * remote CPUs use both these fields when doing load calculation. */ unsigned int nr_running; +#ifdef CONFIG_NUMA_BALANCING + unsigned int nr_numa_running; + unsigned int nr_preferred_running; +#endif #define CPU_LOAD_IDX_MAX 5 unsigned long cpu_load[CPU_LOAD_IDX_MAX]; unsigned long last_load_update_tick; @@ -426,9 +436,6 @@ struct rq { #ifdef CONFIG_FAIR_GROUP_SCHED /* list of leaf cfs_rq on this cpu: */ struct list_head leaf_cfs_rq_list; -#ifdef CONFIG_SMP - unsigned long h_load_throttle; -#endif /* CONFIG_SMP */ #endif /* CONFIG_FAIR_GROUP_SCHED */ #ifdef CONFIG_RT_GROUP_SCHED @@ -474,6 +481,9 @@ struct rq { u64 age_stamp; u64 idle_stamp; u64 avg_idle; + + /* This is used to determine avg_idle's max value */ + u64 max_idle_balance_cost; #endif #ifdef CONFIG_IRQ_TIME_ACCOUNTING @@ -540,6 +550,22 @@ DECLARE_PER_CPU(struct rq, runqueues); #define cpu_curr(cpu) (cpu_rq(cpu)->curr) #define raw_rq() (&__raw_get_cpu_var(runqueues)) +static inline u64 rq_clock(struct rq *rq) +{ + return rq->clock; +} + +static inline u64 rq_clock_task(struct rq *rq) +{ + return rq->clock_task; +} + +#ifdef CONFIG_NUMA_BALANCING +extern void sched_setnuma(struct task_struct *p, int node); +extern int migrate_task_to(struct task_struct *p, int cpu); +extern int migrate_swap(struct task_struct *, struct task_struct *); +#endif /* CONFIG_NUMA_BALANCING */ + #ifdef CONFIG_SMP #define rcu_dereference_check_sched_domain(p) \ @@ -581,8 +607,24 @@ static inline struct sched_domain *highest_flag_domain(int cpu, int flag) return hsd; } +static inline struct sched_domain *lowest_flag_domain(int cpu, int flag) +{ + struct sched_domain *sd; + + for_each_domain(cpu, sd) { + if (sd->flags & flag) + break; + } + + return sd; +} + DECLARE_PER_CPU(struct sched_domain *, sd_llc); +DECLARE_PER_CPU(int, sd_llc_size); DECLARE_PER_CPU(int, sd_llc_id); +DECLARE_PER_CPU(struct sched_domain *, sd_numa); +DECLARE_PER_CPU(struct sched_domain *, sd_busy); +DECLARE_PER_CPU(struct sched_domain *, sd_asym); struct sched_group_power { atomic_t ref; @@ -592,6 +634,7 @@ struct sched_group_power { */ unsigned int power, power_orig; unsigned long next_update; + int imbalance; /* XXX unrelated to power but shared group state */ /* * Number of busy cpus in this group. */ @@ -652,9 +695,9 @@ extern int group_balance_cpu(struct sched_group *sg); /* * Return the group to which this tasks belongs. * - * We cannot use task_subsys_state() and friends because the cgroup - * subsystem changes that value before the cgroup_subsys::attach() method - * is called, therefore we cannot pin it and might observe the wrong value. + * We cannot use task_css() and friends because the cgroup subsystem + * changes that value before the cgroup_subsys::attach() method is called, + * therefore we cannot pin it and might observe the wrong value. * * The same is true for autogroup's p->signal->autogroup->tg, the autogroup * core changes this before calling sched_move_task(). @@ -706,6 +749,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) */ smp_wmb(); task_thread_info(p)->cpu = cpu; + p->wake_cpu = cpu; #endif } @@ -884,24 +928,6 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) #define WF_FORK 0x02 /* child wakeup after fork */ #define WF_MIGRATED 0x4 /* internal use, task got migrated */ -static inline void update_load_add(struct load_weight *lw, unsigned long inc) -{ - lw->weight += inc; - lw->inv_weight = 0; -} - -static inline void update_load_sub(struct load_weight *lw, unsigned long dec) -{ - lw->weight -= dec; - lw->inv_weight = 0; -} - -static inline void update_load_set(struct load_weight *lw, unsigned long w) -{ - lw->weight = w; - lw->inv_weight = 0; -} - /* * To aid in avoiding the subversion of "niceness" due to uneven distribution * of tasks with abnormal "nice" values across CPUs the contribution that @@ -979,7 +1005,7 @@ struct sched_class { void (*put_prev_task) (struct rq *rq, struct task_struct *p); #ifdef CONFIG_SMP - int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags); + int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags); void (*migrate_task_rq)(struct task_struct *p, int next_cpu); void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); @@ -1028,17 +1054,8 @@ extern void update_group_power(struct sched_domain *sd, int cpu); extern void trigger_load_balance(struct rq *rq, int cpu); extern void idle_balance(int this_cpu, struct rq *this_rq); -/* - * Only depends on SMP, FAIR_GROUP_SCHED may be removed when runnable_avg - * becomes useful in lb - */ -#if defined(CONFIG_FAIR_GROUP_SCHED) extern void idle_enter_fair(struct rq *this_rq); extern void idle_exit_fair(struct rq *this_rq); -#else -static inline void idle_enter_fair(struct rq *this_rq) {} -static inline void idle_exit_fair(struct rq *this_rq) {} -#endif #else /* CONFIG_SMP */ @@ -1051,7 +1068,6 @@ static inline void idle_balance(int cpu, struct rq *rq) extern void sysrq_sched_debug_show(void); extern void sched_init_granularity(void); extern void update_max_interval(void); -extern int update_runtime(struct notifier_block *nfb, unsigned long action, void *hcpu); extern void init_sched_rt_class(void); extern void init_sched_fair_class(void); @@ -1063,6 +1079,8 @@ extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime extern void update_idle_cpu_load(struct rq *this_rq); +extern void init_task_runnable_average(struct task_struct *p); + #ifdef CONFIG_PARAVIRT static inline u64 steal_ticks(u64 steal) { @@ -1233,6 +1251,24 @@ static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); } +static inline void double_lock(spinlock_t *l1, spinlock_t *l2) +{ + if (l1 > l2) + swap(l1, l2); + + spin_lock(l1); + spin_lock_nested(l2, SINGLE_DEPTH_NESTING); +} + +static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2) +{ + if (l1 > l2) + swap(l1, l2); + + raw_spin_lock(l1); + raw_spin_lock_nested(l2, SINGLE_DEPTH_NESTING); +} + /* * double_rq_lock - safely lock two runqueues * @@ -1318,7 +1354,8 @@ extern void print_rt_stats(struct seq_file *m, int cpu); extern void init_cfs_rq(struct cfs_rq *cfs_rq); extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq); -extern void account_cfs_bandwidth_used(int enabled, int was_enabled); +extern void cfs_bandwidth_usage_inc(void); +extern void cfs_bandwidth_usage_dec(void); #ifdef CONFIG_NO_HZ_COMMON enum rq_nohz_flag_bits { |