forked from Minki/linux
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (64 commits) sched: Fix sched::sched_stat_wait tracepoint field sched: Disable NEW_FAIR_SLEEPERS for now sched: Keep kthreads at default priority sched: Re-tune the scheduler latency defaults to decrease worst-case latencies sched: Turn off child_runs_first sched: Ensure that a child can't gain time over it's parent after fork() sched: enable SD_WAKE_IDLE sched: Deal with low-load in wake_affine() sched: Remove short cut from select_task_rq_fair() sched: Turn on SD_BALANCE_NEWIDLE sched: Clean up topology.h sched: Fix dynamic power-balancing crash sched: Remove reciprocal for cpu_power sched: Try to deal with low capacity, fix update_sd_power_savings_stats() sched: Try to deal with low capacity sched: Scale down cpu_power due to RT tasks sched: Implement dynamic cpu_power sched: Add smt_gain sched: Update the cpu_power sum during load-balance sched: Add SD_PREFER_SIBLING ...
This commit is contained in:
commit
774a694f8c
@ -129,25 +129,34 @@ extern unsigned long node_remap_size[];
|
||||
#endif
|
||||
|
||||
/* sched_domains SD_NODE_INIT for NUMA machines */
|
||||
#define SD_NODE_INIT (struct sched_domain) { \
|
||||
.min_interval = 8, \
|
||||
.max_interval = 32, \
|
||||
.busy_factor = 32, \
|
||||
.imbalance_pct = 125, \
|
||||
.cache_nice_tries = SD_CACHE_NICE_TRIES, \
|
||||
.busy_idx = 3, \
|
||||
.idle_idx = SD_IDLE_IDX, \
|
||||
.newidle_idx = SD_NEWIDLE_IDX, \
|
||||
.wake_idx = 1, \
|
||||
.forkexec_idx = SD_FORKEXEC_IDX, \
|
||||
.flags = SD_LOAD_BALANCE \
|
||||
| SD_BALANCE_EXEC \
|
||||
| SD_BALANCE_FORK \
|
||||
| SD_WAKE_AFFINE \
|
||||
| SD_WAKE_BALANCE \
|
||||
| SD_SERIALIZE, \
|
||||
.last_balance = jiffies, \
|
||||
.balance_interval = 1, \
|
||||
#define SD_NODE_INIT (struct sched_domain) { \
|
||||
.min_interval = 8, \
|
||||
.max_interval = 32, \
|
||||
.busy_factor = 32, \
|
||||
.imbalance_pct = 125, \
|
||||
.cache_nice_tries = SD_CACHE_NICE_TRIES, \
|
||||
.busy_idx = 3, \
|
||||
.idle_idx = SD_IDLE_IDX, \
|
||||
.newidle_idx = SD_NEWIDLE_IDX, \
|
||||
.wake_idx = 1, \
|
||||
.forkexec_idx = SD_FORKEXEC_IDX, \
|
||||
\
|
||||
.flags = 1*SD_LOAD_BALANCE \
|
||||
| 1*SD_BALANCE_NEWIDLE \
|
||||
| 1*SD_BALANCE_EXEC \
|
||||
| 1*SD_BALANCE_FORK \
|
||||
| 0*SD_WAKE_IDLE \
|
||||
| 1*SD_WAKE_AFFINE \
|
||||
| 1*SD_WAKE_BALANCE \
|
||||
| 0*SD_SHARE_CPUPOWER \
|
||||
| 0*SD_POWERSAVINGS_BALANCE \
|
||||
| 0*SD_SHARE_PKG_RESOURCES \
|
||||
| 1*SD_SERIALIZE \
|
||||
| 1*SD_WAKE_IDLE_FAR \
|
||||
| 0*SD_PREFER_SIBLING \
|
||||
, \
|
||||
.last_balance = jiffies, \
|
||||
.balance_interval = 1, \
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64_ACPI_NUMA
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include <linux/swap.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/fs_struct.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include "internal.h"
|
||||
|
||||
int sysctl_vfs_cache_pressure __read_mostly = 100;
|
||||
|
@ -768,7 +768,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
|
||||
* give it the opportunity to lock the file.
|
||||
*/
|
||||
if (found)
|
||||
cond_resched_bkl();
|
||||
cond_resched();
|
||||
|
||||
find_conflict:
|
||||
for_each_lock(inode, before) {
|
||||
|
@ -64,6 +64,12 @@
|
||||
#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
|
||||
#define NMI_OFFSET (1UL << NMI_SHIFT)
|
||||
|
||||
#ifndef PREEMPT_ACTIVE
|
||||
#define PREEMPT_ACTIVE_BITS 1
|
||||
#define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS)
|
||||
#define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
|
||||
#endif
|
||||
|
||||
#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
|
||||
#error PREEMPT_ACTIVE is too low!
|
||||
#endif
|
||||
|
@ -125,7 +125,7 @@ extern int _cond_resched(void);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
|
||||
void __might_sleep(char *file, int line);
|
||||
void __might_sleep(char *file, int line, int preempt_offset);
|
||||
/**
|
||||
* might_sleep - annotation for functions that can sleep
|
||||
*
|
||||
@ -137,8 +137,9 @@ extern int _cond_resched(void);
|
||||
* supposed to.
|
||||
*/
|
||||
# define might_sleep() \
|
||||
do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0)
|
||||
do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
|
||||
#else
|
||||
static inline void __might_sleep(char *file, int line, int preempt_offset) { }
|
||||
# define might_sleep() do { might_resched(); } while (0)
|
||||
#endif
|
||||
|
||||
|
@ -38,6 +38,8 @@
|
||||
#define SCHED_BATCH 3
|
||||
/* SCHED_ISO: reserved but not implemented yet */
|
||||
#define SCHED_IDLE 5
|
||||
/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
|
||||
#define SCHED_RESET_ON_FORK 0x40000000
|
||||
|
||||
#ifdef __KERNEL__
|
||||
|
||||
@ -796,18 +798,19 @@ enum cpu_idle_type {
|
||||
#define SCHED_LOAD_SCALE_FUZZ SCHED_LOAD_SCALE
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
#define SD_LOAD_BALANCE 1 /* Do load balancing on this domain. */
|
||||
#define SD_BALANCE_NEWIDLE 2 /* Balance when about to become idle */
|
||||
#define SD_BALANCE_EXEC 4 /* Balance on exec */
|
||||
#define SD_BALANCE_FORK 8 /* Balance on fork, clone */
|
||||
#define SD_WAKE_IDLE 16 /* Wake to idle CPU on task wakeup */
|
||||
#define SD_WAKE_AFFINE 32 /* Wake task to waking CPU */
|
||||
#define SD_WAKE_BALANCE 64 /* Perform balancing at task wakeup */
|
||||
#define SD_SHARE_CPUPOWER 128 /* Domain members share cpu power */
|
||||
#define SD_POWERSAVINGS_BALANCE 256 /* Balance for power savings */
|
||||
#define SD_SHARE_PKG_RESOURCES 512 /* Domain members share cpu pkg resources */
|
||||
#define SD_SERIALIZE 1024 /* Only a single load balancing instance */
|
||||
#define SD_WAKE_IDLE_FAR 2048 /* Gain latency sacrificing cache hit */
|
||||
#define SD_LOAD_BALANCE 0x0001 /* Do load balancing on this domain. */
|
||||
#define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */
|
||||
#define SD_BALANCE_EXEC 0x0004 /* Balance on exec */
|
||||
#define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */
|
||||
#define SD_WAKE_IDLE 0x0010 /* Wake to idle CPU on task wakeup */
|
||||
#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */
|
||||
#define SD_WAKE_BALANCE 0x0040 /* Perform balancing at task wakeup */
|
||||
#define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */
|
||||
#define SD_POWERSAVINGS_BALANCE 0x0100 /* Balance for power savings */
|
||||
#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */
|
||||
#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */
|
||||
#define SD_WAKE_IDLE_FAR 0x0800 /* Gain latency sacrificing cache hit */
|
||||
#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */
|
||||
|
||||
enum powersavings_balance_level {
|
||||
POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */
|
||||
@ -827,7 +830,7 @@ static inline int sd_balance_for_mc_power(void)
|
||||
if (sched_smt_power_savings)
|
||||
return SD_POWERSAVINGS_BALANCE;
|
||||
|
||||
return 0;
|
||||
return SD_PREFER_SIBLING;
|
||||
}
|
||||
|
||||
static inline int sd_balance_for_package_power(void)
|
||||
@ -835,7 +838,7 @@ static inline int sd_balance_for_package_power(void)
|
||||
if (sched_mc_power_savings | sched_smt_power_savings)
|
||||
return SD_POWERSAVINGS_BALANCE;
|
||||
|
||||
return 0;
|
||||
return SD_PREFER_SIBLING;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -857,15 +860,9 @@ struct sched_group {
|
||||
|
||||
/*
|
||||
* CPU power of this group, SCHED_LOAD_SCALE being max power for a
|
||||
* single CPU. This is read only (except for setup, hotplug CPU).
|
||||
* Note : Never change cpu_power without recompute its reciprocal
|
||||
* single CPU.
|
||||
*/
|
||||
unsigned int __cpu_power;
|
||||
/*
|
||||
* reciprocal value of cpu_power to avoid expensive divides
|
||||
* (see include/linux/reciprocal_div.h)
|
||||
*/
|
||||
u32 reciprocal_cpu_power;
|
||||
unsigned int cpu_power;
|
||||
|
||||
/*
|
||||
* The CPUs this group covers.
|
||||
@ -918,6 +915,7 @@ struct sched_domain {
|
||||
unsigned int newidle_idx;
|
||||
unsigned int wake_idx;
|
||||
unsigned int forkexec_idx;
|
||||
unsigned int smt_gain;
|
||||
int flags; /* See SD_* */
|
||||
enum sched_domain_level level;
|
||||
|
||||
@ -1045,7 +1043,6 @@ struct sched_class {
|
||||
struct rq *busiest, struct sched_domain *sd,
|
||||
enum cpu_idle_type idle);
|
||||
void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
|
||||
int (*needs_post_schedule) (struct rq *this_rq);
|
||||
void (*post_schedule) (struct rq *this_rq);
|
||||
void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
|
||||
|
||||
@ -1110,6 +1107,8 @@ struct sched_entity {
|
||||
u64 wait_max;
|
||||
u64 wait_count;
|
||||
u64 wait_sum;
|
||||
u64 iowait_count;
|
||||
u64 iowait_sum;
|
||||
|
||||
u64 sleep_start;
|
||||
u64 sleep_max;
|
||||
@ -1234,11 +1233,19 @@ struct task_struct {
|
||||
unsigned did_exec:1;
|
||||
unsigned in_execve:1; /* Tell the LSMs that the process is doing an
|
||||
* execve */
|
||||
unsigned in_iowait:1;
|
||||
|
||||
|
||||
/* Revert to default priority/policy when forking */
|
||||
unsigned sched_reset_on_fork:1;
|
||||
|
||||
pid_t pid;
|
||||
pid_t tgid;
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
/* Canary value for the -fstack-protector gcc feature */
|
||||
unsigned long stack_canary;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* pointers to (original) parent process, youngest child, younger sibling,
|
||||
@ -1840,11 +1847,12 @@ extern unsigned int sysctl_sched_min_granularity;
|
||||
extern unsigned int sysctl_sched_wakeup_granularity;
|
||||
extern unsigned int sysctl_sched_shares_ratelimit;
|
||||
extern unsigned int sysctl_sched_shares_thresh;
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
extern unsigned int sysctl_sched_child_runs_first;
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
extern unsigned int sysctl_sched_features;
|
||||
extern unsigned int sysctl_sched_migration_cost;
|
||||
extern unsigned int sysctl_sched_nr_migrate;
|
||||
extern unsigned int sysctl_sched_time_avg;
|
||||
extern unsigned int sysctl_timer_migration;
|
||||
|
||||
int sched_nr_latency_handler(struct ctl_table *table, int write,
|
||||
@ -2308,23 +2316,31 @@ static inline int need_resched(void)
|
||||
* cond_resched_softirq() will enable bhs before scheduling.
|
||||
*/
|
||||
extern int _cond_resched(void);
|
||||
#ifdef CONFIG_PREEMPT_BKL
|
||||
static inline int cond_resched(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define cond_resched() ({ \
|
||||
__might_sleep(__FILE__, __LINE__, 0); \
|
||||
_cond_resched(); \
|
||||
})
|
||||
|
||||
extern int __cond_resched_lock(spinlock_t *lock);
|
||||
|
||||
#ifdef CONFIG_PREEMPT
|
||||
#define PREEMPT_LOCK_OFFSET PREEMPT_OFFSET
|
||||
#else
|
||||
static inline int cond_resched(void)
|
||||
{
|
||||
return _cond_resched();
|
||||
}
|
||||
#define PREEMPT_LOCK_OFFSET 0
|
||||
#endif
|
||||
extern int cond_resched_lock(spinlock_t * lock);
|
||||
extern int cond_resched_softirq(void);
|
||||
static inline int cond_resched_bkl(void)
|
||||
{
|
||||
return _cond_resched();
|
||||
}
|
||||
|
||||
#define cond_resched_lock(lock) ({ \
|
||||
__might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \
|
||||
__cond_resched_lock(lock); \
|
||||
})
|
||||
|
||||
extern int __cond_resched_softirq(void);
|
||||
|
||||
#define cond_resched_softirq() ({ \
|
||||
__might_sleep(__FILE__, __LINE__, SOFTIRQ_OFFSET); \
|
||||
__cond_resched_softirq(); \
|
||||
})
|
||||
|
||||
/*
|
||||
* Does a critical section need to be broken due to another
|
||||
|
@ -85,20 +85,29 @@ int arch_update_cpu_topology(void);
|
||||
#define ARCH_HAS_SCHED_WAKE_IDLE
|
||||
/* Common values for SMT siblings */
|
||||
#ifndef SD_SIBLING_INIT
|
||||
#define SD_SIBLING_INIT (struct sched_domain) { \
|
||||
.min_interval = 1, \
|
||||
.max_interval = 2, \
|
||||
.busy_factor = 64, \
|
||||
.imbalance_pct = 110, \
|
||||
.flags = SD_LOAD_BALANCE \
|
||||
| SD_BALANCE_NEWIDLE \
|
||||
| SD_BALANCE_FORK \
|
||||
| SD_BALANCE_EXEC \
|
||||
| SD_WAKE_AFFINE \
|
||||
| SD_WAKE_BALANCE \
|
||||
| SD_SHARE_CPUPOWER, \
|
||||
.last_balance = jiffies, \
|
||||
.balance_interval = 1, \
|
||||
#define SD_SIBLING_INIT (struct sched_domain) { \
|
||||
.min_interval = 1, \
|
||||
.max_interval = 2, \
|
||||
.busy_factor = 64, \
|
||||
.imbalance_pct = 110, \
|
||||
\
|
||||
.flags = 1*SD_LOAD_BALANCE \
|
||||
| 1*SD_BALANCE_NEWIDLE \
|
||||
| 1*SD_BALANCE_EXEC \
|
||||
| 1*SD_BALANCE_FORK \
|
||||
| 0*SD_WAKE_IDLE \
|
||||
| 1*SD_WAKE_AFFINE \
|
||||
| 1*SD_WAKE_BALANCE \
|
||||
| 1*SD_SHARE_CPUPOWER \
|
||||
| 0*SD_POWERSAVINGS_BALANCE \
|
||||
| 0*SD_SHARE_PKG_RESOURCES \
|
||||
| 0*SD_SERIALIZE \
|
||||
| 0*SD_WAKE_IDLE_FAR \
|
||||
| 0*SD_PREFER_SIBLING \
|
||||
, \
|
||||
.last_balance = jiffies, \
|
||||
.balance_interval = 1, \
|
||||
.smt_gain = 1178, /* 15% */ \
|
||||
}
|
||||
#endif
|
||||
#endif /* CONFIG_SCHED_SMT */
|
||||
@ -106,69 +115,94 @@ int arch_update_cpu_topology(void);
|
||||
#ifdef CONFIG_SCHED_MC
|
||||
/* Common values for MC siblings. for now mostly derived from SD_CPU_INIT */
|
||||
#ifndef SD_MC_INIT
|
||||
#define SD_MC_INIT (struct sched_domain) { \
|
||||
.min_interval = 1, \
|
||||
.max_interval = 4, \
|
||||
.busy_factor = 64, \
|
||||
.imbalance_pct = 125, \
|
||||
.cache_nice_tries = 1, \
|
||||
.busy_idx = 2, \
|
||||
.wake_idx = 1, \
|
||||
.forkexec_idx = 1, \
|
||||
.flags = SD_LOAD_BALANCE \
|
||||
| SD_BALANCE_FORK \
|
||||
| SD_BALANCE_EXEC \
|
||||
| SD_WAKE_AFFINE \
|
||||
| SD_WAKE_BALANCE \
|
||||
| SD_SHARE_PKG_RESOURCES\
|
||||
| sd_balance_for_mc_power()\
|
||||
| sd_power_saving_flags(),\
|
||||
.last_balance = jiffies, \
|
||||
.balance_interval = 1, \
|
||||
#define SD_MC_INIT (struct sched_domain) { \
|
||||
.min_interval = 1, \
|
||||
.max_interval = 4, \
|
||||
.busy_factor = 64, \
|
||||
.imbalance_pct = 125, \
|
||||
.cache_nice_tries = 1, \
|
||||
.busy_idx = 2, \
|
||||
.wake_idx = 1, \
|
||||
.forkexec_idx = 1, \
|
||||
\
|
||||
.flags = 1*SD_LOAD_BALANCE \
|
||||
| 1*SD_BALANCE_NEWIDLE \
|
||||
| 1*SD_BALANCE_EXEC \
|
||||
| 1*SD_BALANCE_FORK \
|
||||
| 1*SD_WAKE_IDLE \
|
||||
| 1*SD_WAKE_AFFINE \
|
||||
| 1*SD_WAKE_BALANCE \
|
||||
| 0*SD_SHARE_CPUPOWER \
|
||||
| 1*SD_SHARE_PKG_RESOURCES \
|
||||
| 0*SD_SERIALIZE \
|
||||
| 0*SD_WAKE_IDLE_FAR \
|
||||
| sd_balance_for_mc_power() \
|
||||
| sd_power_saving_flags() \
|
||||
, \
|
||||
.last_balance = jiffies, \
|
||||
.balance_interval = 1, \
|
||||
}
|
||||
#endif
|
||||
#endif /* CONFIG_SCHED_MC */
|
||||
|
||||
/* Common values for CPUs */
|
||||
#ifndef SD_CPU_INIT
|
||||
#define SD_CPU_INIT (struct sched_domain) { \
|
||||
.min_interval = 1, \
|
||||
.max_interval = 4, \
|
||||
.busy_factor = 64, \
|
||||
.imbalance_pct = 125, \
|
||||
.cache_nice_tries = 1, \
|
||||
.busy_idx = 2, \
|
||||
.idle_idx = 1, \
|
||||
.newidle_idx = 2, \
|
||||
.wake_idx = 1, \
|
||||
.forkexec_idx = 1, \
|
||||
.flags = SD_LOAD_BALANCE \
|
||||
| SD_BALANCE_EXEC \
|
||||
| SD_BALANCE_FORK \
|
||||
| SD_WAKE_AFFINE \
|
||||
| SD_WAKE_BALANCE \
|
||||
| sd_balance_for_package_power()\
|
||||
| sd_power_saving_flags(),\
|
||||
.last_balance = jiffies, \
|
||||
.balance_interval = 1, \
|
||||
#define SD_CPU_INIT (struct sched_domain) { \
|
||||
.min_interval = 1, \
|
||||
.max_interval = 4, \
|
||||
.busy_factor = 64, \
|
||||
.imbalance_pct = 125, \
|
||||
.cache_nice_tries = 1, \
|
||||
.busy_idx = 2, \
|
||||
.idle_idx = 1, \
|
||||
.newidle_idx = 2, \
|
||||
.wake_idx = 1, \
|
||||
.forkexec_idx = 1, \
|
||||
\
|
||||
.flags = 1*SD_LOAD_BALANCE \
|
||||
| 1*SD_BALANCE_NEWIDLE \
|
||||
| 1*SD_BALANCE_EXEC \
|
||||
| 1*SD_BALANCE_FORK \
|
||||
| 1*SD_WAKE_IDLE \
|
||||
| 0*SD_WAKE_AFFINE \
|
||||
| 1*SD_WAKE_BALANCE \
|
||||
| 0*SD_SHARE_CPUPOWER \
|
||||
| 0*SD_SHARE_PKG_RESOURCES \
|
||||
| 0*SD_SERIALIZE \
|
||||
| 0*SD_WAKE_IDLE_FAR \
|
||||
| sd_balance_for_package_power() \
|
||||
| sd_power_saving_flags() \
|
||||
, \
|
||||
.last_balance = jiffies, \
|
||||
.balance_interval = 1, \
|
||||
}
|
||||
#endif
|
||||
|
||||
/* sched_domains SD_ALLNODES_INIT for NUMA machines */
|
||||
#define SD_ALLNODES_INIT (struct sched_domain) { \
|
||||
.min_interval = 64, \
|
||||
.max_interval = 64*num_online_cpus(), \
|
||||
.busy_factor = 128, \
|
||||
.imbalance_pct = 133, \
|
||||
.cache_nice_tries = 1, \
|
||||
.busy_idx = 3, \
|
||||
.idle_idx = 3, \
|
||||
.flags = SD_LOAD_BALANCE \
|
||||
| SD_BALANCE_NEWIDLE \
|
||||
| SD_WAKE_AFFINE \
|
||||
| SD_SERIALIZE, \
|
||||
.last_balance = jiffies, \
|
||||
.balance_interval = 64, \
|
||||
#define SD_ALLNODES_INIT (struct sched_domain) { \
|
||||
.min_interval = 64, \
|
||||
.max_interval = 64*num_online_cpus(), \
|
||||
.busy_factor = 128, \
|
||||
.imbalance_pct = 133, \
|
||||
.cache_nice_tries = 1, \
|
||||
.busy_idx = 3, \
|
||||
.idle_idx = 3, \
|
||||
.flags = 1*SD_LOAD_BALANCE \
|
||||
| 1*SD_BALANCE_NEWIDLE \
|
||||
| 0*SD_BALANCE_EXEC \
|
||||
| 0*SD_BALANCE_FORK \
|
||||
| 0*SD_WAKE_IDLE \
|
||||
| 1*SD_WAKE_AFFINE \
|
||||
| 0*SD_WAKE_BALANCE \
|
||||
| 0*SD_SHARE_CPUPOWER \
|
||||
| 0*SD_POWERSAVINGS_BALANCE \
|
||||
| 0*SD_SHARE_PKG_RESOURCES \
|
||||
| 1*SD_SERIALIZE \
|
||||
| 1*SD_WAKE_IDLE_FAR \
|
||||
| 0*SD_PREFER_SIBLING \
|
||||
, \
|
||||
.last_balance = jiffies, \
|
||||
.balance_interval = 64, \
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
|
@ -340,6 +340,101 @@ TRACE_EVENT(sched_signal_send,
|
||||
__entry->sig, __entry->comm, __entry->pid)
|
||||
);
|
||||
|
||||
/*
|
||||
* XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
|
||||
* adding sched_stat support to SCHED_FIFO/RR would be welcome.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Tracepoint for accounting wait time (time the task is runnable
|
||||
* but not actually running due to scheduler contention).
|
||||
*/
|
||||
TRACE_EVENT(sched_stat_wait,
|
||||
|
||||
TP_PROTO(struct task_struct *tsk, u64 delay),
|
||||
|
||||
TP_ARGS(tsk, delay),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array( char, comm, TASK_COMM_LEN )
|
||||
__field( pid_t, pid )
|
||||
__field( u64, delay )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
|
||||
__entry->pid = tsk->pid;
|
||||
__entry->delay = delay;
|
||||
)
|
||||
TP_perf_assign(
|
||||
__perf_count(delay);
|
||||
),
|
||||
|
||||
TP_printk("task: %s:%d wait: %Lu [ns]",
|
||||
__entry->comm, __entry->pid,
|
||||
(unsigned long long)__entry->delay)
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracepoint for accounting sleep time (time the task is not runnable,
|
||||
* including iowait, see below).
|
||||
*/
|
||||
TRACE_EVENT(sched_stat_sleep,
|
||||
|
||||
TP_PROTO(struct task_struct *tsk, u64 delay),
|
||||
|
||||
TP_ARGS(tsk, delay),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array( char, comm, TASK_COMM_LEN )
|
||||
__field( pid_t, pid )
|
||||
__field( u64, delay )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
|
||||
__entry->pid = tsk->pid;
|
||||
__entry->delay = delay;
|
||||
)
|
||||
TP_perf_assign(
|
||||
__perf_count(delay);
|
||||
),
|
||||
|
||||
TP_printk("task: %s:%d sleep: %Lu [ns]",
|
||||
__entry->comm, __entry->pid,
|
||||
(unsigned long long)__entry->delay)
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracepoint for accounting iowait time (time the task is not runnable
|
||||
* due to waiting on IO to complete).
|
||||
*/
|
||||
TRACE_EVENT(sched_stat_iowait,
|
||||
|
||||
TP_PROTO(struct task_struct *tsk, u64 delay),
|
||||
|
||||
TP_ARGS(tsk, delay),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array( char, comm, TASK_COMM_LEN )
|
||||
__field( pid_t, pid )
|
||||
__field( u64, delay )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
|
||||
__entry->pid = tsk->pid;
|
||||
__entry->delay = delay;
|
||||
)
|
||||
TP_perf_assign(
|
||||
__perf_count(delay);
|
||||
),
|
||||
|
||||
TP_printk("task: %s:%d iowait: %Lu [ns]",
|
||||
__entry->comm, __entry->pid,
|
||||
(unsigned long long)__entry->delay)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_SCHED_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
@ -631,7 +631,6 @@ asmlinkage void __init start_kernel(void)
|
||||
softirq_init();
|
||||
timekeeping_init();
|
||||
time_init();
|
||||
sched_clock_init();
|
||||
profile_init();
|
||||
if (!irqs_disabled())
|
||||
printk(KERN_CRIT "start_kernel(): bug: interrupts were "
|
||||
@ -682,6 +681,7 @@ asmlinkage void __init start_kernel(void)
|
||||
numa_policy_init();
|
||||
if (late_time_init)
|
||||
late_time_init();
|
||||
sched_clock_init();
|
||||
calibrate_delay();
|
||||
pidmap_init();
|
||||
anon_vma_init();
|
||||
|
@ -16,8 +16,6 @@
|
||||
#include <linux/mutex.h>
|
||||
#include <trace/events/sched.h>
|
||||
|
||||
#define KTHREAD_NICE_LEVEL (-5)
|
||||
|
||||
static DEFINE_SPINLOCK(kthread_create_lock);
|
||||
static LIST_HEAD(kthread_create_list);
|
||||
struct task_struct *kthreadd_task;
|
||||
@ -145,7 +143,6 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
|
||||
* The kernel thread should not inherit these properties.
|
||||
*/
|
||||
sched_setscheduler_nocheck(create.result, SCHED_NORMAL, ¶m);
|
||||
set_user_nice(create.result, KTHREAD_NICE_LEVEL);
|
||||
set_cpus_allowed_ptr(create.result, cpu_all_mask);
|
||||
}
|
||||
return create.result;
|
||||
@ -221,7 +218,6 @@ int kthreadd(void *unused)
|
||||
/* Setup a clean context for our children to inherit. */
|
||||
set_task_comm(tsk, "kthreadd");
|
||||
ignore_signals(tsk);
|
||||
set_user_nice(tsk, KTHREAD_NICE_LEVEL);
|
||||
set_cpus_allowed_ptr(tsk, cpu_all_mask);
|
||||
set_mems_allowed(node_possible_map);
|
||||
|
||||
|
1111
kernel/sched.c
1111
kernel/sched.c
File diff suppressed because it is too large
Load Diff
@ -127,21 +127,11 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
|
||||
|
||||
/*
|
||||
* If the cpu was currently mapped to a different value, we
|
||||
* first need to unmap the old value
|
||||
* need to map it to the new value then remove the old value.
|
||||
* Note, we must add the new value first, otherwise we risk the
|
||||
* cpu being cleared from pri_active, and this cpu could be
|
||||
* missed for a push or pull.
|
||||
*/
|
||||
if (likely(oldpri != CPUPRI_INVALID)) {
|
||||
struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri];
|
||||
|
||||
spin_lock_irqsave(&vec->lock, flags);
|
||||
|
||||
vec->count--;
|
||||
if (!vec->count)
|
||||
clear_bit(oldpri, cp->pri_active);
|
||||
cpumask_clear_cpu(cpu, vec->mask);
|
||||
|
||||
spin_unlock_irqrestore(&vec->lock, flags);
|
||||
}
|
||||
|
||||
if (likely(newpri != CPUPRI_INVALID)) {
|
||||
struct cpupri_vec *vec = &cp->pri_to_cpu[newpri];
|
||||
|
||||
@ -154,6 +144,18 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
|
||||
|
||||
spin_unlock_irqrestore(&vec->lock, flags);
|
||||
}
|
||||
if (likely(oldpri != CPUPRI_INVALID)) {
|
||||
struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri];
|
||||
|
||||
spin_lock_irqsave(&vec->lock, flags);
|
||||
|
||||
vec->count--;
|
||||
if (!vec->count)
|
||||
clear_bit(oldpri, cp->pri_active);
|
||||
cpumask_clear_cpu(cpu, vec->mask);
|
||||
|
||||
spin_unlock_irqrestore(&vec->lock, flags);
|
||||
}
|
||||
|
||||
*currpri = newpri;
|
||||
}
|
||||
|
@ -409,6 +409,8 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
|
||||
PN(se.wait_max);
|
||||
PN(se.wait_sum);
|
||||
P(se.wait_count);
|
||||
PN(se.iowait_sum);
|
||||
P(se.iowait_count);
|
||||
P(sched_info.bkl_count);
|
||||
P(se.nr_migrations);
|
||||
P(se.nr_migrations_cold);
|
||||
@ -479,6 +481,8 @@ void proc_sched_set_task(struct task_struct *p)
|
||||
p->se.wait_max = 0;
|
||||
p->se.wait_sum = 0;
|
||||
p->se.wait_count = 0;
|
||||
p->se.iowait_sum = 0;
|
||||
p->se.iowait_count = 0;
|
||||
p->se.sleep_max = 0;
|
||||
p->se.sum_sleep_runtime = 0;
|
||||
p->se.block_max = 0;
|
||||
|
@ -24,7 +24,7 @@
|
||||
|
||||
/*
|
||||
* Targeted preemption latency for CPU-bound tasks:
|
||||
* (default: 20ms * (1 + ilog(ncpus)), units: nanoseconds)
|
||||
* (default: 5ms * (1 + ilog(ncpus)), units: nanoseconds)
|
||||
*
|
||||
* NOTE: this latency value is not the same as the concept of
|
||||
* 'timeslice length' - timeslices in CFS are of variable length
|
||||
@ -34,13 +34,13 @@
|
||||
* (to see the precise effective timeslice length of your workload,
|
||||
* run vmstat and monitor the context-switches (cs) field)
|
||||
*/
|
||||
unsigned int sysctl_sched_latency = 20000000ULL;
|
||||
unsigned int sysctl_sched_latency = 5000000ULL;
|
||||
|
||||
/*
|
||||
* Minimal preemption granularity for CPU-bound tasks:
|
||||
* (default: 4 msec * (1 + ilog(ncpus)), units: nanoseconds)
|
||||
* (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
|
||||
*/
|
||||
unsigned int sysctl_sched_min_granularity = 4000000ULL;
|
||||
unsigned int sysctl_sched_min_granularity = 1000000ULL;
|
||||
|
||||
/*
|
||||
* is kept at sysctl_sched_latency / sysctl_sched_min_granularity
|
||||
@ -48,10 +48,10 @@ unsigned int sysctl_sched_min_granularity = 4000000ULL;
|
||||
static unsigned int sched_nr_latency = 5;
|
||||
|
||||
/*
|
||||
* After fork, child runs first. (default) If set to 0 then
|
||||
* After fork, child runs first. If set to 0 (default) then
|
||||
* parent will (try to) run first.
|
||||
*/
|
||||
const_debug unsigned int sysctl_sched_child_runs_first = 1;
|
||||
unsigned int sysctl_sched_child_runs_first __read_mostly;
|
||||
|
||||
/*
|
||||
* sys_sched_yield() compat mode
|
||||
@ -63,13 +63,13 @@ unsigned int __read_mostly sysctl_sched_compat_yield;
|
||||
|
||||
/*
|
||||
* SCHED_OTHER wake-up granularity.
|
||||
* (default: 5 msec * (1 + ilog(ncpus)), units: nanoseconds)
|
||||
* (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
|
||||
*
|
||||
* This option delays the preemption effects of decoupled workloads
|
||||
* and reduces their over-scheduling. Synchronous workloads will still
|
||||
* have immediate wakeup/sleep latencies.
|
||||
*/
|
||||
unsigned int sysctl_sched_wakeup_granularity = 5000000UL;
|
||||
unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
|
||||
|
||||
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
|
||||
|
||||
@ -79,11 +79,6 @@ static const struct sched_class fair_sched_class;
|
||||
* CFS operations on generic schedulable entities:
|
||||
*/
|
||||
|
||||
static inline struct task_struct *task_of(struct sched_entity *se)
|
||||
{
|
||||
return container_of(se, struct task_struct, se);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
|
||||
/* cpu runqueue to which this cfs_rq is attached */
|
||||
@ -95,6 +90,14 @@ static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
|
||||
/* An entity is a task if it doesn't "own" a runqueue */
|
||||
#define entity_is_task(se) (!se->my_q)
|
||||
|
||||
static inline struct task_struct *task_of(struct sched_entity *se)
|
||||
{
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
WARN_ON_ONCE(!entity_is_task(se));
|
||||
#endif
|
||||
return container_of(se, struct task_struct, se);
|
||||
}
|
||||
|
||||
/* Walk up scheduling entities hierarchy */
|
||||
#define for_each_sched_entity(se) \
|
||||
for (; se; se = se->parent)
|
||||
@ -186,7 +189,12 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse)
|
||||
}
|
||||
}
|
||||
|
||||
#else /* CONFIG_FAIR_GROUP_SCHED */
|
||||
#else /* !CONFIG_FAIR_GROUP_SCHED */
|
||||
|
||||
static inline struct task_struct *task_of(struct sched_entity *se)
|
||||
{
|
||||
return container_of(se, struct task_struct, se);
|
||||
}
|
||||
|
||||
static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
@ -537,6 +545,12 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
schedstat_set(se->wait_count, se->wait_count + 1);
|
||||
schedstat_set(se->wait_sum, se->wait_sum +
|
||||
rq_of(cfs_rq)->clock - se->wait_start);
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
if (entity_is_task(se)) {
|
||||
trace_sched_stat_wait(task_of(se),
|
||||
rq_of(cfs_rq)->clock - se->wait_start);
|
||||
}
|
||||
#endif
|
||||
schedstat_set(se->wait_start, 0);
|
||||
}
|
||||
|
||||
@ -628,8 +642,10 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
se->sleep_start = 0;
|
||||
se->sum_sleep_runtime += delta;
|
||||
|
||||
if (tsk)
|
||||
if (tsk) {
|
||||
account_scheduler_latency(tsk, delta >> 10, 1);
|
||||
trace_sched_stat_sleep(tsk, delta);
|
||||
}
|
||||
}
|
||||
if (se->block_start) {
|
||||
u64 delta = rq_of(cfs_rq)->clock - se->block_start;
|
||||
@ -644,6 +660,12 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
se->sum_sleep_runtime += delta;
|
||||
|
||||
if (tsk) {
|
||||
if (tsk->in_iowait) {
|
||||
se->iowait_sum += delta;
|
||||
se->iowait_count++;
|
||||
trace_sched_stat_iowait(tsk, delta);
|
||||
}
|
||||
|
||||
/*
|
||||
* Blocking time is in units of nanosecs, so shift by
|
||||
* 20 to get a milliseconds-range estimation of the
|
||||
@ -705,11 +727,11 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
|
||||
|
||||
vruntime -= thresh;
|
||||
}
|
||||
|
||||
/* ensure we never gain time by being placed backwards. */
|
||||
vruntime = max_vruntime(se->vruntime, vruntime);
|
||||
}
|
||||
|
||||
/* ensure we never gain time by being placed backwards. */
|
||||
vruntime = max_vruntime(se->vruntime, vruntime);
|
||||
|
||||
se->vruntime = vruntime;
|
||||
}
|
||||
|
||||
@ -1046,17 +1068,21 @@ static void yield_task_fair(struct rq *rq)
|
||||
* search starts with cpus closest then further out as needed,
|
||||
* so we always favor a closer, idle cpu.
|
||||
* Domains may include CPUs that are not usable for migration,
|
||||
* hence we need to mask them out (cpu_active_mask)
|
||||
* hence we need to mask them out (rq->rd->online)
|
||||
*
|
||||
* Returns the CPU we should wake onto.
|
||||
*/
|
||||
#if defined(ARCH_HAS_SCHED_WAKE_IDLE)
|
||||
|
||||
#define cpu_rd_active(cpu, rq) cpumask_test_cpu(cpu, rq->rd->online)
|
||||
|
||||
static int wake_idle(int cpu, struct task_struct *p)
|
||||
{
|
||||
struct sched_domain *sd;
|
||||
int i;
|
||||
unsigned int chosen_wakeup_cpu;
|
||||
int this_cpu;
|
||||
struct rq *task_rq = task_rq(p);
|
||||
|
||||
/*
|
||||
* At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu
|
||||
@ -1089,10 +1115,10 @@ static int wake_idle(int cpu, struct task_struct *p)
|
||||
for_each_domain(cpu, sd) {
|
||||
if ((sd->flags & SD_WAKE_IDLE)
|
||||
|| ((sd->flags & SD_WAKE_IDLE_FAR)
|
||||
&& !task_hot(p, task_rq(p)->clock, sd))) {
|
||||
&& !task_hot(p, task_rq->clock, sd))) {
|
||||
for_each_cpu_and(i, sched_domain_span(sd),
|
||||
&p->cpus_allowed) {
|
||||
if (cpu_active(i) && idle_cpu(i)) {
|
||||
if (cpu_rd_active(i, task_rq) && idle_cpu(i)) {
|
||||
if (i != task_cpu(p)) {
|
||||
schedstat_inc(p,
|
||||
se.nr_wakeups_idle);
|
||||
@ -1235,7 +1261,17 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
|
||||
tg = task_group(p);
|
||||
weight = p->se.load.weight;
|
||||
|
||||
balanced = 100*(tl + effective_load(tg, this_cpu, weight, weight)) <=
|
||||
/*
|
||||
* In low-load situations, where prev_cpu is idle and this_cpu is idle
|
||||
* due to the sync cause above having dropped tl to 0, we'll always have
|
||||
* an imbalance, but there's really nothing you can do about that, so
|
||||
* that's good too.
|
||||
*
|
||||
* Otherwise check if either cpus are near enough in load to allow this
|
||||
* task to be woken on this_cpu.
|
||||
*/
|
||||
balanced = !tl ||
|
||||
100*(tl + effective_load(tg, this_cpu, weight, weight)) <=
|
||||
imbalance*(load + effective_load(tg, prev_cpu, 0, weight));
|
||||
|
||||
/*
|
||||
@ -1278,8 +1314,6 @@ static int select_task_rq_fair(struct task_struct *p, int sync)
|
||||
this_rq = cpu_rq(this_cpu);
|
||||
new_cpu = prev_cpu;
|
||||
|
||||
if (prev_cpu == this_cpu)
|
||||
goto out;
|
||||
/*
|
||||
* 'this_sd' is the first domain that both
|
||||
* this_cpu and prev_cpu are present in:
|
||||
@ -1721,6 +1755,8 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
|
||||
sched_info_queued(p);
|
||||
|
||||
update_curr(cfs_rq);
|
||||
if (curr)
|
||||
se->vruntime = curr->vruntime;
|
||||
place_entity(cfs_rq, se, 1);
|
||||
|
||||
/* 'curr' will be NULL if the child belongs to a different group */
|
||||
|
@ -1,4 +1,4 @@
|
||||
SCHED_FEAT(NEW_FAIR_SLEEPERS, 1)
|
||||
SCHED_FEAT(NEW_FAIR_SLEEPERS, 0)
|
||||
SCHED_FEAT(NORMALIZED_SLEEPER, 0)
|
||||
SCHED_FEAT(ADAPTIVE_GRAN, 1)
|
||||
SCHED_FEAT(WAKEUP_PREEMPT, 1)
|
||||
|
@ -3,15 +3,18 @@
|
||||
* policies)
|
||||
*/
|
||||
|
||||
static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
|
||||
{
|
||||
return container_of(rt_se, struct task_struct, rt);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
|
||||
#define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
|
||||
|
||||
static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
|
||||
{
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
WARN_ON_ONCE(!rt_entity_is_task(rt_se));
|
||||
#endif
|
||||
return container_of(rt_se, struct task_struct, rt);
|
||||
}
|
||||
|
||||
static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
|
||||
{
|
||||
return rt_rq->rq;
|
||||
@ -26,6 +29,11 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
|
||||
|
||||
#define rt_entity_is_task(rt_se) (1)
|
||||
|
||||
static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
|
||||
{
|
||||
return container_of(rt_se, struct task_struct, rt);
|
||||
}
|
||||
|
||||
static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
|
||||
{
|
||||
return container_of(rt_rq, struct rq, rt);
|
||||
@ -128,6 +136,11 @@ static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
|
||||
plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
|
||||
}
|
||||
|
||||
static inline int has_pushable_tasks(struct rq *rq)
|
||||
{
|
||||
return !plist_head_empty(&rq->rt.pushable_tasks);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
|
||||
@ -602,6 +615,8 @@ static void update_curr_rt(struct rq *rq)
|
||||
curr->se.exec_start = rq->clock;
|
||||
cpuacct_charge(curr, delta_exec);
|
||||
|
||||
sched_rt_avg_update(rq, delta_exec);
|
||||
|
||||
if (!rt_bandwidth_enabled())
|
||||
return;
|
||||
|
||||
@ -874,8 +889,6 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
|
||||
|
||||
if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
|
||||
enqueue_pushable_task(rq, p);
|
||||
|
||||
inc_cpu_load(rq, p->se.load.weight);
|
||||
}
|
||||
|
||||
static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
|
||||
@ -886,8 +899,6 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
|
||||
dequeue_rt_entity(rt_se);
|
||||
|
||||
dequeue_pushable_task(rq, p);
|
||||
|
||||
dec_cpu_load(rq, p->se.load.weight);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1064,6 +1075,14 @@ static struct task_struct *pick_next_task_rt(struct rq *rq)
|
||||
if (p)
|
||||
dequeue_pushable_task(rq, p);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* We detect this state here so that we can avoid taking the RQ
|
||||
* lock again later if there is no need to push
|
||||
*/
|
||||
rq->post_schedule = has_pushable_tasks(rq);
|
||||
#endif
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
@ -1161,13 +1180,6 @@ static int find_lowest_rq(struct task_struct *task)
|
||||
if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
|
||||
return -1; /* No targets found */
|
||||
|
||||
/*
|
||||
* Only consider CPUs that are usable for migration.
|
||||
* I guess we might want to change cpupri_find() to ignore those
|
||||
* in the first place.
|
||||
*/
|
||||
cpumask_and(lowest_mask, lowest_mask, cpu_active_mask);
|
||||
|
||||
/*
|
||||
* At this point we have built a mask of cpus representing the
|
||||
* lowest priority tasks in the system. Now we want to elect
|
||||
@ -1262,11 +1274,6 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
|
||||
return lowest_rq;
|
||||
}
|
||||
|
||||
static inline int has_pushable_tasks(struct rq *rq)
|
||||
{
|
||||
return !plist_head_empty(&rq->rt.pushable_tasks);
|
||||
}
|
||||
|
||||
static struct task_struct *pick_next_pushable_task(struct rq *rq)
|
||||
{
|
||||
struct task_struct *p;
|
||||
@ -1466,23 +1473,9 @@ static void pre_schedule_rt(struct rq *rq, struct task_struct *prev)
|
||||
pull_rt_task(rq);
|
||||
}
|
||||
|
||||
/*
|
||||
* assumes rq->lock is held
|
||||
*/
|
||||
static int needs_post_schedule_rt(struct rq *rq)
|
||||
{
|
||||
return has_pushable_tasks(rq);
|
||||
}
|
||||
|
||||
static void post_schedule_rt(struct rq *rq)
|
||||
{
|
||||
/*
|
||||
* This is only called if needs_post_schedule_rt() indicates that
|
||||
* we need to push tasks away
|
||||
*/
|
||||
spin_lock_irq(&rq->lock);
|
||||
push_rt_tasks(rq);
|
||||
spin_unlock_irq(&rq->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1758,7 +1751,6 @@ static const struct sched_class rt_sched_class = {
|
||||
.rq_online = rq_online_rt,
|
||||
.rq_offline = rq_offline_rt,
|
||||
.pre_schedule = pre_schedule_rt,
|
||||
.needs_post_schedule = needs_post_schedule_rt,
|
||||
.post_schedule = post_schedule_rt,
|
||||
.task_wake_up = task_wake_up_rt,
|
||||
.switched_from = switched_from_rt,
|
||||
|
@ -245,6 +245,14 @@ static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */
|
||||
#endif
|
||||
|
||||
static struct ctl_table kern_table[] = {
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "sched_child_runs_first",
|
||||
.data = &sysctl_sched_child_runs_first,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
@ -297,14 +305,6 @@ static struct ctl_table kern_table[] = {
|
||||
.strategy = &sysctl_intvec,
|
||||
.extra1 = &zero,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "sched_child_runs_first",
|
||||
.data = &sysctl_sched_child_runs_first,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "sched_features",
|
||||
@ -329,6 +329,14 @@ static struct ctl_table kern_table[] = {
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "sched_time_avg",
|
||||
.data = &sysctl_sched_time_avg,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "timer_migration",
|
||||
|
@ -317,8 +317,6 @@ static int worker_thread(void *__cwq)
|
||||
if (cwq->wq->freezeable)
|
||||
set_freezable();
|
||||
|
||||
set_user_nice(current, -5);
|
||||
|
||||
for (;;) {
|
||||
prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE);
|
||||
if (!freezing(current) &&
|
||||
|
Loading…
Reference in New Issue
Block a user