Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: - Remove the unused per rq load array and all its infrastructure, by Dietmar Eggemann. - Add utilization clamping support by Patrick Bellasi. This is a refinement of the energy aware scheduling framework with support for boosting of interactive and capping of background workloads: to make sure critical GUI threads get maximum frequency ASAP, and to make sure background processing doesn't unnecessarily move to cpufreq governor to higher frequencies and less energy efficient CPU modes. - Add the bare minimum of tracepoints required for LISA EAS regression testing, by Qais Yousef - which allows automated testing of various power management features, including energy aware scheduling. - Restructure the former tsk_nr_cpus_allowed() facility that the -rt kernel used to modify the scheduler's CPU affinity logic such as migrate_disable() - introduce the task->cpus_ptr value instead of taking the address of &task->cpus_allowed directly - by Sebastian Andrzej Siewior. - Misc optimizations, fixes, cleanups and small enhancements - see the Git log for details. * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (33 commits) sched/uclamp: Add uclamp support to energy_compute() sched/uclamp: Add uclamp_util_with() sched/cpufreq, sched/uclamp: Add clamps for FAIR and RT tasks sched/uclamp: Set default clamps for RT tasks sched/uclamp: Reset uclamp values on RESET_ON_FORK sched/uclamp: Extend sched_setattr() to support utilization clamping sched/core: Allow sched_setattr() to use the current policy sched/uclamp: Add system default clamps sched/uclamp: Enforce last task's UCLAMP_MAX sched/uclamp: Add bucket local max tracking sched/uclamp: Add CPU's clamp buckets refcounting sched/fair: Rename weighted_cpuload() to cpu_runnable_load() sched/debug: Export the newly added tracepoints sched/debug: Add sched_overutilized tracepoint sched/debug: Add new tracepoint to track PELT at se level sched/debug: Add new tracepoints to track PELT at rq level sched/debug: Add a new sched_trace_*() helper functions sched/autogroup: Make autogroup_path() always available sched/wait: Deduplicate code with do-while sched/topology: Remove unused 'sd' parameter from arch_scale_cpu_capacity() ...
This commit is contained in:
commit
dad1c12ed8
@ -20,7 +20,8 @@ void calc_runnable_avg_yN_inv(void)
|
||||
int i;
|
||||
unsigned int x;
|
||||
|
||||
printf("static const u32 runnable_avg_yN_inv[] = {");
|
||||
/* To silence -Wunused-but-set-variable warnings. */
|
||||
printf("static const u32 runnable_avg_yN_inv[] __maybe_unused = {");
|
||||
for (i = 0; i < HALFLIFE; i++) {
|
||||
x = ((1UL<<32)-1)*pow(y, i);
|
||||
|
||||
|
@ -169,7 +169,7 @@ static void update_cpu_capacity(unsigned int cpu)
|
||||
topology_set_cpu_scale(cpu, cpu_capacity(cpu) / middle_capacity);
|
||||
|
||||
pr_info("CPU%u: update cpu_capacity %lu\n",
|
||||
cpu, topology_get_cpu_scale(NULL, cpu));
|
||||
cpu, topology_get_cpu_scale(cpu));
|
||||
}
|
||||
|
||||
#else
|
||||
|
@ -1831,7 +1831,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset,
|
||||
ti->cpu = cpu;
|
||||
p->stack = ti;
|
||||
p->state = TASK_UNINTERRUPTIBLE;
|
||||
cpumask_set_cpu(cpu, &p->cpus_allowed);
|
||||
cpumask_set_cpu(cpu, &p->cpus_mask);
|
||||
INIT_LIST_HEAD(&p->tasks);
|
||||
p->parent = p->real_parent = p->group_leader = p;
|
||||
INIT_LIST_HEAD(&p->children);
|
||||
|
@ -42,7 +42,7 @@ extern struct task_struct *ll_task;
|
||||
* inline to try to keep the overhead down. If we have been forced to run on
|
||||
* a "CPU" with an FPU because of a previous high level of FP computation,
|
||||
* but did not actually use the FPU during the most recent time-slice (CU1
|
||||
* isn't set), we undo the restriction on cpus_allowed.
|
||||
* isn't set), we undo the restriction on cpus_mask.
|
||||
*
|
||||
* We're not calling set_cpus_allowed() here, because we have no need to
|
||||
* force prompt migration - we're already switching the current CPU to a
|
||||
@ -57,7 +57,7 @@ do { \
|
||||
test_ti_thread_flag(__prev_ti, TIF_FPUBOUND) && \
|
||||
(!(KSTK_STATUS(prev) & ST0_CU1))) { \
|
||||
clear_ti_thread_flag(__prev_ti, TIF_FPUBOUND); \
|
||||
prev->cpus_allowed = prev->thread.user_cpus_allowed; \
|
||||
prev->cpus_mask = prev->thread.user_cpus_allowed; \
|
||||
} \
|
||||
next->thread.emulated_fp = 0; \
|
||||
} while(0)
|
||||
|
@ -177,7 +177,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len,
|
||||
if (retval)
|
||||
goto out_unlock;
|
||||
|
||||
cpumask_or(&allowed, &p->thread.user_cpus_allowed, &p->cpus_allowed);
|
||||
cpumask_or(&allowed, &p->thread.user_cpus_allowed, p->cpus_ptr);
|
||||
cpumask_and(&mask, &allowed, cpu_active_mask);
|
||||
|
||||
out_unlock:
|
||||
|
@ -891,12 +891,12 @@ static void mt_ase_fp_affinity(void)
|
||||
* restricted the allowed set to exclude any CPUs with FPUs,
|
||||
* we'll skip the procedure.
|
||||
*/
|
||||
if (cpumask_intersects(¤t->cpus_allowed, &mt_fpu_cpumask)) {
|
||||
if (cpumask_intersects(¤t->cpus_mask, &mt_fpu_cpumask)) {
|
||||
cpumask_t tmask;
|
||||
|
||||
current->thread.user_cpus_allowed
|
||||
= current->cpus_allowed;
|
||||
cpumask_and(&tmask, ¤t->cpus_allowed,
|
||||
= current->cpus_mask;
|
||||
cpumask_and(&tmask, ¤t->cpus_mask,
|
||||
&mt_fpu_cpumask);
|
||||
set_cpus_allowed_ptr(current, &tmask);
|
||||
set_thread_flag(TIF_FPUBOUND);
|
||||
|
@ -128,7 +128,7 @@ void __spu_update_sched_info(struct spu_context *ctx)
|
||||
* runqueue. The context will be rescheduled on the proper node
|
||||
* if it is timesliced or preempted.
|
||||
*/
|
||||
cpumask_copy(&ctx->cpus_allowed, ¤t->cpus_allowed);
|
||||
cpumask_copy(&ctx->cpus_allowed, current->cpus_ptr);
|
||||
|
||||
/* Save the current cpu id for spu interrupt routing. */
|
||||
ctx->last_ran = raw_smp_processor_id();
|
||||
|
@ -1503,7 +1503,7 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
* may be scheduled elsewhere and invalidate entries in the
|
||||
* pseudo-locked region.
|
||||
*/
|
||||
if (!cpumask_subset(¤t->cpus_allowed, &plr->d->cpu_mask)) {
|
||||
if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) {
|
||||
mutex_unlock(&rdtgroup_mutex);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -43,7 +43,7 @@ static ssize_t cpu_capacity_show(struct device *dev,
|
||||
{
|
||||
struct cpu *cpu = container_of(dev, struct cpu, dev);
|
||||
|
||||
return sprintf(buf, "%lu\n", topology_get_cpu_scale(NULL, cpu->dev.id));
|
||||
return sprintf(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id));
|
||||
}
|
||||
|
||||
static void update_topology_flags_workfn(struct work_struct *work);
|
||||
@ -116,7 +116,7 @@ void topology_normalize_cpu_scale(void)
|
||||
/ capacity_scale;
|
||||
topology_set_cpu_scale(cpu, capacity);
|
||||
pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
|
||||
cpu, topology_get_cpu_scale(NULL, cpu));
|
||||
cpu, topology_get_cpu_scale(cpu));
|
||||
}
|
||||
}
|
||||
|
||||
@ -185,7 +185,7 @@ init_cpu_capacity_callback(struct notifier_block *nb,
|
||||
cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus);
|
||||
|
||||
for_each_cpu(cpu, policy->related_cpus) {
|
||||
raw_capacity[cpu] = topology_get_cpu_scale(NULL, cpu) *
|
||||
raw_capacity[cpu] = topology_get_cpu_scale(cpu) *
|
||||
policy->cpuinfo.max_freq / 1000UL;
|
||||
capacity_scale = max(raw_capacity[cpu], capacity_scale);
|
||||
}
|
||||
|
@ -1038,7 +1038,7 @@ int hfi1_get_proc_affinity(int node)
|
||||
struct hfi1_affinity_node *entry;
|
||||
cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask;
|
||||
const struct cpumask *node_mask,
|
||||
*proc_mask = ¤t->cpus_allowed;
|
||||
*proc_mask = current->cpus_ptr;
|
||||
struct hfi1_affinity_node_list *affinity = &node_affinity;
|
||||
struct cpu_mask_set *set = &affinity->proc;
|
||||
|
||||
@ -1046,7 +1046,7 @@ int hfi1_get_proc_affinity(int node)
|
||||
* check whether process/context affinity has already
|
||||
* been set
|
||||
*/
|
||||
if (cpumask_weight(proc_mask) == 1) {
|
||||
if (current->nr_cpus_allowed == 1) {
|
||||
hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl",
|
||||
current->pid, current->comm,
|
||||
cpumask_pr_args(proc_mask));
|
||||
@ -1057,7 +1057,7 @@ int hfi1_get_proc_affinity(int node)
|
||||
cpu = cpumask_first(proc_mask);
|
||||
cpumask_set_cpu(cpu, &set->used);
|
||||
goto done;
|
||||
} else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) {
|
||||
} else if (current->nr_cpus_allowed < cpumask_weight(&set->mask)) {
|
||||
hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl",
|
||||
current->pid, current->comm,
|
||||
cpumask_pr_args(proc_mask));
|
||||
|
@ -869,14 +869,13 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
|
||||
{
|
||||
struct sdma_rht_node *rht_node;
|
||||
struct sdma_engine *sde = NULL;
|
||||
const struct cpumask *current_mask = ¤t->cpus_allowed;
|
||||
unsigned long cpu_id;
|
||||
|
||||
/*
|
||||
* To ensure that always the same sdma engine(s) will be
|
||||
* selected make sure the process is pinned to this CPU only.
|
||||
*/
|
||||
if (cpumask_weight(current_mask) != 1)
|
||||
if (current->nr_cpus_allowed != 1)
|
||||
goto out;
|
||||
|
||||
cpu_id = smp_processor_id();
|
||||
|
@ -1142,7 +1142,7 @@ static __poll_t qib_poll(struct file *fp, struct poll_table_struct *pt)
|
||||
static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd)
|
||||
{
|
||||
struct qib_filedata *fd = fp->private_data;
|
||||
const unsigned int weight = cpumask_weight(¤t->cpus_allowed);
|
||||
const unsigned int weight = current->nr_cpus_allowed;
|
||||
const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus);
|
||||
int local_cpu;
|
||||
|
||||
@ -1623,9 +1623,8 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
|
||||
ret = find_free_ctxt(i_minor - 1, fp, uinfo);
|
||||
else {
|
||||
int unit;
|
||||
const unsigned int cpu = cpumask_first(¤t->cpus_allowed);
|
||||
const unsigned int weight =
|
||||
cpumask_weight(¤t->cpus_allowed);
|
||||
const unsigned int cpu = cpumask_first(current->cpus_ptr);
|
||||
const unsigned int weight = current->nr_cpus_allowed;
|
||||
|
||||
if (weight == 1 && !test_bit(cpu, qib_cpulist))
|
||||
if (!find_hca(cpu, &unit) && unit >= 0)
|
||||
|
@ -381,9 +381,9 @@ static inline void task_context_switch_counts(struct seq_file *m,
|
||||
static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
|
||||
{
|
||||
seq_printf(m, "Cpus_allowed:\t%*pb\n",
|
||||
cpumask_pr_args(&task->cpus_allowed));
|
||||
cpumask_pr_args(task->cpus_ptr));
|
||||
seq_printf(m, "Cpus_allowed_list:\t%*pbl\n",
|
||||
cpumask_pr_args(&task->cpus_allowed));
|
||||
cpumask_pr_args(task->cpus_ptr));
|
||||
}
|
||||
|
||||
static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm)
|
||||
|
@ -18,7 +18,7 @@ DECLARE_PER_CPU(unsigned long, cpu_scale);
|
||||
|
||||
struct sched_domain;
|
||||
static inline
|
||||
unsigned long topology_get_cpu_scale(struct sched_domain *sd, int cpu)
|
||||
unsigned long topology_get_cpu_scale(int cpu)
|
||||
{
|
||||
return per_cpu(cpu_scale, cpu);
|
||||
}
|
||||
|
@ -89,7 +89,7 @@ static inline unsigned long em_pd_energy(struct em_perf_domain *pd,
|
||||
* like schedutil.
|
||||
*/
|
||||
cpu = cpumask_first(to_cpumask(pd->cpus));
|
||||
scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
|
||||
scale_cpu = arch_scale_cpu_capacity(cpu);
|
||||
cs = &pd->table[pd->nr_cap_states - 1];
|
||||
freq = map_util_freq(max_util, cs->frequency, scale_cpu);
|
||||
|
||||
|
@ -220,4 +220,38 @@ int __order_base_2(unsigned long n)
|
||||
ilog2((n) - 1) + 1) : \
|
||||
__order_base_2(n) \
|
||||
)
|
||||
|
||||
static inline __attribute__((const))
|
||||
int __bits_per(unsigned long n)
|
||||
{
|
||||
if (n < 2)
|
||||
return 1;
|
||||
if (is_power_of_2(n))
|
||||
return order_base_2(n) + 1;
|
||||
return order_base_2(n);
|
||||
}
|
||||
|
||||
/**
|
||||
* bits_per - calculate the number of bits required for the argument
|
||||
* @n: parameter
|
||||
*
|
||||
* This is constant-capable and can be used for compile time
|
||||
* initializations, e.g bitfields.
|
||||
*
|
||||
* The first few values calculated by this routine:
|
||||
* bf(0) = 1
|
||||
* bf(1) = 1
|
||||
* bf(2) = 2
|
||||
* bf(3) = 2
|
||||
* bf(4) = 3
|
||||
* ... and so on.
|
||||
*/
|
||||
#define bits_per(n) \
|
||||
( \
|
||||
__builtin_constant_p(n) ? ( \
|
||||
((n) == 0 || (n) == 1) \
|
||||
? 1 : ilog2(n) + 1 \
|
||||
) : \
|
||||
__bits_per(n) \
|
||||
)
|
||||
#endif /* _LINUX_LOG2_H */
|
||||
|
@ -35,6 +35,7 @@ struct audit_context;
|
||||
struct backing_dev_info;
|
||||
struct bio_list;
|
||||
struct blk_plug;
|
||||
struct capture_control;
|
||||
struct cfs_rq;
|
||||
struct fs_struct;
|
||||
struct futex_pi_state;
|
||||
@ -47,8 +48,9 @@ struct pid_namespace;
|
||||
struct pipe_inode_info;
|
||||
struct rcu_node;
|
||||
struct reclaim_state;
|
||||
struct capture_control;
|
||||
struct robust_list_head;
|
||||
struct root_domain;
|
||||
struct rq;
|
||||
struct sched_attr;
|
||||
struct sched_param;
|
||||
struct seq_file;
|
||||
@ -281,6 +283,18 @@ struct vtime {
|
||||
u64 gtime;
|
||||
};
|
||||
|
||||
/*
|
||||
* Utilization clamp constraints.
|
||||
* @UCLAMP_MIN: Minimum utilization
|
||||
* @UCLAMP_MAX: Maximum utilization
|
||||
* @UCLAMP_CNT: Utilization clamp constraints count
|
||||
*/
|
||||
enum uclamp_id {
|
||||
UCLAMP_MIN = 0,
|
||||
UCLAMP_MAX,
|
||||
UCLAMP_CNT
|
||||
};
|
||||
|
||||
struct sched_info {
|
||||
#ifdef CONFIG_SCHED_INFO
|
||||
/* Cumulative counters: */
|
||||
@ -312,6 +326,10 @@ struct sched_info {
|
||||
# define SCHED_FIXEDPOINT_SHIFT 10
|
||||
# define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT)
|
||||
|
||||
/* Increase resolution of cpu_capacity calculations */
|
||||
# define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT
|
||||
# define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT)
|
||||
|
||||
struct load_weight {
|
||||
unsigned long weight;
|
||||
u32 inv_weight;
|
||||
@ -560,6 +578,41 @@ struct sched_dl_entity {
|
||||
struct hrtimer inactive_timer;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_UCLAMP_TASK
|
||||
/* Number of utilization clamp buckets (shorter alias) */
|
||||
#define UCLAMP_BUCKETS CONFIG_UCLAMP_BUCKETS_COUNT
|
||||
|
||||
/*
|
||||
* Utilization clamp for a scheduling entity
|
||||
* @value: clamp value "assigned" to a se
|
||||
* @bucket_id: bucket index corresponding to the "assigned" value
|
||||
* @active: the se is currently refcounted in a rq's bucket
|
||||
* @user_defined: the requested clamp value comes from user-space
|
||||
*
|
||||
* The bucket_id is the index of the clamp bucket matching the clamp value
|
||||
* which is pre-computed and stored to avoid expensive integer divisions from
|
||||
* the fast path.
|
||||
*
|
||||
* The active bit is set whenever a task has got an "effective" value assigned,
|
||||
* which can be different from the clamp value "requested" from user-space.
|
||||
* This allows to know a task is refcounted in the rq's bucket corresponding
|
||||
* to the "effective" bucket_id.
|
||||
*
|
||||
* The user_defined bit is set whenever a task has got a task-specific clamp
|
||||
* value requested from userspace, i.e. the system defaults apply to this task
|
||||
* just as a restriction. This allows to relax default clamps when a less
|
||||
* restrictive task-specific value has been requested, thus allowing to
|
||||
* implement a "nice" semantic. For example, a task running with a 20%
|
||||
* default boost can still drop its own boosting to 0%.
|
||||
*/
|
||||
struct uclamp_se {
|
||||
unsigned int value : bits_per(SCHED_CAPACITY_SCALE);
|
||||
unsigned int bucket_id : bits_per(UCLAMP_BUCKETS);
|
||||
unsigned int active : 1;
|
||||
unsigned int user_defined : 1;
|
||||
};
|
||||
#endif /* CONFIG_UCLAMP_TASK */
|
||||
|
||||
union rcu_special {
|
||||
struct {
|
||||
u8 blocked;
|
||||
@ -640,6 +693,13 @@ struct task_struct {
|
||||
#endif
|
||||
struct sched_dl_entity dl;
|
||||
|
||||
#ifdef CONFIG_UCLAMP_TASK
|
||||
/* Clamp values requested for a scheduling entity */
|
||||
struct uclamp_se uclamp_req[UCLAMP_CNT];
|
||||
/* Effective clamp values used for a scheduling entity */
|
||||
struct uclamp_se uclamp[UCLAMP_CNT];
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PREEMPT_NOTIFIERS
|
||||
/* List of struct preempt_notifier: */
|
||||
struct hlist_head preempt_notifiers;
|
||||
@ -651,7 +711,8 @@ struct task_struct {
|
||||
|
||||
unsigned int policy;
|
||||
int nr_cpus_allowed;
|
||||
cpumask_t cpus_allowed;
|
||||
const cpumask_t *cpus_ptr;
|
||||
cpumask_t cpus_mask;
|
||||
|
||||
#ifdef CONFIG_PREEMPT_RCU
|
||||
int rcu_read_lock_nesting;
|
||||
@ -1399,7 +1460,7 @@ extern struct pid *cad_pid;
|
||||
#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
|
||||
#define PF_MEMSTALL 0x01000000 /* Stalled due to lack of memory */
|
||||
#define PF_UMH 0x02000000 /* I'm an Usermodehelper process */
|
||||
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */
|
||||
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
|
||||
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
|
||||
#define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */
|
||||
#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
|
||||
@ -1915,4 +1976,16 @@ static inline void rseq_syscall(struct pt_regs *regs)
|
||||
|
||||
#endif
|
||||
|
||||
const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq);
|
||||
char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len);
|
||||
int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq);
|
||||
|
||||
const struct sched_avg *sched_trace_rq_avg_rt(struct rq *rq);
|
||||
const struct sched_avg *sched_trace_rq_avg_dl(struct rq *rq);
|
||||
const struct sched_avg *sched_trace_rq_avg_irq(struct rq *rq);
|
||||
|
||||
int sched_trace_rq_cpu(struct rq *rq);
|
||||
|
||||
const struct cpumask *sched_trace_rd_span(struct root_domain *rd);
|
||||
|
||||
#endif
|
||||
|
@ -6,14 +6,6 @@
|
||||
* This is the interface between the scheduler and nohz/dynticks:
|
||||
*/
|
||||
|
||||
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
|
||||
extern void cpu_load_update_nohz_start(void);
|
||||
extern void cpu_load_update_nohz_stop(void);
|
||||
#else
|
||||
static inline void cpu_load_update_nohz_start(void) { }
|
||||
static inline void cpu_load_update_nohz_stop(void) { }
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
|
||||
extern void nohz_balance_enter_idle(int cpu);
|
||||
extern int get_nohz_timer_target(void);
|
||||
|
@ -56,6 +56,11 @@ int sched_proc_update_handler(struct ctl_table *table, int write,
|
||||
extern unsigned int sysctl_sched_rt_period;
|
||||
extern int sysctl_sched_rt_runtime;
|
||||
|
||||
#ifdef CONFIG_UCLAMP_TASK
|
||||
extern unsigned int sysctl_sched_uclamp_util_min;
|
||||
extern unsigned int sysctl_sched_uclamp_util_max;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CFS_BANDWIDTH
|
||||
extern unsigned int sysctl_sched_cfs_bandwidth_slice;
|
||||
#endif
|
||||
@ -75,6 +80,12 @@ extern int sched_rt_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos);
|
||||
|
||||
#ifdef CONFIG_UCLAMP_TASK
|
||||
extern int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos);
|
||||
#endif
|
||||
|
||||
extern int sysctl_numa_balancing(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos);
|
||||
|
@ -6,12 +6,6 @@
|
||||
|
||||
#include <linux/sched/idle.h>
|
||||
|
||||
/*
|
||||
* Increase resolution of cpu_capacity calculations
|
||||
*/
|
||||
#define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT
|
||||
#define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT)
|
||||
|
||||
/*
|
||||
* sched-domains (multiprocessor balancing) declarations:
|
||||
*/
|
||||
@ -84,11 +78,6 @@ struct sched_domain {
|
||||
unsigned int busy_factor; /* less balancing by factor if busy */
|
||||
unsigned int imbalance_pct; /* No balance until over watermark */
|
||||
unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */
|
||||
unsigned int busy_idx;
|
||||
unsigned int idle_idx;
|
||||
unsigned int newidle_idx;
|
||||
unsigned int wake_idx;
|
||||
unsigned int forkexec_idx;
|
||||
|
||||
int nohz_idle; /* NOHZ IDLE status */
|
||||
int flags; /* See SD_* */
|
||||
@ -201,14 +190,6 @@ extern void set_sched_topology(struct sched_domain_topology_level *tl);
|
||||
# define SD_INIT_NAME(type)
|
||||
#endif
|
||||
|
||||
#ifndef arch_scale_cpu_capacity
|
||||
static __always_inline
|
||||
unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
|
||||
{
|
||||
return SCHED_CAPACITY_SCALE;
|
||||
}
|
||||
#endif
|
||||
|
||||
#else /* CONFIG_SMP */
|
||||
|
||||
struct sched_domain_attr;
|
||||
@ -224,16 +205,16 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu)
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif /* !CONFIG_SMP */
|
||||
|
||||
#ifndef arch_scale_cpu_capacity
|
||||
static __always_inline
|
||||
unsigned long arch_scale_cpu_capacity(void __always_unused *sd, int cpu)
|
||||
unsigned long arch_scale_cpu_capacity(int cpu)
|
||||
{
|
||||
return SCHED_CAPACITY_SCALE;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* !CONFIG_SMP */
|
||||
|
||||
static inline int task_node(const struct task_struct *p)
|
||||
{
|
||||
return cpu_to_node(task_cpu(p));
|
||||
|
@ -594,6 +594,37 @@ TRACE_EVENT(sched_wake_idle_without_ipi,
|
||||
|
||||
TP_printk("cpu=%d", __entry->cpu)
|
||||
);
|
||||
|
||||
/*
|
||||
* Following tracepoints are not exported in tracefs and provide hooking
|
||||
* mechanisms only for testing and debugging purposes.
|
||||
*
|
||||
* Postfixed with _tp to make them easily identifiable in the code.
|
||||
*/
|
||||
DECLARE_TRACE(pelt_cfs_tp,
|
||||
TP_PROTO(struct cfs_rq *cfs_rq),
|
||||
TP_ARGS(cfs_rq));
|
||||
|
||||
DECLARE_TRACE(pelt_rt_tp,
|
||||
TP_PROTO(struct rq *rq),
|
||||
TP_ARGS(rq));
|
||||
|
||||
DECLARE_TRACE(pelt_dl_tp,
|
||||
TP_PROTO(struct rq *rq),
|
||||
TP_ARGS(rq));
|
||||
|
||||
DECLARE_TRACE(pelt_irq_tp,
|
||||
TP_PROTO(struct rq *rq),
|
||||
TP_ARGS(rq));
|
||||
|
||||
DECLARE_TRACE(pelt_se_tp,
|
||||
TP_PROTO(struct sched_entity *se),
|
||||
TP_ARGS(se));
|
||||
|
||||
DECLARE_TRACE(sched_overutilized_tp,
|
||||
TP_PROTO(struct root_domain *rd, bool overutilized),
|
||||
TP_ARGS(rd, overutilized));
|
||||
|
||||
#endif /* _TRACE_SCHED_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
@ -51,9 +51,21 @@
|
||||
#define SCHED_FLAG_RESET_ON_FORK 0x01
|
||||
#define SCHED_FLAG_RECLAIM 0x02
|
||||
#define SCHED_FLAG_DL_OVERRUN 0x04
|
||||
#define SCHED_FLAG_KEEP_POLICY 0x08
|
||||
#define SCHED_FLAG_KEEP_PARAMS 0x10
|
||||
#define SCHED_FLAG_UTIL_CLAMP_MIN 0x20
|
||||
#define SCHED_FLAG_UTIL_CLAMP_MAX 0x40
|
||||
|
||||
#define SCHED_FLAG_KEEP_ALL (SCHED_FLAG_KEEP_POLICY | \
|
||||
SCHED_FLAG_KEEP_PARAMS)
|
||||
|
||||
#define SCHED_FLAG_UTIL_CLAMP (SCHED_FLAG_UTIL_CLAMP_MIN | \
|
||||
SCHED_FLAG_UTIL_CLAMP_MAX)
|
||||
|
||||
#define SCHED_FLAG_ALL (SCHED_FLAG_RESET_ON_FORK | \
|
||||
SCHED_FLAG_RECLAIM | \
|
||||
SCHED_FLAG_DL_OVERRUN)
|
||||
SCHED_FLAG_DL_OVERRUN | \
|
||||
SCHED_FLAG_KEEP_ALL | \
|
||||
SCHED_FLAG_UTIL_CLAMP)
|
||||
|
||||
#endif /* _UAPI_LINUX_SCHED_H */
|
||||
|
@ -9,6 +9,7 @@ struct sched_param {
|
||||
};
|
||||
|
||||
#define SCHED_ATTR_SIZE_VER0 48 /* sizeof first published struct */
|
||||
#define SCHED_ATTR_SIZE_VER1 56 /* add: util_{min,max} */
|
||||
|
||||
/*
|
||||
* Extended scheduling parameters data structure.
|
||||
@ -21,8 +22,33 @@ struct sched_param {
|
||||
* the tasks may be useful for a wide variety of application fields, e.g.,
|
||||
* multimedia, streaming, automation and control, and many others.
|
||||
*
|
||||
* This variant (sched_attr) is meant at describing a so-called
|
||||
* sporadic time-constrained task. In such model a task is specified by:
|
||||
* This variant (sched_attr) allows to define additional attributes to
|
||||
* improve the scheduler knowledge about task requirements.
|
||||
*
|
||||
* Scheduling Class Attributes
|
||||
* ===========================
|
||||
*
|
||||
* A subset of sched_attr attributes specifies the
|
||||
* scheduling policy and relative POSIX attributes:
|
||||
*
|
||||
* @size size of the structure, for fwd/bwd compat.
|
||||
*
|
||||
* @sched_policy task's scheduling policy
|
||||
* @sched_nice task's nice value (SCHED_NORMAL/BATCH)
|
||||
* @sched_priority task's static priority (SCHED_FIFO/RR)
|
||||
*
|
||||
* Certain more advanced scheduling features can be controlled by a
|
||||
* predefined set of flags via the attribute:
|
||||
*
|
||||
* @sched_flags for customizing the scheduler behaviour
|
||||
*
|
||||
* Sporadic Time-Constrained Task Attributes
|
||||
* =========================================
|
||||
*
|
||||
* A subset of sched_attr attributes allows to describe a so-called
|
||||
* sporadic time-constrained task.
|
||||
*
|
||||
* In such a model a task is specified by:
|
||||
* - the activation period or minimum instance inter-arrival time;
|
||||
* - the maximum (or average, depending on the actual scheduling
|
||||
* discipline) computation time of all instances, a.k.a. runtime;
|
||||
@ -34,14 +60,8 @@ struct sched_param {
|
||||
* than the runtime and must be completed by time instant t equal to
|
||||
* the instance activation time + the deadline.
|
||||
*
|
||||
* This is reflected by the actual fields of the sched_attr structure:
|
||||
* This is reflected by the following fields of the sched_attr structure:
|
||||
*
|
||||
* @size size of the structure, for fwd/bwd compat.
|
||||
*
|
||||
* @sched_policy task's scheduling policy
|
||||
* @sched_flags for customizing the scheduler behaviour
|
||||
* @sched_nice task's nice value (SCHED_NORMAL/BATCH)
|
||||
* @sched_priority task's static priority (SCHED_FIFO/RR)
|
||||
* @sched_deadline representative of the task's deadline
|
||||
* @sched_runtime representative of the task's runtime
|
||||
* @sched_period representative of the task's period
|
||||
@ -53,6 +73,29 @@ struct sched_param {
|
||||
* As of now, the SCHED_DEADLINE policy (sched_dl scheduling class) is the
|
||||
* only user of this new interface. More information about the algorithm
|
||||
* available in the scheduling class file or in Documentation/.
|
||||
*
|
||||
* Task Utilization Attributes
|
||||
* ===========================
|
||||
*
|
||||
* A subset of sched_attr attributes allows to specify the utilization
|
||||
* expected for a task. These attributes allow to inform the scheduler about
|
||||
* the utilization boundaries within which it should schedule the task. These
|
||||
* boundaries are valuable hints to support scheduler decisions on both task
|
||||
* placement and frequency selection.
|
||||
*
|
||||
* @sched_util_min represents the minimum utilization
|
||||
* @sched_util_max represents the maximum utilization
|
||||
*
|
||||
* Utilization is a value in the range [0..SCHED_CAPACITY_SCALE]. It
|
||||
* represents the percentage of CPU time used by a task when running at the
|
||||
* maximum frequency on the highest capacity CPU of the system. For example, a
|
||||
* 20% utilization task is a task running for 2ms every 10ms at maximum
|
||||
* frequency.
|
||||
*
|
||||
* A task with a min utilization value bigger than 0 is more likely scheduled
|
||||
* on a CPU with a capacity big enough to fit the specified value.
|
||||
* A task with a max utilization value smaller than 1024 is more likely
|
||||
* scheduled on a CPU with no more capacity than the specified value.
|
||||
*/
|
||||
struct sched_attr {
|
||||
__u32 size;
|
||||
@ -70,6 +113,11 @@ struct sched_attr {
|
||||
__u64 sched_runtime;
|
||||
__u64 sched_deadline;
|
||||
__u64 sched_period;
|
||||
|
||||
/* Utilization hints */
|
||||
__u32 sched_util_min;
|
||||
__u32 sched_util_max;
|
||||
|
||||
};
|
||||
|
||||
#endif /* _UAPI_LINUX_SCHED_TYPES_H */
|
||||
|
53
init/Kconfig
53
init/Kconfig
@ -677,6 +677,59 @@ config HAVE_UNSTABLE_SCHED_CLOCK
|
||||
config GENERIC_SCHED_CLOCK
|
||||
bool
|
||||
|
||||
menu "Scheduler features"
|
||||
|
||||
config UCLAMP_TASK
|
||||
bool "Enable utilization clamping for RT/FAIR tasks"
|
||||
depends on CPU_FREQ_GOV_SCHEDUTIL
|
||||
help
|
||||
This feature enables the scheduler to track the clamped utilization
|
||||
of each CPU based on RUNNABLE tasks scheduled on that CPU.
|
||||
|
||||
With this option, the user can specify the min and max CPU
|
||||
utilization allowed for RUNNABLE tasks. The max utilization defines
|
||||
the maximum frequency a task should use while the min utilization
|
||||
defines the minimum frequency it should use.
|
||||
|
||||
Both min and max utilization clamp values are hints to the scheduler,
|
||||
aiming at improving its frequency selection policy, but they do not
|
||||
enforce or grant any specific bandwidth for tasks.
|
||||
|
||||
If in doubt, say N.
|
||||
|
||||
config UCLAMP_BUCKETS_COUNT
|
||||
int "Number of supported utilization clamp buckets"
|
||||
range 5 20
|
||||
default 5
|
||||
depends on UCLAMP_TASK
|
||||
help
|
||||
Defines the number of clamp buckets to use. The range of each bucket
|
||||
will be SCHED_CAPACITY_SCALE/UCLAMP_BUCKETS_COUNT. The higher the
|
||||
number of clamp buckets the finer their granularity and the higher
|
||||
the precision of clamping aggregation and tracking at run-time.
|
||||
|
||||
For example, with the minimum configuration value we will have 5
|
||||
clamp buckets tracking 20% utilization each. A 25% boosted tasks will
|
||||
be refcounted in the [20..39]% bucket and will set the bucket clamp
|
||||
effective value to 25%.
|
||||
If a second 30% boosted task should be co-scheduled on the same CPU,
|
||||
that task will be refcounted in the same bucket of the first task and
|
||||
it will boost the bucket clamp effective value to 30%.
|
||||
The clamp effective value of a bucket is reset to its nominal value
|
||||
(20% in the example above) when there are no more tasks refcounted in
|
||||
that bucket.
|
||||
|
||||
An additional boost/capping margin can be added to some tasks. In the
|
||||
example above the 25% task will be boosted to 30% until it exits the
|
||||
CPU. If that should be considered not acceptable on certain systems,
|
||||
it's always possible to reduce the margin by increasing the number of
|
||||
clamp buckets to trade off used memory for run-time tracking
|
||||
precision.
|
||||
|
||||
If in doubt, use the default value.
|
||||
|
||||
endmenu
|
||||
|
||||
#
|
||||
# For architectures that want to enable the support for NUMA-affine scheduler
|
||||
# balancing logic:
|
||||
|
@ -72,7 +72,8 @@ struct task_struct init_task
|
||||
.static_prio = MAX_PRIO - 20,
|
||||
.normal_prio = MAX_PRIO - 20,
|
||||
.policy = SCHED_NORMAL,
|
||||
.cpus_allowed = CPU_MASK_ALL,
|
||||
.cpus_ptr = &init_task.cpus_mask,
|
||||
.cpus_mask = CPU_MASK_ALL,
|
||||
.nr_cpus_allowed= NR_CPUS,
|
||||
.mm = NULL,
|
||||
.active_mm = &init_mm,
|
||||
|
@ -2829,7 +2829,7 @@ static void cpuset_fork(struct task_struct *task)
|
||||
if (task_css_is_root(task, cpuset_cgrp_id))
|
||||
return;
|
||||
|
||||
set_cpus_allowed_ptr(task, ¤t->cpus_allowed);
|
||||
set_cpus_allowed_ptr(task, current->cpus_ptr);
|
||||
task->mems_allowed = current->mems_allowed;
|
||||
}
|
||||
|
||||
|
@ -898,6 +898,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
|
||||
#ifdef CONFIG_STACKPROTECTOR
|
||||
tsk->stack_canary = get_random_canary();
|
||||
#endif
|
||||
if (orig->cpus_ptr == &orig->cpus_mask)
|
||||
tsk->cpus_ptr = &tsk->cpus_mask;
|
||||
|
||||
/*
|
||||
* One for us, one for whoever does the "release_task()" (usually
|
||||
|
@ -223,7 +223,7 @@ int em_register_perf_domain(cpumask_t *span, unsigned int nr_states,
|
||||
* All CPUs of a domain must have the same micro-architecture
|
||||
* since they all share the same table.
|
||||
*/
|
||||
cap = arch_scale_cpu_capacity(NULL, cpu);
|
||||
cap = arch_scale_cpu_capacity(cpu);
|
||||
if (prev_cap && prev_cap != cap) {
|
||||
pr_err("CPUs of %*pbl must have the same capacity\n",
|
||||
cpumask_pr_args(span));
|
||||
|
@ -259,7 +259,6 @@ out:
|
||||
}
|
||||
#endif /* CONFIG_PROC_FS */
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
int autogroup_path(struct task_group *tg, char *buf, int buflen)
|
||||
{
|
||||
if (!task_group_is_autogroup(tg))
|
||||
@ -267,4 +266,3 @@ int autogroup_path(struct task_group *tg, char *buf, int buflen)
|
||||
|
||||
return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id);
|
||||
}
|
||||
#endif
|
||||
|
@ -23,6 +23,17 @@
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/sched.h>
|
||||
|
||||
/*
|
||||
* Export tracepoints that act as a bare tracehook (ie: have no trace event
|
||||
* associated with them) to allow external modules to probe them.
|
||||
*/
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_cfs_tp);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_rt_tp);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_dl_tp);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_se_tp);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_overutilized_tp);
|
||||
|
||||
DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
|
||||
|
||||
#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_JUMP_LABEL)
|
||||
@ -761,6 +772,401 @@ static void set_load_weight(struct task_struct *p, bool update_load)
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_UCLAMP_TASK
|
||||
/* Max allowed minimum utilization */
|
||||
unsigned int sysctl_sched_uclamp_util_min = SCHED_CAPACITY_SCALE;
|
||||
|
||||
/* Max allowed maximum utilization */
|
||||
unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE;
|
||||
|
||||
/* All clamps are required to be less or equal than these values */
|
||||
static struct uclamp_se uclamp_default[UCLAMP_CNT];
|
||||
|
||||
/* Integer rounded range for each bucket */
|
||||
#define UCLAMP_BUCKET_DELTA DIV_ROUND_CLOSEST(SCHED_CAPACITY_SCALE, UCLAMP_BUCKETS)
|
||||
|
||||
#define for_each_clamp_id(clamp_id) \
|
||||
for ((clamp_id) = 0; (clamp_id) < UCLAMP_CNT; (clamp_id)++)
|
||||
|
||||
static inline unsigned int uclamp_bucket_id(unsigned int clamp_value)
|
||||
{
|
||||
return clamp_value / UCLAMP_BUCKET_DELTA;
|
||||
}
|
||||
|
||||
static inline unsigned int uclamp_bucket_base_value(unsigned int clamp_value)
|
||||
{
|
||||
return UCLAMP_BUCKET_DELTA * uclamp_bucket_id(clamp_value);
|
||||
}
|
||||
|
||||
static inline unsigned int uclamp_none(int clamp_id)
|
||||
{
|
||||
if (clamp_id == UCLAMP_MIN)
|
||||
return 0;
|
||||
return SCHED_CAPACITY_SCALE;
|
||||
}
|
||||
|
||||
static inline void uclamp_se_set(struct uclamp_se *uc_se,
|
||||
unsigned int value, bool user_defined)
|
||||
{
|
||||
uc_se->value = value;
|
||||
uc_se->bucket_id = uclamp_bucket_id(value);
|
||||
uc_se->user_defined = user_defined;
|
||||
}
|
||||
|
||||
static inline unsigned int
|
||||
uclamp_idle_value(struct rq *rq, unsigned int clamp_id,
|
||||
unsigned int clamp_value)
|
||||
{
|
||||
/*
|
||||
* Avoid blocked utilization pushing up the frequency when we go
|
||||
* idle (which drops the max-clamp) by retaining the last known
|
||||
* max-clamp.
|
||||
*/
|
||||
if (clamp_id == UCLAMP_MAX) {
|
||||
rq->uclamp_flags |= UCLAMP_FLAG_IDLE;
|
||||
return clamp_value;
|
||||
}
|
||||
|
||||
return uclamp_none(UCLAMP_MIN);
|
||||
}
|
||||
|
||||
static inline void uclamp_idle_reset(struct rq *rq, unsigned int clamp_id,
|
||||
unsigned int clamp_value)
|
||||
{
|
||||
/* Reset max-clamp retention only on idle exit */
|
||||
if (!(rq->uclamp_flags & UCLAMP_FLAG_IDLE))
|
||||
return;
|
||||
|
||||
WRITE_ONCE(rq->uclamp[clamp_id].value, clamp_value);
|
||||
}
|
||||
|
||||
static inline
|
||||
unsigned int uclamp_rq_max_value(struct rq *rq, unsigned int clamp_id,
|
||||
unsigned int clamp_value)
|
||||
{
|
||||
struct uclamp_bucket *bucket = rq->uclamp[clamp_id].bucket;
|
||||
int bucket_id = UCLAMP_BUCKETS - 1;
|
||||
|
||||
/*
|
||||
* Since both min and max clamps are max aggregated, find the
|
||||
* top most bucket with tasks in.
|
||||
*/
|
||||
for ( ; bucket_id >= 0; bucket_id--) {
|
||||
if (!bucket[bucket_id].tasks)
|
||||
continue;
|
||||
return bucket[bucket_id].value;
|
||||
}
|
||||
|
||||
/* No tasks -- default clamp values */
|
||||
return uclamp_idle_value(rq, clamp_id, clamp_value);
|
||||
}
|
||||
|
||||
/*
|
||||
* The effective clamp bucket index of a task depends on, by increasing
|
||||
* priority:
|
||||
* - the task specific clamp value, when explicitly requested from userspace
|
||||
* - the system default clamp value, defined by the sysadmin
|
||||
*/
|
||||
static inline struct uclamp_se
|
||||
uclamp_eff_get(struct task_struct *p, unsigned int clamp_id)
|
||||
{
|
||||
struct uclamp_se uc_req = p->uclamp_req[clamp_id];
|
||||
struct uclamp_se uc_max = uclamp_default[clamp_id];
|
||||
|
||||
/* System default restrictions always apply */
|
||||
if (unlikely(uc_req.value > uc_max.value))
|
||||
return uc_max;
|
||||
|
||||
return uc_req;
|
||||
}
|
||||
|
||||
unsigned int uclamp_eff_value(struct task_struct *p, unsigned int clamp_id)
|
||||
{
|
||||
struct uclamp_se uc_eff;
|
||||
|
||||
/* Task currently refcounted: use back-annotated (effective) value */
|
||||
if (p->uclamp[clamp_id].active)
|
||||
return p->uclamp[clamp_id].value;
|
||||
|
||||
uc_eff = uclamp_eff_get(p, clamp_id);
|
||||
|
||||
return uc_eff.value;
|
||||
}
|
||||
|
||||
/*
|
||||
* When a task is enqueued on a rq, the clamp bucket currently defined by the
|
||||
* task's uclamp::bucket_id is refcounted on that rq. This also immediately
|
||||
* updates the rq's clamp value if required.
|
||||
*
|
||||
* Tasks can have a task-specific value requested from user-space, track
|
||||
* within each bucket the maximum value for tasks refcounted in it.
|
||||
* This "local max aggregation" allows to track the exact "requested" value
|
||||
* for each bucket when all its RUNNABLE tasks require the same clamp.
|
||||
*/
|
||||
static inline void uclamp_rq_inc_id(struct rq *rq, struct task_struct *p,
|
||||
unsigned int clamp_id)
|
||||
{
|
||||
struct uclamp_rq *uc_rq = &rq->uclamp[clamp_id];
|
||||
struct uclamp_se *uc_se = &p->uclamp[clamp_id];
|
||||
struct uclamp_bucket *bucket;
|
||||
|
||||
lockdep_assert_held(&rq->lock);
|
||||
|
||||
/* Update task effective clamp */
|
||||
p->uclamp[clamp_id] = uclamp_eff_get(p, clamp_id);
|
||||
|
||||
bucket = &uc_rq->bucket[uc_se->bucket_id];
|
||||
bucket->tasks++;
|
||||
uc_se->active = true;
|
||||
|
||||
uclamp_idle_reset(rq, clamp_id, uc_se->value);
|
||||
|
||||
/*
|
||||
* Local max aggregation: rq buckets always track the max
|
||||
* "requested" clamp value of its RUNNABLE tasks.
|
||||
*/
|
||||
if (bucket->tasks == 1 || uc_se->value > bucket->value)
|
||||
bucket->value = uc_se->value;
|
||||
|
||||
if (uc_se->value > READ_ONCE(uc_rq->value))
|
||||
WRITE_ONCE(uc_rq->value, uc_se->value);
|
||||
}
|
||||
|
||||
/*
|
||||
* When a task is dequeued from a rq, the clamp bucket refcounted by the task
|
||||
* is released. If this is the last task reference counting the rq's max
|
||||
* active clamp value, then the rq's clamp value is updated.
|
||||
*
|
||||
* Both refcounted tasks and rq's cached clamp values are expected to be
|
||||
* always valid. If it's detected they are not, as defensive programming,
|
||||
* enforce the expected state and warn.
|
||||
*/
|
||||
static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
|
||||
unsigned int clamp_id)
|
||||
{
|
||||
struct uclamp_rq *uc_rq = &rq->uclamp[clamp_id];
|
||||
struct uclamp_se *uc_se = &p->uclamp[clamp_id];
|
||||
struct uclamp_bucket *bucket;
|
||||
unsigned int bkt_clamp;
|
||||
unsigned int rq_clamp;
|
||||
|
||||
lockdep_assert_held(&rq->lock);
|
||||
|
||||
bucket = &uc_rq->bucket[uc_se->bucket_id];
|
||||
SCHED_WARN_ON(!bucket->tasks);
|
||||
if (likely(bucket->tasks))
|
||||
bucket->tasks--;
|
||||
uc_se->active = false;
|
||||
|
||||
/*
|
||||
* Keep "local max aggregation" simple and accept to (possibly)
|
||||
* overboost some RUNNABLE tasks in the same bucket.
|
||||
* The rq clamp bucket value is reset to its base value whenever
|
||||
* there are no more RUNNABLE tasks refcounting it.
|
||||
*/
|
||||
if (likely(bucket->tasks))
|
||||
return;
|
||||
|
||||
rq_clamp = READ_ONCE(uc_rq->value);
|
||||
/*
|
||||
* Defensive programming: this should never happen. If it happens,
|
||||
* e.g. due to future modification, warn and fixup the expected value.
|
||||
*/
|
||||
SCHED_WARN_ON(bucket->value > rq_clamp);
|
||||
if (bucket->value >= rq_clamp) {
|
||||
bkt_clamp = uclamp_rq_max_value(rq, clamp_id, uc_se->value);
|
||||
WRITE_ONCE(uc_rq->value, bkt_clamp);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
unsigned int clamp_id;
|
||||
|
||||
if (unlikely(!p->sched_class->uclamp_enabled))
|
||||
return;
|
||||
|
||||
for_each_clamp_id(clamp_id)
|
||||
uclamp_rq_inc_id(rq, p, clamp_id);
|
||||
|
||||
/* Reset clamp idle holding when there is one RUNNABLE task */
|
||||
if (rq->uclamp_flags & UCLAMP_FLAG_IDLE)
|
||||
rq->uclamp_flags &= ~UCLAMP_FLAG_IDLE;
|
||||
}
|
||||
|
||||
static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
unsigned int clamp_id;
|
||||
|
||||
if (unlikely(!p->sched_class->uclamp_enabled))
|
||||
return;
|
||||
|
||||
for_each_clamp_id(clamp_id)
|
||||
uclamp_rq_dec_id(rq, p, clamp_id);
|
||||
}
|
||||
|
||||
int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos)
|
||||
{
|
||||
int old_min, old_max;
|
||||
static DEFINE_MUTEX(mutex);
|
||||
int result;
|
||||
|
||||
mutex_lock(&mutex);
|
||||
old_min = sysctl_sched_uclamp_util_min;
|
||||
old_max = sysctl_sched_uclamp_util_max;
|
||||
|
||||
result = proc_dointvec(table, write, buffer, lenp, ppos);
|
||||
if (result)
|
||||
goto undo;
|
||||
if (!write)
|
||||
goto done;
|
||||
|
||||
if (sysctl_sched_uclamp_util_min > sysctl_sched_uclamp_util_max ||
|
||||
sysctl_sched_uclamp_util_max > SCHED_CAPACITY_SCALE) {
|
||||
result = -EINVAL;
|
||||
goto undo;
|
||||
}
|
||||
|
||||
if (old_min != sysctl_sched_uclamp_util_min) {
|
||||
uclamp_se_set(&uclamp_default[UCLAMP_MIN],
|
||||
sysctl_sched_uclamp_util_min, false);
|
||||
}
|
||||
if (old_max != sysctl_sched_uclamp_util_max) {
|
||||
uclamp_se_set(&uclamp_default[UCLAMP_MAX],
|
||||
sysctl_sched_uclamp_util_max, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* Updating all the RUNNABLE task is expensive, keep it simple and do
|
||||
* just a lazy update at each next enqueue time.
|
||||
*/
|
||||
goto done;
|
||||
|
||||
undo:
|
||||
sysctl_sched_uclamp_util_min = old_min;
|
||||
sysctl_sched_uclamp_util_max = old_max;
|
||||
done:
|
||||
mutex_unlock(&mutex);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static int uclamp_validate(struct task_struct *p,
|
||||
const struct sched_attr *attr)
|
||||
{
|
||||
unsigned int lower_bound = p->uclamp_req[UCLAMP_MIN].value;
|
||||
unsigned int upper_bound = p->uclamp_req[UCLAMP_MAX].value;
|
||||
|
||||
if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN)
|
||||
lower_bound = attr->sched_util_min;
|
||||
if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX)
|
||||
upper_bound = attr->sched_util_max;
|
||||
|
||||
if (lower_bound > upper_bound)
|
||||
return -EINVAL;
|
||||
if (upper_bound > SCHED_CAPACITY_SCALE)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __setscheduler_uclamp(struct task_struct *p,
|
||||
const struct sched_attr *attr)
|
||||
{
|
||||
unsigned int clamp_id;
|
||||
|
||||
/*
|
||||
* On scheduling class change, reset to default clamps for tasks
|
||||
* without a task-specific value.
|
||||
*/
|
||||
for_each_clamp_id(clamp_id) {
|
||||
struct uclamp_se *uc_se = &p->uclamp_req[clamp_id];
|
||||
unsigned int clamp_value = uclamp_none(clamp_id);
|
||||
|
||||
/* Keep using defined clamps across class changes */
|
||||
if (uc_se->user_defined)
|
||||
continue;
|
||||
|
||||
/* By default, RT tasks always get 100% boost */
|
||||
if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN))
|
||||
clamp_value = uclamp_none(UCLAMP_MAX);
|
||||
|
||||
uclamp_se_set(uc_se, clamp_value, false);
|
||||
}
|
||||
|
||||
if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)))
|
||||
return;
|
||||
|
||||
if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
|
||||
uclamp_se_set(&p->uclamp_req[UCLAMP_MIN],
|
||||
attr->sched_util_min, true);
|
||||
}
|
||||
|
||||
if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) {
|
||||
uclamp_se_set(&p->uclamp_req[UCLAMP_MAX],
|
||||
attr->sched_util_max, true);
|
||||
}
|
||||
}
|
||||
|
||||
static void uclamp_fork(struct task_struct *p)
|
||||
{
|
||||
unsigned int clamp_id;
|
||||
|
||||
for_each_clamp_id(clamp_id)
|
||||
p->uclamp[clamp_id].active = false;
|
||||
|
||||
if (likely(!p->sched_reset_on_fork))
|
||||
return;
|
||||
|
||||
for_each_clamp_id(clamp_id) {
|
||||
unsigned int clamp_value = uclamp_none(clamp_id);
|
||||
|
||||
/* By default, RT tasks always get 100% boost */
|
||||
if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN))
|
||||
clamp_value = uclamp_none(UCLAMP_MAX);
|
||||
|
||||
uclamp_se_set(&p->uclamp_req[clamp_id], clamp_value, false);
|
||||
}
|
||||
}
|
||||
|
||||
static void __init init_uclamp(void)
|
||||
{
|
||||
struct uclamp_se uc_max = {};
|
||||
unsigned int clamp_id;
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
memset(&cpu_rq(cpu)->uclamp, 0, sizeof(struct uclamp_rq));
|
||||
cpu_rq(cpu)->uclamp_flags = 0;
|
||||
}
|
||||
|
||||
for_each_clamp_id(clamp_id) {
|
||||
uclamp_se_set(&init_task.uclamp_req[clamp_id],
|
||||
uclamp_none(clamp_id), false);
|
||||
}
|
||||
|
||||
/* System defaults allow max clamp values for both indexes */
|
||||
uclamp_se_set(&uc_max, uclamp_none(UCLAMP_MAX), false);
|
||||
for_each_clamp_id(clamp_id)
|
||||
uclamp_default[clamp_id] = uc_max;
|
||||
}
|
||||
|
||||
#else /* CONFIG_UCLAMP_TASK */
|
||||
static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { }
|
||||
static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { }
|
||||
static inline int uclamp_validate(struct task_struct *p,
|
||||
const struct sched_attr *attr)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
static void __setscheduler_uclamp(struct task_struct *p,
|
||||
const struct sched_attr *attr) { }
|
||||
static inline void uclamp_fork(struct task_struct *p) { }
|
||||
static inline void init_uclamp(void) { }
|
||||
#endif /* CONFIG_UCLAMP_TASK */
|
||||
|
||||
static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
if (!(flags & ENQUEUE_NOCLOCK))
|
||||
@ -771,6 +1177,7 @@ static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
|
||||
psi_enqueue(p, flags & ENQUEUE_WAKEUP);
|
||||
}
|
||||
|
||||
uclamp_rq_inc(rq, p);
|
||||
p->sched_class->enqueue_task(rq, p, flags);
|
||||
}
|
||||
|
||||
@ -784,6 +1191,7 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
|
||||
psi_dequeue(p, flags & DEQUEUE_SLEEP);
|
||||
}
|
||||
|
||||
uclamp_rq_dec(rq, p);
|
||||
p->sched_class->dequeue_task(rq, p, flags);
|
||||
}
|
||||
|
||||
@ -930,7 +1338,7 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
|
||||
*/
|
||||
static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
|
||||
{
|
||||
if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
|
||||
if (!cpumask_test_cpu(cpu, p->cpus_ptr))
|
||||
return false;
|
||||
|
||||
if (is_per_cpu_kthread(p))
|
||||
@ -1025,7 +1433,7 @@ static int migration_cpu_stop(void *data)
|
||||
local_irq_disable();
|
||||
/*
|
||||
* We need to explicitly wake pending tasks before running
|
||||
* __migrate_task() such that we will not miss enforcing cpus_allowed
|
||||
* __migrate_task() such that we will not miss enforcing cpus_ptr
|
||||
* during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
|
||||
*/
|
||||
sched_ttwu_pending();
|
||||
@ -1056,7 +1464,7 @@ static int migration_cpu_stop(void *data)
|
||||
*/
|
||||
void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
|
||||
{
|
||||
cpumask_copy(&p->cpus_allowed, new_mask);
|
||||
cpumask_copy(&p->cpus_mask, new_mask);
|
||||
p->nr_cpus_allowed = cpumask_weight(new_mask);
|
||||
}
|
||||
|
||||
@ -1126,7 +1534,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (cpumask_equal(&p->cpus_allowed, new_mask))
|
||||
if (cpumask_equal(p->cpus_ptr, new_mask))
|
||||
goto out;
|
||||
|
||||
if (!cpumask_intersects(new_mask, cpu_valid_mask)) {
|
||||
@ -1286,10 +1694,10 @@ static int migrate_swap_stop(void *data)
|
||||
if (task_cpu(arg->src_task) != arg->src_cpu)
|
||||
goto unlock;
|
||||
|
||||
if (!cpumask_test_cpu(arg->dst_cpu, &arg->src_task->cpus_allowed))
|
||||
if (!cpumask_test_cpu(arg->dst_cpu, arg->src_task->cpus_ptr))
|
||||
goto unlock;
|
||||
|
||||
if (!cpumask_test_cpu(arg->src_cpu, &arg->dst_task->cpus_allowed))
|
||||
if (!cpumask_test_cpu(arg->src_cpu, arg->dst_task->cpus_ptr))
|
||||
goto unlock;
|
||||
|
||||
__migrate_swap_task(arg->src_task, arg->dst_cpu);
|
||||
@ -1331,10 +1739,10 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p,
|
||||
if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu))
|
||||
goto out;
|
||||
|
||||
if (!cpumask_test_cpu(arg.dst_cpu, &arg.src_task->cpus_allowed))
|
||||
if (!cpumask_test_cpu(arg.dst_cpu, arg.src_task->cpus_ptr))
|
||||
goto out;
|
||||
|
||||
if (!cpumask_test_cpu(arg.src_cpu, &arg.dst_task->cpus_allowed))
|
||||
if (!cpumask_test_cpu(arg.src_cpu, arg.dst_task->cpus_ptr))
|
||||
goto out;
|
||||
|
||||
trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu);
|
||||
@ -1479,7 +1887,7 @@ void kick_process(struct task_struct *p)
|
||||
EXPORT_SYMBOL_GPL(kick_process);
|
||||
|
||||
/*
|
||||
* ->cpus_allowed is protected by both rq->lock and p->pi_lock
|
||||
* ->cpus_ptr is protected by both rq->lock and p->pi_lock
|
||||
*
|
||||
* A few notes on cpu_active vs cpu_online:
|
||||
*
|
||||
@ -1519,14 +1927,14 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
|
||||
for_each_cpu(dest_cpu, nodemask) {
|
||||
if (!cpu_active(dest_cpu))
|
||||
continue;
|
||||
if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
|
||||
if (cpumask_test_cpu(dest_cpu, p->cpus_ptr))
|
||||
return dest_cpu;
|
||||
}
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
/* Any allowed, online CPU? */
|
||||
for_each_cpu(dest_cpu, &p->cpus_allowed) {
|
||||
for_each_cpu(dest_cpu, p->cpus_ptr) {
|
||||
if (!is_cpu_allowed(p, dest_cpu))
|
||||
continue;
|
||||
|
||||
@ -1570,7 +1978,7 @@ out:
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
|
||||
* The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable.
|
||||
*/
|
||||
static inline
|
||||
int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
|
||||
@ -1580,11 +1988,11 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
|
||||
if (p->nr_cpus_allowed > 1)
|
||||
cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
|
||||
else
|
||||
cpu = cpumask_any(&p->cpus_allowed);
|
||||
cpu = cpumask_any(p->cpus_ptr);
|
||||
|
||||
/*
|
||||
* In order not to call set_task_cpu() on a blocking task we need
|
||||
* to rely on ttwu() to place the task on a valid ->cpus_allowed
|
||||
* to rely on ttwu() to place the task on a valid ->cpus_ptr
|
||||
* CPU.
|
||||
*
|
||||
* Since this is common to all placement strategies, this lives here.
|
||||
@ -1991,6 +2399,29 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||
unsigned long flags;
|
||||
int cpu, success = 0;
|
||||
|
||||
if (p == current) {
|
||||
/*
|
||||
* We're waking current, this means 'p->on_rq' and 'task_cpu(p)
|
||||
* == smp_processor_id()'. Together this means we can special
|
||||
* case the whole 'p->on_rq && ttwu_remote()' case below
|
||||
* without taking any locks.
|
||||
*
|
||||
* In particular:
|
||||
* - we rely on Program-Order guarantees for all the ordering,
|
||||
* - we're serialized against set_special_state() by virtue of
|
||||
* it disabling IRQs (this allows not taking ->pi_lock).
|
||||
*/
|
||||
if (!(p->state & state))
|
||||
return false;
|
||||
|
||||
success = 1;
|
||||
cpu = task_cpu(p);
|
||||
trace_sched_waking(p);
|
||||
p->state = TASK_RUNNING;
|
||||
trace_sched_wakeup(p);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we are going to wake up a thread waiting for CONDITION we
|
||||
* need to ensure that CONDITION=1 done by the caller can not be
|
||||
@ -2000,7 +2431,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||
raw_spin_lock_irqsave(&p->pi_lock, flags);
|
||||
smp_mb__after_spinlock();
|
||||
if (!(p->state & state))
|
||||
goto out;
|
||||
goto unlock;
|
||||
|
||||
trace_sched_waking(p);
|
||||
|
||||
@ -2030,7 +2461,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||
*/
|
||||
smp_rmb();
|
||||
if (p->on_rq && ttwu_remote(p, wake_flags))
|
||||
goto stat;
|
||||
goto unlock;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
@ -2090,10 +2521,11 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
ttwu_queue(p, cpu, wake_flags);
|
||||
stat:
|
||||
ttwu_stat(p, cpu, wake_flags);
|
||||
out:
|
||||
unlock:
|
||||
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
||||
out:
|
||||
if (success)
|
||||
ttwu_stat(p, cpu, wake_flags);
|
||||
|
||||
return success;
|
||||
}
|
||||
@ -2300,6 +2732,8 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
|
||||
*/
|
||||
p->prio = current->normal_prio;
|
||||
|
||||
uclamp_fork(p);
|
||||
|
||||
/*
|
||||
* Revert to default priority/policy on fork if requested.
|
||||
*/
|
||||
@ -2395,7 +2829,7 @@ void wake_up_new_task(struct task_struct *p)
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* Fork balancing, do it here and not earlier because:
|
||||
* - cpus_allowed can change in the fork path
|
||||
* - cpus_ptr can change in the fork path
|
||||
* - any previously selected CPU might disappear through hotplug
|
||||
*
|
||||
* Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
|
||||
@ -3033,7 +3467,6 @@ void scheduler_tick(void)
|
||||
|
||||
update_rq_clock(rq);
|
||||
curr->sched_class->task_tick(rq, curr, 0);
|
||||
cpu_load_update_active(rq);
|
||||
calc_global_load_tick(rq);
|
||||
psi_task_tick(rq);
|
||||
|
||||
@ -4071,6 +4504,13 @@ static void __setscheduler_params(struct task_struct *p,
|
||||
static void __setscheduler(struct rq *rq, struct task_struct *p,
|
||||
const struct sched_attr *attr, bool keep_boost)
|
||||
{
|
||||
/*
|
||||
* If params can't change scheduling class changes aren't allowed
|
||||
* either.
|
||||
*/
|
||||
if (attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)
|
||||
return;
|
||||
|
||||
__setscheduler_params(p, attr);
|
||||
|
||||
/*
|
||||
@ -4208,6 +4648,13 @@ recheck:
|
||||
return retval;
|
||||
}
|
||||
|
||||
/* Update task specific "requested" clamps */
|
||||
if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) {
|
||||
retval = uclamp_validate(p, attr);
|
||||
if (retval)
|
||||
return retval;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure no PI-waiters arrive (or leave) while we are
|
||||
* changing the priority of the task:
|
||||
@ -4237,6 +4684,8 @@ recheck:
|
||||
goto change;
|
||||
if (dl_policy(policy) && dl_param_changed(p, attr))
|
||||
goto change;
|
||||
if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)
|
||||
goto change;
|
||||
|
||||
p->sched_reset_on_fork = reset_on_fork;
|
||||
task_rq_unlock(rq, p, &rf);
|
||||
@ -4267,7 +4716,7 @@ change:
|
||||
* the entire root_domain to become SCHED_DEADLINE. We
|
||||
* will also fail if there's no bandwidth available.
|
||||
*/
|
||||
if (!cpumask_subset(span, &p->cpus_allowed) ||
|
||||
if (!cpumask_subset(span, p->cpus_ptr) ||
|
||||
rq->rd->dl_bw.bw == 0) {
|
||||
task_rq_unlock(rq, p, &rf);
|
||||
return -EPERM;
|
||||
@ -4317,7 +4766,9 @@ change:
|
||||
put_prev_task(rq, p);
|
||||
|
||||
prev_class = p->sched_class;
|
||||
|
||||
__setscheduler(rq, p, attr, pi);
|
||||
__setscheduler_uclamp(p, attr);
|
||||
|
||||
if (queued) {
|
||||
/*
|
||||
@ -4493,6 +4944,10 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a
|
||||
if (ret)
|
||||
return -EFAULT;
|
||||
|
||||
if ((attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) &&
|
||||
size < SCHED_ATTR_SIZE_VER1)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* XXX: Do we want to be lenient like existing syscalls; or do we want
|
||||
* to be strict and return an error on out-of-bounds values?
|
||||
@ -4556,14 +5011,21 @@ SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
|
||||
|
||||
if ((int)attr.sched_policy < 0)
|
||||
return -EINVAL;
|
||||
if (attr.sched_flags & SCHED_FLAG_KEEP_POLICY)
|
||||
attr.sched_policy = SETPARAM_POLICY;
|
||||
|
||||
rcu_read_lock();
|
||||
retval = -ESRCH;
|
||||
p = find_process_by_pid(pid);
|
||||
if (p != NULL)
|
||||
retval = sched_setattr(p, &attr);
|
||||
if (likely(p))
|
||||
get_task_struct(p);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (likely(p)) {
|
||||
retval = sched_setattr(p, &attr);
|
||||
put_task_struct(p);
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -4714,6 +5176,11 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
|
||||
else
|
||||
attr.sched_nice = task_nice(p);
|
||||
|
||||
#ifdef CONFIG_UCLAMP_TASK
|
||||
attr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value;
|
||||
attr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value;
|
||||
#endif
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
retval = sched_read_attr(uattr, &attr, size);
|
||||
@ -4866,7 +5333,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
|
||||
goto out_unlock;
|
||||
|
||||
raw_spin_lock_irqsave(&p->pi_lock, flags);
|
||||
cpumask_and(mask, &p->cpus_allowed, cpu_active_mask);
|
||||
cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
|
||||
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
||||
|
||||
out_unlock:
|
||||
@ -5123,7 +5590,7 @@ long __sched io_schedule_timeout(long timeout)
|
||||
}
|
||||
EXPORT_SYMBOL(io_schedule_timeout);
|
||||
|
||||
void io_schedule(void)
|
||||
void __sched io_schedule(void)
|
||||
{
|
||||
int token;
|
||||
|
||||
@ -5443,7 +5910,7 @@ int task_can_attach(struct task_struct *p,
|
||||
* allowed nodes is unnecessary. Thus, cpusets are not
|
||||
* applicable for such threads. This prevents checking for
|
||||
* success of set_cpus_allowed_ptr() on all attached tasks
|
||||
* before cpus_allowed may be changed.
|
||||
* before cpus_mask may be changed.
|
||||
*/
|
||||
if (p->flags & PF_NO_SETAFFINITY) {
|
||||
ret = -EINVAL;
|
||||
@ -5470,7 +5937,7 @@ int migrate_task_to(struct task_struct *p, int target_cpu)
|
||||
if (curr_cpu == target_cpu)
|
||||
return 0;
|
||||
|
||||
if (!cpumask_test_cpu(target_cpu, &p->cpus_allowed))
|
||||
if (!cpumask_test_cpu(target_cpu, p->cpus_ptr))
|
||||
return -EINVAL;
|
||||
|
||||
/* TODO: This is not properly updating schedstats */
|
||||
@ -5608,7 +6075,7 @@ static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
|
||||
put_prev_task(rq, next);
|
||||
|
||||
/*
|
||||
* Rules for changing task_struct::cpus_allowed are holding
|
||||
* Rules for changing task_struct::cpus_mask are holding
|
||||
* both pi_lock and rq->lock, such that holding either
|
||||
* stabilizes the mask.
|
||||
*
|
||||
@ -5902,8 +6369,8 @@ DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
|
||||
|
||||
void __init sched_init(void)
|
||||
{
|
||||
int i, j;
|
||||
unsigned long alloc_size = 0, ptr;
|
||||
int i;
|
||||
|
||||
wait_bit_init();
|
||||
|
||||
@ -6005,10 +6472,6 @@ void __init sched_init(void)
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL);
|
||||
#endif
|
||||
|
||||
for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
|
||||
rq->cpu_load[j] = 0;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
rq->sd = NULL;
|
||||
rq->rd = NULL;
|
||||
@ -6063,6 +6526,8 @@ void __init sched_init(void)
|
||||
|
||||
psi_init();
|
||||
|
||||
init_uclamp();
|
||||
|
||||
scheduler_running = 1;
|
||||
}
|
||||
|
||||
|
@ -120,14 +120,14 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
|
||||
const struct sched_dl_entity *dl_se = &p->dl;
|
||||
|
||||
if (later_mask &&
|
||||
cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) {
|
||||
cpumask_and(later_mask, cp->free_cpus, p->cpus_ptr)) {
|
||||
return 1;
|
||||
} else {
|
||||
int best_cpu = cpudl_maximum(cp);
|
||||
|
||||
WARN_ON(best_cpu != -1 && !cpu_present(best_cpu));
|
||||
|
||||
if (cpumask_test_cpu(best_cpu, &p->cpus_allowed) &&
|
||||
if (cpumask_test_cpu(best_cpu, p->cpus_ptr) &&
|
||||
dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
|
||||
if (later_mask)
|
||||
cpumask_set_cpu(best_cpu, later_mask);
|
||||
|
@ -196,14 +196,17 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
|
||||
* based on the task model parameters and gives the minimal utilization
|
||||
* required to meet deadlines.
|
||||
*/
|
||||
unsigned long schedutil_freq_util(int cpu, unsigned long util_cfs,
|
||||
unsigned long max, enum schedutil_type type)
|
||||
unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
|
||||
unsigned long max, enum schedutil_type type,
|
||||
struct task_struct *p)
|
||||
{
|
||||
unsigned long dl_util, util, irq;
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
|
||||
if (type == FREQUENCY_UTIL && rt_rq_is_runnable(&rq->rt))
|
||||
if (!IS_BUILTIN(CONFIG_UCLAMP_TASK) &&
|
||||
type == FREQUENCY_UTIL && rt_rq_is_runnable(&rq->rt)) {
|
||||
return max;
|
||||
}
|
||||
|
||||
/*
|
||||
* Early check to see if IRQ/steal time saturates the CPU, can be
|
||||
@ -219,9 +222,16 @@ unsigned long schedutil_freq_util(int cpu, unsigned long util_cfs,
|
||||
* CFS tasks and we use the same metric to track the effective
|
||||
* utilization (PELT windows are synchronized) we can directly add them
|
||||
* to obtain the CPU's actual utilization.
|
||||
*
|
||||
* CFS and RT utilization can be boosted or capped, depending on
|
||||
* utilization clamp constraints requested by currently RUNNABLE
|
||||
* tasks.
|
||||
* When there are no CFS RUNNABLE tasks, clamps are released and
|
||||
* frequency will be gracefully reduced with the utilization decay.
|
||||
*/
|
||||
util = util_cfs;
|
||||
util += cpu_util_rt(rq);
|
||||
util = util_cfs + cpu_util_rt(rq);
|
||||
if (type == FREQUENCY_UTIL)
|
||||
util = uclamp_util_with(rq, util, p);
|
||||
|
||||
dl_util = cpu_util_dl(rq);
|
||||
|
||||
@ -276,12 +286,12 @@ static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(sg_cpu->cpu);
|
||||
unsigned long util = cpu_util_cfs(rq);
|
||||
unsigned long max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu);
|
||||
unsigned long max = arch_scale_cpu_capacity(sg_cpu->cpu);
|
||||
|
||||
sg_cpu->max = max;
|
||||
sg_cpu->bw_dl = cpu_bw_dl(rq);
|
||||
|
||||
return schedutil_freq_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL);
|
||||
return schedutil_cpu_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL, NULL);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -94,11 +94,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
|
||||
if (skip)
|
||||
continue;
|
||||
|
||||
if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
|
||||
if (cpumask_any_and(p->cpus_ptr, vec->mask) >= nr_cpu_ids)
|
||||
continue;
|
||||
|
||||
if (lowest_mask) {
|
||||
cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
|
||||
cpumask_and(lowest_mask, p->cpus_ptr, vec->mask);
|
||||
|
||||
/*
|
||||
* We have to ensure that we have at least one bit
|
||||
|
@ -538,7 +538,7 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p
|
||||
* If we cannot preempt any rq, fall back to pick any
|
||||
* online CPU:
|
||||
*/
|
||||
cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
|
||||
cpu = cpumask_any_and(cpu_active_mask, p->cpus_ptr);
|
||||
if (cpu >= nr_cpu_ids) {
|
||||
/*
|
||||
* Failed to find any suitable CPU.
|
||||
@ -1195,7 +1195,7 @@ static void update_curr_dl(struct rq *rq)
|
||||
&curr->dl);
|
||||
} else {
|
||||
unsigned long scale_freq = arch_scale_freq_capacity(cpu);
|
||||
unsigned long scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
|
||||
unsigned long scale_cpu = arch_scale_cpu_capacity(cpu);
|
||||
|
||||
scaled_delta_exec = cap_scale(delta_exec, scale_freq);
|
||||
scaled_delta_exec = cap_scale(scaled_delta_exec, scale_cpu);
|
||||
@ -1824,7 +1824,7 @@ static void set_curr_task_dl(struct rq *rq)
|
||||
static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
|
||||
{
|
||||
if (!task_running(rq, p) &&
|
||||
cpumask_test_cpu(cpu, &p->cpus_allowed))
|
||||
cpumask_test_cpu(cpu, p->cpus_ptr))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
@ -1974,7 +1974,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
|
||||
/* Retry if something changed. */
|
||||
if (double_lock_balance(rq, later_rq)) {
|
||||
if (unlikely(task_rq(task) != rq ||
|
||||
!cpumask_test_cpu(later_rq->cpu, &task->cpus_allowed) ||
|
||||
!cpumask_test_cpu(later_rq->cpu, task->cpus_ptr) ||
|
||||
task_running(rq, task) ||
|
||||
!dl_task(task) ||
|
||||
!task_on_rq_queued(task))) {
|
||||
|
@ -233,49 +233,35 @@ static void sd_free_ctl_entry(struct ctl_table **tablep)
|
||||
*tablep = NULL;
|
||||
}
|
||||
|
||||
static int min_load_idx = 0;
|
||||
static int max_load_idx = CPU_LOAD_IDX_MAX-1;
|
||||
|
||||
static void
|
||||
set_table_entry(struct ctl_table *entry,
|
||||
const char *procname, void *data, int maxlen,
|
||||
umode_t mode, proc_handler *proc_handler,
|
||||
bool load_idx)
|
||||
umode_t mode, proc_handler *proc_handler)
|
||||
{
|
||||
entry->procname = procname;
|
||||
entry->data = data;
|
||||
entry->maxlen = maxlen;
|
||||
entry->mode = mode;
|
||||
entry->proc_handler = proc_handler;
|
||||
|
||||
if (load_idx) {
|
||||
entry->extra1 = &min_load_idx;
|
||||
entry->extra2 = &max_load_idx;
|
||||
}
|
||||
}
|
||||
|
||||
static struct ctl_table *
|
||||
sd_alloc_ctl_domain_table(struct sched_domain *sd)
|
||||
{
|
||||
struct ctl_table *table = sd_alloc_ctl_entry(14);
|
||||
struct ctl_table *table = sd_alloc_ctl_entry(9);
|
||||
|
||||
if (table == NULL)
|
||||
return NULL;
|
||||
|
||||
set_table_entry(&table[0] , "min_interval", &sd->min_interval, sizeof(long), 0644, proc_doulongvec_minmax, false);
|
||||
set_table_entry(&table[1] , "max_interval", &sd->max_interval, sizeof(long), 0644, proc_doulongvec_minmax, false);
|
||||
set_table_entry(&table[2] , "busy_idx", &sd->busy_idx, sizeof(int) , 0644, proc_dointvec_minmax, true );
|
||||
set_table_entry(&table[3] , "idle_idx", &sd->idle_idx, sizeof(int) , 0644, proc_dointvec_minmax, true );
|
||||
set_table_entry(&table[4] , "newidle_idx", &sd->newidle_idx, sizeof(int) , 0644, proc_dointvec_minmax, true );
|
||||
set_table_entry(&table[5] , "wake_idx", &sd->wake_idx, sizeof(int) , 0644, proc_dointvec_minmax, true );
|
||||
set_table_entry(&table[6] , "forkexec_idx", &sd->forkexec_idx, sizeof(int) , 0644, proc_dointvec_minmax, true );
|
||||
set_table_entry(&table[7] , "busy_factor", &sd->busy_factor, sizeof(int) , 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[8] , "imbalance_pct", &sd->imbalance_pct, sizeof(int) , 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[9] , "cache_nice_tries", &sd->cache_nice_tries, sizeof(int) , 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[10], "flags", &sd->flags, sizeof(int) , 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[11], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax, false);
|
||||
set_table_entry(&table[12], "name", sd->name, CORENAME_MAX_SIZE, 0444, proc_dostring, false);
|
||||
/* &table[13] is terminator */
|
||||
set_table_entry(&table[0], "min_interval", &sd->min_interval, sizeof(long), 0644, proc_doulongvec_minmax);
|
||||
set_table_entry(&table[1], "max_interval", &sd->max_interval, sizeof(long), 0644, proc_doulongvec_minmax);
|
||||
set_table_entry(&table[2], "busy_factor", &sd->busy_factor, sizeof(int), 0644, proc_dointvec_minmax);
|
||||
set_table_entry(&table[3], "imbalance_pct", &sd->imbalance_pct, sizeof(int), 0644, proc_dointvec_minmax);
|
||||
set_table_entry(&table[4], "cache_nice_tries", &sd->cache_nice_tries, sizeof(int), 0644, proc_dointvec_minmax);
|
||||
set_table_entry(&table[5], "flags", &sd->flags, sizeof(int), 0644, proc_dointvec_minmax);
|
||||
set_table_entry(&table[6], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax);
|
||||
set_table_entry(&table[7], "name", sd->name, CORENAME_MAX_SIZE, 0444, proc_dostring);
|
||||
/* &table[8] is terminator */
|
||||
|
||||
return table;
|
||||
}
|
||||
@ -653,8 +639,6 @@ do { \
|
||||
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x))
|
||||
|
||||
P(nr_running);
|
||||
SEQ_printf(m, " .%-30s: %lu\n", "load",
|
||||
rq->load.weight);
|
||||
P(nr_switches);
|
||||
P(nr_load_updates);
|
||||
P(nr_uninterruptible);
|
||||
@ -662,11 +646,6 @@ do { \
|
||||
SEQ_printf(m, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr)));
|
||||
PN(clock);
|
||||
PN(clock_task);
|
||||
P(cpu_load[0]);
|
||||
P(cpu_load[1]);
|
||||
P(cpu_load[2]);
|
||||
P(cpu_load[3]);
|
||||
P(cpu_load[4]);
|
||||
#undef P
|
||||
#undef PN
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -39,7 +39,6 @@ SCHED_FEAT(WAKEUP_PREEMPTION, true)
|
||||
|
||||
SCHED_FEAT(HRTICK, false)
|
||||
SCHED_FEAT(DOUBLE_TICK, false)
|
||||
SCHED_FEAT(LB_BIAS, false)
|
||||
|
||||
/*
|
||||
* Decrement CPU capacity based on time not spent running tasks
|
||||
|
@ -28,6 +28,8 @@
|
||||
#include "sched.h"
|
||||
#include "pelt.h"
|
||||
|
||||
#include <trace/events/sched.h>
|
||||
|
||||
/*
|
||||
* Approximate:
|
||||
* val * y^n, where y^32 ~= 0.5 (~1 scheduling period)
|
||||
@ -265,6 +267,7 @@ int __update_load_avg_blocked_se(u64 now, struct sched_entity *se)
|
||||
{
|
||||
if (___update_load_sum(now, &se->avg, 0, 0, 0)) {
|
||||
___update_load_avg(&se->avg, se_weight(se), se_runnable(se));
|
||||
trace_pelt_se_tp(se);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -278,6 +281,7 @@ int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se
|
||||
|
||||
___update_load_avg(&se->avg, se_weight(se), se_runnable(se));
|
||||
cfs_se_util_change(&se->avg);
|
||||
trace_pelt_se_tp(se);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -292,6 +296,7 @@ int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq)
|
||||
cfs_rq->curr != NULL)) {
|
||||
|
||||
___update_load_avg(&cfs_rq->avg, 1, 1);
|
||||
trace_pelt_cfs_tp(cfs_rq);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -317,6 +322,7 @@ int update_rt_rq_load_avg(u64 now, struct rq *rq, int running)
|
||||
running)) {
|
||||
|
||||
___update_load_avg(&rq->avg_rt, 1, 1);
|
||||
trace_pelt_rt_tp(rq);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -340,6 +346,7 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
|
||||
running)) {
|
||||
|
||||
___update_load_avg(&rq->avg_dl, 1, 1);
|
||||
trace_pelt_dl_tp(rq);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -366,7 +373,7 @@ int update_irq_load_avg(struct rq *rq, u64 running)
|
||||
* reflect the real amount of computation
|
||||
*/
|
||||
running = cap_scale(running, arch_scale_freq_capacity(cpu_of(rq)));
|
||||
running = cap_scale(running, arch_scale_cpu_capacity(NULL, cpu_of(rq)));
|
||||
running = cap_scale(running, arch_scale_cpu_capacity(cpu_of(rq)));
|
||||
|
||||
/*
|
||||
* We know the time that has been used by interrupt since last update
|
||||
@ -388,8 +395,10 @@ int update_irq_load_avg(struct rq *rq, u64 running)
|
||||
1,
|
||||
1);
|
||||
|
||||
if (ret)
|
||||
if (ret) {
|
||||
___update_load_avg(&rq->avg_irq, 1, 1);
|
||||
trace_pelt_irq_tp(rq);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -79,7 +79,7 @@ static inline void update_rq_clock_pelt(struct rq *rq, s64 delta)
|
||||
* Scale the elapsed time to reflect the real amount of
|
||||
* computation
|
||||
*/
|
||||
delta = cap_scale(delta, arch_scale_cpu_capacity(NULL, cpu_of(rq)));
|
||||
delta = cap_scale(delta, arch_scale_cpu_capacity(cpu_of(rq)));
|
||||
delta = cap_scale(delta, arch_scale_freq_capacity(cpu_of(rq)));
|
||||
|
||||
rq->clock_pelt += delta;
|
||||
|
@ -1614,7 +1614,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
|
||||
static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
|
||||
{
|
||||
if (!task_running(rq, p) &&
|
||||
cpumask_test_cpu(cpu, &p->cpus_allowed))
|
||||
cpumask_test_cpu(cpu, p->cpus_ptr))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
@ -1751,7 +1751,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
|
||||
* Also make sure that it wasn't scheduled on its rq.
|
||||
*/
|
||||
if (unlikely(task_rq(task) != rq ||
|
||||
!cpumask_test_cpu(lowest_rq->cpu, &task->cpus_allowed) ||
|
||||
!cpumask_test_cpu(lowest_rq->cpu, task->cpus_ptr) ||
|
||||
task_running(rq, task) ||
|
||||
!rt_task(task) ||
|
||||
!task_on_rq_queued(task))) {
|
||||
@ -2400,6 +2400,10 @@ const struct sched_class rt_sched_class = {
|
||||
.switched_to = switched_to_rt,
|
||||
|
||||
.update_curr = update_curr_rt,
|
||||
|
||||
#ifdef CONFIG_UCLAMP_TASK
|
||||
.uclamp_enabled = 1,
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
|
@ -1,7 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* Generated by Documentation/scheduler/sched-pelt; do not modify. */
|
||||
|
||||
static const u32 runnable_avg_yN_inv[] = {
|
||||
static const u32 runnable_avg_yN_inv[] __maybe_unused = {
|
||||
0xffffffff, 0xfa83b2da, 0xf5257d14, 0xefe4b99a, 0xeac0c6e6, 0xe5b906e6,
|
||||
0xe0ccdeeb, 0xdbfbb796, 0xd744fcc9, 0xd2a81d91, 0xce248c14, 0xc9b9bd85,
|
||||
0xc5672a10, 0xc12c4cc9, 0xbd08a39e, 0xb8fbaf46, 0xb504f333, 0xb123f581,
|
||||
|
@ -96,12 +96,6 @@ extern atomic_long_t calc_load_tasks;
|
||||
extern void calc_global_load_tick(struct rq *this_rq);
|
||||
extern long calc_load_fold_active(struct rq *this_rq, long adjust);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
extern void cpu_load_update_active(struct rq *this_rq);
|
||||
#else
|
||||
static inline void cpu_load_update_active(struct rq *this_rq) { }
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Helpers for converting nanosecond timing to jiffy resolution
|
||||
*/
|
||||
@ -344,8 +338,10 @@ struct cfs_bandwidth {
|
||||
u64 runtime_expires;
|
||||
int expires_seq;
|
||||
|
||||
short idle;
|
||||
short period_active;
|
||||
u8 idle;
|
||||
u8 period_active;
|
||||
u8 distribute_running;
|
||||
u8 slack_started;
|
||||
struct hrtimer period_timer;
|
||||
struct hrtimer slack_timer;
|
||||
struct list_head throttled_cfs_rq;
|
||||
@ -354,8 +350,6 @@ struct cfs_bandwidth {
|
||||
int nr_periods;
|
||||
int nr_throttled;
|
||||
u64 throttled_time;
|
||||
|
||||
bool distribute_running;
|
||||
#endif
|
||||
};
|
||||
|
||||
@ -797,6 +791,48 @@ extern void rto_push_irq_work_func(struct irq_work *work);
|
||||
#endif
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
#ifdef CONFIG_UCLAMP_TASK
|
||||
/*
|
||||
* struct uclamp_bucket - Utilization clamp bucket
|
||||
* @value: utilization clamp value for tasks on this clamp bucket
|
||||
* @tasks: number of RUNNABLE tasks on this clamp bucket
|
||||
*
|
||||
* Keep track of how many tasks are RUNNABLE for a given utilization
|
||||
* clamp value.
|
||||
*/
|
||||
struct uclamp_bucket {
|
||||
unsigned long value : bits_per(SCHED_CAPACITY_SCALE);
|
||||
unsigned long tasks : BITS_PER_LONG - bits_per(SCHED_CAPACITY_SCALE);
|
||||
};
|
||||
|
||||
/*
|
||||
* struct uclamp_rq - rq's utilization clamp
|
||||
* @value: currently active clamp values for a rq
|
||||
* @bucket: utilization clamp buckets affecting a rq
|
||||
*
|
||||
* Keep track of RUNNABLE tasks on a rq to aggregate their clamp values.
|
||||
* A clamp value is affecting a rq when there is at least one task RUNNABLE
|
||||
* (or actually running) with that value.
|
||||
*
|
||||
* There are up to UCLAMP_CNT possible different clamp values, currently there
|
||||
* are only two: minimum utilization and maximum utilization.
|
||||
*
|
||||
* All utilization clamping values are MAX aggregated, since:
|
||||
* - for util_min: we want to run the CPU at least at the max of the minimum
|
||||
* utilization required by its currently RUNNABLE tasks.
|
||||
* - for util_max: we want to allow the CPU to run up to the max of the
|
||||
* maximum utilization allowed by its currently RUNNABLE tasks.
|
||||
*
|
||||
* Since on each system we expect only a limited number of different
|
||||
* utilization clamp values (UCLAMP_BUCKETS), use a simple array to track
|
||||
* the metrics required to compute all the per-rq utilization clamp values.
|
||||
*/
|
||||
struct uclamp_rq {
|
||||
unsigned int value;
|
||||
struct uclamp_bucket bucket[UCLAMP_BUCKETS];
|
||||
};
|
||||
#endif /* CONFIG_UCLAMP_TASK */
|
||||
|
||||
/*
|
||||
* This is the main, per-CPU runqueue data structure.
|
||||
*
|
||||
@ -818,8 +854,6 @@ struct rq {
|
||||
unsigned int nr_preferred_running;
|
||||
unsigned int numa_migrate_on;
|
||||
#endif
|
||||
#define CPU_LOAD_IDX_MAX 5
|
||||
unsigned long cpu_load[CPU_LOAD_IDX_MAX];
|
||||
#ifdef CONFIG_NO_HZ_COMMON
|
||||
#ifdef CONFIG_SMP
|
||||
unsigned long last_load_update_tick;
|
||||
@ -830,11 +864,16 @@ struct rq {
|
||||
atomic_t nohz_flags;
|
||||
#endif /* CONFIG_NO_HZ_COMMON */
|
||||
|
||||
/* capture load from *all* tasks on this CPU: */
|
||||
struct load_weight load;
|
||||
unsigned long nr_load_updates;
|
||||
u64 nr_switches;
|
||||
|
||||
#ifdef CONFIG_UCLAMP_TASK
|
||||
/* Utilization clamp values based on CPU's RUNNABLE tasks */
|
||||
struct uclamp_rq uclamp[UCLAMP_CNT] ____cacheline_aligned;
|
||||
unsigned int uclamp_flags;
|
||||
#define UCLAMP_FLAG_IDLE 0x01
|
||||
#endif
|
||||
|
||||
struct cfs_rq cfs;
|
||||
struct rt_rq rt;
|
||||
struct dl_rq dl;
|
||||
@ -1649,6 +1688,10 @@ extern const u32 sched_prio_to_wmult[40];
|
||||
struct sched_class {
|
||||
const struct sched_class *next;
|
||||
|
||||
#ifdef CONFIG_UCLAMP_TASK
|
||||
int uclamp_enabled;
|
||||
#endif
|
||||
|
||||
void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
|
||||
void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
|
||||
void (*yield_task) (struct rq *rq);
|
||||
@ -2222,6 +2265,48 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
|
||||
static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
|
||||
#endif /* CONFIG_CPU_FREQ */
|
||||
|
||||
#ifdef CONFIG_UCLAMP_TASK
|
||||
unsigned int uclamp_eff_value(struct task_struct *p, unsigned int clamp_id);
|
||||
|
||||
static __always_inline
|
||||
unsigned int uclamp_util_with(struct rq *rq, unsigned int util,
|
||||
struct task_struct *p)
|
||||
{
|
||||
unsigned int min_util = READ_ONCE(rq->uclamp[UCLAMP_MIN].value);
|
||||
unsigned int max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value);
|
||||
|
||||
if (p) {
|
||||
min_util = max(min_util, uclamp_eff_value(p, UCLAMP_MIN));
|
||||
max_util = max(max_util, uclamp_eff_value(p, UCLAMP_MAX));
|
||||
}
|
||||
|
||||
/*
|
||||
* Since CPU's {min,max}_util clamps are MAX aggregated considering
|
||||
* RUNNABLE tasks with _different_ clamps, we can end up with an
|
||||
* inversion. Fix it now when the clamps are applied.
|
||||
*/
|
||||
if (unlikely(min_util >= max_util))
|
||||
return min_util;
|
||||
|
||||
return clamp(util, min_util, max_util);
|
||||
}
|
||||
|
||||
static inline unsigned int uclamp_util(struct rq *rq, unsigned int util)
|
||||
{
|
||||
return uclamp_util_with(rq, util, NULL);
|
||||
}
|
||||
#else /* CONFIG_UCLAMP_TASK */
|
||||
static inline unsigned int uclamp_util_with(struct rq *rq, unsigned int util,
|
||||
struct task_struct *p)
|
||||
{
|
||||
return util;
|
||||
}
|
||||
static inline unsigned int uclamp_util(struct rq *rq, unsigned int util)
|
||||
{
|
||||
return util;
|
||||
}
|
||||
#endif /* CONFIG_UCLAMP_TASK */
|
||||
|
||||
#ifdef arch_scale_freq_capacity
|
||||
# ifndef arch_scale_freq_invariant
|
||||
# define arch_scale_freq_invariant() true
|
||||
@ -2237,7 +2322,6 @@ static inline unsigned long capacity_orig_of(int cpu)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
|
||||
/**
|
||||
* enum schedutil_type - CPU utilization type
|
||||
* @FREQUENCY_UTIL: Utilization used to select frequency
|
||||
@ -2253,15 +2337,11 @@ enum schedutil_type {
|
||||
ENERGY_UTIL,
|
||||
};
|
||||
|
||||
unsigned long schedutil_freq_util(int cpu, unsigned long util_cfs,
|
||||
unsigned long max, enum schedutil_type type);
|
||||
#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
|
||||
|
||||
static inline unsigned long schedutil_energy_util(int cpu, unsigned long cfs)
|
||||
{
|
||||
unsigned long max = arch_scale_cpu_capacity(NULL, cpu);
|
||||
|
||||
return schedutil_freq_util(cpu, cfs, max, ENERGY_UTIL);
|
||||
}
|
||||
unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
|
||||
unsigned long max, enum schedutil_type type,
|
||||
struct task_struct *p);
|
||||
|
||||
static inline unsigned long cpu_bw_dl(struct rq *rq)
|
||||
{
|
||||
@ -2290,11 +2370,13 @@ static inline unsigned long cpu_util_rt(struct rq *rq)
|
||||
return READ_ONCE(rq->avg_rt.util_avg);
|
||||
}
|
||||
#else /* CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
|
||||
static inline unsigned long schedutil_energy_util(int cpu, unsigned long cfs)
|
||||
static inline unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
|
||||
unsigned long max, enum schedutil_type type,
|
||||
struct task_struct *p)
|
||||
{
|
||||
return cfs;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
#endif /* CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
|
||||
|
||||
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
|
||||
static inline unsigned long cpu_util_irq(struct rq *rq)
|
||||
|
@ -1344,11 +1344,6 @@ sd_init(struct sched_domain_topology_level *tl,
|
||||
.imbalance_pct = 125,
|
||||
|
||||
.cache_nice_tries = 0,
|
||||
.busy_idx = 0,
|
||||
.idle_idx = 0,
|
||||
.newidle_idx = 0,
|
||||
.wake_idx = 0,
|
||||
.forkexec_idx = 0,
|
||||
|
||||
.flags = 1*SD_LOAD_BALANCE
|
||||
| 1*SD_BALANCE_NEWIDLE
|
||||
@ -1400,13 +1395,10 @@ sd_init(struct sched_domain_topology_level *tl,
|
||||
} else if (sd->flags & SD_SHARE_PKG_RESOURCES) {
|
||||
sd->imbalance_pct = 117;
|
||||
sd->cache_nice_tries = 1;
|
||||
sd->busy_idx = 2;
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
} else if (sd->flags & SD_NUMA) {
|
||||
sd->cache_nice_tries = 2;
|
||||
sd->busy_idx = 3;
|
||||
sd->idle_idx = 2;
|
||||
|
||||
sd->flags &= ~SD_PREFER_SIBLING;
|
||||
sd->flags |= SD_SERIALIZE;
|
||||
@ -1419,8 +1411,6 @@ sd_init(struct sched_domain_topology_level *tl,
|
||||
#endif
|
||||
} else {
|
||||
sd->cache_nice_tries = 1;
|
||||
sd->busy_idx = 2;
|
||||
sd->idle_idx = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1884,10 +1874,10 @@ static struct sched_domain_topology_level
|
||||
unsigned long cap;
|
||||
|
||||
/* Is there any asymmetry? */
|
||||
cap = arch_scale_cpu_capacity(NULL, cpumask_first(cpu_map));
|
||||
cap = arch_scale_cpu_capacity(cpumask_first(cpu_map));
|
||||
|
||||
for_each_cpu(i, cpu_map) {
|
||||
if (arch_scale_cpu_capacity(NULL, i) != cap) {
|
||||
if (arch_scale_cpu_capacity(i) != cap) {
|
||||
asym = true;
|
||||
break;
|
||||
}
|
||||
@ -1902,7 +1892,7 @@ static struct sched_domain_topology_level
|
||||
* to everyone.
|
||||
*/
|
||||
for_each_cpu(i, cpu_map) {
|
||||
unsigned long max_capacity = arch_scale_cpu_capacity(NULL, i);
|
||||
unsigned long max_capacity = arch_scale_cpu_capacity(i);
|
||||
int tl_id = 0;
|
||||
|
||||
for_each_sd_topology(tl) {
|
||||
@ -1912,7 +1902,7 @@ static struct sched_domain_topology_level
|
||||
for_each_cpu_and(j, tl->mask(i), cpu_map) {
|
||||
unsigned long capacity;
|
||||
|
||||
capacity = arch_scale_cpu_capacity(NULL, j);
|
||||
capacity = arch_scale_cpu_capacity(j);
|
||||
|
||||
if (capacity <= max_capacity)
|
||||
continue;
|
||||
|
@ -118,16 +118,12 @@ static void __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int
|
||||
bookmark.func = NULL;
|
||||
INIT_LIST_HEAD(&bookmark.entry);
|
||||
|
||||
spin_lock_irqsave(&wq_head->lock, flags);
|
||||
nr_exclusive = __wake_up_common(wq_head, mode, nr_exclusive, wake_flags, key, &bookmark);
|
||||
spin_unlock_irqrestore(&wq_head->lock, flags);
|
||||
|
||||
while (bookmark.flags & WQ_FLAG_BOOKMARK) {
|
||||
do {
|
||||
spin_lock_irqsave(&wq_head->lock, flags);
|
||||
nr_exclusive = __wake_up_common(wq_head, mode, nr_exclusive,
|
||||
wake_flags, key, &bookmark);
|
||||
spin_unlock_irqrestore(&wq_head->lock, flags);
|
||||
}
|
||||
} while (bookmark.flags & WQ_FLAG_BOOKMARK);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -452,6 +452,22 @@ static struct ctl_table kern_table[] = {
|
||||
.mode = 0644,
|
||||
.proc_handler = sched_rr_handler,
|
||||
},
|
||||
#ifdef CONFIG_UCLAMP_TASK
|
||||
{
|
||||
.procname = "sched_util_clamp_min",
|
||||
.data = &sysctl_sched_uclamp_util_min,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = sysctl_sched_uclamp_handler,
|
||||
},
|
||||
{
|
||||
.procname = "sched_util_clamp_max",
|
||||
.data = &sysctl_sched_uclamp_util_max,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = sysctl_sched_uclamp_handler,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_SCHED_AUTOGROUP
|
||||
{
|
||||
.procname = "sched_autogroup_enabled",
|
||||
|
@ -782,7 +782,6 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
|
||||
*/
|
||||
if (!ts->tick_stopped) {
|
||||
calc_load_nohz_start();
|
||||
cpu_load_update_nohz_start();
|
||||
quiet_vmstat();
|
||||
|
||||
ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
|
||||
@ -829,7 +828,6 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
|
||||
{
|
||||
/* Update jiffies first */
|
||||
tick_do_update_jiffies64(now);
|
||||
cpu_load_update_nohz_stop();
|
||||
|
||||
/*
|
||||
* Clear the timer idle flag, so we avoid IPIs on remote queueing and
|
||||
|
@ -277,7 +277,7 @@ static void move_to_next_cpu(void)
|
||||
* of this thread, than stop migrating for the duration
|
||||
* of the current test.
|
||||
*/
|
||||
if (!cpumask_equal(current_mask, ¤t->cpus_allowed))
|
||||
if (!cpumask_equal(current_mask, current->cpus_ptr))
|
||||
goto disable;
|
||||
|
||||
get_online_cpus();
|
||||
|
@ -23,7 +23,7 @@ unsigned int check_preemption_disabled(const char *what1, const char *what2)
|
||||
* Kernel threads bound to a single CPU can safely use
|
||||
* smp_processor_id():
|
||||
*/
|
||||
if (cpumask_equal(¤t->cpus_allowed, cpumask_of(this_cpu)))
|
||||
if (cpumask_equal(current->cpus_ptr, cpumask_of(this_cpu)))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
|
@ -34,7 +34,7 @@ static void simple_thread_func(int cnt)
|
||||
|
||||
/* Silly tracepoints */
|
||||
trace_foo_bar("hello", cnt, array, random_strings[len],
|
||||
¤t->cpus_allowed);
|
||||
current->cpus_ptr);
|
||||
|
||||
trace_foo_with_template_simple("HELLO", cnt);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user