Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: - Remove the unused per rq load array and all its infrastructure, by Dietmar Eggemann. - Add utilization clamping support by Patrick Bellasi. This is a refinement of the energy aware scheduling framework with support for boosting of interactive and capping of background workloads: to make sure critical GUI threads get maximum frequency ASAP, and to make sure background processing doesn't unnecessarily move to cpufreq governor to higher frequencies and less energy efficient CPU modes. - Add the bare minimum of tracepoints required for LISA EAS regression testing, by Qais Yousef - which allows automated testing of various power management features, including energy aware scheduling. - Restructure the former tsk_nr_cpus_allowed() facility that the -rt kernel used to modify the scheduler's CPU affinity logic such as migrate_disable() - introduce the task->cpus_ptr value instead of taking the address of &task->cpus_allowed directly - by Sebastian Andrzej Siewior. - Misc optimizations, fixes, cleanups and small enhancements - see the Git log for details. * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (33 commits) sched/uclamp: Add uclamp support to energy_compute() sched/uclamp: Add uclamp_util_with() sched/cpufreq, sched/uclamp: Add clamps for FAIR and RT tasks sched/uclamp: Set default clamps for RT tasks sched/uclamp: Reset uclamp values on RESET_ON_FORK sched/uclamp: Extend sched_setattr() to support utilization clamping sched/core: Allow sched_setattr() to use the current policy sched/uclamp: Add system default clamps sched/uclamp: Enforce last task's UCLAMP_MAX sched/uclamp: Add bucket local max tracking sched/uclamp: Add CPU's clamp buckets refcounting sched/fair: Rename weighted_cpuload() to cpu_runnable_load() sched/debug: Export the newly added tracepoints sched/debug: Add sched_overutilized tracepoint sched/debug: Add new tracepoint to track PELT at se level sched/debug: Add new tracepoints to track PELT at rq level sched/debug: Add a new sched_trace_*() helper functions sched/autogroup: Make autogroup_path() always available sched/wait: Deduplicate code with do-while sched/topology: Remove unused 'sd' parameter from arch_scale_cpu_capacity() ...
This commit is contained in:
		
						commit
						dad1c12ed8
					
				| @ -20,7 +20,8 @@ void calc_runnable_avg_yN_inv(void) | ||||
| 	int i; | ||||
| 	unsigned int x; | ||||
| 
 | ||||
| 	printf("static const u32 runnable_avg_yN_inv[] = {"); | ||||
| 	/* To silence -Wunused-but-set-variable warnings. */ | ||||
| 	printf("static const u32 runnable_avg_yN_inv[] __maybe_unused = {"); | ||||
| 	for (i = 0; i < HALFLIFE; i++) { | ||||
| 		x = ((1UL<<32)-1)*pow(y, i); | ||||
| 
 | ||||
|  | ||||
| @ -169,7 +169,7 @@ static void update_cpu_capacity(unsigned int cpu) | ||||
| 	topology_set_cpu_scale(cpu, cpu_capacity(cpu) / middle_capacity); | ||||
| 
 | ||||
| 	pr_info("CPU%u: update cpu_capacity %lu\n", | ||||
| 		cpu, topology_get_cpu_scale(NULL, cpu)); | ||||
| 		cpu, topology_get_cpu_scale(cpu)); | ||||
| } | ||||
| 
 | ||||
| #else | ||||
|  | ||||
| @ -1831,7 +1831,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset, | ||||
| 	ti->cpu = cpu; | ||||
| 	p->stack = ti; | ||||
| 	p->state = TASK_UNINTERRUPTIBLE; | ||||
| 	cpumask_set_cpu(cpu, &p->cpus_allowed); | ||||
| 	cpumask_set_cpu(cpu, &p->cpus_mask); | ||||
| 	INIT_LIST_HEAD(&p->tasks); | ||||
| 	p->parent = p->real_parent = p->group_leader = p; | ||||
| 	INIT_LIST_HEAD(&p->children); | ||||
|  | ||||
| @ -42,7 +42,7 @@ extern struct task_struct *ll_task; | ||||
|  * inline to try to keep the overhead down. If we have been forced to run on | ||||
|  * a "CPU" with an FPU because of a previous high level of FP computation, | ||||
|  * but did not actually use the FPU during the most recent time-slice (CU1 | ||||
|  * isn't set), we undo the restriction on cpus_allowed. | ||||
|  * isn't set), we undo the restriction on cpus_mask. | ||||
|  * | ||||
|  * We're not calling set_cpus_allowed() here, because we have no need to | ||||
|  * force prompt migration - we're already switching the current CPU to a | ||||
| @ -57,7 +57,7 @@ do {									\ | ||||
| 	    test_ti_thread_flag(__prev_ti, TIF_FPUBOUND) &&		\ | ||||
| 	    (!(KSTK_STATUS(prev) & ST0_CU1))) {				\ | ||||
| 		clear_ti_thread_flag(__prev_ti, TIF_FPUBOUND);		\ | ||||
| 		prev->cpus_allowed = prev->thread.user_cpus_allowed;	\ | ||||
| 		prev->cpus_mask = prev->thread.user_cpus_allowed;	\ | ||||
| 	}								\ | ||||
| 	next->thread.emulated_fp = 0;					\ | ||||
| } while(0) | ||||
|  | ||||
| @ -177,7 +177,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len, | ||||
| 	if (retval) | ||||
| 		goto out_unlock; | ||||
| 
 | ||||
| 	cpumask_or(&allowed, &p->thread.user_cpus_allowed, &p->cpus_allowed); | ||||
| 	cpumask_or(&allowed, &p->thread.user_cpus_allowed, p->cpus_ptr); | ||||
| 	cpumask_and(&mask, &allowed, cpu_active_mask); | ||||
| 
 | ||||
| out_unlock: | ||||
|  | ||||
| @ -891,12 +891,12 @@ static void mt_ase_fp_affinity(void) | ||||
| 		 * restricted the allowed set to exclude any CPUs with FPUs, | ||||
| 		 * we'll skip the procedure. | ||||
| 		 */ | ||||
| 		if (cpumask_intersects(¤t->cpus_allowed, &mt_fpu_cpumask)) { | ||||
| 		if (cpumask_intersects(¤t->cpus_mask, &mt_fpu_cpumask)) { | ||||
| 			cpumask_t tmask; | ||||
| 
 | ||||
| 			current->thread.user_cpus_allowed | ||||
| 				= current->cpus_allowed; | ||||
| 			cpumask_and(&tmask, ¤t->cpus_allowed, | ||||
| 				= current->cpus_mask; | ||||
| 			cpumask_and(&tmask, ¤t->cpus_mask, | ||||
| 				    &mt_fpu_cpumask); | ||||
| 			set_cpus_allowed_ptr(current, &tmask); | ||||
| 			set_thread_flag(TIF_FPUBOUND); | ||||
|  | ||||
| @ -128,7 +128,7 @@ void __spu_update_sched_info(struct spu_context *ctx) | ||||
| 	 * runqueue. The context will be rescheduled on the proper node | ||||
| 	 * if it is timesliced or preempted. | ||||
| 	 */ | ||||
| 	cpumask_copy(&ctx->cpus_allowed, ¤t->cpus_allowed); | ||||
| 	cpumask_copy(&ctx->cpus_allowed, current->cpus_ptr); | ||||
| 
 | ||||
| 	/* Save the current cpu id for spu interrupt routing. */ | ||||
| 	ctx->last_ran = raw_smp_processor_id(); | ||||
|  | ||||
| @ -1503,7 +1503,7 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma) | ||||
| 	 * may be scheduled elsewhere and invalidate entries in the | ||||
| 	 * pseudo-locked region. | ||||
| 	 */ | ||||
| 	if (!cpumask_subset(¤t->cpus_allowed, &plr->d->cpu_mask)) { | ||||
| 	if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) { | ||||
| 		mutex_unlock(&rdtgroup_mutex); | ||||
| 		return -EINVAL; | ||||
| 	} | ||||
|  | ||||
| @ -43,7 +43,7 @@ static ssize_t cpu_capacity_show(struct device *dev, | ||||
| { | ||||
| 	struct cpu *cpu = container_of(dev, struct cpu, dev); | ||||
| 
 | ||||
| 	return sprintf(buf, "%lu\n", topology_get_cpu_scale(NULL, cpu->dev.id)); | ||||
| 	return sprintf(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id)); | ||||
| } | ||||
| 
 | ||||
| static void update_topology_flags_workfn(struct work_struct *work); | ||||
| @ -116,7 +116,7 @@ void topology_normalize_cpu_scale(void) | ||||
| 			/ capacity_scale; | ||||
| 		topology_set_cpu_scale(cpu, capacity); | ||||
| 		pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n", | ||||
| 			cpu, topology_get_cpu_scale(NULL, cpu)); | ||||
| 			cpu, topology_get_cpu_scale(cpu)); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| @ -185,7 +185,7 @@ init_cpu_capacity_callback(struct notifier_block *nb, | ||||
| 	cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus); | ||||
| 
 | ||||
| 	for_each_cpu(cpu, policy->related_cpus) { | ||||
| 		raw_capacity[cpu] = topology_get_cpu_scale(NULL, cpu) * | ||||
| 		raw_capacity[cpu] = topology_get_cpu_scale(cpu) * | ||||
| 				    policy->cpuinfo.max_freq / 1000UL; | ||||
| 		capacity_scale = max(raw_capacity[cpu], capacity_scale); | ||||
| 	} | ||||
|  | ||||
| @ -1038,7 +1038,7 @@ int hfi1_get_proc_affinity(int node) | ||||
| 	struct hfi1_affinity_node *entry; | ||||
| 	cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask; | ||||
| 	const struct cpumask *node_mask, | ||||
| 		*proc_mask = ¤t->cpus_allowed; | ||||
| 		*proc_mask = current->cpus_ptr; | ||||
| 	struct hfi1_affinity_node_list *affinity = &node_affinity; | ||||
| 	struct cpu_mask_set *set = &affinity->proc; | ||||
| 
 | ||||
| @ -1046,7 +1046,7 @@ int hfi1_get_proc_affinity(int node) | ||||
| 	 * check whether process/context affinity has already | ||||
| 	 * been set | ||||
| 	 */ | ||||
| 	if (cpumask_weight(proc_mask) == 1) { | ||||
| 	if (current->nr_cpus_allowed == 1) { | ||||
| 		hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl", | ||||
| 			  current->pid, current->comm, | ||||
| 			  cpumask_pr_args(proc_mask)); | ||||
| @ -1057,7 +1057,7 @@ int hfi1_get_proc_affinity(int node) | ||||
| 		cpu = cpumask_first(proc_mask); | ||||
| 		cpumask_set_cpu(cpu, &set->used); | ||||
| 		goto done; | ||||
| 	} else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) { | ||||
| 	} else if (current->nr_cpus_allowed < cpumask_weight(&set->mask)) { | ||||
| 		hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl", | ||||
| 			  current->pid, current->comm, | ||||
| 			  cpumask_pr_args(proc_mask)); | ||||
|  | ||||
| @ -869,14 +869,13 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd, | ||||
| { | ||||
| 	struct sdma_rht_node *rht_node; | ||||
| 	struct sdma_engine *sde = NULL; | ||||
| 	const struct cpumask *current_mask = ¤t->cpus_allowed; | ||||
| 	unsigned long cpu_id; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * To ensure that always the same sdma engine(s) will be | ||||
| 	 * selected make sure the process is pinned to this CPU only. | ||||
| 	 */ | ||||
| 	if (cpumask_weight(current_mask) != 1) | ||||
| 	if (current->nr_cpus_allowed != 1) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	cpu_id = smp_processor_id(); | ||||
|  | ||||
| @ -1142,7 +1142,7 @@ static __poll_t qib_poll(struct file *fp, struct poll_table_struct *pt) | ||||
| static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd) | ||||
| { | ||||
| 	struct qib_filedata *fd = fp->private_data; | ||||
| 	const unsigned int weight = cpumask_weight(¤t->cpus_allowed); | ||||
| 	const unsigned int weight = current->nr_cpus_allowed; | ||||
| 	const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus); | ||||
| 	int local_cpu; | ||||
| 
 | ||||
| @ -1623,9 +1623,8 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo) | ||||
| 		ret = find_free_ctxt(i_minor - 1, fp, uinfo); | ||||
| 	else { | ||||
| 		int unit; | ||||
| 		const unsigned int cpu = cpumask_first(¤t->cpus_allowed); | ||||
| 		const unsigned int weight = | ||||
| 			cpumask_weight(¤t->cpus_allowed); | ||||
| 		const unsigned int cpu = cpumask_first(current->cpus_ptr); | ||||
| 		const unsigned int weight = current->nr_cpus_allowed; | ||||
| 
 | ||||
| 		if (weight == 1 && !test_bit(cpu, qib_cpulist)) | ||||
| 			if (!find_hca(cpu, &unit) && unit >= 0) | ||||
|  | ||||
| @ -381,9 +381,9 @@ static inline void task_context_switch_counts(struct seq_file *m, | ||||
| static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) | ||||
| { | ||||
| 	seq_printf(m, "Cpus_allowed:\t%*pb\n", | ||||
| 		   cpumask_pr_args(&task->cpus_allowed)); | ||||
| 		   cpumask_pr_args(task->cpus_ptr)); | ||||
| 	seq_printf(m, "Cpus_allowed_list:\t%*pbl\n", | ||||
| 		   cpumask_pr_args(&task->cpus_allowed)); | ||||
| 		   cpumask_pr_args(task->cpus_ptr)); | ||||
| } | ||||
| 
 | ||||
| static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm) | ||||
|  | ||||
| @ -18,7 +18,7 @@ DECLARE_PER_CPU(unsigned long, cpu_scale); | ||||
| 
 | ||||
| struct sched_domain; | ||||
| static inline | ||||
| unsigned long topology_get_cpu_scale(struct sched_domain *sd, int cpu) | ||||
| unsigned long topology_get_cpu_scale(int cpu) | ||||
| { | ||||
| 	return per_cpu(cpu_scale, cpu); | ||||
| } | ||||
|  | ||||
| @ -89,7 +89,7 @@ static inline unsigned long em_pd_energy(struct em_perf_domain *pd, | ||||
| 	 * like schedutil. | ||||
| 	 */ | ||||
| 	cpu = cpumask_first(to_cpumask(pd->cpus)); | ||||
| 	scale_cpu = arch_scale_cpu_capacity(NULL, cpu); | ||||
| 	scale_cpu = arch_scale_cpu_capacity(cpu); | ||||
| 	cs = &pd->table[pd->nr_cap_states - 1]; | ||||
| 	freq = map_util_freq(max_util, cs->frequency, scale_cpu); | ||||
| 
 | ||||
|  | ||||
| @ -220,4 +220,38 @@ int __order_base_2(unsigned long n) | ||||
| 		ilog2((n) - 1) + 1) :		\ | ||||
| 	__order_base_2(n)			\ | ||||
| ) | ||||
| 
 | ||||
| static inline __attribute__((const)) | ||||
| int __bits_per(unsigned long n) | ||||
| { | ||||
| 	if (n < 2) | ||||
| 		return 1; | ||||
| 	if (is_power_of_2(n)) | ||||
| 		return order_base_2(n) + 1; | ||||
| 	return order_base_2(n); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * bits_per - calculate the number of bits required for the argument | ||||
|  * @n: parameter | ||||
|  * | ||||
|  * This is constant-capable and can be used for compile time | ||||
|  * initializations, e.g bitfields. | ||||
|  * | ||||
|  * The first few values calculated by this routine: | ||||
|  * bf(0) = 1 | ||||
|  * bf(1) = 1 | ||||
|  * bf(2) = 2 | ||||
|  * bf(3) = 2 | ||||
|  * bf(4) = 3 | ||||
|  * ... and so on. | ||||
|  */ | ||||
| #define bits_per(n)				\ | ||||
| (						\ | ||||
| 	__builtin_constant_p(n) ? (		\ | ||||
| 		((n) == 0 || (n) == 1)		\ | ||||
| 			? 1 : ilog2(n) + 1	\ | ||||
| 	) :					\ | ||||
| 	__bits_per(n)				\ | ||||
| ) | ||||
| #endif /* _LINUX_LOG2_H */ | ||||
|  | ||||
| @ -35,6 +35,7 @@ struct audit_context; | ||||
| struct backing_dev_info; | ||||
| struct bio_list; | ||||
| struct blk_plug; | ||||
| struct capture_control; | ||||
| struct cfs_rq; | ||||
| struct fs_struct; | ||||
| struct futex_pi_state; | ||||
| @ -47,8 +48,9 @@ struct pid_namespace; | ||||
| struct pipe_inode_info; | ||||
| struct rcu_node; | ||||
| struct reclaim_state; | ||||
| struct capture_control; | ||||
| struct robust_list_head; | ||||
| struct root_domain; | ||||
| struct rq; | ||||
| struct sched_attr; | ||||
| struct sched_param; | ||||
| struct seq_file; | ||||
| @ -281,6 +283,18 @@ struct vtime { | ||||
| 	u64			gtime; | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * Utilization clamp constraints. | ||||
|  * @UCLAMP_MIN:	Minimum utilization | ||||
|  * @UCLAMP_MAX:	Maximum utilization | ||||
|  * @UCLAMP_CNT:	Utilization clamp constraints count | ||||
|  */ | ||||
| enum uclamp_id { | ||||
| 	UCLAMP_MIN = 0, | ||||
| 	UCLAMP_MAX, | ||||
| 	UCLAMP_CNT | ||||
| }; | ||||
| 
 | ||||
| struct sched_info { | ||||
| #ifdef CONFIG_SCHED_INFO | ||||
| 	/* Cumulative counters: */ | ||||
| @ -312,6 +326,10 @@ struct sched_info { | ||||
| # define SCHED_FIXEDPOINT_SHIFT		10 | ||||
| # define SCHED_FIXEDPOINT_SCALE		(1L << SCHED_FIXEDPOINT_SHIFT) | ||||
| 
 | ||||
| /* Increase resolution of cpu_capacity calculations */ | ||||
| # define SCHED_CAPACITY_SHIFT		SCHED_FIXEDPOINT_SHIFT | ||||
| # define SCHED_CAPACITY_SCALE		(1L << SCHED_CAPACITY_SHIFT) | ||||
| 
 | ||||
| struct load_weight { | ||||
| 	unsigned long			weight; | ||||
| 	u32				inv_weight; | ||||
| @ -560,6 +578,41 @@ struct sched_dl_entity { | ||||
| 	struct hrtimer inactive_timer; | ||||
| }; | ||||
| 
 | ||||
| #ifdef CONFIG_UCLAMP_TASK | ||||
| /* Number of utilization clamp buckets (shorter alias) */ | ||||
| #define UCLAMP_BUCKETS CONFIG_UCLAMP_BUCKETS_COUNT | ||||
| 
 | ||||
| /*
 | ||||
|  * Utilization clamp for a scheduling entity | ||||
|  * @value:		clamp value "assigned" to a se | ||||
|  * @bucket_id:		bucket index corresponding to the "assigned" value | ||||
|  * @active:		the se is currently refcounted in a rq's bucket | ||||
|  * @user_defined:	the requested clamp value comes from user-space | ||||
|  * | ||||
|  * The bucket_id is the index of the clamp bucket matching the clamp value | ||||
|  * which is pre-computed and stored to avoid expensive integer divisions from | ||||
|  * the fast path. | ||||
|  * | ||||
|  * The active bit is set whenever a task has got an "effective" value assigned, | ||||
|  * which can be different from the clamp value "requested" from user-space. | ||||
|  * This allows to know a task is refcounted in the rq's bucket corresponding | ||||
|  * to the "effective" bucket_id. | ||||
|  * | ||||
|  * The user_defined bit is set whenever a task has got a task-specific clamp | ||||
|  * value requested from userspace, i.e. the system defaults apply to this task | ||||
|  * just as a restriction. This allows to relax default clamps when a less | ||||
|  * restrictive task-specific value has been requested, thus allowing to | ||||
|  * implement a "nice" semantic. For example, a task running with a 20% | ||||
|  * default boost can still drop its own boosting to 0%. | ||||
|  */ | ||||
| struct uclamp_se { | ||||
| 	unsigned int value		: bits_per(SCHED_CAPACITY_SCALE); | ||||
| 	unsigned int bucket_id		: bits_per(UCLAMP_BUCKETS); | ||||
| 	unsigned int active		: 1; | ||||
| 	unsigned int user_defined	: 1; | ||||
| }; | ||||
| #endif /* CONFIG_UCLAMP_TASK */ | ||||
| 
 | ||||
| union rcu_special { | ||||
| 	struct { | ||||
| 		u8			blocked; | ||||
| @ -640,6 +693,13 @@ struct task_struct { | ||||
| #endif | ||||
| 	struct sched_dl_entity		dl; | ||||
| 
 | ||||
| #ifdef CONFIG_UCLAMP_TASK | ||||
| 	/* Clamp values requested for a scheduling entity */ | ||||
| 	struct uclamp_se		uclamp_req[UCLAMP_CNT]; | ||||
| 	/* Effective clamp values used for a scheduling entity */ | ||||
| 	struct uclamp_se		uclamp[UCLAMP_CNT]; | ||||
| #endif | ||||
| 
 | ||||
| #ifdef CONFIG_PREEMPT_NOTIFIERS | ||||
| 	/* List of struct preempt_notifier: */ | ||||
| 	struct hlist_head		preempt_notifiers; | ||||
| @ -651,7 +711,8 @@ struct task_struct { | ||||
| 
 | ||||
| 	unsigned int			policy; | ||||
| 	int				nr_cpus_allowed; | ||||
| 	cpumask_t			cpus_allowed; | ||||
| 	const cpumask_t			*cpus_ptr; | ||||
| 	cpumask_t			cpus_mask; | ||||
| 
 | ||||
| #ifdef CONFIG_PREEMPT_RCU | ||||
| 	int				rcu_read_lock_nesting; | ||||
| @ -1399,7 +1460,7 @@ extern struct pid *cad_pid; | ||||
| #define PF_SWAPWRITE		0x00800000	/* Allowed to write to swap */ | ||||
| #define PF_MEMSTALL		0x01000000	/* Stalled due to lack of memory */ | ||||
| #define PF_UMH			0x02000000	/* I'm an Usermodehelper process */ | ||||
| #define PF_NO_SETAFFINITY	0x04000000	/* Userland is not allowed to meddle with cpus_allowed */ | ||||
| #define PF_NO_SETAFFINITY	0x04000000	/* Userland is not allowed to meddle with cpus_mask */ | ||||
| #define PF_MCE_EARLY		0x08000000      /* Early kill for mce process policy */ | ||||
| #define PF_MEMALLOC_NOCMA	0x10000000	/* All allocation request will have _GFP_MOVABLE cleared */ | ||||
| #define PF_FREEZER_SKIP		0x40000000	/* Freezer should not count it as freezable */ | ||||
| @ -1915,4 +1976,16 @@ static inline void rseq_syscall(struct pt_regs *regs) | ||||
| 
 | ||||
| #endif | ||||
| 
 | ||||
| const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq); | ||||
| char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len); | ||||
| int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq); | ||||
| 
 | ||||
| const struct sched_avg *sched_trace_rq_avg_rt(struct rq *rq); | ||||
| const struct sched_avg *sched_trace_rq_avg_dl(struct rq *rq); | ||||
| const struct sched_avg *sched_trace_rq_avg_irq(struct rq *rq); | ||||
| 
 | ||||
| int sched_trace_rq_cpu(struct rq *rq); | ||||
| 
 | ||||
| const struct cpumask *sched_trace_rd_span(struct root_domain *rd); | ||||
| 
 | ||||
| #endif | ||||
|  | ||||
| @ -6,14 +6,6 @@ | ||||
|  * This is the interface between the scheduler and nohz/dynticks: | ||||
|  */ | ||||
| 
 | ||||
| #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) | ||||
| extern void cpu_load_update_nohz_start(void); | ||||
| extern void cpu_load_update_nohz_stop(void); | ||||
| #else | ||||
| static inline void cpu_load_update_nohz_start(void) { } | ||||
| static inline void cpu_load_update_nohz_stop(void) { } | ||||
| #endif | ||||
| 
 | ||||
| #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) | ||||
| extern void nohz_balance_enter_idle(int cpu); | ||||
| extern int get_nohz_timer_target(void); | ||||
|  | ||||
| @ -56,6 +56,11 @@ int sched_proc_update_handler(struct ctl_table *table, int write, | ||||
| extern unsigned int sysctl_sched_rt_period; | ||||
| extern int sysctl_sched_rt_runtime; | ||||
| 
 | ||||
| #ifdef CONFIG_UCLAMP_TASK | ||||
| extern unsigned int sysctl_sched_uclamp_util_min; | ||||
| extern unsigned int sysctl_sched_uclamp_util_max; | ||||
| #endif | ||||
| 
 | ||||
| #ifdef CONFIG_CFS_BANDWIDTH | ||||
| extern unsigned int sysctl_sched_cfs_bandwidth_slice; | ||||
| #endif | ||||
| @ -75,6 +80,12 @@ extern int sched_rt_handler(struct ctl_table *table, int write, | ||||
| 		void __user *buffer, size_t *lenp, | ||||
| 		loff_t *ppos); | ||||
| 
 | ||||
| #ifdef CONFIG_UCLAMP_TASK | ||||
| extern int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, | ||||
| 				       void __user *buffer, size_t *lenp, | ||||
| 				       loff_t *ppos); | ||||
| #endif | ||||
| 
 | ||||
| extern int sysctl_numa_balancing(struct ctl_table *table, int write, | ||||
| 				 void __user *buffer, size_t *lenp, | ||||
| 				 loff_t *ppos); | ||||
|  | ||||
| @ -6,12 +6,6 @@ | ||||
| 
 | ||||
| #include <linux/sched/idle.h> | ||||
| 
 | ||||
| /*
 | ||||
|  * Increase resolution of cpu_capacity calculations | ||||
|  */ | ||||
| #define SCHED_CAPACITY_SHIFT	SCHED_FIXEDPOINT_SHIFT | ||||
| #define SCHED_CAPACITY_SCALE	(1L << SCHED_CAPACITY_SHIFT) | ||||
| 
 | ||||
| /*
 | ||||
|  * sched-domains (multiprocessor balancing) declarations: | ||||
|  */ | ||||
| @ -84,11 +78,6 @@ struct sched_domain { | ||||
| 	unsigned int busy_factor;	/* less balancing by factor if busy */ | ||||
| 	unsigned int imbalance_pct;	/* No balance until over watermark */ | ||||
| 	unsigned int cache_nice_tries;	/* Leave cache hot tasks for # tries */ | ||||
| 	unsigned int busy_idx; | ||||
| 	unsigned int idle_idx; | ||||
| 	unsigned int newidle_idx; | ||||
| 	unsigned int wake_idx; | ||||
| 	unsigned int forkexec_idx; | ||||
| 
 | ||||
| 	int nohz_idle;			/* NOHZ IDLE status */ | ||||
| 	int flags;			/* See SD_* */ | ||||
| @ -201,14 +190,6 @@ extern void set_sched_topology(struct sched_domain_topology_level *tl); | ||||
| # define SD_INIT_NAME(type) | ||||
| #endif | ||||
| 
 | ||||
| #ifndef arch_scale_cpu_capacity | ||||
| static __always_inline | ||||
| unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu) | ||||
| { | ||||
| 	return SCHED_CAPACITY_SCALE; | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| #else /* CONFIG_SMP */ | ||||
| 
 | ||||
| struct sched_domain_attr; | ||||
| @ -224,16 +205,16 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu) | ||||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| #endif	/* !CONFIG_SMP */ | ||||
| 
 | ||||
| #ifndef arch_scale_cpu_capacity | ||||
| static __always_inline | ||||
| unsigned long arch_scale_cpu_capacity(void __always_unused *sd, int cpu) | ||||
| unsigned long arch_scale_cpu_capacity(int cpu) | ||||
| { | ||||
| 	return SCHED_CAPACITY_SCALE; | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| #endif	/* !CONFIG_SMP */ | ||||
| 
 | ||||
| static inline int task_node(const struct task_struct *p) | ||||
| { | ||||
| 	return cpu_to_node(task_cpu(p)); | ||||
|  | ||||
| @ -594,6 +594,37 @@ TRACE_EVENT(sched_wake_idle_without_ipi, | ||||
| 
 | ||||
| 	TP_printk("cpu=%d", __entry->cpu) | ||||
| ); | ||||
| 
 | ||||
| /*
 | ||||
|  * Following tracepoints are not exported in tracefs and provide hooking | ||||
|  * mechanisms only for testing and debugging purposes. | ||||
|  * | ||||
|  * Postfixed with _tp to make them easily identifiable in the code. | ||||
|  */ | ||||
| DECLARE_TRACE(pelt_cfs_tp, | ||||
| 	TP_PROTO(struct cfs_rq *cfs_rq), | ||||
| 	TP_ARGS(cfs_rq)); | ||||
| 
 | ||||
| DECLARE_TRACE(pelt_rt_tp, | ||||
| 	TP_PROTO(struct rq *rq), | ||||
| 	TP_ARGS(rq)); | ||||
| 
 | ||||
| DECLARE_TRACE(pelt_dl_tp, | ||||
| 	TP_PROTO(struct rq *rq), | ||||
| 	TP_ARGS(rq)); | ||||
| 
 | ||||
| DECLARE_TRACE(pelt_irq_tp, | ||||
| 	TP_PROTO(struct rq *rq), | ||||
| 	TP_ARGS(rq)); | ||||
| 
 | ||||
| DECLARE_TRACE(pelt_se_tp, | ||||
| 	TP_PROTO(struct sched_entity *se), | ||||
| 	TP_ARGS(se)); | ||||
| 
 | ||||
| DECLARE_TRACE(sched_overutilized_tp, | ||||
| 	TP_PROTO(struct root_domain *rd, bool overutilized), | ||||
| 	TP_ARGS(rd, overutilized)); | ||||
| 
 | ||||
| #endif /* _TRACE_SCHED_H */ | ||||
| 
 | ||||
| /* This part must be outside protection */ | ||||
|  | ||||
| @ -51,9 +51,21 @@ | ||||
| #define SCHED_FLAG_RESET_ON_FORK	0x01 | ||||
| #define SCHED_FLAG_RECLAIM		0x02 | ||||
| #define SCHED_FLAG_DL_OVERRUN		0x04 | ||||
| #define SCHED_FLAG_KEEP_POLICY		0x08 | ||||
| #define SCHED_FLAG_KEEP_PARAMS		0x10 | ||||
| #define SCHED_FLAG_UTIL_CLAMP_MIN	0x20 | ||||
| #define SCHED_FLAG_UTIL_CLAMP_MAX	0x40 | ||||
| 
 | ||||
| #define SCHED_FLAG_KEEP_ALL	(SCHED_FLAG_KEEP_POLICY | \ | ||||
| 				 SCHED_FLAG_KEEP_PARAMS) | ||||
| 
 | ||||
| #define SCHED_FLAG_UTIL_CLAMP	(SCHED_FLAG_UTIL_CLAMP_MIN | \ | ||||
| 				 SCHED_FLAG_UTIL_CLAMP_MAX) | ||||
| 
 | ||||
| #define SCHED_FLAG_ALL	(SCHED_FLAG_RESET_ON_FORK	| \ | ||||
| 			 SCHED_FLAG_RECLAIM		| \ | ||||
| 			 SCHED_FLAG_DL_OVERRUN) | ||||
| 			 SCHED_FLAG_DL_OVERRUN		| \ | ||||
| 			 SCHED_FLAG_KEEP_ALL		| \ | ||||
| 			 SCHED_FLAG_UTIL_CLAMP) | ||||
| 
 | ||||
| #endif /* _UAPI_LINUX_SCHED_H */ | ||||
|  | ||||
| @ -9,6 +9,7 @@ struct sched_param { | ||||
| }; | ||||
| 
 | ||||
| #define SCHED_ATTR_SIZE_VER0	48	/* sizeof first published struct */ | ||||
| #define SCHED_ATTR_SIZE_VER1	56	/* add: util_{min,max} */ | ||||
| 
 | ||||
| /*
 | ||||
|  * Extended scheduling parameters data structure. | ||||
| @ -21,8 +22,33 @@ struct sched_param { | ||||
|  * the tasks may be useful for a wide variety of application fields, e.g., | ||||
|  * multimedia, streaming, automation and control, and many others. | ||||
|  * | ||||
|  * This variant (sched_attr) is meant at describing a so-called | ||||
|  * sporadic time-constrained task. In such model a task is specified by: | ||||
|  * This variant (sched_attr) allows to define additional attributes to | ||||
|  * improve the scheduler knowledge about task requirements. | ||||
|  * | ||||
|  * Scheduling Class Attributes | ||||
|  * =========================== | ||||
|  * | ||||
|  * A subset of sched_attr attributes specifies the | ||||
|  * scheduling policy and relative POSIX attributes: | ||||
|  * | ||||
|  *  @size		size of the structure, for fwd/bwd compat. | ||||
|  * | ||||
|  *  @sched_policy	task's scheduling policy | ||||
|  *  @sched_nice		task's nice value      (SCHED_NORMAL/BATCH) | ||||
|  *  @sched_priority	task's static priority (SCHED_FIFO/RR) | ||||
|  * | ||||
|  * Certain more advanced scheduling features can be controlled by a | ||||
|  * predefined set of flags via the attribute: | ||||
|  * | ||||
|  *  @sched_flags	for customizing the scheduler behaviour | ||||
|  * | ||||
|  * Sporadic Time-Constrained Task Attributes | ||||
|  * ========================================= | ||||
|  * | ||||
|  * A subset of sched_attr attributes allows to describe a so-called | ||||
|  * sporadic time-constrained task. | ||||
|  * | ||||
|  * In such a model a task is specified by: | ||||
|  *  - the activation period or minimum instance inter-arrival time; | ||||
|  *  - the maximum (or average, depending on the actual scheduling | ||||
|  *    discipline) computation time of all instances, a.k.a. runtime; | ||||
| @ -34,14 +60,8 @@ struct sched_param { | ||||
|  * than the runtime and must be completed by time instant t equal to | ||||
|  * the instance activation time + the deadline. | ||||
|  * | ||||
|  * This is reflected by the actual fields of the sched_attr structure: | ||||
|  * This is reflected by the following fields of the sched_attr structure: | ||||
|  * | ||||
|  *  @size		size of the structure, for fwd/bwd compat. | ||||
|  * | ||||
|  *  @sched_policy	task's scheduling policy | ||||
|  *  @sched_flags	for customizing the scheduler behaviour | ||||
|  *  @sched_nice		task's nice value      (SCHED_NORMAL/BATCH) | ||||
|  *  @sched_priority	task's static priority (SCHED_FIFO/RR) | ||||
|  *  @sched_deadline	representative of the task's deadline | ||||
|  *  @sched_runtime	representative of the task's runtime | ||||
|  *  @sched_period	representative of the task's period | ||||
| @ -53,6 +73,29 @@ struct sched_param { | ||||
|  * As of now, the SCHED_DEADLINE policy (sched_dl scheduling class) is the | ||||
|  * only user of this new interface. More information about the algorithm | ||||
|  * available in the scheduling class file or in Documentation/. | ||||
|  * | ||||
|  * Task Utilization Attributes | ||||
|  * =========================== | ||||
|  * | ||||
|  * A subset of sched_attr attributes allows to specify the utilization | ||||
|  * expected for a task. These attributes allow to inform the scheduler about | ||||
|  * the utilization boundaries within which it should schedule the task. These | ||||
|  * boundaries are valuable hints to support scheduler decisions on both task | ||||
|  * placement and frequency selection. | ||||
|  * | ||||
|  *  @sched_util_min	represents the minimum utilization | ||||
|  *  @sched_util_max	represents the maximum utilization | ||||
|  * | ||||
|  * Utilization is a value in the range [0..SCHED_CAPACITY_SCALE]. It | ||||
|  * represents the percentage of CPU time used by a task when running at the | ||||
|  * maximum frequency on the highest capacity CPU of the system. For example, a | ||||
|  * 20% utilization task is a task running for 2ms every 10ms at maximum | ||||
|  * frequency. | ||||
|  * | ||||
|  * A task with a min utilization value bigger than 0 is more likely scheduled | ||||
|  * on a CPU with a capacity big enough to fit the specified value. | ||||
|  * A task with a max utilization value smaller than 1024 is more likely | ||||
|  * scheduled on a CPU with no more capacity than the specified value. | ||||
|  */ | ||||
| struct sched_attr { | ||||
| 	__u32 size; | ||||
| @ -70,6 +113,11 @@ struct sched_attr { | ||||
| 	__u64 sched_runtime; | ||||
| 	__u64 sched_deadline; | ||||
| 	__u64 sched_period; | ||||
| 
 | ||||
| 	/* Utilization hints */ | ||||
| 	__u32 sched_util_min; | ||||
| 	__u32 sched_util_max; | ||||
| 
 | ||||
| }; | ||||
| 
 | ||||
| #endif /* _UAPI_LINUX_SCHED_TYPES_H */ | ||||
|  | ||||
							
								
								
									
										53
									
								
								init/Kconfig
									
									
									
									
									
								
							
							
						
						
									
										53
									
								
								init/Kconfig
									
									
									
									
									
								
							| @ -677,6 +677,59 @@ config HAVE_UNSTABLE_SCHED_CLOCK | ||||
| config GENERIC_SCHED_CLOCK | ||||
| 	bool | ||||
| 
 | ||||
| menu "Scheduler features" | ||||
| 
 | ||||
| config UCLAMP_TASK | ||||
| 	bool "Enable utilization clamping for RT/FAIR tasks" | ||||
| 	depends on CPU_FREQ_GOV_SCHEDUTIL | ||||
| 	help | ||||
| 	  This feature enables the scheduler to track the clamped utilization | ||||
| 	  of each CPU based on RUNNABLE tasks scheduled on that CPU. | ||||
| 
 | ||||
| 	  With this option, the user can specify the min and max CPU | ||||
| 	  utilization allowed for RUNNABLE tasks. The max utilization defines | ||||
| 	  the maximum frequency a task should use while the min utilization | ||||
| 	  defines the minimum frequency it should use. | ||||
| 
 | ||||
| 	  Both min and max utilization clamp values are hints to the scheduler, | ||||
| 	  aiming at improving its frequency selection policy, but they do not | ||||
| 	  enforce or grant any specific bandwidth for tasks. | ||||
| 
 | ||||
| 	  If in doubt, say N. | ||||
| 
 | ||||
| config UCLAMP_BUCKETS_COUNT | ||||
| 	int "Number of supported utilization clamp buckets" | ||||
| 	range 5 20 | ||||
| 	default 5 | ||||
| 	depends on UCLAMP_TASK | ||||
| 	help | ||||
| 	  Defines the number of clamp buckets to use. The range of each bucket | ||||
| 	  will be SCHED_CAPACITY_SCALE/UCLAMP_BUCKETS_COUNT. The higher the | ||||
| 	  number of clamp buckets the finer their granularity and the higher | ||||
| 	  the precision of clamping aggregation and tracking at run-time. | ||||
| 
 | ||||
| 	  For example, with the minimum configuration value we will have 5 | ||||
| 	  clamp buckets tracking 20% utilization each. A 25% boosted tasks will | ||||
| 	  be refcounted in the [20..39]% bucket and will set the bucket clamp | ||||
| 	  effective value to 25%. | ||||
| 	  If a second 30% boosted task should be co-scheduled on the same CPU, | ||||
| 	  that task will be refcounted in the same bucket of the first task and | ||||
| 	  it will boost the bucket clamp effective value to 30%. | ||||
| 	  The clamp effective value of a bucket is reset to its nominal value | ||||
| 	  (20% in the example above) when there are no more tasks refcounted in | ||||
| 	  that bucket. | ||||
| 
 | ||||
| 	  An additional boost/capping margin can be added to some tasks. In the | ||||
| 	  example above the 25% task will be boosted to 30% until it exits the | ||||
| 	  CPU. If that should be considered not acceptable on certain systems, | ||||
| 	  it's always possible to reduce the margin by increasing the number of | ||||
| 	  clamp buckets to trade off used memory for run-time tracking | ||||
| 	  precision. | ||||
| 
 | ||||
| 	  If in doubt, use the default value. | ||||
| 
 | ||||
| endmenu | ||||
| 
 | ||||
| # | ||||
| # For architectures that want to enable the support for NUMA-affine scheduler | ||||
| # balancing logic: | ||||
|  | ||||
| @ -72,7 +72,8 @@ struct task_struct init_task | ||||
| 	.static_prio	= MAX_PRIO - 20, | ||||
| 	.normal_prio	= MAX_PRIO - 20, | ||||
| 	.policy		= SCHED_NORMAL, | ||||
| 	.cpus_allowed	= CPU_MASK_ALL, | ||||
| 	.cpus_ptr	= &init_task.cpus_mask, | ||||
| 	.cpus_mask	= CPU_MASK_ALL, | ||||
| 	.nr_cpus_allowed= NR_CPUS, | ||||
| 	.mm		= NULL, | ||||
| 	.active_mm	= &init_mm, | ||||
|  | ||||
| @ -2829,7 +2829,7 @@ static void cpuset_fork(struct task_struct *task) | ||||
| 	if (task_css_is_root(task, cpuset_cgrp_id)) | ||||
| 		return; | ||||
| 
 | ||||
| 	set_cpus_allowed_ptr(task, ¤t->cpus_allowed); | ||||
| 	set_cpus_allowed_ptr(task, current->cpus_ptr); | ||||
| 	task->mems_allowed = current->mems_allowed; | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -898,6 +898,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) | ||||
| #ifdef CONFIG_STACKPROTECTOR | ||||
| 	tsk->stack_canary = get_random_canary(); | ||||
| #endif | ||||
| 	if (orig->cpus_ptr == &orig->cpus_mask) | ||||
| 		tsk->cpus_ptr = &tsk->cpus_mask; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * One for us, one for whoever does the "release_task()" (usually | ||||
|  | ||||
| @ -223,7 +223,7 @@ int em_register_perf_domain(cpumask_t *span, unsigned int nr_states, | ||||
| 		 * All CPUs of a domain must have the same micro-architecture | ||||
| 		 * since they all share the same table. | ||||
| 		 */ | ||||
| 		cap = arch_scale_cpu_capacity(NULL, cpu); | ||||
| 		cap = arch_scale_cpu_capacity(cpu); | ||||
| 		if (prev_cap && prev_cap != cap) { | ||||
| 			pr_err("CPUs of %*pbl must have the same capacity\n", | ||||
| 							cpumask_pr_args(span)); | ||||
|  | ||||
| @ -259,7 +259,6 @@ out: | ||||
| } | ||||
| #endif /* CONFIG_PROC_FS */ | ||||
| 
 | ||||
| #ifdef CONFIG_SCHED_DEBUG | ||||
| int autogroup_path(struct task_group *tg, char *buf, int buflen) | ||||
| { | ||||
| 	if (!task_group_is_autogroup(tg)) | ||||
| @ -267,4 +266,3 @@ int autogroup_path(struct task_group *tg, char *buf, int buflen) | ||||
| 
 | ||||
| 	return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); | ||||
| } | ||||
| #endif | ||||
|  | ||||
| @ -23,6 +23,17 @@ | ||||
| #define CREATE_TRACE_POINTS | ||||
| #include <trace/events/sched.h> | ||||
| 
 | ||||
| /*
 | ||||
|  * Export tracepoints that act as a bare tracehook (ie: have no trace event | ||||
|  * associated with them) to allow external modules to probe them. | ||||
|  */ | ||||
| EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_cfs_tp); | ||||
| EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_rt_tp); | ||||
| EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_dl_tp); | ||||
| EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); | ||||
| EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_se_tp); | ||||
| EXPORT_TRACEPOINT_SYMBOL_GPL(sched_overutilized_tp); | ||||
| 
 | ||||
| DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); | ||||
| 
 | ||||
| #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_JUMP_LABEL) | ||||
| @ -761,6 +772,401 @@ static void set_load_weight(struct task_struct *p, bool update_load) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_UCLAMP_TASK | ||||
| /* Max allowed minimum utilization */ | ||||
| unsigned int sysctl_sched_uclamp_util_min = SCHED_CAPACITY_SCALE; | ||||
| 
 | ||||
| /* Max allowed maximum utilization */ | ||||
| unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE; | ||||
| 
 | ||||
| /* All clamps are required to be less or equal than these values */ | ||||
| static struct uclamp_se uclamp_default[UCLAMP_CNT]; | ||||
| 
 | ||||
| /* Integer rounded range for each bucket */ | ||||
| #define UCLAMP_BUCKET_DELTA DIV_ROUND_CLOSEST(SCHED_CAPACITY_SCALE, UCLAMP_BUCKETS) | ||||
| 
 | ||||
| #define for_each_clamp_id(clamp_id) \ | ||||
| 	for ((clamp_id) = 0; (clamp_id) < UCLAMP_CNT; (clamp_id)++) | ||||
| 
 | ||||
| static inline unsigned int uclamp_bucket_id(unsigned int clamp_value) | ||||
| { | ||||
| 	return clamp_value / UCLAMP_BUCKET_DELTA; | ||||
| } | ||||
| 
 | ||||
| static inline unsigned int uclamp_bucket_base_value(unsigned int clamp_value) | ||||
| { | ||||
| 	return UCLAMP_BUCKET_DELTA * uclamp_bucket_id(clamp_value); | ||||
| } | ||||
| 
 | ||||
| static inline unsigned int uclamp_none(int clamp_id) | ||||
| { | ||||
| 	if (clamp_id == UCLAMP_MIN) | ||||
| 		return 0; | ||||
| 	return SCHED_CAPACITY_SCALE; | ||||
| } | ||||
| 
 | ||||
| static inline void uclamp_se_set(struct uclamp_se *uc_se, | ||||
| 				 unsigned int value, bool user_defined) | ||||
| { | ||||
| 	uc_se->value = value; | ||||
| 	uc_se->bucket_id = uclamp_bucket_id(value); | ||||
| 	uc_se->user_defined = user_defined; | ||||
| } | ||||
| 
 | ||||
| static inline unsigned int | ||||
| uclamp_idle_value(struct rq *rq, unsigned int clamp_id, | ||||
| 		  unsigned int clamp_value) | ||||
| { | ||||
| 	/*
 | ||||
| 	 * Avoid blocked utilization pushing up the frequency when we go | ||||
| 	 * idle (which drops the max-clamp) by retaining the last known | ||||
| 	 * max-clamp. | ||||
| 	 */ | ||||
| 	if (clamp_id == UCLAMP_MAX) { | ||||
| 		rq->uclamp_flags |= UCLAMP_FLAG_IDLE; | ||||
| 		return clamp_value; | ||||
| 	} | ||||
| 
 | ||||
| 	return uclamp_none(UCLAMP_MIN); | ||||
| } | ||||
| 
 | ||||
| static inline void uclamp_idle_reset(struct rq *rq, unsigned int clamp_id, | ||||
| 				     unsigned int clamp_value) | ||||
| { | ||||
| 	/* Reset max-clamp retention only on idle exit */ | ||||
| 	if (!(rq->uclamp_flags & UCLAMP_FLAG_IDLE)) | ||||
| 		return; | ||||
| 
 | ||||
| 	WRITE_ONCE(rq->uclamp[clamp_id].value, clamp_value); | ||||
| } | ||||
| 
 | ||||
| static inline | ||||
| unsigned int uclamp_rq_max_value(struct rq *rq, unsigned int clamp_id, | ||||
| 				 unsigned int clamp_value) | ||||
| { | ||||
| 	struct uclamp_bucket *bucket = rq->uclamp[clamp_id].bucket; | ||||
| 	int bucket_id = UCLAMP_BUCKETS - 1; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Since both min and max clamps are max aggregated, find the | ||||
| 	 * top most bucket with tasks in. | ||||
| 	 */ | ||||
| 	for ( ; bucket_id >= 0; bucket_id--) { | ||||
| 		if (!bucket[bucket_id].tasks) | ||||
| 			continue; | ||||
| 		return bucket[bucket_id].value; | ||||
| 	} | ||||
| 
 | ||||
| 	/* No tasks -- default clamp values */ | ||||
| 	return uclamp_idle_value(rq, clamp_id, clamp_value); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * The effective clamp bucket index of a task depends on, by increasing | ||||
|  * priority: | ||||
|  * - the task specific clamp value, when explicitly requested from userspace | ||||
|  * - the system default clamp value, defined by the sysadmin | ||||
|  */ | ||||
| static inline struct uclamp_se | ||||
| uclamp_eff_get(struct task_struct *p, unsigned int clamp_id) | ||||
| { | ||||
| 	struct uclamp_se uc_req = p->uclamp_req[clamp_id]; | ||||
| 	struct uclamp_se uc_max = uclamp_default[clamp_id]; | ||||
| 
 | ||||
| 	/* System default restrictions always apply */ | ||||
| 	if (unlikely(uc_req.value > uc_max.value)) | ||||
| 		return uc_max; | ||||
| 
 | ||||
| 	return uc_req; | ||||
| } | ||||
| 
 | ||||
| unsigned int uclamp_eff_value(struct task_struct *p, unsigned int clamp_id) | ||||
| { | ||||
| 	struct uclamp_se uc_eff; | ||||
| 
 | ||||
| 	/* Task currently refcounted: use back-annotated (effective) value */ | ||||
| 	if (p->uclamp[clamp_id].active) | ||||
| 		return p->uclamp[clamp_id].value; | ||||
| 
 | ||||
| 	uc_eff = uclamp_eff_get(p, clamp_id); | ||||
| 
 | ||||
| 	return uc_eff.value; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * When a task is enqueued on a rq, the clamp bucket currently defined by the | ||||
|  * task's uclamp::bucket_id is refcounted on that rq. This also immediately | ||||
|  * updates the rq's clamp value if required. | ||||
|  * | ||||
|  * Tasks can have a task-specific value requested from user-space, track | ||||
|  * within each bucket the maximum value for tasks refcounted in it. | ||||
|  * This "local max aggregation" allows to track the exact "requested" value | ||||
|  * for each bucket when all its RUNNABLE tasks require the same clamp. | ||||
|  */ | ||||
| static inline void uclamp_rq_inc_id(struct rq *rq, struct task_struct *p, | ||||
| 				    unsigned int clamp_id) | ||||
| { | ||||
| 	struct uclamp_rq *uc_rq = &rq->uclamp[clamp_id]; | ||||
| 	struct uclamp_se *uc_se = &p->uclamp[clamp_id]; | ||||
| 	struct uclamp_bucket *bucket; | ||||
| 
 | ||||
| 	lockdep_assert_held(&rq->lock); | ||||
| 
 | ||||
| 	/* Update task effective clamp */ | ||||
| 	p->uclamp[clamp_id] = uclamp_eff_get(p, clamp_id); | ||||
| 
 | ||||
| 	bucket = &uc_rq->bucket[uc_se->bucket_id]; | ||||
| 	bucket->tasks++; | ||||
| 	uc_se->active = true; | ||||
| 
 | ||||
| 	uclamp_idle_reset(rq, clamp_id, uc_se->value); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Local max aggregation: rq buckets always track the max | ||||
| 	 * "requested" clamp value of its RUNNABLE tasks. | ||||
| 	 */ | ||||
| 	if (bucket->tasks == 1 || uc_se->value > bucket->value) | ||||
| 		bucket->value = uc_se->value; | ||||
| 
 | ||||
| 	if (uc_se->value > READ_ONCE(uc_rq->value)) | ||||
| 		WRITE_ONCE(uc_rq->value, uc_se->value); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * When a task is dequeued from a rq, the clamp bucket refcounted by the task | ||||
|  * is released. If this is the last task reference counting the rq's max | ||||
|  * active clamp value, then the rq's clamp value is updated. | ||||
|  * | ||||
|  * Both refcounted tasks and rq's cached clamp values are expected to be | ||||
|  * always valid. If it's detected they are not, as defensive programming, | ||||
|  * enforce the expected state and warn. | ||||
|  */ | ||||
| static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p, | ||||
| 				    unsigned int clamp_id) | ||||
| { | ||||
| 	struct uclamp_rq *uc_rq = &rq->uclamp[clamp_id]; | ||||
| 	struct uclamp_se *uc_se = &p->uclamp[clamp_id]; | ||||
| 	struct uclamp_bucket *bucket; | ||||
| 	unsigned int bkt_clamp; | ||||
| 	unsigned int rq_clamp; | ||||
| 
 | ||||
| 	lockdep_assert_held(&rq->lock); | ||||
| 
 | ||||
| 	bucket = &uc_rq->bucket[uc_se->bucket_id]; | ||||
| 	SCHED_WARN_ON(!bucket->tasks); | ||||
| 	if (likely(bucket->tasks)) | ||||
| 		bucket->tasks--; | ||||
| 	uc_se->active = false; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Keep "local max aggregation" simple and accept to (possibly) | ||||
| 	 * overboost some RUNNABLE tasks in the same bucket. | ||||
| 	 * The rq clamp bucket value is reset to its base value whenever | ||||
| 	 * there are no more RUNNABLE tasks refcounting it. | ||||
| 	 */ | ||||
| 	if (likely(bucket->tasks)) | ||||
| 		return; | ||||
| 
 | ||||
| 	rq_clamp = READ_ONCE(uc_rq->value); | ||||
| 	/*
 | ||||
| 	 * Defensive programming: this should never happen. If it happens, | ||||
| 	 * e.g. due to future modification, warn and fixup the expected value. | ||||
| 	 */ | ||||
| 	SCHED_WARN_ON(bucket->value > rq_clamp); | ||||
| 	if (bucket->value >= rq_clamp) { | ||||
| 		bkt_clamp = uclamp_rq_max_value(rq, clamp_id, uc_se->value); | ||||
| 		WRITE_ONCE(uc_rq->value, bkt_clamp); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) | ||||
| { | ||||
| 	unsigned int clamp_id; | ||||
| 
 | ||||
| 	if (unlikely(!p->sched_class->uclamp_enabled)) | ||||
| 		return; | ||||
| 
 | ||||
| 	for_each_clamp_id(clamp_id) | ||||
| 		uclamp_rq_inc_id(rq, p, clamp_id); | ||||
| 
 | ||||
| 	/* Reset clamp idle holding when there is one RUNNABLE task */ | ||||
| 	if (rq->uclamp_flags & UCLAMP_FLAG_IDLE) | ||||
| 		rq->uclamp_flags &= ~UCLAMP_FLAG_IDLE; | ||||
| } | ||||
| 
 | ||||
| static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) | ||||
| { | ||||
| 	unsigned int clamp_id; | ||||
| 
 | ||||
| 	if (unlikely(!p->sched_class->uclamp_enabled)) | ||||
| 		return; | ||||
| 
 | ||||
| 	for_each_clamp_id(clamp_id) | ||||
| 		uclamp_rq_dec_id(rq, p, clamp_id); | ||||
| } | ||||
| 
 | ||||
| int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, | ||||
| 				void __user *buffer, size_t *lenp, | ||||
| 				loff_t *ppos) | ||||
| { | ||||
| 	int old_min, old_max; | ||||
| 	static DEFINE_MUTEX(mutex); | ||||
| 	int result; | ||||
| 
 | ||||
| 	mutex_lock(&mutex); | ||||
| 	old_min = sysctl_sched_uclamp_util_min; | ||||
| 	old_max = sysctl_sched_uclamp_util_max; | ||||
| 
 | ||||
| 	result = proc_dointvec(table, write, buffer, lenp, ppos); | ||||
| 	if (result) | ||||
| 		goto undo; | ||||
| 	if (!write) | ||||
| 		goto done; | ||||
| 
 | ||||
| 	if (sysctl_sched_uclamp_util_min > sysctl_sched_uclamp_util_max || | ||||
| 	    sysctl_sched_uclamp_util_max > SCHED_CAPACITY_SCALE) { | ||||
| 		result = -EINVAL; | ||||
| 		goto undo; | ||||
| 	} | ||||
| 
 | ||||
| 	if (old_min != sysctl_sched_uclamp_util_min) { | ||||
| 		uclamp_se_set(&uclamp_default[UCLAMP_MIN], | ||||
| 			      sysctl_sched_uclamp_util_min, false); | ||||
| 	} | ||||
| 	if (old_max != sysctl_sched_uclamp_util_max) { | ||||
| 		uclamp_se_set(&uclamp_default[UCLAMP_MAX], | ||||
| 			      sysctl_sched_uclamp_util_max, false); | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Updating all the RUNNABLE task is expensive, keep it simple and do | ||||
| 	 * just a lazy update at each next enqueue time. | ||||
| 	 */ | ||||
| 	goto done; | ||||
| 
 | ||||
| undo: | ||||
| 	sysctl_sched_uclamp_util_min = old_min; | ||||
| 	sysctl_sched_uclamp_util_max = old_max; | ||||
| done: | ||||
| 	mutex_unlock(&mutex); | ||||
| 
 | ||||
| 	return result; | ||||
| } | ||||
| 
 | ||||
| static int uclamp_validate(struct task_struct *p, | ||||
| 			   const struct sched_attr *attr) | ||||
| { | ||||
| 	unsigned int lower_bound = p->uclamp_req[UCLAMP_MIN].value; | ||||
| 	unsigned int upper_bound = p->uclamp_req[UCLAMP_MAX].value; | ||||
| 
 | ||||
| 	if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) | ||||
| 		lower_bound = attr->sched_util_min; | ||||
| 	if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) | ||||
| 		upper_bound = attr->sched_util_max; | ||||
| 
 | ||||
| 	if (lower_bound > upper_bound) | ||||
| 		return -EINVAL; | ||||
| 	if (upper_bound > SCHED_CAPACITY_SCALE) | ||||
| 		return -EINVAL; | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static void __setscheduler_uclamp(struct task_struct *p, | ||||
| 				  const struct sched_attr *attr) | ||||
| { | ||||
| 	unsigned int clamp_id; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * On scheduling class change, reset to default clamps for tasks | ||||
| 	 * without a task-specific value. | ||||
| 	 */ | ||||
| 	for_each_clamp_id(clamp_id) { | ||||
| 		struct uclamp_se *uc_se = &p->uclamp_req[clamp_id]; | ||||
| 		unsigned int clamp_value = uclamp_none(clamp_id); | ||||
| 
 | ||||
| 		/* Keep using defined clamps across class changes */ | ||||
| 		if (uc_se->user_defined) | ||||
| 			continue; | ||||
| 
 | ||||
| 		/* By default, RT tasks always get 100% boost */ | ||||
| 		if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN)) | ||||
| 			clamp_value = uclamp_none(UCLAMP_MAX); | ||||
| 
 | ||||
| 		uclamp_se_set(uc_se, clamp_value, false); | ||||
| 	} | ||||
| 
 | ||||
| 	if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP))) | ||||
| 		return; | ||||
| 
 | ||||
| 	if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) { | ||||
| 		uclamp_se_set(&p->uclamp_req[UCLAMP_MIN], | ||||
| 			      attr->sched_util_min, true); | ||||
| 	} | ||||
| 
 | ||||
| 	if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) { | ||||
| 		uclamp_se_set(&p->uclamp_req[UCLAMP_MAX], | ||||
| 			      attr->sched_util_max, true); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static void uclamp_fork(struct task_struct *p) | ||||
| { | ||||
| 	unsigned int clamp_id; | ||||
| 
 | ||||
| 	for_each_clamp_id(clamp_id) | ||||
| 		p->uclamp[clamp_id].active = false; | ||||
| 
 | ||||
| 	if (likely(!p->sched_reset_on_fork)) | ||||
| 		return; | ||||
| 
 | ||||
| 	for_each_clamp_id(clamp_id) { | ||||
| 		unsigned int clamp_value = uclamp_none(clamp_id); | ||||
| 
 | ||||
| 		/* By default, RT tasks always get 100% boost */ | ||||
| 		if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN)) | ||||
| 			clamp_value = uclamp_none(UCLAMP_MAX); | ||||
| 
 | ||||
| 		uclamp_se_set(&p->uclamp_req[clamp_id], clamp_value, false); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static void __init init_uclamp(void) | ||||
| { | ||||
| 	struct uclamp_se uc_max = {}; | ||||
| 	unsigned int clamp_id; | ||||
| 	int cpu; | ||||
| 
 | ||||
| 	for_each_possible_cpu(cpu) { | ||||
| 		memset(&cpu_rq(cpu)->uclamp, 0, sizeof(struct uclamp_rq)); | ||||
| 		cpu_rq(cpu)->uclamp_flags = 0; | ||||
| 	} | ||||
| 
 | ||||
| 	for_each_clamp_id(clamp_id) { | ||||
| 		uclamp_se_set(&init_task.uclamp_req[clamp_id], | ||||
| 			      uclamp_none(clamp_id), false); | ||||
| 	} | ||||
| 
 | ||||
| 	/* System defaults allow max clamp values for both indexes */ | ||||
| 	uclamp_se_set(&uc_max, uclamp_none(UCLAMP_MAX), false); | ||||
| 	for_each_clamp_id(clamp_id) | ||||
| 		uclamp_default[clamp_id] = uc_max; | ||||
| } | ||||
| 
 | ||||
| #else /* CONFIG_UCLAMP_TASK */ | ||||
| static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { } | ||||
| static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { } | ||||
| static inline int uclamp_validate(struct task_struct *p, | ||||
| 				  const struct sched_attr *attr) | ||||
| { | ||||
| 	return -EOPNOTSUPP; | ||||
| } | ||||
| static void __setscheduler_uclamp(struct task_struct *p, | ||||
| 				  const struct sched_attr *attr) { } | ||||
| static inline void uclamp_fork(struct task_struct *p) { } | ||||
| static inline void init_uclamp(void) { } | ||||
| #endif /* CONFIG_UCLAMP_TASK */ | ||||
| 
 | ||||
| static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags) | ||||
| { | ||||
| 	if (!(flags & ENQUEUE_NOCLOCK)) | ||||
| @ -771,6 +1177,7 @@ static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags) | ||||
| 		psi_enqueue(p, flags & ENQUEUE_WAKEUP); | ||||
| 	} | ||||
| 
 | ||||
| 	uclamp_rq_inc(rq, p); | ||||
| 	p->sched_class->enqueue_task(rq, p, flags); | ||||
| } | ||||
| 
 | ||||
| @ -784,6 +1191,7 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags) | ||||
| 		psi_dequeue(p, flags & DEQUEUE_SLEEP); | ||||
| 	} | ||||
| 
 | ||||
| 	uclamp_rq_dec(rq, p); | ||||
| 	p->sched_class->dequeue_task(rq, p, flags); | ||||
| } | ||||
| 
 | ||||
| @ -930,7 +1338,7 @@ static inline bool is_per_cpu_kthread(struct task_struct *p) | ||||
|  */ | ||||
| static inline bool is_cpu_allowed(struct task_struct *p, int cpu) | ||||
| { | ||||
| 	if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) | ||||
| 	if (!cpumask_test_cpu(cpu, p->cpus_ptr)) | ||||
| 		return false; | ||||
| 
 | ||||
| 	if (is_per_cpu_kthread(p)) | ||||
| @ -1025,7 +1433,7 @@ static int migration_cpu_stop(void *data) | ||||
| 	local_irq_disable(); | ||||
| 	/*
 | ||||
| 	 * We need to explicitly wake pending tasks before running | ||||
| 	 * __migrate_task() such that we will not miss enforcing cpus_allowed | ||||
| 	 * __migrate_task() such that we will not miss enforcing cpus_ptr | ||||
| 	 * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test. | ||||
| 	 */ | ||||
| 	sched_ttwu_pending(); | ||||
| @ -1056,7 +1464,7 @@ static int migration_cpu_stop(void *data) | ||||
|  */ | ||||
| void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask) | ||||
| { | ||||
| 	cpumask_copy(&p->cpus_allowed, new_mask); | ||||
| 	cpumask_copy(&p->cpus_mask, new_mask); | ||||
| 	p->nr_cpus_allowed = cpumask_weight(new_mask); | ||||
| } | ||||
| 
 | ||||
| @ -1126,7 +1534,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	if (cpumask_equal(&p->cpus_allowed, new_mask)) | ||||
| 	if (cpumask_equal(p->cpus_ptr, new_mask)) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	if (!cpumask_intersects(new_mask, cpu_valid_mask)) { | ||||
| @ -1286,10 +1694,10 @@ static int migrate_swap_stop(void *data) | ||||
| 	if (task_cpu(arg->src_task) != arg->src_cpu) | ||||
| 		goto unlock; | ||||
| 
 | ||||
| 	if (!cpumask_test_cpu(arg->dst_cpu, &arg->src_task->cpus_allowed)) | ||||
| 	if (!cpumask_test_cpu(arg->dst_cpu, arg->src_task->cpus_ptr)) | ||||
| 		goto unlock; | ||||
| 
 | ||||
| 	if (!cpumask_test_cpu(arg->src_cpu, &arg->dst_task->cpus_allowed)) | ||||
| 	if (!cpumask_test_cpu(arg->src_cpu, arg->dst_task->cpus_ptr)) | ||||
| 		goto unlock; | ||||
| 
 | ||||
| 	__migrate_swap_task(arg->src_task, arg->dst_cpu); | ||||
| @ -1331,10 +1739,10 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p, | ||||
| 	if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu)) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	if (!cpumask_test_cpu(arg.dst_cpu, &arg.src_task->cpus_allowed)) | ||||
| 	if (!cpumask_test_cpu(arg.dst_cpu, arg.src_task->cpus_ptr)) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	if (!cpumask_test_cpu(arg.src_cpu, &arg.dst_task->cpus_allowed)) | ||||
| 	if (!cpumask_test_cpu(arg.src_cpu, arg.dst_task->cpus_ptr)) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu); | ||||
| @ -1479,7 +1887,7 @@ void kick_process(struct task_struct *p) | ||||
| EXPORT_SYMBOL_GPL(kick_process); | ||||
| 
 | ||||
| /*
 | ||||
|  * ->cpus_allowed is protected by both rq->lock and p->pi_lock | ||||
|  * ->cpus_ptr is protected by both rq->lock and p->pi_lock | ||||
|  * | ||||
|  * A few notes on cpu_active vs cpu_online: | ||||
|  * | ||||
| @ -1519,14 +1927,14 @@ static int select_fallback_rq(int cpu, struct task_struct *p) | ||||
| 		for_each_cpu(dest_cpu, nodemask) { | ||||
| 			if (!cpu_active(dest_cpu)) | ||||
| 				continue; | ||||
| 			if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) | ||||
| 			if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) | ||||
| 				return dest_cpu; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	for (;;) { | ||||
| 		/* Any allowed, online CPU? */ | ||||
| 		for_each_cpu(dest_cpu, &p->cpus_allowed) { | ||||
| 		for_each_cpu(dest_cpu, p->cpus_ptr) { | ||||
| 			if (!is_cpu_allowed(p, dest_cpu)) | ||||
| 				continue; | ||||
| 
 | ||||
| @ -1570,7 +1978,7 @@ out: | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable. | ||||
|  * The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable. | ||||
|  */ | ||||
| static inline | ||||
| int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) | ||||
| @ -1580,11 +1988,11 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) | ||||
| 	if (p->nr_cpus_allowed > 1) | ||||
| 		cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags); | ||||
| 	else | ||||
| 		cpu = cpumask_any(&p->cpus_allowed); | ||||
| 		cpu = cpumask_any(p->cpus_ptr); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * In order not to call set_task_cpu() on a blocking task we need | ||||
| 	 * to rely on ttwu() to place the task on a valid ->cpus_allowed | ||||
| 	 * to rely on ttwu() to place the task on a valid ->cpus_ptr | ||||
| 	 * CPU. | ||||
| 	 * | ||||
| 	 * Since this is common to all placement strategies, this lives here. | ||||
| @ -1991,6 +2399,29 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | ||||
| 	unsigned long flags; | ||||
| 	int cpu, success = 0; | ||||
| 
 | ||||
| 	if (p == current) { | ||||
| 		/*
 | ||||
| 		 * We're waking current, this means 'p->on_rq' and 'task_cpu(p) | ||||
| 		 * == smp_processor_id()'. Together this means we can special | ||||
| 		 * case the whole 'p->on_rq && ttwu_remote()' case below | ||||
| 		 * without taking any locks. | ||||
| 		 * | ||||
| 		 * In particular: | ||||
| 		 *  - we rely on Program-Order guarantees for all the ordering, | ||||
| 		 *  - we're serialized against set_special_state() by virtue of | ||||
| 		 *    it disabling IRQs (this allows not taking ->pi_lock). | ||||
| 		 */ | ||||
| 		if (!(p->state & state)) | ||||
| 			return false; | ||||
| 
 | ||||
| 		success = 1; | ||||
| 		cpu = task_cpu(p); | ||||
| 		trace_sched_waking(p); | ||||
| 		p->state = TASK_RUNNING; | ||||
| 		trace_sched_wakeup(p); | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If we are going to wake up a thread waiting for CONDITION we | ||||
| 	 * need to ensure that CONDITION=1 done by the caller can not be | ||||
| @ -2000,7 +2431,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | ||||
| 	raw_spin_lock_irqsave(&p->pi_lock, flags); | ||||
| 	smp_mb__after_spinlock(); | ||||
| 	if (!(p->state & state)) | ||||
| 		goto out; | ||||
| 		goto unlock; | ||||
| 
 | ||||
| 	trace_sched_waking(p); | ||||
| 
 | ||||
| @ -2030,7 +2461,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | ||||
| 	 */ | ||||
| 	smp_rmb(); | ||||
| 	if (p->on_rq && ttwu_remote(p, wake_flags)) | ||||
| 		goto stat; | ||||
| 		goto unlock; | ||||
| 
 | ||||
| #ifdef CONFIG_SMP | ||||
| 	/*
 | ||||
| @ -2090,10 +2521,11 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | ||||
| #endif /* CONFIG_SMP */ | ||||
| 
 | ||||
| 	ttwu_queue(p, cpu, wake_flags); | ||||
| stat: | ||||
| 	ttwu_stat(p, cpu, wake_flags); | ||||
| out: | ||||
| unlock: | ||||
| 	raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||||
| out: | ||||
| 	if (success) | ||||
| 		ttwu_stat(p, cpu, wake_flags); | ||||
| 
 | ||||
| 	return success; | ||||
| } | ||||
| @ -2300,6 +2732,8 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) | ||||
| 	 */ | ||||
| 	p->prio = current->normal_prio; | ||||
| 
 | ||||
| 	uclamp_fork(p); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Revert to default priority/policy on fork if requested. | ||||
| 	 */ | ||||
| @ -2395,7 +2829,7 @@ void wake_up_new_task(struct task_struct *p) | ||||
| #ifdef CONFIG_SMP | ||||
| 	/*
 | ||||
| 	 * Fork balancing, do it here and not earlier because: | ||||
| 	 *  - cpus_allowed can change in the fork path | ||||
| 	 *  - cpus_ptr can change in the fork path | ||||
| 	 *  - any previously selected CPU might disappear through hotplug | ||||
| 	 * | ||||
| 	 * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq, | ||||
| @ -3033,7 +3467,6 @@ void scheduler_tick(void) | ||||
| 
 | ||||
| 	update_rq_clock(rq); | ||||
| 	curr->sched_class->task_tick(rq, curr, 0); | ||||
| 	cpu_load_update_active(rq); | ||||
| 	calc_global_load_tick(rq); | ||||
| 	psi_task_tick(rq); | ||||
| 
 | ||||
| @ -4071,6 +4504,13 @@ static void __setscheduler_params(struct task_struct *p, | ||||
| static void __setscheduler(struct rq *rq, struct task_struct *p, | ||||
| 			   const struct sched_attr *attr, bool keep_boost) | ||||
| { | ||||
| 	/*
 | ||||
| 	 * If params can't change scheduling class changes aren't allowed | ||||
| 	 * either. | ||||
| 	 */ | ||||
| 	if (attr->sched_flags & SCHED_FLAG_KEEP_PARAMS) | ||||
| 		return; | ||||
| 
 | ||||
| 	__setscheduler_params(p, attr); | ||||
| 
 | ||||
| 	/*
 | ||||
| @ -4208,6 +4648,13 @@ recheck: | ||||
| 			return retval; | ||||
| 	} | ||||
| 
 | ||||
| 	/* Update task specific "requested" clamps */ | ||||
| 	if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) { | ||||
| 		retval = uclamp_validate(p, attr); | ||||
| 		if (retval) | ||||
| 			return retval; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Make sure no PI-waiters arrive (or leave) while we are | ||||
| 	 * changing the priority of the task: | ||||
| @ -4237,6 +4684,8 @@ recheck: | ||||
| 			goto change; | ||||
| 		if (dl_policy(policy) && dl_param_changed(p, attr)) | ||||
| 			goto change; | ||||
| 		if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) | ||||
| 			goto change; | ||||
| 
 | ||||
| 		p->sched_reset_on_fork = reset_on_fork; | ||||
| 		task_rq_unlock(rq, p, &rf); | ||||
| @ -4267,7 +4716,7 @@ change: | ||||
| 			 * the entire root_domain to become SCHED_DEADLINE. We | ||||
| 			 * will also fail if there's no bandwidth available. | ||||
| 			 */ | ||||
| 			if (!cpumask_subset(span, &p->cpus_allowed) || | ||||
| 			if (!cpumask_subset(span, p->cpus_ptr) || | ||||
| 			    rq->rd->dl_bw.bw == 0) { | ||||
| 				task_rq_unlock(rq, p, &rf); | ||||
| 				return -EPERM; | ||||
| @ -4317,7 +4766,9 @@ change: | ||||
| 		put_prev_task(rq, p); | ||||
| 
 | ||||
| 	prev_class = p->sched_class; | ||||
| 
 | ||||
| 	__setscheduler(rq, p, attr, pi); | ||||
| 	__setscheduler_uclamp(p, attr); | ||||
| 
 | ||||
| 	if (queued) { | ||||
| 		/*
 | ||||
| @ -4493,6 +4944,10 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a | ||||
| 	if (ret) | ||||
| 		return -EFAULT; | ||||
| 
 | ||||
| 	if ((attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) && | ||||
| 	    size < SCHED_ATTR_SIZE_VER1) | ||||
| 		return -EINVAL; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * XXX: Do we want to be lenient like existing syscalls; or do we want | ||||
| 	 * to be strict and return an error on out-of-bounds values? | ||||
| @ -4556,14 +5011,21 @@ SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, | ||||
| 
 | ||||
| 	if ((int)attr.sched_policy < 0) | ||||
| 		return -EINVAL; | ||||
| 	if (attr.sched_flags & SCHED_FLAG_KEEP_POLICY) | ||||
| 		attr.sched_policy = SETPARAM_POLICY; | ||||
| 
 | ||||
| 	rcu_read_lock(); | ||||
| 	retval = -ESRCH; | ||||
| 	p = find_process_by_pid(pid); | ||||
| 	if (p != NULL) | ||||
| 		retval = sched_setattr(p, &attr); | ||||
| 	if (likely(p)) | ||||
| 		get_task_struct(p); | ||||
| 	rcu_read_unlock(); | ||||
| 
 | ||||
| 	if (likely(p)) { | ||||
| 		retval = sched_setattr(p, &attr); | ||||
| 		put_task_struct(p); | ||||
| 	} | ||||
| 
 | ||||
| 	return retval; | ||||
| } | ||||
| 
 | ||||
| @ -4714,6 +5176,11 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, | ||||
| 	else | ||||
| 		attr.sched_nice = task_nice(p); | ||||
| 
 | ||||
| #ifdef CONFIG_UCLAMP_TASK | ||||
| 	attr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value; | ||||
| 	attr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value; | ||||
| #endif | ||||
| 
 | ||||
| 	rcu_read_unlock(); | ||||
| 
 | ||||
| 	retval = sched_read_attr(uattr, &attr, size); | ||||
| @ -4866,7 +5333,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) | ||||
| 		goto out_unlock; | ||||
| 
 | ||||
| 	raw_spin_lock_irqsave(&p->pi_lock, flags); | ||||
| 	cpumask_and(mask, &p->cpus_allowed, cpu_active_mask); | ||||
| 	cpumask_and(mask, &p->cpus_mask, cpu_active_mask); | ||||
| 	raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||||
| 
 | ||||
| out_unlock: | ||||
| @ -5123,7 +5590,7 @@ long __sched io_schedule_timeout(long timeout) | ||||
| } | ||||
| EXPORT_SYMBOL(io_schedule_timeout); | ||||
| 
 | ||||
| void io_schedule(void) | ||||
| void __sched io_schedule(void) | ||||
| { | ||||
| 	int token; | ||||
| 
 | ||||
| @ -5443,7 +5910,7 @@ int task_can_attach(struct task_struct *p, | ||||
| 	 * allowed nodes is unnecessary.  Thus, cpusets are not | ||||
| 	 * applicable for such threads.  This prevents checking for | ||||
| 	 * success of set_cpus_allowed_ptr() on all attached tasks | ||||
| 	 * before cpus_allowed may be changed. | ||||
| 	 * before cpus_mask may be changed. | ||||
| 	 */ | ||||
| 	if (p->flags & PF_NO_SETAFFINITY) { | ||||
| 		ret = -EINVAL; | ||||
| @ -5470,7 +5937,7 @@ int migrate_task_to(struct task_struct *p, int target_cpu) | ||||
| 	if (curr_cpu == target_cpu) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	if (!cpumask_test_cpu(target_cpu, &p->cpus_allowed)) | ||||
| 	if (!cpumask_test_cpu(target_cpu, p->cpus_ptr)) | ||||
| 		return -EINVAL; | ||||
| 
 | ||||
| 	/* TODO: This is not properly updating schedstats */ | ||||
| @ -5608,7 +6075,7 @@ static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf) | ||||
| 		put_prev_task(rq, next); | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * Rules for changing task_struct::cpus_allowed are holding | ||||
| 		 * Rules for changing task_struct::cpus_mask are holding | ||||
| 		 * both pi_lock and rq->lock, such that holding either | ||||
| 		 * stabilizes the mask. | ||||
| 		 * | ||||
| @ -5902,8 +6369,8 @@ DECLARE_PER_CPU(cpumask_var_t, select_idle_mask); | ||||
| 
 | ||||
| void __init sched_init(void) | ||||
| { | ||||
| 	int i, j; | ||||
| 	unsigned long alloc_size = 0, ptr; | ||||
| 	int i; | ||||
| 
 | ||||
| 	wait_bit_init(); | ||||
| 
 | ||||
| @ -6005,10 +6472,6 @@ void __init sched_init(void) | ||||
| #ifdef CONFIG_RT_GROUP_SCHED | ||||
| 		init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL); | ||||
| #endif | ||||
| 
 | ||||
| 		for (j = 0; j < CPU_LOAD_IDX_MAX; j++) | ||||
| 			rq->cpu_load[j] = 0; | ||||
| 
 | ||||
| #ifdef CONFIG_SMP | ||||
| 		rq->sd = NULL; | ||||
| 		rq->rd = NULL; | ||||
| @ -6063,6 +6526,8 @@ void __init sched_init(void) | ||||
| 
 | ||||
| 	psi_init(); | ||||
| 
 | ||||
| 	init_uclamp(); | ||||
| 
 | ||||
| 	scheduler_running = 1; | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -120,14 +120,14 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p, | ||||
| 	const struct sched_dl_entity *dl_se = &p->dl; | ||||
| 
 | ||||
| 	if (later_mask && | ||||
| 	    cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) { | ||||
| 	    cpumask_and(later_mask, cp->free_cpus, p->cpus_ptr)) { | ||||
| 		return 1; | ||||
| 	} else { | ||||
| 		int best_cpu = cpudl_maximum(cp); | ||||
| 
 | ||||
| 		WARN_ON(best_cpu != -1 && !cpu_present(best_cpu)); | ||||
| 
 | ||||
| 		if (cpumask_test_cpu(best_cpu, &p->cpus_allowed) && | ||||
| 		if (cpumask_test_cpu(best_cpu, p->cpus_ptr) && | ||||
| 		    dl_time_before(dl_se->deadline, cp->elements[0].dl)) { | ||||
| 			if (later_mask) | ||||
| 				cpumask_set_cpu(best_cpu, later_mask); | ||||
|  | ||||
| @ -196,14 +196,17 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy, | ||||
|  * based on the task model parameters and gives the minimal utilization | ||||
|  * required to meet deadlines. | ||||
|  */ | ||||
| unsigned long schedutil_freq_util(int cpu, unsigned long util_cfs, | ||||
| 				  unsigned long max, enum schedutil_type type) | ||||
| unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs, | ||||
| 				 unsigned long max, enum schedutil_type type, | ||||
| 				 struct task_struct *p) | ||||
| { | ||||
| 	unsigned long dl_util, util, irq; | ||||
| 	struct rq *rq = cpu_rq(cpu); | ||||
| 
 | ||||
| 	if (type == FREQUENCY_UTIL && rt_rq_is_runnable(&rq->rt)) | ||||
| 	if (!IS_BUILTIN(CONFIG_UCLAMP_TASK) && | ||||
| 	    type == FREQUENCY_UTIL && rt_rq_is_runnable(&rq->rt)) { | ||||
| 		return max; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Early check to see if IRQ/steal time saturates the CPU, can be | ||||
| @ -219,9 +222,16 @@ unsigned long schedutil_freq_util(int cpu, unsigned long util_cfs, | ||||
| 	 * CFS tasks and we use the same metric to track the effective | ||||
| 	 * utilization (PELT windows are synchronized) we can directly add them | ||||
| 	 * to obtain the CPU's actual utilization. | ||||
| 	 * | ||||
| 	 * CFS and RT utilization can be boosted or capped, depending on | ||||
| 	 * utilization clamp constraints requested by currently RUNNABLE | ||||
| 	 * tasks. | ||||
| 	 * When there are no CFS RUNNABLE tasks, clamps are released and | ||||
| 	 * frequency will be gracefully reduced with the utilization decay. | ||||
| 	 */ | ||||
| 	util = util_cfs; | ||||
| 	util += cpu_util_rt(rq); | ||||
| 	util = util_cfs + cpu_util_rt(rq); | ||||
| 	if (type == FREQUENCY_UTIL) | ||||
| 		util = uclamp_util_with(rq, util, p); | ||||
| 
 | ||||
| 	dl_util = cpu_util_dl(rq); | ||||
| 
 | ||||
| @ -276,12 +286,12 @@ static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu) | ||||
| { | ||||
| 	struct rq *rq = cpu_rq(sg_cpu->cpu); | ||||
| 	unsigned long util = cpu_util_cfs(rq); | ||||
| 	unsigned long max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu); | ||||
| 	unsigned long max = arch_scale_cpu_capacity(sg_cpu->cpu); | ||||
| 
 | ||||
| 	sg_cpu->max = max; | ||||
| 	sg_cpu->bw_dl = cpu_bw_dl(rq); | ||||
| 
 | ||||
| 	return schedutil_freq_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL); | ||||
| 	return schedutil_cpu_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL, NULL); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  | ||||
| @ -94,11 +94,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, | ||||
| 		if (skip) | ||||
| 			continue; | ||||
| 
 | ||||
| 		if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) | ||||
| 		if (cpumask_any_and(p->cpus_ptr, vec->mask) >= nr_cpu_ids) | ||||
| 			continue; | ||||
| 
 | ||||
| 		if (lowest_mask) { | ||||
| 			cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); | ||||
| 			cpumask_and(lowest_mask, p->cpus_ptr, vec->mask); | ||||
| 
 | ||||
| 			/*
 | ||||
| 			 * We have to ensure that we have at least one bit | ||||
|  | ||||
| @ -538,7 +538,7 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p | ||||
| 		 * If we cannot preempt any rq, fall back to pick any | ||||
| 		 * online CPU: | ||||
| 		 */ | ||||
| 		cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed); | ||||
| 		cpu = cpumask_any_and(cpu_active_mask, p->cpus_ptr); | ||||
| 		if (cpu >= nr_cpu_ids) { | ||||
| 			/*
 | ||||
| 			 * Failed to find any suitable CPU. | ||||
| @ -1195,7 +1195,7 @@ static void update_curr_dl(struct rq *rq) | ||||
| 						 &curr->dl); | ||||
| 	} else { | ||||
| 		unsigned long scale_freq = arch_scale_freq_capacity(cpu); | ||||
| 		unsigned long scale_cpu = arch_scale_cpu_capacity(NULL, cpu); | ||||
| 		unsigned long scale_cpu = arch_scale_cpu_capacity(cpu); | ||||
| 
 | ||||
| 		scaled_delta_exec = cap_scale(delta_exec, scale_freq); | ||||
| 		scaled_delta_exec = cap_scale(scaled_delta_exec, scale_cpu); | ||||
| @ -1824,7 +1824,7 @@ static void set_curr_task_dl(struct rq *rq) | ||||
| static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu) | ||||
| { | ||||
| 	if (!task_running(rq, p) && | ||||
| 	    cpumask_test_cpu(cpu, &p->cpus_allowed)) | ||||
| 	    cpumask_test_cpu(cpu, p->cpus_ptr)) | ||||
| 		return 1; | ||||
| 	return 0; | ||||
| } | ||||
| @ -1974,7 +1974,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq) | ||||
| 		/* Retry if something changed. */ | ||||
| 		if (double_lock_balance(rq, later_rq)) { | ||||
| 			if (unlikely(task_rq(task) != rq || | ||||
| 				     !cpumask_test_cpu(later_rq->cpu, &task->cpus_allowed) || | ||||
| 				     !cpumask_test_cpu(later_rq->cpu, task->cpus_ptr) || | ||||
| 				     task_running(rq, task) || | ||||
| 				     !dl_task(task) || | ||||
| 				     !task_on_rq_queued(task))) { | ||||
|  | ||||
| @ -233,49 +233,35 @@ static void sd_free_ctl_entry(struct ctl_table **tablep) | ||||
| 	*tablep = NULL; | ||||
| } | ||||
| 
 | ||||
| static int min_load_idx = 0; | ||||
| static int max_load_idx = CPU_LOAD_IDX_MAX-1; | ||||
| 
 | ||||
| static void | ||||
| set_table_entry(struct ctl_table *entry, | ||||
| 		const char *procname, void *data, int maxlen, | ||||
| 		umode_t mode, proc_handler *proc_handler, | ||||
| 		bool load_idx) | ||||
| 		umode_t mode, proc_handler *proc_handler) | ||||
| { | ||||
| 	entry->procname = procname; | ||||
| 	entry->data = data; | ||||
| 	entry->maxlen = maxlen; | ||||
| 	entry->mode = mode; | ||||
| 	entry->proc_handler = proc_handler; | ||||
| 
 | ||||
| 	if (load_idx) { | ||||
| 		entry->extra1 = &min_load_idx; | ||||
| 		entry->extra2 = &max_load_idx; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static struct ctl_table * | ||||
| sd_alloc_ctl_domain_table(struct sched_domain *sd) | ||||
| { | ||||
| 	struct ctl_table *table = sd_alloc_ctl_entry(14); | ||||
| 	struct ctl_table *table = sd_alloc_ctl_entry(9); | ||||
| 
 | ||||
| 	if (table == NULL) | ||||
| 		return NULL; | ||||
| 
 | ||||
| 	set_table_entry(&table[0] , "min_interval",	   &sd->min_interval,	     sizeof(long), 0644, proc_doulongvec_minmax, false); | ||||
| 	set_table_entry(&table[1] , "max_interval",	   &sd->max_interval,	     sizeof(long), 0644, proc_doulongvec_minmax, false); | ||||
| 	set_table_entry(&table[2] , "busy_idx",		   &sd->busy_idx,	     sizeof(int) , 0644, proc_dointvec_minmax,   true ); | ||||
| 	set_table_entry(&table[3] , "idle_idx",		   &sd->idle_idx,	     sizeof(int) , 0644, proc_dointvec_minmax,   true ); | ||||
| 	set_table_entry(&table[4] , "newidle_idx",	   &sd->newidle_idx,	     sizeof(int) , 0644, proc_dointvec_minmax,   true ); | ||||
| 	set_table_entry(&table[5] , "wake_idx",		   &sd->wake_idx,	     sizeof(int) , 0644, proc_dointvec_minmax,   true ); | ||||
| 	set_table_entry(&table[6] , "forkexec_idx",	   &sd->forkexec_idx,	     sizeof(int) , 0644, proc_dointvec_minmax,   true ); | ||||
| 	set_table_entry(&table[7] , "busy_factor",	   &sd->busy_factor,	     sizeof(int) , 0644, proc_dointvec_minmax,   false); | ||||
| 	set_table_entry(&table[8] , "imbalance_pct",	   &sd->imbalance_pct,	     sizeof(int) , 0644, proc_dointvec_minmax,   false); | ||||
| 	set_table_entry(&table[9] , "cache_nice_tries",	   &sd->cache_nice_tries,    sizeof(int) , 0644, proc_dointvec_minmax,   false); | ||||
| 	set_table_entry(&table[10], "flags",		   &sd->flags,		     sizeof(int) , 0644, proc_dointvec_minmax,   false); | ||||
| 	set_table_entry(&table[11], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax, false); | ||||
| 	set_table_entry(&table[12], "name",		   sd->name,		CORENAME_MAX_SIZE, 0444, proc_dostring,		 false); | ||||
| 	/* &table[13] is terminator */ | ||||
| 	set_table_entry(&table[0], "min_interval",	  &sd->min_interval,	    sizeof(long), 0644, proc_doulongvec_minmax); | ||||
| 	set_table_entry(&table[1], "max_interval",	  &sd->max_interval,	    sizeof(long), 0644, proc_doulongvec_minmax); | ||||
| 	set_table_entry(&table[2], "busy_factor",	  &sd->busy_factor,	    sizeof(int),  0644, proc_dointvec_minmax); | ||||
| 	set_table_entry(&table[3], "imbalance_pct",	  &sd->imbalance_pct,	    sizeof(int),  0644, proc_dointvec_minmax); | ||||
| 	set_table_entry(&table[4], "cache_nice_tries",	  &sd->cache_nice_tries,    sizeof(int),  0644, proc_dointvec_minmax); | ||||
| 	set_table_entry(&table[5], "flags",		  &sd->flags,		    sizeof(int),  0644, proc_dointvec_minmax); | ||||
| 	set_table_entry(&table[6], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax); | ||||
| 	set_table_entry(&table[7], "name",		  sd->name,	       CORENAME_MAX_SIZE, 0444, proc_dostring); | ||||
| 	/* &table[8] is terminator */ | ||||
| 
 | ||||
| 	return table; | ||||
| } | ||||
| @ -653,8 +639,6 @@ do {									\ | ||||
| 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x)) | ||||
| 
 | ||||
| 	P(nr_running); | ||||
| 	SEQ_printf(m, "  .%-30s: %lu\n", "load", | ||||
| 		   rq->load.weight); | ||||
| 	P(nr_switches); | ||||
| 	P(nr_load_updates); | ||||
| 	P(nr_uninterruptible); | ||||
| @ -662,11 +646,6 @@ do {									\ | ||||
| 	SEQ_printf(m, "  .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr))); | ||||
| 	PN(clock); | ||||
| 	PN(clock_task); | ||||
| 	P(cpu_load[0]); | ||||
| 	P(cpu_load[1]); | ||||
| 	P(cpu_load[2]); | ||||
| 	P(cpu_load[3]); | ||||
| 	P(cpu_load[4]); | ||||
| #undef P | ||||
| #undef PN | ||||
| 
 | ||||
|  | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -39,7 +39,6 @@ SCHED_FEAT(WAKEUP_PREEMPTION, true) | ||||
| 
 | ||||
| SCHED_FEAT(HRTICK, false) | ||||
| SCHED_FEAT(DOUBLE_TICK, false) | ||||
| SCHED_FEAT(LB_BIAS, false) | ||||
| 
 | ||||
| /*
 | ||||
|  * Decrement CPU capacity based on time not spent running tasks | ||||
|  | ||||
| @ -28,6 +28,8 @@ | ||||
| #include "sched.h" | ||||
| #include "pelt.h" | ||||
| 
 | ||||
| #include <trace/events/sched.h> | ||||
| 
 | ||||
| /*
 | ||||
|  * Approximate: | ||||
|  *   val * y^n,    where y^32 ~= 0.5 (~1 scheduling period) | ||||
| @ -265,6 +267,7 @@ int __update_load_avg_blocked_se(u64 now, struct sched_entity *se) | ||||
| { | ||||
| 	if (___update_load_sum(now, &se->avg, 0, 0, 0)) { | ||||
| 		___update_load_avg(&se->avg, se_weight(se), se_runnable(se)); | ||||
| 		trace_pelt_se_tp(se); | ||||
| 		return 1; | ||||
| 	} | ||||
| 
 | ||||
| @ -278,6 +281,7 @@ int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se | ||||
| 
 | ||||
| 		___update_load_avg(&se->avg, se_weight(se), se_runnable(se)); | ||||
| 		cfs_se_util_change(&se->avg); | ||||
| 		trace_pelt_se_tp(se); | ||||
| 		return 1; | ||||
| 	} | ||||
| 
 | ||||
| @ -292,6 +296,7 @@ int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq) | ||||
| 				cfs_rq->curr != NULL)) { | ||||
| 
 | ||||
| 		___update_load_avg(&cfs_rq->avg, 1, 1); | ||||
| 		trace_pelt_cfs_tp(cfs_rq); | ||||
| 		return 1; | ||||
| 	} | ||||
| 
 | ||||
| @ -317,6 +322,7 @@ int update_rt_rq_load_avg(u64 now, struct rq *rq, int running) | ||||
| 				running)) { | ||||
| 
 | ||||
| 		___update_load_avg(&rq->avg_rt, 1, 1); | ||||
| 		trace_pelt_rt_tp(rq); | ||||
| 		return 1; | ||||
| 	} | ||||
| 
 | ||||
| @ -340,6 +346,7 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running) | ||||
| 				running)) { | ||||
| 
 | ||||
| 		___update_load_avg(&rq->avg_dl, 1, 1); | ||||
| 		trace_pelt_dl_tp(rq); | ||||
| 		return 1; | ||||
| 	} | ||||
| 
 | ||||
| @ -366,7 +373,7 @@ int update_irq_load_avg(struct rq *rq, u64 running) | ||||
| 	 * reflect the real amount of computation | ||||
| 	 */ | ||||
| 	running = cap_scale(running, arch_scale_freq_capacity(cpu_of(rq))); | ||||
| 	running = cap_scale(running, arch_scale_cpu_capacity(NULL, cpu_of(rq))); | ||||
| 	running = cap_scale(running, arch_scale_cpu_capacity(cpu_of(rq))); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * We know the time that has been used by interrupt since last update | ||||
| @ -388,8 +395,10 @@ int update_irq_load_avg(struct rq *rq, u64 running) | ||||
| 				1, | ||||
| 				1); | ||||
| 
 | ||||
| 	if (ret) | ||||
| 	if (ret) { | ||||
| 		___update_load_avg(&rq->avg_irq, 1, 1); | ||||
| 		trace_pelt_irq_tp(rq); | ||||
| 	} | ||||
| 
 | ||||
| 	return ret; | ||||
| } | ||||
|  | ||||
| @ -79,7 +79,7 @@ static inline void update_rq_clock_pelt(struct rq *rq, s64 delta) | ||||
| 	 * Scale the elapsed time to reflect the real amount of | ||||
| 	 * computation | ||||
| 	 */ | ||||
| 	delta = cap_scale(delta, arch_scale_cpu_capacity(NULL, cpu_of(rq))); | ||||
| 	delta = cap_scale(delta, arch_scale_cpu_capacity(cpu_of(rq))); | ||||
| 	delta = cap_scale(delta, arch_scale_freq_capacity(cpu_of(rq))); | ||||
| 
 | ||||
| 	rq->clock_pelt += delta; | ||||
|  | ||||
| @ -1614,7 +1614,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) | ||||
| static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) | ||||
| { | ||||
| 	if (!task_running(rq, p) && | ||||
| 	    cpumask_test_cpu(cpu, &p->cpus_allowed)) | ||||
| 	    cpumask_test_cpu(cpu, p->cpus_ptr)) | ||||
| 		return 1; | ||||
| 
 | ||||
| 	return 0; | ||||
| @ -1751,7 +1751,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) | ||||
| 			 * Also make sure that it wasn't scheduled on its rq. | ||||
| 			 */ | ||||
| 			if (unlikely(task_rq(task) != rq || | ||||
| 				     !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_allowed) || | ||||
| 				     !cpumask_test_cpu(lowest_rq->cpu, task->cpus_ptr) || | ||||
| 				     task_running(rq, task) || | ||||
| 				     !rt_task(task) || | ||||
| 				     !task_on_rq_queued(task))) { | ||||
| @ -2400,6 +2400,10 @@ const struct sched_class rt_sched_class = { | ||||
| 	.switched_to		= switched_to_rt, | ||||
| 
 | ||||
| 	.update_curr		= update_curr_rt, | ||||
| 
 | ||||
| #ifdef CONFIG_UCLAMP_TASK | ||||
| 	.uclamp_enabled		= 1, | ||||
| #endif | ||||
| }; | ||||
| 
 | ||||
| #ifdef CONFIG_RT_GROUP_SCHED | ||||
|  | ||||
| @ -1,7 +1,7 @@ | ||||
| /* SPDX-License-Identifier: GPL-2.0 */ | ||||
| /* Generated by Documentation/scheduler/sched-pelt; do not modify. */ | ||||
| 
 | ||||
| static const u32 runnable_avg_yN_inv[] = { | ||||
| static const u32 runnable_avg_yN_inv[] __maybe_unused = { | ||||
| 	0xffffffff, 0xfa83b2da, 0xf5257d14, 0xefe4b99a, 0xeac0c6e6, 0xe5b906e6, | ||||
| 	0xe0ccdeeb, 0xdbfbb796, 0xd744fcc9, 0xd2a81d91, 0xce248c14, 0xc9b9bd85, | ||||
| 	0xc5672a10, 0xc12c4cc9, 0xbd08a39e, 0xb8fbaf46, 0xb504f333, 0xb123f581, | ||||
|  | ||||
| @ -96,12 +96,6 @@ extern atomic_long_t calc_load_tasks; | ||||
| extern void calc_global_load_tick(struct rq *this_rq); | ||||
| extern long calc_load_fold_active(struct rq *this_rq, long adjust); | ||||
| 
 | ||||
| #ifdef CONFIG_SMP | ||||
| extern void cpu_load_update_active(struct rq *this_rq); | ||||
| #else | ||||
| static inline void cpu_load_update_active(struct rq *this_rq) { } | ||||
| #endif | ||||
| 
 | ||||
| /*
 | ||||
|  * Helpers for converting nanosecond timing to jiffy resolution | ||||
|  */ | ||||
| @ -344,8 +338,10 @@ struct cfs_bandwidth { | ||||
| 	u64			runtime_expires; | ||||
| 	int			expires_seq; | ||||
| 
 | ||||
| 	short			idle; | ||||
| 	short			period_active; | ||||
| 	u8			idle; | ||||
| 	u8			period_active; | ||||
| 	u8			distribute_running; | ||||
| 	u8			slack_started; | ||||
| 	struct hrtimer		period_timer; | ||||
| 	struct hrtimer		slack_timer; | ||||
| 	struct list_head	throttled_cfs_rq; | ||||
| @ -354,8 +350,6 @@ struct cfs_bandwidth { | ||||
| 	int			nr_periods; | ||||
| 	int			nr_throttled; | ||||
| 	u64			throttled_time; | ||||
| 
 | ||||
| 	bool                    distribute_running; | ||||
| #endif | ||||
| }; | ||||
| 
 | ||||
| @ -797,6 +791,48 @@ extern void rto_push_irq_work_func(struct irq_work *work); | ||||
| #endif | ||||
| #endif /* CONFIG_SMP */ | ||||
| 
 | ||||
| #ifdef CONFIG_UCLAMP_TASK | ||||
| /*
 | ||||
|  * struct uclamp_bucket - Utilization clamp bucket | ||||
|  * @value: utilization clamp value for tasks on this clamp bucket | ||||
|  * @tasks: number of RUNNABLE tasks on this clamp bucket | ||||
|  * | ||||
|  * Keep track of how many tasks are RUNNABLE for a given utilization | ||||
|  * clamp value. | ||||
|  */ | ||||
| struct uclamp_bucket { | ||||
| 	unsigned long value : bits_per(SCHED_CAPACITY_SCALE); | ||||
| 	unsigned long tasks : BITS_PER_LONG - bits_per(SCHED_CAPACITY_SCALE); | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * struct uclamp_rq - rq's utilization clamp | ||||
|  * @value: currently active clamp values for a rq | ||||
|  * @bucket: utilization clamp buckets affecting a rq | ||||
|  * | ||||
|  * Keep track of RUNNABLE tasks on a rq to aggregate their clamp values. | ||||
|  * A clamp value is affecting a rq when there is at least one task RUNNABLE | ||||
|  * (or actually running) with that value. | ||||
|  * | ||||
|  * There are up to UCLAMP_CNT possible different clamp values, currently there | ||||
|  * are only two: minimum utilization and maximum utilization. | ||||
|  * | ||||
|  * All utilization clamping values are MAX aggregated, since: | ||||
|  * - for util_min: we want to run the CPU at least at the max of the minimum | ||||
|  *   utilization required by its currently RUNNABLE tasks. | ||||
|  * - for util_max: we want to allow the CPU to run up to the max of the | ||||
|  *   maximum utilization allowed by its currently RUNNABLE tasks. | ||||
|  * | ||||
|  * Since on each system we expect only a limited number of different | ||||
|  * utilization clamp values (UCLAMP_BUCKETS), use a simple array to track | ||||
|  * the metrics required to compute all the per-rq utilization clamp values. | ||||
|  */ | ||||
| struct uclamp_rq { | ||||
| 	unsigned int value; | ||||
| 	struct uclamp_bucket bucket[UCLAMP_BUCKETS]; | ||||
| }; | ||||
| #endif /* CONFIG_UCLAMP_TASK */ | ||||
| 
 | ||||
| /*
 | ||||
|  * This is the main, per-CPU runqueue data structure. | ||||
|  * | ||||
| @ -818,8 +854,6 @@ struct rq { | ||||
| 	unsigned int		nr_preferred_running; | ||||
| 	unsigned int		numa_migrate_on; | ||||
| #endif | ||||
| 	#define CPU_LOAD_IDX_MAX 5 | ||||
| 	unsigned long		cpu_load[CPU_LOAD_IDX_MAX]; | ||||
| #ifdef CONFIG_NO_HZ_COMMON | ||||
| #ifdef CONFIG_SMP | ||||
| 	unsigned long		last_load_update_tick; | ||||
| @ -830,11 +864,16 @@ struct rq { | ||||
| 	atomic_t nohz_flags; | ||||
| #endif /* CONFIG_NO_HZ_COMMON */ | ||||
| 
 | ||||
| 	/* capture load from *all* tasks on this CPU: */ | ||||
| 	struct load_weight	load; | ||||
| 	unsigned long		nr_load_updates; | ||||
| 	u64			nr_switches; | ||||
| 
 | ||||
| #ifdef CONFIG_UCLAMP_TASK | ||||
| 	/* Utilization clamp values based on CPU's RUNNABLE tasks */ | ||||
| 	struct uclamp_rq	uclamp[UCLAMP_CNT] ____cacheline_aligned; | ||||
| 	unsigned int		uclamp_flags; | ||||
| #define UCLAMP_FLAG_IDLE 0x01 | ||||
| #endif | ||||
| 
 | ||||
| 	struct cfs_rq		cfs; | ||||
| 	struct rt_rq		rt; | ||||
| 	struct dl_rq		dl; | ||||
| @ -1649,6 +1688,10 @@ extern const u32		sched_prio_to_wmult[40]; | ||||
| struct sched_class { | ||||
| 	const struct sched_class *next; | ||||
| 
 | ||||
| #ifdef CONFIG_UCLAMP_TASK | ||||
| 	int uclamp_enabled; | ||||
| #endif | ||||
| 
 | ||||
| 	void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags); | ||||
| 	void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags); | ||||
| 	void (*yield_task)   (struct rq *rq); | ||||
| @ -2222,6 +2265,48 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) | ||||
| static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} | ||||
| #endif /* CONFIG_CPU_FREQ */ | ||||
| 
 | ||||
| #ifdef CONFIG_UCLAMP_TASK | ||||
| unsigned int uclamp_eff_value(struct task_struct *p, unsigned int clamp_id); | ||||
| 
 | ||||
| static __always_inline | ||||
| unsigned int uclamp_util_with(struct rq *rq, unsigned int util, | ||||
| 			      struct task_struct *p) | ||||
| { | ||||
| 	unsigned int min_util = READ_ONCE(rq->uclamp[UCLAMP_MIN].value); | ||||
| 	unsigned int max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value); | ||||
| 
 | ||||
| 	if (p) { | ||||
| 		min_util = max(min_util, uclamp_eff_value(p, UCLAMP_MIN)); | ||||
| 		max_util = max(max_util, uclamp_eff_value(p, UCLAMP_MAX)); | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Since CPU's {min,max}_util clamps are MAX aggregated considering | ||||
| 	 * RUNNABLE tasks with _different_ clamps, we can end up with an | ||||
| 	 * inversion. Fix it now when the clamps are applied. | ||||
| 	 */ | ||||
| 	if (unlikely(min_util >= max_util)) | ||||
| 		return min_util; | ||||
| 
 | ||||
| 	return clamp(util, min_util, max_util); | ||||
| } | ||||
| 
 | ||||
| static inline unsigned int uclamp_util(struct rq *rq, unsigned int util) | ||||
| { | ||||
| 	return uclamp_util_with(rq, util, NULL); | ||||
| } | ||||
| #else /* CONFIG_UCLAMP_TASK */ | ||||
| static inline unsigned int uclamp_util_with(struct rq *rq, unsigned int util, | ||||
| 					    struct task_struct *p) | ||||
| { | ||||
| 	return util; | ||||
| } | ||||
| static inline unsigned int uclamp_util(struct rq *rq, unsigned int util) | ||||
| { | ||||
| 	return util; | ||||
| } | ||||
| #endif /* CONFIG_UCLAMP_TASK */ | ||||
| 
 | ||||
| #ifdef arch_scale_freq_capacity | ||||
| # ifndef arch_scale_freq_invariant | ||||
| #  define arch_scale_freq_invariant()	true | ||||
| @ -2237,7 +2322,6 @@ static inline unsigned long capacity_orig_of(int cpu) | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| #ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL | ||||
| /**
 | ||||
|  * enum schedutil_type - CPU utilization type | ||||
|  * @FREQUENCY_UTIL:	Utilization used to select frequency | ||||
| @ -2253,15 +2337,11 @@ enum schedutil_type { | ||||
| 	ENERGY_UTIL, | ||||
| }; | ||||
| 
 | ||||
| unsigned long schedutil_freq_util(int cpu, unsigned long util_cfs, | ||||
| 				  unsigned long max, enum schedutil_type type); | ||||
| #ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL | ||||
| 
 | ||||
| static inline unsigned long schedutil_energy_util(int cpu, unsigned long cfs) | ||||
| { | ||||
| 	unsigned long max = arch_scale_cpu_capacity(NULL, cpu); | ||||
| 
 | ||||
| 	return schedutil_freq_util(cpu, cfs, max, ENERGY_UTIL); | ||||
| } | ||||
| unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs, | ||||
| 				 unsigned long max, enum schedutil_type type, | ||||
| 				 struct task_struct *p); | ||||
| 
 | ||||
| static inline unsigned long cpu_bw_dl(struct rq *rq) | ||||
| { | ||||
| @ -2290,11 +2370,13 @@ static inline unsigned long cpu_util_rt(struct rq *rq) | ||||
| 	return READ_ONCE(rq->avg_rt.util_avg); | ||||
| } | ||||
| #else /* CONFIG_CPU_FREQ_GOV_SCHEDUTIL */ | ||||
| static inline unsigned long schedutil_energy_util(int cpu, unsigned long cfs) | ||||
| static inline unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs, | ||||
| 				 unsigned long max, enum schedutil_type type, | ||||
| 				 struct task_struct *p) | ||||
| { | ||||
| 	return cfs; | ||||
| 	return 0; | ||||
| } | ||||
| #endif | ||||
| #endif /* CONFIG_CPU_FREQ_GOV_SCHEDUTIL */ | ||||
| 
 | ||||
| #ifdef CONFIG_HAVE_SCHED_AVG_IRQ | ||||
| static inline unsigned long cpu_util_irq(struct rq *rq) | ||||
|  | ||||
| @ -1344,11 +1344,6 @@ sd_init(struct sched_domain_topology_level *tl, | ||||
| 		.imbalance_pct		= 125, | ||||
| 
 | ||||
| 		.cache_nice_tries	= 0, | ||||
| 		.busy_idx		= 0, | ||||
| 		.idle_idx		= 0, | ||||
| 		.newidle_idx		= 0, | ||||
| 		.wake_idx		= 0, | ||||
| 		.forkexec_idx		= 0, | ||||
| 
 | ||||
| 		.flags			= 1*SD_LOAD_BALANCE | ||||
| 					| 1*SD_BALANCE_NEWIDLE | ||||
| @ -1400,13 +1395,10 @@ sd_init(struct sched_domain_topology_level *tl, | ||||
| 	} else if (sd->flags & SD_SHARE_PKG_RESOURCES) { | ||||
| 		sd->imbalance_pct = 117; | ||||
| 		sd->cache_nice_tries = 1; | ||||
| 		sd->busy_idx = 2; | ||||
| 
 | ||||
| #ifdef CONFIG_NUMA | ||||
| 	} else if (sd->flags & SD_NUMA) { | ||||
| 		sd->cache_nice_tries = 2; | ||||
| 		sd->busy_idx = 3; | ||||
| 		sd->idle_idx = 2; | ||||
| 
 | ||||
| 		sd->flags &= ~SD_PREFER_SIBLING; | ||||
| 		sd->flags |= SD_SERIALIZE; | ||||
| @ -1419,8 +1411,6 @@ sd_init(struct sched_domain_topology_level *tl, | ||||
| #endif | ||||
| 	} else { | ||||
| 		sd->cache_nice_tries = 1; | ||||
| 		sd->busy_idx = 2; | ||||
| 		sd->idle_idx = 1; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| @ -1884,10 +1874,10 @@ static struct sched_domain_topology_level | ||||
| 	unsigned long cap; | ||||
| 
 | ||||
| 	/* Is there any asymmetry? */ | ||||
| 	cap = arch_scale_cpu_capacity(NULL, cpumask_first(cpu_map)); | ||||
| 	cap = arch_scale_cpu_capacity(cpumask_first(cpu_map)); | ||||
| 
 | ||||
| 	for_each_cpu(i, cpu_map) { | ||||
| 		if (arch_scale_cpu_capacity(NULL, i) != cap) { | ||||
| 		if (arch_scale_cpu_capacity(i) != cap) { | ||||
| 			asym = true; | ||||
| 			break; | ||||
| 		} | ||||
| @ -1902,7 +1892,7 @@ static struct sched_domain_topology_level | ||||
| 	 * to everyone. | ||||
| 	 */ | ||||
| 	for_each_cpu(i, cpu_map) { | ||||
| 		unsigned long max_capacity = arch_scale_cpu_capacity(NULL, i); | ||||
| 		unsigned long max_capacity = arch_scale_cpu_capacity(i); | ||||
| 		int tl_id = 0; | ||||
| 
 | ||||
| 		for_each_sd_topology(tl) { | ||||
| @ -1912,7 +1902,7 @@ static struct sched_domain_topology_level | ||||
| 			for_each_cpu_and(j, tl->mask(i), cpu_map) { | ||||
| 				unsigned long capacity; | ||||
| 
 | ||||
| 				capacity = arch_scale_cpu_capacity(NULL, j); | ||||
| 				capacity = arch_scale_cpu_capacity(j); | ||||
| 
 | ||||
| 				if (capacity <= max_capacity) | ||||
| 					continue; | ||||
|  | ||||
| @ -118,16 +118,12 @@ static void __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int | ||||
| 	bookmark.func = NULL; | ||||
| 	INIT_LIST_HEAD(&bookmark.entry); | ||||
| 
 | ||||
| 	spin_lock_irqsave(&wq_head->lock, flags); | ||||
| 	nr_exclusive = __wake_up_common(wq_head, mode, nr_exclusive, wake_flags, key, &bookmark); | ||||
| 	spin_unlock_irqrestore(&wq_head->lock, flags); | ||||
| 
 | ||||
| 	while (bookmark.flags & WQ_FLAG_BOOKMARK) { | ||||
| 	do { | ||||
| 		spin_lock_irqsave(&wq_head->lock, flags); | ||||
| 		nr_exclusive = __wake_up_common(wq_head, mode, nr_exclusive, | ||||
| 						wake_flags, key, &bookmark); | ||||
| 		spin_unlock_irqrestore(&wq_head->lock, flags); | ||||
| 	} | ||||
| 	} while (bookmark.flags & WQ_FLAG_BOOKMARK); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  | ||||
| @ -452,6 +452,22 @@ static struct ctl_table kern_table[] = { | ||||
| 		.mode		= 0644, | ||||
| 		.proc_handler	= sched_rr_handler, | ||||
| 	}, | ||||
| #ifdef CONFIG_UCLAMP_TASK | ||||
| 	{ | ||||
| 		.procname	= "sched_util_clamp_min", | ||||
| 		.data		= &sysctl_sched_uclamp_util_min, | ||||
| 		.maxlen		= sizeof(unsigned int), | ||||
| 		.mode		= 0644, | ||||
| 		.proc_handler	= sysctl_sched_uclamp_handler, | ||||
| 	}, | ||||
| 	{ | ||||
| 		.procname	= "sched_util_clamp_max", | ||||
| 		.data		= &sysctl_sched_uclamp_util_max, | ||||
| 		.maxlen		= sizeof(unsigned int), | ||||
| 		.mode		= 0644, | ||||
| 		.proc_handler	= sysctl_sched_uclamp_handler, | ||||
| 	}, | ||||
| #endif | ||||
| #ifdef CONFIG_SCHED_AUTOGROUP | ||||
| 	{ | ||||
| 		.procname	= "sched_autogroup_enabled", | ||||
|  | ||||
| @ -782,7 +782,6 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) | ||||
| 	 */ | ||||
| 	if (!ts->tick_stopped) { | ||||
| 		calc_load_nohz_start(); | ||||
| 		cpu_load_update_nohz_start(); | ||||
| 		quiet_vmstat(); | ||||
| 
 | ||||
| 		ts->last_tick = hrtimer_get_expires(&ts->sched_timer); | ||||
| @ -829,7 +828,6 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) | ||||
| { | ||||
| 	/* Update jiffies first */ | ||||
| 	tick_do_update_jiffies64(now); | ||||
| 	cpu_load_update_nohz_stop(); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Clear the timer idle flag, so we avoid IPIs on remote queueing and | ||||
|  | ||||
| @ -277,7 +277,7 @@ static void move_to_next_cpu(void) | ||||
| 	 * of this thread, than stop migrating for the duration | ||||
| 	 * of the current test. | ||||
| 	 */ | ||||
| 	if (!cpumask_equal(current_mask, ¤t->cpus_allowed)) | ||||
| 	if (!cpumask_equal(current_mask, current->cpus_ptr)) | ||||
| 		goto disable; | ||||
| 
 | ||||
| 	get_online_cpus(); | ||||
|  | ||||
| @ -23,7 +23,7 @@ unsigned int check_preemption_disabled(const char *what1, const char *what2) | ||||
| 	 * Kernel threads bound to a single CPU can safely use | ||||
| 	 * smp_processor_id(): | ||||
| 	 */ | ||||
| 	if (cpumask_equal(¤t->cpus_allowed, cpumask_of(this_cpu))) | ||||
| 	if (cpumask_equal(current->cpus_ptr, cpumask_of(this_cpu))) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	/*
 | ||||
|  | ||||
| @ -34,7 +34,7 @@ static void simple_thread_func(int cnt) | ||||
| 
 | ||||
| 	/* Silly tracepoints */ | ||||
| 	trace_foo_bar("hello", cnt, array, random_strings[len], | ||||
| 		      ¤t->cpus_allowed); | ||||
| 		      current->cpus_ptr); | ||||
| 
 | ||||
| 	trace_foo_with_template_simple("HELLO", cnt); | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user