Merge branch 'x86-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 AVX512 status update from Ingo Molnar: "This adds a new ABI that the main scheduler probably doesn't want to deal with but HPC job schedulers might want to use: the AVX512_elapsed_ms field in the new /proc/<pid>/arch_status task status file, which allows the user-space job scheduler to cluster such tasks, to avoid turbo frequency drops" * 'x86-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: Documentation/filesystems/proc.txt: Add arch_status file x86/process: Add AVX-512 usage elapsed time to /proc/pid/arch_status proc: Add /proc/<pid>/arch_status
This commit is contained in:
commit
3431a940bb
@ -45,6 +45,7 @@ Table of Contents
|
||||
3.9 /proc/<pid>/map_files - Information about memory mapped files
|
||||
3.10 /proc/<pid>/timerslack_ns - Task timerslack value
|
||||
3.11 /proc/<pid>/patch_state - Livepatch patch operation state
|
||||
3.12 /proc/<pid>/arch_status - Task architecture specific information
|
||||
|
||||
4 Configuring procfs
|
||||
4.1 Mount options
|
||||
@ -1948,6 +1949,45 @@ patched. If the patch is being enabled, then the task has already been
|
||||
patched. If the patch is being disabled, then the task hasn't been
|
||||
unpatched yet.
|
||||
|
||||
3.12 /proc/<pid>/arch_status - task architecture specific status
|
||||
-------------------------------------------------------------------
|
||||
When CONFIG_PROC_PID_ARCH_STATUS is enabled, this file displays the
|
||||
architecture specific status of the task.
|
||||
|
||||
Example
|
||||
-------
|
||||
$ cat /proc/6753/arch_status
|
||||
AVX512_elapsed_ms: 8
|
||||
|
||||
Description
|
||||
-----------
|
||||
|
||||
x86 specific entries:
|
||||
---------------------
|
||||
AVX512_elapsed_ms:
|
||||
------------------
|
||||
If AVX512 is supported on the machine, this entry shows the milliseconds
|
||||
elapsed since the last time AVX512 usage was recorded. The recording
|
||||
happens on a best effort basis when a task is scheduled out. This means
|
||||
that the value depends on two factors:
|
||||
|
||||
1) The time which the task spent on the CPU without being scheduled
|
||||
out. With CPU isolation and a single runnable task this can take
|
||||
several seconds.
|
||||
|
||||
2) The time since the task was scheduled out last. Depending on the
|
||||
reason for being scheduled out (time slice exhausted, syscall ...)
|
||||
this can be arbitrary long time.
|
||||
|
||||
As a consequence the value cannot be considered precise and authoritative
|
||||
information. The application which uses this information has to be aware
|
||||
of the overall scenario on the system in order to determine whether a
|
||||
task is a real AVX512 user or not. Precise information can be obtained
|
||||
with performance counters.
|
||||
|
||||
A special value of '-1' indicates that no AVX512 usage was recorded, thus
|
||||
the task is unlikely an AVX512 user, but depends on the workload and the
|
||||
scheduling scenario, it also could be a false negative mentioned above.
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
Configuring procfs
|
||||
|
@ -220,6 +220,7 @@ config X86
|
||||
select USER_STACKTRACE_SUPPORT
|
||||
select VIRT_TO_BUS
|
||||
select X86_FEATURE_NAMES if PROC_FS
|
||||
select PROC_PID_ARCH_STATUS if PROC_FS
|
||||
|
||||
config INSTRUCTION_DECODER
|
||||
def_bool y
|
||||
|
@ -8,6 +8,8 @@
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/mman.h>
|
||||
#include <linux/pkeys.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/proc_fs.h>
|
||||
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm/fpu/internal.h>
|
||||
@ -1231,3 +1233,48 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROC_PID_ARCH_STATUS
|
||||
/*
|
||||
* Report the amount of time elapsed in millisecond since last AVX512
|
||||
* use in the task.
|
||||
*/
|
||||
static void avx512_status(struct seq_file *m, struct task_struct *task)
|
||||
{
|
||||
unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
|
||||
long delta;
|
||||
|
||||
if (!timestamp) {
|
||||
/*
|
||||
* Report -1 if no AVX512 usage
|
||||
*/
|
||||
delta = -1;
|
||||
} else {
|
||||
delta = (long)(jiffies - timestamp);
|
||||
/*
|
||||
* Cap to LONG_MAX if time difference > LONG_MAX
|
||||
*/
|
||||
if (delta < 0)
|
||||
delta = LONG_MAX;
|
||||
delta = jiffies_to_msecs(delta);
|
||||
}
|
||||
|
||||
seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta);
|
||||
seq_putc(m, '\n');
|
||||
}
|
||||
|
||||
/*
|
||||
* Report architecture specific information
|
||||
*/
|
||||
int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
|
||||
struct pid *pid, struct task_struct *task)
|
||||
{
|
||||
/*
|
||||
* Report AVX512 state if the processor and build option supported.
|
||||
*/
|
||||
if (cpu_feature_enabled(X86_FEATURE_AVX512F))
|
||||
avx512_status(m, task);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_PROC_PID_ARCH_STATUS */
|
||||
|
@ -98,3 +98,7 @@ config PROC_CHILDREN
|
||||
|
||||
Say Y if you are running any user-space software which takes benefit from
|
||||
this interface. For example, rkt is such a piece of software.
|
||||
|
||||
config PROC_PID_ARCH_STATUS
|
||||
def_bool n
|
||||
depends on PROC_FS
|
||||
|
@ -3061,6 +3061,9 @@ static const struct pid_entry tgid_base_stuff[] = {
|
||||
#ifdef CONFIG_STACKLEAK_METRICS
|
||||
ONE("stack_depth", S_IRUGO, proc_stack_depth),
|
||||
#endif
|
||||
#ifdef CONFIG_PROC_PID_ARCH_STATUS
|
||||
ONE("arch_status", S_IRUGO, proc_pid_arch_status),
|
||||
#endif
|
||||
};
|
||||
|
||||
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
|
||||
@ -3448,6 +3451,9 @@ static const struct pid_entry tid_base_stuff[] = {
|
||||
#ifdef CONFIG_LIVEPATCH
|
||||
ONE("patch_state", S_IRUSR, proc_pid_patch_state),
|
||||
#endif
|
||||
#ifdef CONFIG_PROC_PID_ARCH_STATUS
|
||||
ONE("arch_status", S_IRUGO, proc_pid_arch_status),
|
||||
#endif
|
||||
};
|
||||
|
||||
static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
|
||||
|
@ -75,6 +75,15 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo
|
||||
void *data);
|
||||
extern struct pid *tgid_pidfd_to_pid(const struct file *file);
|
||||
|
||||
#ifdef CONFIG_PROC_PID_ARCH_STATUS
|
||||
/*
|
||||
* The architecture which selects CONFIG_PROC_PID_ARCH_STATUS must
|
||||
* provide proc_pid_arch_status() definition.
|
||||
*/
|
||||
int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
|
||||
struct pid *pid, struct task_struct *task);
|
||||
#endif /* CONFIG_PROC_PID_ARCH_STATUS */
|
||||
|
||||
#else /* CONFIG_PROC_FS */
|
||||
|
||||
static inline void proc_root_init(void)
|
||||
|
Loading…
Reference in New Issue
Block a user