Merge branch 'x86-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 AVX512 status update from Ingo Molnar: "This adds a new ABI that the main scheduler probably doesn't want to deal with but HPC job schedulers might want to use: the AVX512_elapsed_ms field in the new /proc/<pid>/arch_status task status file, which allows the user-space job scheduler to cluster such tasks, to avoid turbo frequency drops" * 'x86-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: Documentation/filesystems/proc.txt: Add arch_status file x86/process: Add AVX-512 usage elapsed time to /proc/pid/arch_status proc: Add /proc/<pid>/arch_status
This commit is contained in:
commit
3431a940bb
@ -45,6 +45,7 @@ Table of Contents
|
|||||||
3.9 /proc/<pid>/map_files - Information about memory mapped files
|
3.9 /proc/<pid>/map_files - Information about memory mapped files
|
||||||
3.10 /proc/<pid>/timerslack_ns - Task timerslack value
|
3.10 /proc/<pid>/timerslack_ns - Task timerslack value
|
||||||
3.11 /proc/<pid>/patch_state - Livepatch patch operation state
|
3.11 /proc/<pid>/patch_state - Livepatch patch operation state
|
||||||
|
3.12 /proc/<pid>/arch_status - Task architecture specific information
|
||||||
|
|
||||||
4 Configuring procfs
|
4 Configuring procfs
|
||||||
4.1 Mount options
|
4.1 Mount options
|
||||||
@ -1948,6 +1949,45 @@ patched. If the patch is being enabled, then the task has already been
|
|||||||
patched. If the patch is being disabled, then the task hasn't been
|
patched. If the patch is being disabled, then the task hasn't been
|
||||||
unpatched yet.
|
unpatched yet.
|
||||||
|
|
||||||
|
3.12 /proc/<pid>/arch_status - task architecture specific status
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
When CONFIG_PROC_PID_ARCH_STATUS is enabled, this file displays the
|
||||||
|
architecture specific status of the task.
|
||||||
|
|
||||||
|
Example
|
||||||
|
-------
|
||||||
|
$ cat /proc/6753/arch_status
|
||||||
|
AVX512_elapsed_ms: 8
|
||||||
|
|
||||||
|
Description
|
||||||
|
-----------
|
||||||
|
|
||||||
|
x86 specific entries:
|
||||||
|
---------------------
|
||||||
|
AVX512_elapsed_ms:
|
||||||
|
------------------
|
||||||
|
If AVX512 is supported on the machine, this entry shows the milliseconds
|
||||||
|
elapsed since the last time AVX512 usage was recorded. The recording
|
||||||
|
happens on a best effort basis when a task is scheduled out. This means
|
||||||
|
that the value depends on two factors:
|
||||||
|
|
||||||
|
1) The time which the task spent on the CPU without being scheduled
|
||||||
|
out. With CPU isolation and a single runnable task this can take
|
||||||
|
several seconds.
|
||||||
|
|
||||||
|
2) The time since the task was scheduled out last. Depending on the
|
||||||
|
reason for being scheduled out (time slice exhausted, syscall ...)
|
||||||
|
this can be arbitrary long time.
|
||||||
|
|
||||||
|
As a consequence the value cannot be considered precise and authoritative
|
||||||
|
information. The application which uses this information has to be aware
|
||||||
|
of the overall scenario on the system in order to determine whether a
|
||||||
|
task is a real AVX512 user or not. Precise information can be obtained
|
||||||
|
with performance counters.
|
||||||
|
|
||||||
|
A special value of '-1' indicates that no AVX512 usage was recorded, thus
|
||||||
|
the task is unlikely an AVX512 user, but depends on the workload and the
|
||||||
|
scheduling scenario, it also could be a false negative mentioned above.
|
||||||
|
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
Configuring procfs
|
Configuring procfs
|
||||||
|
@ -220,6 +220,7 @@ config X86
|
|||||||
select USER_STACKTRACE_SUPPORT
|
select USER_STACKTRACE_SUPPORT
|
||||||
select VIRT_TO_BUS
|
select VIRT_TO_BUS
|
||||||
select X86_FEATURE_NAMES if PROC_FS
|
select X86_FEATURE_NAMES if PROC_FS
|
||||||
|
select PROC_PID_ARCH_STATUS if PROC_FS
|
||||||
|
|
||||||
config INSTRUCTION_DECODER
|
config INSTRUCTION_DECODER
|
||||||
def_bool y
|
def_bool y
|
||||||
|
@ -8,6 +8,8 @@
|
|||||||
#include <linux/cpu.h>
|
#include <linux/cpu.h>
|
||||||
#include <linux/mman.h>
|
#include <linux/mman.h>
|
||||||
#include <linux/pkeys.h>
|
#include <linux/pkeys.h>
|
||||||
|
#include <linux/seq_file.h>
|
||||||
|
#include <linux/proc_fs.h>
|
||||||
|
|
||||||
#include <asm/fpu/api.h>
|
#include <asm/fpu/api.h>
|
||||||
#include <asm/fpu/internal.h>
|
#include <asm/fpu/internal.h>
|
||||||
@ -1231,3 +1233,48 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_PROC_PID_ARCH_STATUS
|
||||||
|
/*
|
||||||
|
* Report the amount of time elapsed in millisecond since last AVX512
|
||||||
|
* use in the task.
|
||||||
|
*/
|
||||||
|
static void avx512_status(struct seq_file *m, struct task_struct *task)
|
||||||
|
{
|
||||||
|
unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
|
||||||
|
long delta;
|
||||||
|
|
||||||
|
if (!timestamp) {
|
||||||
|
/*
|
||||||
|
* Report -1 if no AVX512 usage
|
||||||
|
*/
|
||||||
|
delta = -1;
|
||||||
|
} else {
|
||||||
|
delta = (long)(jiffies - timestamp);
|
||||||
|
/*
|
||||||
|
* Cap to LONG_MAX if time difference > LONG_MAX
|
||||||
|
*/
|
||||||
|
if (delta < 0)
|
||||||
|
delta = LONG_MAX;
|
||||||
|
delta = jiffies_to_msecs(delta);
|
||||||
|
}
|
||||||
|
|
||||||
|
seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta);
|
||||||
|
seq_putc(m, '\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Report architecture specific information
|
||||||
|
*/
|
||||||
|
int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
|
||||||
|
struct pid *pid, struct task_struct *task)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Report AVX512 state if the processor and build option supported.
|
||||||
|
*/
|
||||||
|
if (cpu_feature_enabled(X86_FEATURE_AVX512F))
|
||||||
|
avx512_status(m, task);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_PROC_PID_ARCH_STATUS */
|
||||||
|
@ -98,3 +98,7 @@ config PROC_CHILDREN
|
|||||||
|
|
||||||
Say Y if you are running any user-space software which takes benefit from
|
Say Y if you are running any user-space software which takes benefit from
|
||||||
this interface. For example, rkt is such a piece of software.
|
this interface. For example, rkt is such a piece of software.
|
||||||
|
|
||||||
|
config PROC_PID_ARCH_STATUS
|
||||||
|
def_bool n
|
||||||
|
depends on PROC_FS
|
||||||
|
@ -3061,6 +3061,9 @@ static const struct pid_entry tgid_base_stuff[] = {
|
|||||||
#ifdef CONFIG_STACKLEAK_METRICS
|
#ifdef CONFIG_STACKLEAK_METRICS
|
||||||
ONE("stack_depth", S_IRUGO, proc_stack_depth),
|
ONE("stack_depth", S_IRUGO, proc_stack_depth),
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef CONFIG_PROC_PID_ARCH_STATUS
|
||||||
|
ONE("arch_status", S_IRUGO, proc_pid_arch_status),
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
|
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
|
||||||
@ -3448,6 +3451,9 @@ static const struct pid_entry tid_base_stuff[] = {
|
|||||||
#ifdef CONFIG_LIVEPATCH
|
#ifdef CONFIG_LIVEPATCH
|
||||||
ONE("patch_state", S_IRUSR, proc_pid_patch_state),
|
ONE("patch_state", S_IRUSR, proc_pid_patch_state),
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef CONFIG_PROC_PID_ARCH_STATUS
|
||||||
|
ONE("arch_status", S_IRUGO, proc_pid_arch_status),
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
|
static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
|
||||||
|
@ -75,6 +75,15 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo
|
|||||||
void *data);
|
void *data);
|
||||||
extern struct pid *tgid_pidfd_to_pid(const struct file *file);
|
extern struct pid *tgid_pidfd_to_pid(const struct file *file);
|
||||||
|
|
||||||
|
#ifdef CONFIG_PROC_PID_ARCH_STATUS
|
||||||
|
/*
|
||||||
|
* The architecture which selects CONFIG_PROC_PID_ARCH_STATUS must
|
||||||
|
* provide proc_pid_arch_status() definition.
|
||||||
|
*/
|
||||||
|
int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
|
||||||
|
struct pid *pid, struct task_struct *task);
|
||||||
|
#endif /* CONFIG_PROC_PID_ARCH_STATUS */
|
||||||
|
|
||||||
#else /* CONFIG_PROC_FS */
|
#else /* CONFIG_PROC_FS */
|
||||||
|
|
||||||
static inline void proc_root_init(void)
|
static inline void proc_root_init(void)
|
||||||
|
Loading…
Reference in New Issue
Block a user