linux/arch/powerpc/include/asm/paca.h
Paul Mackerras 7672691a08 powerpc/powernv: Provide a way to force a core into SMT4 mode
POWER9 processors up to and including "Nimbus" v2.2 have hardware
bugs relating to transactional memory and thread reconfiguration.
One of these bugs has a workaround which is to get the core into
SMT4 state temporarily.  This workaround is only needed when
running bare-metal.

This patch provides a function which gets the core into SMT4 mode
by preventing threads from going to a stop state, and waking up
those which are already in a stop state.  Once at least 3 threads
are not in a stop state, the core will be in SMT4 and we can
continue.

To do this, we add a "dont_stop" flag to the paca to tell the
thread not to go into a stop state.  If this flag is set,
power9_idle_stop() just returns immediately with a return value
of 0.  The pnv_power9_force_smt4_catch() function does the following:

1. Set the dont_stop flag for each thread in the core, except
   ourselves (in fact we use an atomic_inc() in case more than
   one thread is calling this function concurrently).
2. See how many threads are awake, indicated by their
   requested_psscr field in the paca being 0.  If this is at
   least 3, skip to step 5.
3. Send a doorbell interrupt to each thread that was seen as
   being in a stop state in step 2.
4. Until at least 3 threads are awake, scan the threads to which
   we sent a doorbell interrupt and check if they are awake now.

This relies on the following properties:

- Once dont_stop is non-zero, requested_psccr can't go from zero to
  non-zero, except transiently (and without the thread doing stop).
- requested_psscr being zero guarantees that the thread isn't in
  a state-losing stop state where thread reconfiguration could occur.
- Doing stop with a PSSCR value of 0 won't be a state-losing stop
  and thus won't allow thread reconfiguration.
- Once threads_per_core/2 + 1 (i.e. 3) threads are awake, the core
  must be in SMT4 mode, since SMT modes are powers of 2.

This does add a sync to power9_idle_stop(), which is necessary to
provide the correct ordering between setting requested_psscr and
checking dont_stop.  The overhead of the sync should be unnoticeable
compared to the latency of going into and out of a stop state.

Because some objected to incurring this extra latency on systems where
the XER[SO] bug is not relevant, I have put the test in
power9_idle_stop inside a feature section.  This means that
pnv_power9_force_smt4_catch() WILL NOT WORK correctly on systems
without the CPU_FTR_P9_TM_XER_SO_BUG feature bit set, and will
probably hang the system.

In order to cater for uses where the caller has an operation that
has to be done while the core is in SMT4, the core continues to be
kept in SMT4 after pnv_power9_force_smt4_catch() function returns,
until the pnv_power9_force_smt4_release() function is called.
It undoes the effect of step 1 above and allows the other threads
to go into a stop state.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-03-24 00:39:11 +11:00

265 lines
8.0 KiB
C

/*
* This control block defines the PACA which defines the processor
* specific data for each logical processor on the system.
* There are some pointers defined that are utilized by PLIC.
*
* C 2001 PPC 64 Team, IBM Corp
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#ifndef _ASM_POWERPC_PACA_H
#define _ASM_POWERPC_PACA_H
#ifdef __KERNEL__
#ifdef CONFIG_PPC64
#include <linux/string.h>
#include <asm/types.h>
#include <asm/lppaca.h>
#include <asm/mmu.h>
#include <asm/page.h>
#ifdef CONFIG_PPC_BOOK3E
#include <asm/exception-64e.h>
#else
#include <asm/exception-64s.h>
#endif
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
#include <asm/kvm_book3s_asm.h>
#endif
#include <asm/accounting.h>
#include <asm/hmi.h>
#include <asm/cpuidle.h>
#include <asm/atomic.h>
register struct paca_struct *local_paca asm("r13");
#if defined(CONFIG_DEBUG_PREEMPT) && defined(CONFIG_SMP)
extern unsigned int debug_smp_processor_id(void); /* from linux/smp.h */
/*
* Add standard checks that preemption cannot occur when using get_paca():
* otherwise the paca_struct it points to may be the wrong one just after.
*/
#define get_paca() ((void) debug_smp_processor_id(), local_paca)
#else
#define get_paca() local_paca
#endif
#define get_lppaca() (get_paca()->lppaca_ptr)
#define get_slb_shadow() (get_paca()->slb_shadow_ptr)
struct task_struct;
/*
* Defines the layout of the paca.
*
* This structure is not directly accessed by firmware or the service
* processor.
*/
struct paca_struct {
#ifdef CONFIG_PPC_BOOK3S
/*
* Because hw_cpu_id, unlike other paca fields, is accessed
* routinely from other CPUs (from the IRQ code), we stick to
* read-only (after boot) fields in the first cacheline to
* avoid cacheline bouncing.
*/
struct lppaca *lppaca_ptr; /* Pointer to LpPaca for PLIC */
#endif /* CONFIG_PPC_BOOK3S */
/*
* MAGIC: the spinlock functions in arch/powerpc/lib/locks.c
* load lock_token and paca_index with a single lwz
* instruction. They must travel together and be properly
* aligned.
*/
#ifdef __BIG_ENDIAN__
u16 lock_token; /* Constant 0x8000, used in locks */
u16 paca_index; /* Logical processor number */
#else
u16 paca_index; /* Logical processor number */
u16 lock_token; /* Constant 0x8000, used in locks */
#endif
u64 kernel_toc; /* Kernel TOC address */
u64 kernelbase; /* Base address of kernel */
u64 kernel_msr; /* MSR while running in kernel */
void *emergency_sp; /* pointer to emergency stack */
u64 data_offset; /* per cpu data offset */
s16 hw_cpu_id; /* Physical processor number */
u8 cpu_start; /* At startup, processor spins until */
/* this becomes non-zero. */
u8 kexec_state; /* set when kexec down has irqs off */
#ifdef CONFIG_PPC_BOOK3S_64
struct slb_shadow *slb_shadow_ptr;
struct dtl_entry *dispatch_log;
struct dtl_entry *dispatch_log_end;
#endif
u64 dscr_default; /* per-CPU default DSCR */
#ifdef CONFIG_PPC_BOOK3S_64
/*
* Now, starting in cacheline 2, the exception save areas
*/
/* used for most interrupts/exceptions */
u64 exgen[EX_SIZE] __attribute__((aligned(0x80)));
u64 exslb[EX_SIZE]; /* used for SLB/segment table misses
* on the linear mapping */
/* SLB related definitions */
u16 vmalloc_sllp;
u16 slb_cache_ptr;
u32 slb_cache[SLB_CACHE_ENTRIES];
#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_PPC_BOOK3E
u64 exgen[8] __aligned(0x40);
/* Keep pgd in the same cacheline as the start of extlb */
pgd_t *pgd __aligned(0x40); /* Current PGD */
pgd_t *kernel_pgd; /* Kernel PGD */
/* Shared by all threads of a core -- points to tcd of first thread */
struct tlb_core_data *tcd_ptr;
/*
* We can have up to 3 levels of reentrancy in the TLB miss handler,
* in each of four exception levels (normal, crit, mcheck, debug).
*/
u64 extlb[12][EX_TLB_SIZE / sizeof(u64)];
u64 exmc[8]; /* used for machine checks */
u64 excrit[8]; /* used for crit interrupts */
u64 exdbg[8]; /* used for debug interrupts */
/* Kernel stack pointers for use by special exceptions */
void *mc_kstack;
void *crit_kstack;
void *dbg_kstack;
struct tlb_core_data tcd;
#endif /* CONFIG_PPC_BOOK3E */
#ifdef CONFIG_PPC_BOOK3S
mm_context_id_t mm_ctx_id;
#ifdef CONFIG_PPC_MM_SLICES
u64 mm_ctx_low_slices_psize;
unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
unsigned long mm_ctx_slb_addr_limit;
#else
u16 mm_ctx_user_psize;
u16 mm_ctx_sllp;
#endif
#endif
/*
* then miscellaneous read-write fields
*/
struct task_struct *__current; /* Pointer to current */
u64 kstack; /* Saved Kernel stack addr */
u64 stab_rr; /* stab/slb round-robin counter */
u64 saved_r1; /* r1 save for RTAS calls or PM */
u64 saved_msr; /* MSR saved here by enter_rtas */
u16 trap_save; /* Used when bad stack is encountered */
u8 irq_soft_mask; /* mask for irq soft masking */
u8 irq_happened; /* irq happened while soft-disabled */
u8 io_sync; /* writel() needs spin_unlock sync */
u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */
u8 nap_state_lost; /* NV GPR values lost in power7_idle */
u64 sprg_vdso; /* Saved user-visible sprg */
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
u64 tm_scratch; /* TM scratch area for reclaim */
#endif
#ifdef CONFIG_PPC_POWERNV
/* Per-core mask tracking idle threads and a lock bit-[L][TTTTTTTT] */
u32 *core_idle_state_ptr;
u8 thread_idle_state; /* PNV_THREAD_RUNNING/NAP/SLEEP */
/* Mask to indicate thread id in core */
u8 thread_mask;
/* Mask to denote subcore sibling threads */
u8 subcore_sibling_mask;
/* Flag to request this thread not to stop */
atomic_t dont_stop;
/*
* Pointer to an array which contains pointer
* to the sibling threads' paca.
*/
struct paca_struct **thread_sibling_pacas;
/* The PSSCR value that the kernel requested before going to stop */
u64 requested_psscr;
/*
* Save area for additional SPRs that need to be
* saved/restored during cpuidle stop.
*/
struct stop_sprs stop_sprs;
#endif
#ifdef CONFIG_PPC_BOOK3S_64
/* Non-maskable exceptions that are not performance critical */
u64 exnmi[EX_SIZE]; /* used for system reset (nmi) */
u64 exmc[EX_SIZE]; /* used for machine checks */
#endif
#ifdef CONFIG_PPC_BOOK3S_64
/* Exclusive stacks for system reset and machine check exception. */
void *nmi_emergency_sp;
void *mc_emergency_sp;
u16 in_nmi; /* In nmi handler */
/*
* Flag to check whether we are in machine check early handler
* and already using emergency stack.
*/
u16 in_mce;
u8 hmi_event_available; /* HMI event is available */
u8 hmi_p9_special_emu; /* HMI P9 special emulation */
#endif
/* Stuff for accurate time accounting */
struct cpu_accounting_data accounting;
u64 dtl_ridx; /* read index in dispatch log */
struct dtl_entry *dtl_curr; /* pointer corresponding to dtl_ridx */
#ifdef CONFIG_KVM_BOOK3S_HANDLER
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
/* We use this to store guest state in */
struct kvmppc_book3s_shadow_vcpu shadow_vcpu;
#endif
struct kvmppc_host_state kvm_hstate;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
* Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for
* more details
*/
struct sibling_subcore_state *sibling_subcore_state;
#endif
#endif
#ifdef CONFIG_PPC_BOOK3S_64
/*
* rfi fallback flush must be in its own cacheline to prevent
* other paca data leaking into the L1d
*/
u64 exrfi[EX_SIZE] __aligned(0x80);
void *rfi_flush_fallback_area;
u64 l1d_flush_size;
#endif
};
extern void copy_mm_to_paca(struct mm_struct *mm);
extern struct paca_struct *paca;
extern void initialise_paca(struct paca_struct *new_paca, int cpu);
extern void setup_paca(struct paca_struct *new_paca);
extern void allocate_pacas(void);
extern void free_unused_pacas(void);
#else /* CONFIG_PPC64 */
static inline void allocate_pacas(void) { };
static inline void free_unused_pacas(void) { };
#endif /* CONFIG_PPC64 */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_PACA_H */