Reimplement Book3S idle code in C, moving POWER7/8/9 implementation speific HV idle code to the powernv platform code. Book3S assembly stubs are kept in common code and used only to save the stack frame and non-volatile GPRs before executing architected idle instructions, and restoring the stack and reloading GPRs then returning to C after waking from idle. The complex logic dealing with threads and subcores, locking, SPRs, HMIs, timebase resync, etc., is all done in C which makes it more maintainable. This is not a strict translation to C code, there are some significant differences: - Idle wakeup no longer uses the ->cpu_restore call to reinit SPRs, but saves and restores them itself. - The optimisation where EC=ESL=0 idle modes did not have to save GPRs or change MSR is restored, because it's now simple to do. ESL=1 sleeps that do not lose GPRs can use this optimization too. - KVM secondary entry and cede is now more of a call/return style rather than branchy. nap_state_lost is not required because KVM always returns via NVGPR restoring path. - KVM secondary wakeup from offline sequence is moved entirely into the offline wakeup, which avoids a hwsync in the normal idle wakeup path. Performance measured with context switch ping-pong on different threads or cores, is possibly improved a small amount, 1-3% depending on stop state and core vs thread test for shallow states. Deep states it's in the noise compared with other latencies. KVM improvements: - Idle sleepers now always return to caller rather than branch out to KVM first. - This allows optimisations like very fast return to caller when no state has been lost. - KVM no longer requires nap_state_lost because it controls NVGPR save/restore itself on the way in and out. - The heavy idle wakeup KVM request check can be moved out of the normal host idle code and into the not-performance-critical offline code. - KVM nap code now returns from where it is called, which makes the flow a bit easier to follow. Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com> Signed-off-by: Nicholas Piggin <npiggin@gmail.com> [mpe: Squash the KVM changes in] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
		
			
				
	
	
		
			106 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			106 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /* SPDX-License-Identifier: GPL-2.0 */
 | |
| #ifndef _ASM_POWERPC_CPUIDLE_H
 | |
| #define _ASM_POWERPC_CPUIDLE_H
 | |
| 
 | |
| #ifdef CONFIG_PPC_POWERNV
 | |
| /* Thread state used in powernv idle state management */
 | |
| #define PNV_THREAD_RUNNING              0
 | |
| #define PNV_THREAD_NAP                  1
 | |
| #define PNV_THREAD_SLEEP                2
 | |
| #define PNV_THREAD_WINKLE               3
 | |
| 
 | |
| /*
 | |
|  * Core state used in powernv idle for POWER8.
 | |
|  *
 | |
|  * The lock bit synchronizes updates to the state, as well as parts of the
 | |
|  * sleep/wake code (see kernel/idle_book3s.S).
 | |
|  *
 | |
|  * Bottom 8 bits track the idle state of each thread. Bit is cleared before
 | |
|  * the thread executes an idle instruction (nap/sleep/winkle).
 | |
|  *
 | |
|  * Then there is winkle tracking. A core does not lose complete state
 | |
|  * until every thread is in winkle. So the winkle count field counts the
 | |
|  * number of threads in winkle (small window of false positives is okay
 | |
|  * around the sleep/wake, so long as there are no false negatives).
 | |
|  *
 | |
|  * When the winkle count reaches 8 (the COUNT_ALL_BIT becomes set), then
 | |
|  * the THREAD_WINKLE_BITS are set, which indicate which threads have not
 | |
|  * yet woken from the winkle state.
 | |
|  */
 | |
| #define NR_PNV_CORE_IDLE_LOCK_BIT		28
 | |
| #define PNV_CORE_IDLE_LOCK_BIT			(1ULL << NR_PNV_CORE_IDLE_LOCK_BIT)
 | |
| 
 | |
| #define PNV_CORE_IDLE_WINKLE_COUNT_SHIFT	16
 | |
| #define PNV_CORE_IDLE_WINKLE_COUNT		0x00010000
 | |
| #define PNV_CORE_IDLE_WINKLE_COUNT_BITS		0x000F0000
 | |
| #define PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT	8
 | |
| #define PNV_CORE_IDLE_THREAD_WINKLE_BITS	0x0000FF00
 | |
| 
 | |
| #define PNV_CORE_IDLE_THREAD_BITS       	0x000000FF
 | |
| 
 | |
| /*
 | |
|  * ============================ NOTE =================================
 | |
|  * The older firmware populates only the RL field in the psscr_val and
 | |
|  * sets the psscr_mask to 0xf. On such a firmware, the kernel sets the
 | |
|  * remaining PSSCR fields to default values as follows:
 | |
|  *
 | |
|  * - ESL and EC bits are to 1. So wakeup from any stop state will be
 | |
|  *   at vector 0x100.
 | |
|  *
 | |
|  * - MTL and PSLL are set to the maximum allowed value as per the ISA,
 | |
|  *    i.e. 15.
 | |
|  *
 | |
|  * - The Transition Rate, TR is set to the Maximum value 3.
 | |
|  */
 | |
| #define PSSCR_HV_DEFAULT_VAL    (PSSCR_ESL | PSSCR_EC |		    \
 | |
| 				PSSCR_PSLL_MASK | PSSCR_TR_MASK |   \
 | |
| 				PSSCR_MTL_MASK)
 | |
| 
 | |
| #define PSSCR_HV_DEFAULT_MASK   (PSSCR_ESL | PSSCR_EC |		    \
 | |
| 				PSSCR_PSLL_MASK | PSSCR_TR_MASK |   \
 | |
| 				PSSCR_MTL_MASK | PSSCR_RL_MASK)
 | |
| #define PSSCR_EC_SHIFT    20
 | |
| #define PSSCR_ESL_SHIFT   21
 | |
| #define GET_PSSCR_EC(x)   (((x) & PSSCR_EC) >> PSSCR_EC_SHIFT)
 | |
| #define GET_PSSCR_ESL(x)  (((x) & PSSCR_ESL) >> PSSCR_ESL_SHIFT)
 | |
| #define GET_PSSCR_RL(x)   ((x) & PSSCR_RL_MASK)
 | |
| 
 | |
| #define ERR_EC_ESL_MISMATCH		-1
 | |
| #define ERR_DEEP_STATE_ESL_MISMATCH	-2
 | |
| 
 | |
| #ifndef __ASSEMBLY__
 | |
| 
 | |
| #define PNV_IDLE_NAME_LEN    16
 | |
| struct pnv_idle_states_t {
 | |
| 	char name[PNV_IDLE_NAME_LEN];
 | |
| 	u32 latency_ns;
 | |
| 	u32 residency_ns;
 | |
| 	u64 psscr_val;
 | |
| 	u64 psscr_mask;
 | |
| 	u32 flags;
 | |
| 	bool valid;
 | |
| };
 | |
| 
 | |
| extern struct pnv_idle_states_t *pnv_idle_states;
 | |
| extern int nr_pnv_idle_states;
 | |
| 
 | |
| unsigned long pnv_cpu_offline(unsigned int cpu);
 | |
| int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags);
 | |
| static inline void report_invalid_psscr_val(u64 psscr_val, int err)
 | |
| {
 | |
| 	switch (err) {
 | |
| 	case ERR_EC_ESL_MISMATCH:
 | |
| 		pr_warn("Invalid psscr 0x%016llx : ESL,EC bits unequal",
 | |
| 			psscr_val);
 | |
| 		break;
 | |
| 	case ERR_DEEP_STATE_ESL_MISMATCH:
 | |
| 		pr_warn("Invalid psscr 0x%016llx : ESL cleared for deep stop-state",
 | |
| 			psscr_val);
 | |
| 	}
 | |
| }
 | |
| #endif
 | |
| 
 | |
| #endif
 | |
| 
 | |
| #endif
 |