2005-04-16 22:20:36 +00:00
|
|
|
/*
|
2005-11-09 02:38:01 +00:00
|
|
|
* This control block defines the PACA which defines the processor
|
|
|
|
* specific data for each logical processor on the system.
|
2005-04-16 22:20:36 +00:00
|
|
|
* There are some pointers defined that are utilized by PLIC.
|
|
|
|
*
|
|
|
|
* C 2001 PPC 64 Team, IBM Corp
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
2005-11-09 02:38:01 +00:00
|
|
|
*/
|
|
|
|
#ifndef _ASM_POWERPC_PACA_H
|
|
|
|
#define _ASM_POWERPC_PACA_H
|
2005-12-16 21:43:46 +00:00
|
|
|
#ifdef __KERNEL__
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2009-07-23 23:15:42 +00:00
|
|
|
#include <asm/types.h>
|
|
|
|
#include <asm/lppaca.h>
|
|
|
|
#include <asm/mmu.h>
|
|
|
|
#include <asm/page.h>
|
|
|
|
#include <asm/exception-64e.h>
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
register struct paca_struct *local_paca asm("r13");
|
2006-10-31 18:44:54 +00:00
|
|
|
|
|
|
|
#if defined(CONFIG_DEBUG_PREEMPT) && defined(CONFIG_SMP)
|
|
|
|
extern unsigned int debug_smp_processor_id(void); /* from linux/smp.h */
|
|
|
|
/*
|
|
|
|
* Add standard checks that preemption cannot occur when using get_paca():
|
|
|
|
* otherwise the paca_struct it points to may be the wrong one just after.
|
|
|
|
*/
|
|
|
|
#define get_paca() ((void) debug_smp_processor_id(), local_paca)
|
|
|
|
#else
|
2005-04-16 22:20:36 +00:00
|
|
|
#define get_paca() local_paca
|
2006-10-31 18:44:54 +00:00
|
|
|
#endif
|
|
|
|
|
2006-01-12 23:26:42 +00:00
|
|
|
#define get_lppaca() (get_paca()->lppaca_ptr)
|
2006-08-07 06:19:19 +00:00
|
|
|
#define get_slb_shadow() (get_paca()->slb_shadow_ptr)
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
struct task_struct;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Defines the layout of the paca.
|
|
|
|
*
|
|
|
|
* This structure is not directly accessed by firmware or the service
|
2008-04-10 06:43:47 +00:00
|
|
|
* processor.
|
2005-04-16 22:20:36 +00:00
|
|
|
*/
|
|
|
|
struct paca_struct {
|
2009-06-02 21:17:41 +00:00
|
|
|
#ifdef CONFIG_PPC_BOOK3S
|
2005-04-16 22:20:36 +00:00
|
|
|
/*
|
|
|
|
* Because hw_cpu_id, unlike other paca fields, is accessed
|
|
|
|
* routinely from other CPUs (from the IRQ code), we stick to
|
|
|
|
* read-only (after boot) fields in the first cacheline to
|
|
|
|
* avoid cacheline bouncing.
|
|
|
|
*/
|
|
|
|
|
|
|
|
struct lppaca *lppaca_ptr; /* Pointer to LpPaca for PLIC */
|
2009-06-02 21:17:41 +00:00
|
|
|
#endif /* CONFIG_PPC_BOOK3S */
|
2005-04-16 22:20:36 +00:00
|
|
|
/*
|
2006-01-23 16:58:20 +00:00
|
|
|
* MAGIC: the spinlock functions in arch/powerpc/lib/locks.c
|
2005-04-16 22:20:36 +00:00
|
|
|
* load lock_token and paca_index with a single lwz
|
|
|
|
* instruction. They must travel together and be properly
|
|
|
|
* aligned.
|
|
|
|
*/
|
|
|
|
u16 lock_token; /* Constant 0x8000, used in locks */
|
|
|
|
u16 paca_index; /* Logical processor number */
|
|
|
|
|
|
|
|
u64 kernel_toc; /* Kernel TOC address */
|
2008-08-30 01:40:24 +00:00
|
|
|
u64 kernelbase; /* Base address of kernel */
|
|
|
|
u64 kernel_msr; /* MSR while running in kernel */
|
2009-06-02 21:17:41 +00:00
|
|
|
#ifdef CONFIG_PPC_STD_MMU_64
|
2005-04-16 22:20:36 +00:00
|
|
|
u64 stab_real; /* Absolute address of segment table */
|
|
|
|
u64 stab_addr; /* Virtual address of segment table */
|
2009-06-02 21:17:41 +00:00
|
|
|
#endif /* CONFIG_PPC_STD_MMU_64 */
|
2005-04-16 22:20:36 +00:00
|
|
|
void *emergency_sp; /* pointer to emergency stack */
|
[PATCH] powerpc/64: per cpu data optimisations
The current ppc64 per cpu data implementation is quite slow. eg:
lhz 11,18(13) /* smp_processor_id() */
ld 9,.LC63-.LCTOC1(30) /* per_cpu__variable_name */
ld 8,.LC61-.LCTOC1(30) /* __per_cpu_offset */
sldi 11,11,3 /* form index into __per_cpu_offset */
mr 10,9
ldx 9,11,8 /* __per_cpu_offset[smp_processor_id()] */
ldx 0,10,9 /* load per cpu data */
5 loads for something that is supposed to be fast, pretty awful. One
reason for the large number of loads is that we have to synthesize 2
64bit constants (per_cpu__variable_name and __per_cpu_offset).
By putting __per_cpu_offset into the paca we can avoid the 2 loads
associated with it:
ld 11,56(13) /* paca->data_offset */
ld 9,.LC59-.LCTOC1(30) /* per_cpu__variable_name */
ldx 0,9,11 /* load per cpu data
Longer term we can should be able to do even better than 3 loads.
If per_cpu__variable_name wasnt a 64bit constant and paca->data_offset
was in a register we could cut it down to one load. A suggestion from
Rusty is to use gcc's __thread extension here. In order to do this we
would need to free up r13 (the __thread register and where the paca
currently is). So far Ive had a few unsuccessful attempts at doing that :)
The patch also allocates per cpu memory node local on NUMA machines.
This patch from Rusty has been sitting in my queue _forever_ but stalled
when I hit the compiler bug. Sorry about that.
Finally I also only allocate per cpu data for possible cpus, which comes
straight out of the x86-64 port. On a pseries kernel (with NR_CPUS == 128)
and 4 possible cpus we see some nice gains:
total used free shared buffers cached
Mem: 4012228 212860 3799368 0 0 162424
total used free shared buffers cached
Mem: 4016200 212984 3803216 0 0 162424
A saving of 3.75MB. Quite nice for smaller machines. Note: we now have
to be careful of per cpu users that touch data for !possible cpus.
At this stage it might be worth making the NUMA and possible cpu
optimisations generic, but per cpu init is done so early we have to be
careful that all architectures have their possible map setup correctly.
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-01-11 02:16:44 +00:00
|
|
|
u64 data_offset; /* per cpu data offset */
|
2005-04-16 22:20:36 +00:00
|
|
|
s16 hw_cpu_id; /* Physical processor number */
|
|
|
|
u8 cpu_start; /* At startup, processor spins until */
|
|
|
|
/* this becomes non-zero. */
|
2009-06-02 21:17:41 +00:00
|
|
|
#ifdef CONFIG_PPC_STD_MMU_64
|
2007-03-16 06:47:07 +00:00
|
|
|
struct slb_shadow *slb_shadow_ptr;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Now, starting in cacheline 2, the exception save areas
|
|
|
|
*/
|
2005-11-07 00:06:55 +00:00
|
|
|
/* used for most interrupts/exceptions */
|
|
|
|
u64 exgen[10] __attribute__((aligned(0x80)));
|
|
|
|
u64 exmc[10]; /* used for machine checks */
|
|
|
|
u64 exslb[10]; /* used for SLB/segment table misses
|
|
|
|
* on the linear mapping */
|
2009-06-02 21:17:41 +00:00
|
|
|
/* SLB related definitions */
|
2006-06-15 00:45:18 +00:00
|
|
|
u16 vmalloc_sllp;
|
2005-04-16 22:20:36 +00:00
|
|
|
u16 slb_cache_ptr;
|
2007-05-08 06:27:27 +00:00
|
|
|
u16 slb_cache[SLB_CACHE_ENTRIES];
|
2009-06-02 21:17:41 +00:00
|
|
|
#endif /* CONFIG_PPC_STD_MMU_64 */
|
|
|
|
|
2009-07-23 23:15:42 +00:00
|
|
|
#ifdef CONFIG_PPC_BOOK3E
|
|
|
|
pgd_t *pgd; /* Current PGD */
|
|
|
|
pgd_t *kernel_pgd; /* Kernel PGD */
|
|
|
|
u64 exgen[8] __attribute__((aligned(0x80)));
|
|
|
|
u64 extlb[EX_TLB_SIZE*3] __attribute__((aligned(0x80)));
|
|
|
|
u64 exmc[8]; /* used for machine checks */
|
|
|
|
u64 excrit[8]; /* used for crit interrupts */
|
|
|
|
u64 exdbg[8]; /* used for debug interrupts */
|
|
|
|
|
|
|
|
/* Kernel stack pointers for use by special exceptions */
|
|
|
|
void *mc_kstack;
|
|
|
|
void *crit_kstack;
|
|
|
|
void *dbg_kstack;
|
|
|
|
#endif /* CONFIG_PPC_BOOK3E */
|
|
|
|
|
2009-06-02 21:17:41 +00:00
|
|
|
mm_context_t context;
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* then miscellaneous read-write fields
|
|
|
|
*/
|
|
|
|
struct task_struct *__current; /* Pointer to current */
|
|
|
|
u64 kstack; /* Saved Kernel stack addr */
|
|
|
|
u64 stab_rr; /* stab/slb round-robin counter */
|
|
|
|
u64 saved_r1; /* r1 save for RTAS calls */
|
|
|
|
u64 saved_msr; /* MSR saved here by enter_rtas */
|
2007-04-23 15:11:55 +00:00
|
|
|
u16 trap_save; /* Used when bad stack is encountered */
|
[POWERPC] Lazy interrupt disabling for 64-bit machines
This implements a lazy strategy for disabling interrupts. This means
that local_irq_disable() et al. just clear the 'interrupts are
enabled' flag in the paca. If an interrupt comes along, the interrupt
entry code notices that interrupts are supposed to be disabled, and
clears the EE bit in SRR1, clears the 'interrupts are hard-enabled'
flag in the paca, and returns. This means that interrupts only
actually get disabled in the processor when an interrupt comes along.
When interrupts are enabled by local_irq_enable() et al., the code
sets the interrupts-enabled flag in the paca, and then checks whether
interrupts got hard-disabled. If so, it also sets the EE bit in the
MSR to hard-enable the interrupts.
This has the potential to improve performance, and also makes it
easier to make a kernel that can boot on iSeries and on other 64-bit
machines, since this lazy-disable strategy is very similar to the
soft-disable strategy that iSeries already uses.
This version renames paca->proc_enabled to paca->soft_enabled, and
changes a couple of soft-disables in the kexec code to hard-disables,
which should fix the crash that Michael Ellerman saw. This doesn't
yet use a reserved CR field for the soft_enabled and hard_enabled
flags. This applies on top of Stephen Rothwell's patches to make it
possible to build a combined iSeries/other kernel.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-10-04 06:47:49 +00:00
|
|
|
u8 soft_enabled; /* irq soft-enable flag */
|
|
|
|
u8 hard_enabled; /* set if irqs are enabled in MSR */
|
2006-09-13 12:08:26 +00:00
|
|
|
u8 io_sync; /* writel() needs spin_unlock sync */
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 10:02:48 +00:00
|
|
|
u8 perf_event_pending; /* PM interrupt while soft-disabled */
|
powerpc: Implement accurate task and CPU time accounting
This implements accurate task and cpu time accounting for 64-bit
powerpc kernels. Instead of accounting a whole jiffy of time to a
task on a timer interrupt because that task happened to be running at
the time, we now account time in units of timebase ticks according to
the actual time spent by the task in user mode and kernel mode. We
also count the time spent processing hardware and software interrupts
accurately. This is conditional on CONFIG_VIRT_CPU_ACCOUNTING. If
that is not set, we do tick-based approximate accounting as before.
To get this accurate information, we read either the PURR (processor
utilization of resources register) on POWER5 machines, or the timebase
on other machines on
* each entry to the kernel from usermode
* each exit to usermode
* transitions between process context, hard irq context and soft irq
context in kernel mode
* context switches.
On POWER5 systems with shared-processor logical partitioning we also
read both the PURR and the timebase at each timer interrupt and
context switch in order to determine how much time has been taken by
the hypervisor to run other partitions ("steal" time). Unfortunately,
since we need values of the PURR on both threads at the same time to
accurately calculate the steal time, and since we can only calculate
steal time on a per-core basis, the apportioning of the steal time
between idle time (time which we ceded to the hypervisor in the idle
loop) and actual stolen time is somewhat approximate at the moment.
This is all based quite heavily on what s390 does, and it uses the
generic interfaces that were added by the s390 developers,
i.e. account_system_time(), account_user_time(), etc.
This patch doesn't add any new interfaces between the kernel and
userspace, and doesn't change the units in which time is reported to
userspace by things such as /proc/stat, /proc/<pid>/stat, getrusage(),
times(), etc. Internally the various task and cpu times are stored in
timebase units, but they are converted to USER_HZ units (1/100th of a
second) when reported to userspace. Some precision is therefore lost
but there should not be any accumulating error, since the internal
accumulation is at full precision.
Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-02-23 23:06:59 +00:00
|
|
|
|
|
|
|
/* Stuff for accurate time accounting */
|
|
|
|
u64 user_time; /* accumulated usermode TB ticks */
|
|
|
|
u64 system_time; /* accumulated system TB ticks */
|
|
|
|
u64 startpurr; /* PURR/TB value snapshot */
|
2007-10-18 10:06:37 +00:00
|
|
|
u64 startspurr; /* SPURR value snapshot */
|
2005-04-16 22:20:36 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
extern struct paca_struct paca[];
|
2008-04-24 03:43:49 +00:00
|
|
|
extern void initialise_pacas(void);
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2005-12-16 21:43:46 +00:00
|
|
|
#endif /* __KERNEL__ */
|
2005-11-09 02:38:01 +00:00
|
|
|
#endif /* _ASM_POWERPC_PACA_H */
|