mirror of
https://github.com/torvalds/linux.git
synced 2024-12-14 15:13:52 +00:00
x86, UV: Improve BAU performance and error recovery
- increase performance of the interrupt handler - release timed-out software acknowledge resources - recover from continuous-busy status due to a hardware issue - add a 'throttle' to keep a uvhub from sending more than a specified number of broadcasts concurrently (work around the hardware issue) - provide a 'nobau' boot command line option - rename 'pnode' and 'node' to 'uvhub' (the 'node' terminology is ambiguous) - add some new statistics about the scope of broadcasts, retries, the hardware issue and the 'throttle' - split off new function uv_bau_retry_msg() from uv_bau_process_message() per community coding style feedback. - simplify the argument list to uv_bau_process_message(), per community coding style feedback. Signed-off-by: Cliff Wickman <cpw@sgi.com> Cc: linux-mm@kvack.org Cc: Jack Steiner <steiner@sgi.com> Cc: Russ Anderson <rja@sgi.com> Cc: Mike Travis <travis@sgi.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Thomas Gleixner <tglx@linutronix.de> LKML-Reference: <E1O25Z4-0004Ur-PB@eag09.americas.sgi.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
2acebe9ecb
commit
b8f7fb13d2
@ -27,13 +27,14 @@
|
||||
* set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on.
|
||||
*
|
||||
* We will use 31 sets, one for sending BAU messages from each of the 32
|
||||
* cpu's on the node.
|
||||
* cpu's on the uvhub.
|
||||
*
|
||||
* TLB shootdown will use the first of the 8 descriptors of each set.
|
||||
* Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set).
|
||||
*/
|
||||
|
||||
#define UV_ITEMS_PER_DESCRIPTOR 8
|
||||
#define MAX_BAU_CONCURRENT 3
|
||||
#define UV_CPUS_PER_ACT_STATUS 32
|
||||
#define UV_ACT_STATUS_MASK 0x3
|
||||
#define UV_ACT_STATUS_SIZE 2
|
||||
@ -45,6 +46,9 @@
|
||||
#define UV_PAYLOADQ_PNODE_SHIFT 49
|
||||
#define UV_PTC_BASENAME "sgi_uv/ptc_statistics"
|
||||
#define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask))
|
||||
#define UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT 15
|
||||
#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT 16
|
||||
#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x000000000bUL
|
||||
|
||||
/*
|
||||
* bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1
|
||||
@ -55,15 +59,29 @@
|
||||
#define DESC_STATUS_SOURCE_TIMEOUT 3
|
||||
|
||||
/*
|
||||
* source side thresholds at which message retries print a warning
|
||||
* source side threshholds at which message retries print a warning
|
||||
*/
|
||||
#define SOURCE_TIMEOUT_LIMIT 20
|
||||
#define DESTINATION_TIMEOUT_LIMIT 20
|
||||
|
||||
/*
|
||||
* misc. delays, in microseconds
|
||||
*/
|
||||
#define THROTTLE_DELAY 10
|
||||
#define TIMEOUT_DELAY 10
|
||||
#define BIOS_TO 1000
|
||||
/* BIOS is assumed to set the destination timeout to 1003520 nanoseconds */
|
||||
|
||||
/*
|
||||
* threshholds at which to use IPI to free resources
|
||||
*/
|
||||
#define PLUGSB4RESET 100
|
||||
#define TIMEOUTSB4RESET 100
|
||||
|
||||
/*
|
||||
* number of entries in the destination side payload queue
|
||||
*/
|
||||
#define DEST_Q_SIZE 17
|
||||
#define DEST_Q_SIZE 20
|
||||
/*
|
||||
* number of destination side software ack resources
|
||||
*/
|
||||
@ -72,9 +90,10 @@
|
||||
/*
|
||||
* completion statuses for sending a TLB flush message
|
||||
*/
|
||||
#define FLUSH_RETRY 1
|
||||
#define FLUSH_GIVEUP 2
|
||||
#define FLUSH_COMPLETE 3
|
||||
#define FLUSH_RETRY_PLUGGED 1
|
||||
#define FLUSH_RETRY_TIMEOUT 2
|
||||
#define FLUSH_GIVEUP 3
|
||||
#define FLUSH_COMPLETE 4
|
||||
|
||||
/*
|
||||
* Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor)
|
||||
@ -86,14 +105,14 @@
|
||||
* 'base_dest_nodeid' field of the header corresponds to the
|
||||
* destination nodeID associated with that specified bit.
|
||||
*/
|
||||
struct bau_target_nodemask {
|
||||
unsigned long bits[BITS_TO_LONGS(256)];
|
||||
struct bau_target_uvhubmask {
|
||||
unsigned long bits[BITS_TO_LONGS(UV_DISTRIBUTION_SIZE)];
|
||||
};
|
||||
|
||||
/*
|
||||
* mask of cpu's on a node
|
||||
* mask of cpu's on a uvhub
|
||||
* (during initialization we need to check that unsigned long has
|
||||
* enough bits for max. cpu's per node)
|
||||
* enough bits for max. cpu's per uvhub)
|
||||
*/
|
||||
struct bau_local_cpumask {
|
||||
unsigned long bits;
|
||||
@ -135,8 +154,8 @@ struct bau_msg_payload {
|
||||
struct bau_msg_header {
|
||||
unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */
|
||||
/* bits 5:0 */
|
||||
unsigned int base_dest_nodeid:15; /* nasid>>1 (pnode) of */
|
||||
/* bits 20:6 */ /* first bit in node_map */
|
||||
unsigned int base_dest_nodeid:15; /* nasid (pnode<<1) of */
|
||||
/* bits 20:6 */ /* first bit in uvhub map */
|
||||
unsigned int command:8; /* message type */
|
||||
/* bits 28:21 */
|
||||
/* 0x38: SN3net EndPoint Message */
|
||||
@ -146,26 +165,38 @@ struct bau_msg_header {
|
||||
unsigned int rsvd_2:9; /* must be zero */
|
||||
/* bits 40:32 */
|
||||
/* Suppl_A is 56-41 */
|
||||
unsigned int payload_2a:8;/* becomes byte 16 of msg */
|
||||
/* bits 48:41 */ /* not currently using */
|
||||
unsigned int payload_2b:8;/* becomes byte 17 of msg */
|
||||
/* bits 56:49 */ /* not currently using */
|
||||
unsigned int sequence:16;/* message sequence number */
|
||||
/* bits 56:41 */ /* becomes bytes 16-17 of msg */
|
||||
/* Address field (96:57) is never used as an
|
||||
address (these are address bits 42:3) */
|
||||
|
||||
unsigned int rsvd_3:1; /* must be zero */
|
||||
/* bit 57 */
|
||||
/* address bits 27:4 are payload */
|
||||
/* these 24 bits become bytes 12-14 of msg */
|
||||
/* these next 24 (58-81) bits become bytes 12-14 of msg */
|
||||
|
||||
/* bits 65:58 land in byte 12 */
|
||||
unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */
|
||||
/* bit 58 */
|
||||
unsigned int msg_type:3; /* software type of the message*/
|
||||
/* bits 61:59 */
|
||||
unsigned int canceled:1; /* message canceled, resource to be freed*/
|
||||
/* bit 62 */
|
||||
unsigned int payload_1a:1;/* not currently used */
|
||||
/* bit 63 */
|
||||
unsigned int payload_1b:2;/* not currently used */
|
||||
/* bits 65:64 */
|
||||
|
||||
unsigned int payload_1a:5;/* not currently used */
|
||||
/* bits 63:59 */
|
||||
unsigned int payload_1b:8;/* not currently used */
|
||||
/* bits 71:64 */
|
||||
unsigned int payload_1c:8;/* not currently used */
|
||||
/* bits 79:72 */
|
||||
unsigned int payload_1d:2;/* not currently used */
|
||||
/* bits 73:66 land in byte 13 */
|
||||
unsigned int payload_1ca:6;/* not currently used */
|
||||
/* bits 71:66 */
|
||||
unsigned int payload_1c:2;/* not currently used */
|
||||
/* bits 73:72 */
|
||||
|
||||
/* bits 81:74 land in byte 14 */
|
||||
unsigned int payload_1d:6;/* not currently used */
|
||||
/* bits 79:74 */
|
||||
unsigned int payload_1e:2;/* not currently used */
|
||||
/* bits 81:80 */
|
||||
|
||||
unsigned int rsvd_4:7; /* must be zero */
|
||||
@ -178,7 +209,7 @@ struct bau_msg_header {
|
||||
/* bits 95:90 */
|
||||
unsigned int rsvd_6:5; /* must be zero */
|
||||
/* bits 100:96 */
|
||||
unsigned int int_both:1;/* if 1, interrupt both sockets on the blade */
|
||||
unsigned int int_both:1;/* if 1, interrupt both sockets on the uvhub */
|
||||
/* bit 101*/
|
||||
unsigned int fairness:3;/* usually zero */
|
||||
/* bits 104:102 */
|
||||
@ -191,13 +222,18 @@ struct bau_msg_header {
|
||||
/* bits 127:107 */
|
||||
};
|
||||
|
||||
/* see msg_type: */
|
||||
#define MSG_NOOP 0
|
||||
#define MSG_REGULAR 1
|
||||
#define MSG_RETRY 2
|
||||
|
||||
/*
|
||||
* The activation descriptor:
|
||||
* The format of the message to send, plus all accompanying control
|
||||
* Should be 64 bytes
|
||||
*/
|
||||
struct bau_desc {
|
||||
struct bau_target_nodemask distribution;
|
||||
struct bau_target_uvhubmask distribution;
|
||||
/*
|
||||
* message template, consisting of header and payload:
|
||||
*/
|
||||
@ -237,19 +273,25 @@ struct bau_payload_queue_entry {
|
||||
unsigned short acknowledge_count; /* filled in by destination */
|
||||
/* 16 bits, bytes 10-11 */
|
||||
|
||||
unsigned short replied_to:1; /* sent as 0 by the source */
|
||||
/* 1 bit */
|
||||
unsigned short unused1:7; /* not currently using */
|
||||
/* 7 bits: byte 12) */
|
||||
/* these next 3 bytes come from bits 58-81 of the message header */
|
||||
unsigned short replied_to:1; /* sent as 0 by the source */
|
||||
unsigned short msg_type:3; /* software message type */
|
||||
unsigned short canceled:1; /* sent as 0 by the source */
|
||||
unsigned short unused1:3; /* not currently using */
|
||||
/* byte 12 */
|
||||
|
||||
unsigned char unused2[2]; /* not currently using */
|
||||
/* bytes 13-14 */
|
||||
unsigned char unused2a; /* not currently using */
|
||||
/* byte 13 */
|
||||
unsigned char unused2; /* not currently using */
|
||||
/* byte 14 */
|
||||
|
||||
unsigned char sw_ack_vector; /* filled in by the hardware */
|
||||
/* byte 15 (bits 127:120) */
|
||||
|
||||
unsigned char unused4[3]; /* not currently using bytes 17-19 */
|
||||
/* bytes 17-19 */
|
||||
unsigned short sequence; /* message sequence number */
|
||||
/* bytes 16-17 */
|
||||
unsigned char unused4[2]; /* not currently using bytes 18-19 */
|
||||
/* bytes 18-19 */
|
||||
|
||||
int number_of_cpus; /* filled in at destination */
|
||||
/* 32 bits, bytes 20-23 (aligned) */
|
||||
@ -259,63 +301,93 @@ struct bau_payload_queue_entry {
|
||||
};
|
||||
|
||||
/*
|
||||
* one for every slot in the destination payload queue
|
||||
*/
|
||||
struct bau_msg_status {
|
||||
struct bau_local_cpumask seen_by; /* map of cpu's */
|
||||
};
|
||||
|
||||
/*
|
||||
* one for every slot in the destination software ack resources
|
||||
*/
|
||||
struct bau_sw_ack_status {
|
||||
struct bau_payload_queue_entry *msg; /* associated message */
|
||||
int watcher; /* cpu monitoring, or -1 */
|
||||
};
|
||||
|
||||
/*
|
||||
* one on every node and per-cpu; to locate the software tables
|
||||
* one per-cpu; to locate the software tables
|
||||
*/
|
||||
struct bau_control {
|
||||
struct bau_desc *descriptor_base;
|
||||
struct bau_payload_queue_entry *bau_msg_head;
|
||||
struct bau_payload_queue_entry *va_queue_first;
|
||||
struct bau_payload_queue_entry *va_queue_last;
|
||||
struct bau_msg_status *msg_statuses;
|
||||
int *watching; /* pointer to array */
|
||||
struct bau_payload_queue_entry *bau_msg_head;
|
||||
struct bau_control *uvhub_master;
|
||||
struct bau_control *socket_master;
|
||||
unsigned long timeout_interval;
|
||||
atomic_t active_descriptor_count;
|
||||
int max_concurrent;
|
||||
int max_concurrent_constant;
|
||||
int retry_message_scans;
|
||||
int plugged_tries;
|
||||
int timeout_tries;
|
||||
int ipi_attempts;
|
||||
int conseccompletes;
|
||||
short cpu;
|
||||
short uvhub_cpu;
|
||||
short uvhub;
|
||||
short cpus_in_socket;
|
||||
short cpus_in_uvhub;
|
||||
unsigned short message_number;
|
||||
unsigned short uvhub_quiesce;
|
||||
short socket_acknowledge_count[DEST_Q_SIZE];
|
||||
cycles_t send_message;
|
||||
spinlock_t masks_lock;
|
||||
spinlock_t uvhub_lock;
|
||||
spinlock_t queue_lock;
|
||||
};
|
||||
|
||||
/*
|
||||
* This structure is allocated per_cpu for UV TLB shootdown statistics.
|
||||
*/
|
||||
struct ptc_stats {
|
||||
unsigned long ptc_i; /* number of IPI-style flushes */
|
||||
unsigned long requestor; /* number of nodes this cpu sent to */
|
||||
unsigned long requestee; /* times cpu was remotely requested */
|
||||
unsigned long alltlb; /* times all tlb's on this cpu were flushed */
|
||||
unsigned long onetlb; /* times just one tlb on this cpu was flushed */
|
||||
unsigned long s_retry; /* retries on source side timeouts */
|
||||
unsigned long d_retry; /* retries on destination side timeouts */
|
||||
unsigned long sflush; /* cycles spent in uv_flush_tlb_others */
|
||||
unsigned long dflush; /* cycles spent on destination side */
|
||||
unsigned long retriesok; /* successes on retries */
|
||||
unsigned long nomsg; /* interrupts with no message */
|
||||
unsigned long multmsg; /* interrupts with multiple messages */
|
||||
unsigned long ntargeted;/* nodes targeted */
|
||||
/* sender statistics */
|
||||
unsigned long s_giveup; /* number of fall backs to IPI-style flushes */
|
||||
unsigned long s_requestor; /* number of shootdown requests */
|
||||
unsigned long s_stimeout; /* source side timeouts */
|
||||
unsigned long s_dtimeout; /* destination side timeouts */
|
||||
unsigned long s_time; /* time spent in sending side */
|
||||
unsigned long s_retriesok; /* successful retries */
|
||||
unsigned long s_ntargcpu; /* number of cpus targeted */
|
||||
unsigned long s_ntarguvhub; /* number of uvhubs targeted */
|
||||
unsigned long s_ntarguvhub16; /* number of times >= 16 target hubs */
|
||||
unsigned long s_ntarguvhub8; /* number of times >= 8 target hubs */
|
||||
unsigned long s_ntarguvhub4; /* number of times >= 4 target hubs */
|
||||
unsigned long s_ntarguvhub2; /* number of times >= 2 target hubs */
|
||||
unsigned long s_ntarguvhub1; /* number of times == 1 target hub */
|
||||
unsigned long s_resets_plug; /* ipi-style resets from plug state */
|
||||
unsigned long s_resets_timeout; /* ipi-style resets from timeouts */
|
||||
unsigned long s_busy; /* status stayed busy past s/w timer */
|
||||
unsigned long s_throttles; /* waits in throttle */
|
||||
unsigned long s_retry_messages; /* retry broadcasts */
|
||||
/* destination statistics */
|
||||
unsigned long d_alltlb; /* times all tlb's on this cpu were flushed */
|
||||
unsigned long d_onetlb; /* times just one tlb on this cpu was flushed */
|
||||
unsigned long d_multmsg; /* interrupts with multiple messages */
|
||||
unsigned long d_nomsg; /* interrupts with no message */
|
||||
unsigned long d_time; /* time spent on destination side */
|
||||
unsigned long d_requestee; /* number of messages processed */
|
||||
unsigned long d_retries; /* number of retry messages processed */
|
||||
unsigned long d_canceled; /* number of messages canceled by retries */
|
||||
unsigned long d_nocanceled; /* retries that found nothing to cancel */
|
||||
unsigned long d_resets; /* number of ipi-style requests processed */
|
||||
unsigned long d_rcanceled; /* number of messages canceled by resets */
|
||||
};
|
||||
|
||||
static inline int bau_node_isset(int node, struct bau_target_nodemask *dstp)
|
||||
static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp)
|
||||
{
|
||||
return constant_test_bit(node, &dstp->bits[0]);
|
||||
return constant_test_bit(uvhub, &dstp->bits[0]);
|
||||
}
|
||||
static inline void bau_node_set(int node, struct bau_target_nodemask *dstp)
|
||||
static inline void bau_uvhub_set(int uvhub, struct bau_target_uvhubmask *dstp)
|
||||
{
|
||||
__set_bit(node, &dstp->bits[0]);
|
||||
__set_bit(uvhub, &dstp->bits[0]);
|
||||
}
|
||||
static inline void bau_nodes_clear(struct bau_target_nodemask *dstp, int nbits)
|
||||
static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp,
|
||||
int nbits)
|
||||
{
|
||||
bitmap_zero(&dstp->bits[0], nbits);
|
||||
}
|
||||
static inline int bau_uvhub_weight(struct bau_target_uvhubmask *dstp)
|
||||
{
|
||||
return bitmap_weight((unsigned long *)&dstp->bits[0],
|
||||
UV_DISTRIBUTION_SIZE);
|
||||
}
|
||||
|
||||
static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
|
||||
{
|
||||
@ -328,4 +400,35 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
|
||||
extern void uv_bau_message_intr1(void);
|
||||
extern void uv_bau_timeout_intr1(void);
|
||||
|
||||
struct atomic_short {
|
||||
short counter;
|
||||
};
|
||||
|
||||
/**
|
||||
* atomic_read_short - read a short atomic variable
|
||||
* @v: pointer of type atomic_short
|
||||
*
|
||||
* Atomically reads the value of @v.
|
||||
*/
|
||||
static inline int atomic_read_short(const struct atomic_short *v)
|
||||
{
|
||||
return v->counter;
|
||||
}
|
||||
|
||||
/**
|
||||
* atomic_add_short_return - add and return a short int
|
||||
* @i: short value to add
|
||||
* @v: pointer of type atomic_short
|
||||
*
|
||||
* Atomically adds @i to @v and returns @i + @v
|
||||
*/
|
||||
static inline int atomic_add_short_return(short i, struct atomic_short *v)
|
||||
{
|
||||
short __i = i;
|
||||
asm volatile(LOCK_PREFIX "xaddw %0, %1"
|
||||
: "+r" (i), "+m" (v->counter)
|
||||
: : "memory");
|
||||
return i + __i;
|
||||
}
|
||||
|
||||
#endif /* _ASM_X86_UV_UV_BAU_H */
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user