x86, UV: Improve BAU performance and error recovery

- increase performance of the interrupt handler

- release timed-out software acknowledge resources

- recover from continuous-busy status due to a hardware issue

- add a 'throttle' to keep a uvhub from sending more than a
  specified number of broadcasts concurrently (work around the hardware issue)

- provide a 'nobau' boot command line option

- rename 'pnode' and 'node' to 'uvhub' (the 'node' terminology
  is ambiguous)

- add some new statistics about the scope of broadcasts, retries, the
  hardware issue and the 'throttle'

- split off new function uv_bau_retry_msg() from
  uv_bau_process_message() per community coding style feedback.

- simplify the argument list to uv_bau_process_message(), per
  community coding style feedback.

Signed-off-by: Cliff Wickman <cpw@sgi.com>
Cc: linux-mm@kvack.org
Cc: Jack Steiner <steiner@sgi.com>
Cc: Russ Anderson <rja@sgi.com>
Cc: Mike Travis <travis@sgi.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <E1O25Z4-0004Ur-PB@eag09.americas.sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Cliff Wickman 2010-04-14 11:35:46 -05:00 committed by Ingo Molnar
parent 2acebe9ecb
commit b8f7fb13d2
2 changed files with 1084 additions and 451 deletions

View File

@ -27,13 +27,14 @@
* set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on.
*
* We will use 31 sets, one for sending BAU messages from each of the 32
* cpu's on the node.
* cpu's on the uvhub.
*
* TLB shootdown will use the first of the 8 descriptors of each set.
* Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set).
*/
#define UV_ITEMS_PER_DESCRIPTOR 8
#define MAX_BAU_CONCURRENT 3
#define UV_CPUS_PER_ACT_STATUS 32
#define UV_ACT_STATUS_MASK 0x3
#define UV_ACT_STATUS_SIZE 2
@ -45,6 +46,9 @@
#define UV_PAYLOADQ_PNODE_SHIFT 49
#define UV_PTC_BASENAME "sgi_uv/ptc_statistics"
#define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask))
#define UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT 15
#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT 16
#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x000000000bUL
/*
* bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1
@ -55,15 +59,29 @@
#define DESC_STATUS_SOURCE_TIMEOUT 3
/*
* source side thresholds at which message retries print a warning
* source side threshholds at which message retries print a warning
*/
#define SOURCE_TIMEOUT_LIMIT 20
#define DESTINATION_TIMEOUT_LIMIT 20
/*
* misc. delays, in microseconds
*/
#define THROTTLE_DELAY 10
#define TIMEOUT_DELAY 10
#define BIOS_TO 1000
/* BIOS is assumed to set the destination timeout to 1003520 nanoseconds */
/*
* threshholds at which to use IPI to free resources
*/
#define PLUGSB4RESET 100
#define TIMEOUTSB4RESET 100
/*
* number of entries in the destination side payload queue
*/
#define DEST_Q_SIZE 17
#define DEST_Q_SIZE 20
/*
* number of destination side software ack resources
*/
@ -72,9 +90,10 @@
/*
* completion statuses for sending a TLB flush message
*/
#define FLUSH_RETRY 1
#define FLUSH_GIVEUP 2
#define FLUSH_COMPLETE 3
#define FLUSH_RETRY_PLUGGED 1
#define FLUSH_RETRY_TIMEOUT 2
#define FLUSH_GIVEUP 3
#define FLUSH_COMPLETE 4
/*
* Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor)
@ -86,14 +105,14 @@
* 'base_dest_nodeid' field of the header corresponds to the
* destination nodeID associated with that specified bit.
*/
struct bau_target_nodemask {
unsigned long bits[BITS_TO_LONGS(256)];
struct bau_target_uvhubmask {
unsigned long bits[BITS_TO_LONGS(UV_DISTRIBUTION_SIZE)];
};
/*
* mask of cpu's on a node
* mask of cpu's on a uvhub
* (during initialization we need to check that unsigned long has
* enough bits for max. cpu's per node)
* enough bits for max. cpu's per uvhub)
*/
struct bau_local_cpumask {
unsigned long bits;
@ -135,8 +154,8 @@ struct bau_msg_payload {
struct bau_msg_header {
unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */
/* bits 5:0 */
unsigned int base_dest_nodeid:15; /* nasid>>1 (pnode) of */
/* bits 20:6 */ /* first bit in node_map */
unsigned int base_dest_nodeid:15; /* nasid (pnode<<1) of */
/* bits 20:6 */ /* first bit in uvhub map */
unsigned int command:8; /* message type */
/* bits 28:21 */
/* 0x38: SN3net EndPoint Message */
@ -146,26 +165,38 @@ struct bau_msg_header {
unsigned int rsvd_2:9; /* must be zero */
/* bits 40:32 */
/* Suppl_A is 56-41 */
unsigned int payload_2a:8;/* becomes byte 16 of msg */
/* bits 48:41 */ /* not currently using */
unsigned int payload_2b:8;/* becomes byte 17 of msg */
/* bits 56:49 */ /* not currently using */
unsigned int sequence:16;/* message sequence number */
/* bits 56:41 */ /* becomes bytes 16-17 of msg */
/* Address field (96:57) is never used as an
address (these are address bits 42:3) */
unsigned int rsvd_3:1; /* must be zero */
/* bit 57 */
/* address bits 27:4 are payload */
/* these 24 bits become bytes 12-14 of msg */
/* these next 24 (58-81) bits become bytes 12-14 of msg */
/* bits 65:58 land in byte 12 */
unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */
/* bit 58 */
unsigned int msg_type:3; /* software type of the message*/
/* bits 61:59 */
unsigned int canceled:1; /* message canceled, resource to be freed*/
/* bit 62 */
unsigned int payload_1a:1;/* not currently used */
/* bit 63 */
unsigned int payload_1b:2;/* not currently used */
/* bits 65:64 */
unsigned int payload_1a:5;/* not currently used */
/* bits 63:59 */
unsigned int payload_1b:8;/* not currently used */
/* bits 71:64 */
unsigned int payload_1c:8;/* not currently used */
/* bits 79:72 */
unsigned int payload_1d:2;/* not currently used */
/* bits 73:66 land in byte 13 */
unsigned int payload_1ca:6;/* not currently used */
/* bits 71:66 */
unsigned int payload_1c:2;/* not currently used */
/* bits 73:72 */
/* bits 81:74 land in byte 14 */
unsigned int payload_1d:6;/* not currently used */
/* bits 79:74 */
unsigned int payload_1e:2;/* not currently used */
/* bits 81:80 */
unsigned int rsvd_4:7; /* must be zero */
@ -178,7 +209,7 @@ struct bau_msg_header {
/* bits 95:90 */
unsigned int rsvd_6:5; /* must be zero */
/* bits 100:96 */
unsigned int int_both:1;/* if 1, interrupt both sockets on the blade */
unsigned int int_both:1;/* if 1, interrupt both sockets on the uvhub */
/* bit 101*/
unsigned int fairness:3;/* usually zero */
/* bits 104:102 */
@ -191,13 +222,18 @@ struct bau_msg_header {
/* bits 127:107 */
};
/* see msg_type: */
#define MSG_NOOP 0
#define MSG_REGULAR 1
#define MSG_RETRY 2
/*
* The activation descriptor:
* The format of the message to send, plus all accompanying control
* Should be 64 bytes
*/
struct bau_desc {
struct bau_target_nodemask distribution;
struct bau_target_uvhubmask distribution;
/*
* message template, consisting of header and payload:
*/
@ -237,19 +273,25 @@ struct bau_payload_queue_entry {
unsigned short acknowledge_count; /* filled in by destination */
/* 16 bits, bytes 10-11 */
unsigned short replied_to:1; /* sent as 0 by the source */
/* 1 bit */
unsigned short unused1:7; /* not currently using */
/* 7 bits: byte 12) */
/* these next 3 bytes come from bits 58-81 of the message header */
unsigned short replied_to:1; /* sent as 0 by the source */
unsigned short msg_type:3; /* software message type */
unsigned short canceled:1; /* sent as 0 by the source */
unsigned short unused1:3; /* not currently using */
/* byte 12 */
unsigned char unused2[2]; /* not currently using */
/* bytes 13-14 */
unsigned char unused2a; /* not currently using */
/* byte 13 */
unsigned char unused2; /* not currently using */
/* byte 14 */
unsigned char sw_ack_vector; /* filled in by the hardware */
/* byte 15 (bits 127:120) */
unsigned char unused4[3]; /* not currently using bytes 17-19 */
/* bytes 17-19 */
unsigned short sequence; /* message sequence number */
/* bytes 16-17 */
unsigned char unused4[2]; /* not currently using bytes 18-19 */
/* bytes 18-19 */
int number_of_cpus; /* filled in at destination */
/* 32 bits, bytes 20-23 (aligned) */
@ -259,63 +301,93 @@ struct bau_payload_queue_entry {
};
/*
* one for every slot in the destination payload queue
*/
struct bau_msg_status {
struct bau_local_cpumask seen_by; /* map of cpu's */
};
/*
* one for every slot in the destination software ack resources
*/
struct bau_sw_ack_status {
struct bau_payload_queue_entry *msg; /* associated message */
int watcher; /* cpu monitoring, or -1 */
};
/*
* one on every node and per-cpu; to locate the software tables
* one per-cpu; to locate the software tables
*/
struct bau_control {
struct bau_desc *descriptor_base;
struct bau_payload_queue_entry *bau_msg_head;
struct bau_payload_queue_entry *va_queue_first;
struct bau_payload_queue_entry *va_queue_last;
struct bau_msg_status *msg_statuses;
int *watching; /* pointer to array */
struct bau_payload_queue_entry *bau_msg_head;
struct bau_control *uvhub_master;
struct bau_control *socket_master;
unsigned long timeout_interval;
atomic_t active_descriptor_count;
int max_concurrent;
int max_concurrent_constant;
int retry_message_scans;
int plugged_tries;
int timeout_tries;
int ipi_attempts;
int conseccompletes;
short cpu;
short uvhub_cpu;
short uvhub;
short cpus_in_socket;
short cpus_in_uvhub;
unsigned short message_number;
unsigned short uvhub_quiesce;
short socket_acknowledge_count[DEST_Q_SIZE];
cycles_t send_message;
spinlock_t masks_lock;
spinlock_t uvhub_lock;
spinlock_t queue_lock;
};
/*
* This structure is allocated per_cpu for UV TLB shootdown statistics.
*/
struct ptc_stats {
unsigned long ptc_i; /* number of IPI-style flushes */
unsigned long requestor; /* number of nodes this cpu sent to */
unsigned long requestee; /* times cpu was remotely requested */
unsigned long alltlb; /* times all tlb's on this cpu were flushed */
unsigned long onetlb; /* times just one tlb on this cpu was flushed */
unsigned long s_retry; /* retries on source side timeouts */
unsigned long d_retry; /* retries on destination side timeouts */
unsigned long sflush; /* cycles spent in uv_flush_tlb_others */
unsigned long dflush; /* cycles spent on destination side */
unsigned long retriesok; /* successes on retries */
unsigned long nomsg; /* interrupts with no message */
unsigned long multmsg; /* interrupts with multiple messages */
unsigned long ntargeted;/* nodes targeted */
/* sender statistics */
unsigned long s_giveup; /* number of fall backs to IPI-style flushes */
unsigned long s_requestor; /* number of shootdown requests */
unsigned long s_stimeout; /* source side timeouts */
unsigned long s_dtimeout; /* destination side timeouts */
unsigned long s_time; /* time spent in sending side */
unsigned long s_retriesok; /* successful retries */
unsigned long s_ntargcpu; /* number of cpus targeted */
unsigned long s_ntarguvhub; /* number of uvhubs targeted */
unsigned long s_ntarguvhub16; /* number of times >= 16 target hubs */
unsigned long s_ntarguvhub8; /* number of times >= 8 target hubs */
unsigned long s_ntarguvhub4; /* number of times >= 4 target hubs */
unsigned long s_ntarguvhub2; /* number of times >= 2 target hubs */
unsigned long s_ntarguvhub1; /* number of times == 1 target hub */
unsigned long s_resets_plug; /* ipi-style resets from plug state */
unsigned long s_resets_timeout; /* ipi-style resets from timeouts */
unsigned long s_busy; /* status stayed busy past s/w timer */
unsigned long s_throttles; /* waits in throttle */
unsigned long s_retry_messages; /* retry broadcasts */
/* destination statistics */
unsigned long d_alltlb; /* times all tlb's on this cpu were flushed */
unsigned long d_onetlb; /* times just one tlb on this cpu was flushed */
unsigned long d_multmsg; /* interrupts with multiple messages */
unsigned long d_nomsg; /* interrupts with no message */
unsigned long d_time; /* time spent on destination side */
unsigned long d_requestee; /* number of messages processed */
unsigned long d_retries; /* number of retry messages processed */
unsigned long d_canceled; /* number of messages canceled by retries */
unsigned long d_nocanceled; /* retries that found nothing to cancel */
unsigned long d_resets; /* number of ipi-style requests processed */
unsigned long d_rcanceled; /* number of messages canceled by resets */
};
static inline int bau_node_isset(int node, struct bau_target_nodemask *dstp)
static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp)
{
return constant_test_bit(node, &dstp->bits[0]);
return constant_test_bit(uvhub, &dstp->bits[0]);
}
static inline void bau_node_set(int node, struct bau_target_nodemask *dstp)
static inline void bau_uvhub_set(int uvhub, struct bau_target_uvhubmask *dstp)
{
__set_bit(node, &dstp->bits[0]);
__set_bit(uvhub, &dstp->bits[0]);
}
static inline void bau_nodes_clear(struct bau_target_nodemask *dstp, int nbits)
static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp,
int nbits)
{
bitmap_zero(&dstp->bits[0], nbits);
}
static inline int bau_uvhub_weight(struct bau_target_uvhubmask *dstp)
{
return bitmap_weight((unsigned long *)&dstp->bits[0],
UV_DISTRIBUTION_SIZE);
}
static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
{
@ -328,4 +400,35 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
extern void uv_bau_message_intr1(void);
extern void uv_bau_timeout_intr1(void);
struct atomic_short {
short counter;
};
/**
* atomic_read_short - read a short atomic variable
* @v: pointer of type atomic_short
*
* Atomically reads the value of @v.
*/
static inline int atomic_read_short(const struct atomic_short *v)
{
return v->counter;
}
/**
* atomic_add_short_return - add and return a short int
* @i: short value to add
* @v: pointer of type atomic_short
*
* Atomically adds @i to @v and returns @i + @v
*/
static inline int atomic_add_short_return(short i, struct atomic_short *v)
{
short __i = i;
asm volatile(LOCK_PREFIX "xaddw %0, %1"
: "+r" (i), "+m" (v->counter)
: : "memory");
return i + __i;
}
#endif /* _ASM_X86_UV_UV_BAU_H */

File diff suppressed because it is too large Load Diff