linux/arch/ia64/sn/kernel/xpc_partition.c
Dean Nelson a607c38971 [IA64-SGI] get XPC to cleanly disengage from remote memory references
When XPC is being shutdown (i.e., rmmod, reboot) it doesn't ensure that
other partitions with whom it was connected have completely disengaged
from any attempt at cross-partition memory references. This can lead to
MCAs in any of these other partitions when the partition is reset.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2005-09-06 16:15:38 -07:00

1191 lines
30 KiB
C

/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved.
*/
/*
* Cross Partition Communication (XPC) partition support.
*
* This is the part of XPC that detects the presence/absence of
* other partitions. It provides a heartbeat and monitors the
* heartbeats of other partitions.
*
*/
#include <linux/kernel.h>
#include <linux/sysctl.h>
#include <linux/cache.h>
#include <linux/mmzone.h>
#include <linux/nodemask.h>
#include <asm/uncached.h>
#include <asm/sn/bte.h>
#include <asm/sn/intr.h>
#include <asm/sn/sn_sal.h>
#include <asm/sn/nodepda.h>
#include <asm/sn/addrs.h>
#include "xpc.h"
/* XPC is exiting flag */
int xpc_exiting;
/* SH_IPI_ACCESS shub register value on startup */
static u64 xpc_sh1_IPI_access;
static u64 xpc_sh2_IPI_access0;
static u64 xpc_sh2_IPI_access1;
static u64 xpc_sh2_IPI_access2;
static u64 xpc_sh2_IPI_access3;
/* original protection values for each node */
u64 xpc_prot_vec[MAX_COMPACT_NODES];
/* this partition's reserved page */
struct xpc_rsvd_page *xpc_rsvd_page;
/* this partition's XPC variables (within the reserved page) */
struct xpc_vars *xpc_vars;
struct xpc_vars_part *xpc_vars_part;
/*
* For performance reasons, each entry of xpc_partitions[] is cacheline
* aligned. And xpc_partitions[] is padded with an additional entry at the
* end so that the last legitimate entry doesn't share its cacheline with
* another variable.
*/
struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
/*
* Generic buffer used to store a local copy of the remote partitions
* reserved page or XPC variables.
*
* xpc_discovery runs only once and is a seperate thread that is
* very likely going to be processing in parallel with receiving
* interrupts.
*/
char ____cacheline_aligned
xpc_remote_copy_buffer[XPC_RSVD_PAGE_ALIGNED_SIZE];
/*
* Given a nasid, get the physical address of the partition's reserved page
* for that nasid. This function returns 0 on any error.
*/
static u64
xpc_get_rsvd_page_pa(int nasid, u64 buf, u64 buf_size)
{
bte_result_t bte_res;
s64 status;
u64 cookie = 0;
u64 rp_pa = nasid; /* seed with nasid */
u64 len = 0;
while (1) {
status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa,
&len);
dev_dbg(xpc_part, "SAL returned with status=%li, cookie="
"0x%016lx, address=0x%016lx, len=0x%016lx\n",
status, cookie, rp_pa, len);
if (status != SALRET_MORE_PASSES) {
break;
}
if (len > buf_size) {
dev_err(xpc_part, "len (=0x%016lx) > buf_size\n", len);
status = SALRET_ERROR;
break;
}
bte_res = xp_bte_copy(rp_pa, ia64_tpa(buf), buf_size,
(BTE_NOTIFY | BTE_WACQUIRE), NULL);
if (bte_res != BTE_SUCCESS) {
dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res);
status = SALRET_ERROR;
break;
}
}
if (status != SALRET_OK) {
rp_pa = 0;
}
dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
return rp_pa;
}
/*
* Fill the partition reserved page with the information needed by
* other partitions to discover we are alive and establish initial
* communications.
*/
struct xpc_rsvd_page *
xpc_rsvd_page_init(void)
{
struct xpc_rsvd_page *rp;
AMO_t *amos_page;
u64 rp_pa, next_cl, nasid_array = 0;
int i, ret;
/* get the local reserved page's address */
rp_pa = xpc_get_rsvd_page_pa(cnodeid_to_nasid(0),
(u64) xpc_remote_copy_buffer,
XPC_RSVD_PAGE_ALIGNED_SIZE);
if (rp_pa == 0) {
dev_err(xpc_part, "SAL failed to locate the reserved page\n");
return NULL;
}
rp = (struct xpc_rsvd_page *) __va(rp_pa);
if (rp->partid != sn_partition_id) {
dev_err(xpc_part, "the reserved page's partid of %d should be "
"%d\n", rp->partid, sn_partition_id);
return NULL;
}
rp->version = XPC_RP_VERSION;
/*
* Place the XPC variables on the cache line following the
* reserved page structure.
*/
next_cl = (u64) rp + XPC_RSVD_PAGE_ALIGNED_SIZE;
xpc_vars = (struct xpc_vars *) next_cl;
/*
* Before clearing xpc_vars, see if a page of AMOs had been previously
* allocated. If not we'll need to allocate one and set permissions
* so that cross-partition AMOs are allowed.
*
* The allocated AMO page needs MCA reporting to remain disabled after
* XPC has unloaded. To make this work, we keep a copy of the pointer
* to this page (i.e., amos_page) in the struct xpc_vars structure,
* which is pointed to by the reserved page, and re-use that saved copy
* on subsequent loads of XPC. This AMO page is never freed, and its
* memory protections are never restricted.
*/
if ((amos_page = xpc_vars->amos_page) == NULL) {
amos_page = (AMO_t *) TO_AMO(uncached_alloc_page(0));
if (amos_page == NULL) {
dev_err(xpc_part, "can't allocate page of AMOs\n");
return NULL;
}
/*
* Open up AMO-R/W to cpu. This is done for Shub 1.1 systems
* when xpc_allow_IPI_ops() is called via xpc_hb_init().
*/
if (!enable_shub_wars_1_1()) {
ret = sn_change_memprotect(ia64_tpa((u64) amos_page),
PAGE_SIZE, SN_MEMPROT_ACCESS_CLASS_1,
&nasid_array);
if (ret != 0) {
dev_err(xpc_part, "can't change memory "
"protections\n");
uncached_free_page(__IA64_UNCACHED_OFFSET |
TO_PHYS((u64) amos_page));
return NULL;
}
}
} else if (!IS_AMO_ADDRESS((u64) amos_page)) {
/*
* EFI's XPBOOT can also set amos_page in the reserved page,
* but it happens to leave it as an uncached physical address
* and we need it to be an uncached virtual, so we'll have to
* convert it.
*/
if (!IS_AMO_PHYS_ADDRESS((u64) amos_page)) {
dev_err(xpc_part, "previously used amos_page address "
"is bad = 0x%p\n", (void *) amos_page);
return NULL;
}
amos_page = (AMO_t *) TO_AMO((u64) amos_page);
}
memset(xpc_vars, 0, sizeof(struct xpc_vars));
/*
* Place the XPC per partition specific variables on the cache line
* following the XPC variables structure.
*/
next_cl += XPC_VARS_ALIGNED_SIZE;
memset((u64 *) next_cl, 0, sizeof(struct xpc_vars_part) *
XP_MAX_PARTITIONS);
xpc_vars_part = (struct xpc_vars_part *) next_cl;
xpc_vars->vars_part_pa = __pa(next_cl);
xpc_vars->version = XPC_V_VERSION;
xpc_vars->act_nasid = cpuid_to_nasid(0);
xpc_vars->act_phys_cpuid = cpu_physical_id(0);
xpc_vars->amos_page = amos_page; /* save for next load of XPC */
/* initialize the activate IRQ related AMO variables */
for (i = 0; i < XP_NASID_MASK_WORDS; i++) {
(void) xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i);
}
/* initialize the engaged remote partitions related AMO variables */
(void) xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO);
(void) xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO);
/* export AMO page's physical address to other partitions */
xpc_vars->amos_page_pa = ia64_tpa((u64) xpc_vars->amos_page);
/* timestamp of when reserved page was initialized */
rp->stamp = CURRENT_TIME;
/*
* This signifies to the remote partition that our reserved
* page is initialized.
*/
rp->vars_pa = __pa(xpc_vars);
return rp;
}
/*
* Change protections to allow IPI operations (and AMO operations on
* Shub 1.1 systems).
*/
void
xpc_allow_IPI_ops(void)
{
int node;
int nasid;
// >>> Change SH_IPI_ACCESS code to use SAL call once it is available.
if (is_shub2()) {
xpc_sh2_IPI_access0 =
(u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
xpc_sh2_IPI_access1 =
(u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
xpc_sh2_IPI_access2 =
(u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
xpc_sh2_IPI_access3 =
(u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
for_each_online_node(node) {
nasid = cnodeid_to_nasid(node);
HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
-1UL);
HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
-1UL);
HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
-1UL);
HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
-1UL);
}
} else {
xpc_sh1_IPI_access =
(u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
for_each_online_node(node) {
nasid = cnodeid_to_nasid(node);
HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
-1UL);
/*
* Since the BIST collides with memory operations on
* SHUB 1.1 sn_change_memprotect() cannot be used.
*/
if (enable_shub_wars_1_1()) {
/* open up everything */
xpc_prot_vec[node] = (u64) HUB_L((u64 *)
GLOBAL_MMR_ADDR(nasid,
SH1_MD_DQLP_MMR_DIR_PRIVEC0));
HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
SH1_MD_DQLP_MMR_DIR_PRIVEC0),
-1UL);
HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
SH1_MD_DQRP_MMR_DIR_PRIVEC0),
-1UL);
}
}
}
}
/*
* Restrict protections to disallow IPI operations (and AMO operations on
* Shub 1.1 systems).
*/
void
xpc_restrict_IPI_ops(void)
{
int node;
int nasid;
// >>> Change SH_IPI_ACCESS code to use SAL call once it is available.
if (is_shub2()) {
for_each_online_node(node) {
nasid = cnodeid_to_nasid(node);
HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
xpc_sh2_IPI_access0);
HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
xpc_sh2_IPI_access1);
HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
xpc_sh2_IPI_access2);
HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
xpc_sh2_IPI_access3);
}
} else {
for_each_online_node(node) {
nasid = cnodeid_to_nasid(node);
HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
xpc_sh1_IPI_access);
if (enable_shub_wars_1_1()) {
HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
SH1_MD_DQLP_MMR_DIR_PRIVEC0),
xpc_prot_vec[node]);
HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
SH1_MD_DQRP_MMR_DIR_PRIVEC0),
xpc_prot_vec[node]);
}
}
}
}
/*
* At periodic intervals, scan through all active partitions and ensure
* their heartbeat is still active. If not, the partition is deactivated.
*/
void
xpc_check_remote_hb(void)
{
struct xpc_vars *remote_vars;
struct xpc_partition *part;
partid_t partid;
bte_result_t bres;
remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
if (xpc_exiting) {
break;
}
if (partid == sn_partition_id) {
continue;
}
part = &xpc_partitions[partid];
if (part->act_state == XPC_P_INACTIVE ||
part->act_state == XPC_P_DEACTIVATING) {
continue;
}
/* pull the remote_hb cache line */
bres = xp_bte_copy(part->remote_vars_pa,
ia64_tpa((u64) remote_vars),
XPC_VARS_ALIGNED_SIZE,
(BTE_NOTIFY | BTE_WACQUIRE), NULL);
if (bres != BTE_SUCCESS) {
XPC_DEACTIVATE_PARTITION(part,
xpc_map_bte_errors(bres));
continue;
}
dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat"
" = %ld, kdb_status = %ld, HB_mask = 0x%lx\n", partid,
remote_vars->heartbeat, part->last_heartbeat,
remote_vars->kdb_status,
remote_vars->heartbeating_to_mask);
if (((remote_vars->heartbeat == part->last_heartbeat) &&
(remote_vars->kdb_status == 0)) ||
!xpc_hb_allowed(sn_partition_id, remote_vars)) {
XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat);
continue;
}
part->last_heartbeat = remote_vars->heartbeat;
}
}
/*
* Get a copy of the remote partition's rsvd page.
*
* remote_rp points to a buffer that is cacheline aligned for BTE copies and
* assumed to be of size XPC_RSVD_PAGE_ALIGNED_SIZE.
*/
static enum xpc_retval
xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
{
int bres, i;
/* get the reserved page's physical address */
*remote_rp_pa = xpc_get_rsvd_page_pa(nasid, (u64) remote_rp,
XPC_RSVD_PAGE_ALIGNED_SIZE);
if (*remote_rp_pa == 0) {
return xpcNoRsvdPageAddr;
}
/* pull over the reserved page structure */
bres = xp_bte_copy(*remote_rp_pa, ia64_tpa((u64) remote_rp),
XPC_RSVD_PAGE_ALIGNED_SIZE,
(BTE_NOTIFY | BTE_WACQUIRE), NULL);
if (bres != BTE_SUCCESS) {
return xpc_map_bte_errors(bres);
}
if (discovered_nasids != NULL) {
for (i = 0; i < XP_NASID_MASK_WORDS; i++) {
discovered_nasids[i] |= remote_rp->part_nasids[i];
}
}
/* check that the partid is for another partition */
if (remote_rp->partid < 1 ||
remote_rp->partid > (XP_MAX_PARTITIONS - 1)) {
return xpcInvalidPartid;
}
if (remote_rp->partid == sn_partition_id) {
return xpcLocalPartid;
}
if (XPC_VERSION_MAJOR(remote_rp->version) !=
XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
return xpcBadVersion;
}
return xpcSuccess;
}
/*
* Get a copy of the remote partition's XPC variables.
*
* remote_vars points to a buffer that is cacheline aligned for BTE copies and
* assumed to be of size XPC_VARS_ALIGNED_SIZE.
*/
static enum xpc_retval
xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
{
int bres;
if (remote_vars_pa == 0) {
return xpcVarsNotSet;
}
/* pull over the cross partition variables */
bres = xp_bte_copy(remote_vars_pa, ia64_tpa((u64) remote_vars),
XPC_VARS_ALIGNED_SIZE,
(BTE_NOTIFY | BTE_WACQUIRE), NULL);
if (bres != BTE_SUCCESS) {
return xpc_map_bte_errors(bres);
}
if (XPC_VERSION_MAJOR(remote_vars->version) !=
XPC_VERSION_MAJOR(XPC_V_VERSION)) {
return xpcBadVersion;
}
return xpcSuccess;
}
/*
* Update the remote partition's info.
*/
static void
xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version,
struct timespec *remote_rp_stamp, u64 remote_rp_pa,
u64 remote_vars_pa, struct xpc_vars *remote_vars)
{
part->remote_rp_version = remote_rp_version;
dev_dbg(xpc_part, " remote_rp_version = 0x%016lx\n",
part->remote_rp_version);
part->remote_rp_stamp = *remote_rp_stamp;
dev_dbg(xpc_part, " remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n",
part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec);
part->remote_rp_pa = remote_rp_pa;
dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
part->remote_vars_pa = remote_vars_pa;
dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n",
part->remote_vars_pa);
part->last_heartbeat = remote_vars->heartbeat;
dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n",
part->last_heartbeat);
part->remote_vars_part_pa = remote_vars->vars_part_pa;
dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n",
part->remote_vars_part_pa);
part->remote_act_nasid = remote_vars->act_nasid;
dev_dbg(xpc_part, " remote_act_nasid = 0x%x\n",
part->remote_act_nasid);
part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
dev_dbg(xpc_part, " remote_act_phys_cpuid = 0x%x\n",
part->remote_act_phys_cpuid);
part->remote_amos_page_pa = remote_vars->amos_page_pa;
dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n",
part->remote_amos_page_pa);
part->remote_vars_version = remote_vars->version;
dev_dbg(xpc_part, " remote_vars_version = 0x%x\n",
part->remote_vars_version);
}
/*
* Prior code has determine the nasid which generated an IPI. Inspect
* that nasid to determine if its partition needs to be activated or
* deactivated.
*
* A partition is consider "awaiting activation" if our partition
* flags indicate it is not active and it has a heartbeat. A
* partition is considered "awaiting deactivation" if our partition
* flags indicate it is active but it has no heartbeat or it is not
* sending its heartbeat to us.
*
* To determine the heartbeat, the remote nasid must have a properly
* initialized reserved page.
*/
static void
xpc_identify_act_IRQ_req(int nasid)
{
struct xpc_rsvd_page *remote_rp;
struct xpc_vars *remote_vars;
u64 remote_rp_pa;
u64 remote_vars_pa;
int remote_rp_version;
int reactivate = 0;
int stamp_diff;
struct timespec remote_rp_stamp = { 0, 0 };
partid_t partid;
struct xpc_partition *part;
enum xpc_retval ret;
/* pull over the reserved page structure */
remote_rp = (struct xpc_rsvd_page *) xpc_remote_copy_buffer;
ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
if (ret != xpcSuccess) {
dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
"which sent interrupt, reason=%d\n", nasid, ret);
return;
}
remote_vars_pa = remote_rp->vars_pa;
remote_rp_version = remote_rp->version;
if (XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
remote_rp_stamp = remote_rp->stamp;
}
partid = remote_rp->partid;
part = &xpc_partitions[partid];
/* pull over the cross partition variables */
remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
if (ret != xpcSuccess) {
dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
"which sent interrupt, reason=%d\n", nasid, ret);
XPC_DEACTIVATE_PARTITION(part, ret);
return;
}
part->act_IRQ_rcvd++;
dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
"%ld:0x%lx\n", (int) nasid, (int) partid, part->act_IRQ_rcvd,
remote_vars->heartbeat, remote_vars->heartbeating_to_mask);
if (xpc_partition_disengaged(part) &&
part->act_state == XPC_P_INACTIVE) {
xpc_update_partition_info(part, remote_rp_version,
&remote_rp_stamp, remote_rp_pa,
remote_vars_pa, remote_vars);
if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
if (xpc_partition_disengage_requested(1UL << partid)) {
/*
* Other side is waiting on us to disengage,
* even though we already have.
*/
return;
}
} else {
/* other side doesn't support disengage requests */
xpc_clear_partition_disengage_request(1UL << partid);
}
xpc_activate_partition(part);
return;
}
DBUG_ON(part->remote_rp_version == 0);
DBUG_ON(part->remote_vars_version == 0);
if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) {
DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part->
remote_vars_version));
if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
version));
/* see if the other side rebooted */
if (part->remote_amos_page_pa ==
remote_vars->amos_page_pa &&
xpc_hb_allowed(sn_partition_id,
remote_vars)) {
/* doesn't look that way, so ignore the IPI */
return;
}
}
/*
* Other side rebooted and previous XPC didn't support the
* disengage request, so we don't need to do anything special.
*/
xpc_update_partition_info(part, remote_rp_version,
&remote_rp_stamp, remote_rp_pa,
remote_vars_pa, remote_vars);
part->reactivate_nasid = nasid;
XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
return;
}
DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version));
if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
/*
* Other side rebooted and previous XPC did support the
* disengage request, but the new one doesn't.
*/
xpc_clear_partition_engaged(1UL << partid);
xpc_clear_partition_disengage_request(1UL << partid);
xpc_update_partition_info(part, remote_rp_version,
&remote_rp_stamp, remote_rp_pa,
remote_vars_pa, remote_vars);
reactivate = 1;
} else {
DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp,
&remote_rp_stamp);
if (stamp_diff != 0) {
DBUG_ON(stamp_diff >= 0);
/*
* Other side rebooted and the previous XPC did support
* the disengage request, as does the new one.
*/
DBUG_ON(xpc_partition_engaged(1UL << partid));
DBUG_ON(xpc_partition_disengage_requested(1UL <<
partid));
xpc_update_partition_info(part, remote_rp_version,
&remote_rp_stamp, remote_rp_pa,
remote_vars_pa, remote_vars);
reactivate = 1;
}
}
if (!xpc_partition_disengaged(part)) {
/* still waiting on other side to disengage from us */
return;
}
if (reactivate) {
part->reactivate_nasid = nasid;
XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
} else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) &&
xpc_partition_disengage_requested(1UL << partid)) {
XPC_DEACTIVATE_PARTITION(part, xpcOtherGoingDown);
}
}
/*
* Loop through the activation AMO variables and process any bits
* which are set. Each bit indicates a nasid sending a partition
* activation or deactivation request.
*
* Return #of IRQs detected.
*/
int
xpc_identify_act_IRQ_sender(void)
{
int word, bit;
u64 nasid_mask;
u64 nasid; /* remote nasid */
int n_IRQs_detected = 0;
AMO_t *act_amos;
struct xpc_rsvd_page *rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
/* scan through act AMO variable looking for non-zero entries */
for (word = 0; word < XP_NASID_MASK_WORDS; word++) {
if (xpc_exiting) {
break;
}
nasid_mask = xpc_IPI_receive(&act_amos[word]);
if (nasid_mask == 0) {
/* no IRQs from nasids in this variable */
continue;
}
dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word,
nasid_mask);
/*
* If this nasid has been added to the machine since
* our partition was reset, this will retain the
* remote nasid in our reserved pages machine mask.
* This is used in the event of module reload.
*/
rp->mach_nasids[word] |= nasid_mask;
/* locate the nasid(s) which sent interrupts */
for (bit = 0; bit < (8 * sizeof(u64)); bit++) {
if (nasid_mask & (1UL << bit)) {
n_IRQs_detected++;
nasid = XPC_NASID_FROM_W_B(word, bit);
dev_dbg(xpc_part, "interrupt from nasid %ld\n",
nasid);
xpc_identify_act_IRQ_req(nasid);
}
}
}
return n_IRQs_detected;
}
/*
* See if the other side has responded to a partition disengage request
* from us.
*/
int
xpc_partition_disengaged(struct xpc_partition *part)
{
partid_t partid = XPC_PARTID(part);
int disengaged;
disengaged = (xpc_partition_engaged(1UL << partid) == 0);
if (part->disengage_request_timeout) {
if (!disengaged) {
if (jiffies < part->disengage_request_timeout) {
/* timelimit hasn't been reached yet */
return 0;
}
/*
* Other side hasn't responded to our disengage
* request in a timely fashion, so assume it's dead.
*/
xpc_clear_partition_engaged(1UL << partid);
disengaged = 1;
}
part->disengage_request_timeout = 0;
/* cancel the timer function, provided it's not us */
if (!in_interrupt()) {
del_singleshot_timer_sync(&part->
disengage_request_timer);
}
DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
part->act_state != XPC_P_INACTIVE);
if (part->act_state != XPC_P_INACTIVE) {
xpc_wakeup_channel_mgr(part);
}
if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
xpc_cancel_partition_disengage_request(part);
}
}
return disengaged;
}
/*
* Mark specified partition as active.
*/
enum xpc_retval
xpc_mark_partition_active(struct xpc_partition *part)
{
unsigned long irq_flags;
enum xpc_retval ret;
dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
spin_lock_irqsave(&part->act_lock, irq_flags);
if (part->act_state == XPC_P_ACTIVATING) {
part->act_state = XPC_P_ACTIVE;
ret = xpcSuccess;
} else {
DBUG_ON(part->reason == xpcSuccess);
ret = part->reason;
}
spin_unlock_irqrestore(&part->act_lock, irq_flags);
return ret;
}
/*
* Notify XPC that the partition is down.
*/
void
xpc_deactivate_partition(const int line, struct xpc_partition *part,
enum xpc_retval reason)
{
unsigned long irq_flags;
spin_lock_irqsave(&part->act_lock, irq_flags);
if (part->act_state == XPC_P_INACTIVE) {
XPC_SET_REASON(part, reason, line);
spin_unlock_irqrestore(&part->act_lock, irq_flags);
if (reason == xpcReactivating) {
/* we interrupt ourselves to reactivate partition */
xpc_IPI_send_reactivate(part);
}
return;
}
if (part->act_state == XPC_P_DEACTIVATING) {
if ((part->reason == xpcUnloading && reason != xpcUnloading) ||
reason == xpcReactivating) {
XPC_SET_REASON(part, reason, line);
}
spin_unlock_irqrestore(&part->act_lock, irq_flags);
return;
}
part->act_state = XPC_P_DEACTIVATING;
XPC_SET_REASON(part, reason, line);
spin_unlock_irqrestore(&part->act_lock, irq_flags);
if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
xpc_request_partition_disengage(part);
xpc_IPI_send_disengage(part);
/* set a timelimit on the disengage request */
part->disengage_request_timeout = jiffies +
(XPC_DISENGAGE_REQUEST_TIMELIMIT * HZ);
part->disengage_request_timer.expires =
part->disengage_request_timeout;
add_timer(&part->disengage_request_timer);
}
dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", partid,
reason);
xpc_partition_going_down(part, reason);
}
/*
* Mark specified partition as inactive.
*/
void
xpc_mark_partition_inactive(struct xpc_partition *part)
{
unsigned long irq_flags;
dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
XPC_PARTID(part));
spin_lock_irqsave(&part->act_lock, irq_flags);
part->act_state = XPC_P_INACTIVE;
spin_unlock_irqrestore(&part->act_lock, irq_flags);
part->remote_rp_pa = 0;
}
/*
* SAL has provided a partition and machine mask. The partition mask
* contains a bit for each even nasid in our partition. The machine
* mask contains a bit for each even nasid in the entire machine.
*
* Using those two bit arrays, we can determine which nasids are
* known in the machine. Each should also have a reserved page
* initialized if they are available for partitioning.
*/
void
xpc_discovery(void)
{
void *remote_rp_base;
struct xpc_rsvd_page *remote_rp;
struct xpc_vars *remote_vars;
u64 remote_rp_pa;
u64 remote_vars_pa;
int region;
int max_regions;
int nasid;
struct xpc_rsvd_page *rp;
partid_t partid;
struct xpc_partition *part;
u64 *discovered_nasids;
enum xpc_retval ret;
remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RSVD_PAGE_ALIGNED_SIZE,
GFP_KERNEL, &remote_rp_base);
if (remote_rp == NULL) {
return;
}
remote_vars = (struct xpc_vars *) remote_rp;
discovered_nasids = kmalloc(sizeof(u64) * XP_NASID_MASK_WORDS,
GFP_KERNEL);
if (discovered_nasids == NULL) {
kfree(remote_rp_base);
return;
}
memset(discovered_nasids, 0, sizeof(u64) * XP_NASID_MASK_WORDS);
rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
/*
* The term 'region' in this context refers to the minimum number of
* nodes that can comprise an access protection grouping. The access
* protection is in regards to memory, IOI and IPI.
*/
//>>> move the next two #defines into either include/asm-ia64/sn/arch.h or
//>>> include/asm-ia64/sn/addrs.h
#define SH1_MAX_REGIONS 64
#define SH2_MAX_REGIONS 256
max_regions = is_shub2() ? SH2_MAX_REGIONS : SH1_MAX_REGIONS;
for (region = 0; region < max_regions; region++) {
if ((volatile int) xpc_exiting) {
break;
}
dev_dbg(xpc_part, "searching region %d\n", region);
for (nasid = (region * sn_region_size * 2);
nasid < ((region + 1) * sn_region_size * 2);
nasid += 2) {
if ((volatile int) xpc_exiting) {
break;
}
dev_dbg(xpc_part, "checking nasid %d\n", nasid);
if (XPC_NASID_IN_ARRAY(nasid, rp->part_nasids)) {
dev_dbg(xpc_part, "PROM indicates Nasid %d is "
"part of the local partition; skipping "
"region\n", nasid);
break;
}
if (!(XPC_NASID_IN_ARRAY(nasid, rp->mach_nasids))) {
dev_dbg(xpc_part, "PROM indicates Nasid %d was "
"not on Numa-Link network at reset\n",
nasid);
continue;
}
if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) {
dev_dbg(xpc_part, "Nasid %d is part of a "
"partition which was previously "
"discovered\n", nasid);
continue;
}
/* pull over the reserved page structure */
ret = xpc_get_remote_rp(nasid, discovered_nasids,
remote_rp, &remote_rp_pa);
if (ret != xpcSuccess) {
dev_dbg(xpc_part, "unable to get reserved page "
"from nasid %d, reason=%d\n", nasid,
ret);
if (ret == xpcLocalPartid) {
break;
}
continue;
}
remote_vars_pa = remote_rp->vars_pa;
partid = remote_rp->partid;
part = &xpc_partitions[partid];
/* pull over the cross partition variables */
ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
if (ret != xpcSuccess) {
dev_dbg(xpc_part, "unable to get XPC variables "
"from nasid %d, reason=%d\n", nasid,
ret);
XPC_DEACTIVATE_PARTITION(part, ret);
continue;
}
if (part->act_state != XPC_P_INACTIVE) {
dev_dbg(xpc_part, "partition %d on nasid %d is "
"already activating\n", partid, nasid);
break;
}
/*
* Register the remote partition's AMOs with SAL so it
* can handle and cleanup errors within that address
* range should the remote partition go down. We don't
* unregister this range because it is difficult to
* tell when outstanding writes to the remote partition
* are finished and thus when it is thus safe to
* unregister. This should not result in wasted space
* in the SAL xp_addr_region table because we should
* get the same page for remote_act_amos_pa after
* module reloads and system reboots.
*/
if (sn_register_xp_addr_region(
remote_vars->amos_page_pa,
PAGE_SIZE, 1) < 0) {
dev_dbg(xpc_part, "partition %d failed to "
"register xp_addr region 0x%016lx\n",
partid, remote_vars->amos_page_pa);
XPC_SET_REASON(part, xpcPhysAddrRegFailed,
__LINE__);
break;
}
/*
* The remote nasid is valid and available.
* Send an interrupt to that nasid to notify
* it that we are ready to begin activation.
*/
dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, "
"nasid %d, phys_cpuid 0x%x\n",
remote_vars->amos_page_pa,
remote_vars->act_nasid,
remote_vars->act_phys_cpuid);
if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
version)) {
part->remote_amos_page_pa =
remote_vars->amos_page_pa;
xpc_mark_partition_disengaged(part);
xpc_cancel_partition_disengage_request(part);
}
xpc_IPI_send_activate(remote_vars);
}
}
kfree(discovered_nasids);
kfree(remote_rp_base);
}
/*
* Given a partid, get the nasids owned by that partition from the
* remote partition's reserved page.
*/
enum xpc_retval
xpc_initiate_partid_to_nasids(partid_t partid, void *nasid_mask)
{
struct xpc_partition *part;
u64 part_nasid_pa;
int bte_res;
part = &xpc_partitions[partid];
if (part->remote_rp_pa == 0) {
return xpcPartitionDown;
}
part_nasid_pa = part->remote_rp_pa +
(u64) &((struct xpc_rsvd_page *) 0)->part_nasids;
bte_res = xp_bte_copy(part_nasid_pa, ia64_tpa((u64) nasid_mask),
L1_CACHE_ALIGN(XP_NASID_MASK_BYTES),
(BTE_NOTIFY | BTE_WACQUIRE), NULL);
return xpc_map_bte_errors(bte_res);
}