2021-02-17 04:09:51 +00:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
|
|
/* Copyright(c) 2020 Intel Corporation. */
|
|
|
|
|
|
|
|
#ifndef __CXL_H__
|
|
|
|
#define __CXL_H__
|
|
|
|
|
2021-06-15 23:18:17 +00:00
|
|
|
#include <linux/libnvdimm.h>
|
2021-02-17 04:09:51 +00:00
|
|
|
#include <linux/bitfield.h>
|
2024-03-08 21:59:30 +00:00
|
|
|
#include <linux/notifier.h>
|
2021-02-17 04:09:51 +00:00
|
|
|
#include <linux/bitops.h>
|
2022-04-25 18:36:48 +00:00
|
|
|
#include <linux/log2.h>
|
2023-12-21 22:03:26 +00:00
|
|
|
#include <linux/node.h>
|
2021-02-17 04:09:51 +00:00
|
|
|
#include <linux/io.h>
|
|
|
|
|
2024-04-22 17:06:15 +00:00
|
|
|
extern const struct nvdimm_security_ops *cxl_security_ops;
|
|
|
|
|
2021-06-09 16:01:35 +00:00
|
|
|
/**
|
|
|
|
* DOC: cxl objects
|
|
|
|
*
|
|
|
|
* The CXL core objects like ports, decoders, and regions are shared
|
|
|
|
* between the subsystem drivers cxl_acpi, cxl_pci, and core drivers
|
|
|
|
* (port-driver, region-driver, nvdimm object-drivers... etc).
|
|
|
|
*/
|
|
|
|
|
2022-02-01 20:24:30 +00:00
|
|
|
/* CXL 2.0 8.2.4 CXL Component Register Layout and Definition */
|
|
|
|
#define CXL_COMPONENT_REG_BLOCK_SIZE SZ_64K
|
|
|
|
|
2021-05-28 00:49:22 +00:00
|
|
|
/* CXL 2.0 8.2.5 CXL.cache and CXL.mem Registers*/
|
|
|
|
#define CXL_CM_OFFSET 0x1000
|
|
|
|
#define CXL_CM_CAP_HDR_OFFSET 0x0
|
|
|
|
#define CXL_CM_CAP_HDR_ID_MASK GENMASK(15, 0)
|
|
|
|
#define CM_CAP_HDR_CAP_ID 1
|
|
|
|
#define CXL_CM_CAP_HDR_VERSION_MASK GENMASK(19, 16)
|
|
|
|
#define CM_CAP_HDR_CAP_VERSION 1
|
|
|
|
#define CXL_CM_CAP_HDR_CACHE_MEM_VERSION_MASK GENMASK(23, 20)
|
|
|
|
#define CM_CAP_HDR_CACHE_MEM_VERSION 1
|
|
|
|
#define CXL_CM_CAP_HDR_ARRAY_SIZE_MASK GENMASK(31, 24)
|
|
|
|
#define CXL_CM_CAP_PTR_MASK GENMASK(31, 20)
|
|
|
|
|
2022-11-29 17:48:48 +00:00
|
|
|
#define CXL_CM_CAP_CAP_ID_RAS 0x2
|
2021-05-28 00:49:22 +00:00
|
|
|
#define CXL_CM_CAP_CAP_ID_HDM 0x5
|
|
|
|
#define CXL_CM_CAP_CAP_HDM_VERSION 1
|
|
|
|
|
|
|
|
/* HDM decoders CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure */
|
|
|
|
#define CXL_HDM_DECODER_CAP_OFFSET 0x0
|
|
|
|
#define CXL_HDM_DECODER_COUNT_MASK GENMASK(3, 0)
|
|
|
|
#define CXL_HDM_DECODER_TARGET_COUNT_MASK GENMASK(7, 4)
|
2022-02-01 20:24:30 +00:00
|
|
|
#define CXL_HDM_DECODER_INTERLEAVE_11_8 BIT(8)
|
|
|
|
#define CXL_HDM_DECODER_INTERLEAVE_14_12 BIT(9)
|
cxl/region: check interleave capability
Since interleave capability is not verified, if the interleave
capability of a target does not match the region need, committing decoder
should have failed at the device end.
In order to checkout this error as quickly as possible, driver needs
to check the interleave capability of target during attaching it to
region.
Per CXL specification r3.1(8.2.4.20.1 CXL HDM Decoder Capability Register),
bits 11 and 12 indicate the capability to establish interleaving in 3, 6,
12 and 16 ways. If these bits are not set, the target cannot be attached to
a region utilizing such interleave ways.
Additionally, bits 8 and 9 represent the capability of the bits used for
interleaving in the address, Linux tracks this in the cxl_port
interleave_mask.
Per CXL specification r3.1(8.2.4.20.13 Decoder Protection):
eIW means encoded Interleave Ways.
eIG means encoded Interleave Granularity.
in HPA:
if eIW is 0 or 8 (interleave ways: 1, 3), all the bits of HPA are used,
the interleave bits are none, the following check is ignored.
if eIW is less than 8 (interleave ways: 2, 4, 8, 16), the interleave bits
start at bit position eIG + 8 and end at eIG + eIW + 8 - 1.
if eIW is greater than 8 (interleave ways: 6, 12), the interleave bits
start at bit position eIG + 8 and end at eIG + eIW - 1.
if the interleave mask is insufficient to cover the required interleave
bits, the target cannot be attached to the region.
Fixes: 384e624bb211 ("cxl/region: Attach endpoint decoders")
Signed-off-by: Yao Xingtao <yaoxt.fnst@fujitsu.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://patch.msgid.link/20240614084755.59503-2-yaoxt.fnst@fujitsu.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2024-06-14 08:47:54 +00:00
|
|
|
#define CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY BIT(11)
|
|
|
|
#define CXL_HDM_DECODER_INTERLEAVE_16_WAY BIT(12)
|
2022-02-01 20:24:30 +00:00
|
|
|
#define CXL_HDM_DECODER_CTRL_OFFSET 0x4
|
|
|
|
#define CXL_HDM_DECODER_ENABLE BIT(1)
|
|
|
|
#define CXL_HDM_DECODER0_BASE_LOW_OFFSET(i) (0x20 * (i) + 0x10)
|
|
|
|
#define CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i) (0x20 * (i) + 0x14)
|
|
|
|
#define CXL_HDM_DECODER0_SIZE_LOW_OFFSET(i) (0x20 * (i) + 0x18)
|
|
|
|
#define CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(i) (0x20 * (i) + 0x1c)
|
|
|
|
#define CXL_HDM_DECODER0_CTRL_OFFSET(i) (0x20 * (i) + 0x20)
|
|
|
|
#define CXL_HDM_DECODER0_CTRL_IG_MASK GENMASK(3, 0)
|
|
|
|
#define CXL_HDM_DECODER0_CTRL_IW_MASK GENMASK(7, 4)
|
|
|
|
#define CXL_HDM_DECODER0_CTRL_LOCK BIT(8)
|
|
|
|
#define CXL_HDM_DECODER0_CTRL_COMMIT BIT(9)
|
|
|
|
#define CXL_HDM_DECODER0_CTRL_COMMITTED BIT(10)
|
2022-06-09 05:56:37 +00:00
|
|
|
#define CXL_HDM_DECODER0_CTRL_COMMIT_ERROR BIT(11)
|
2023-06-15 01:30:19 +00:00
|
|
|
#define CXL_HDM_DECODER0_CTRL_HOSTONLY BIT(12)
|
2022-02-01 20:24:30 +00:00
|
|
|
#define CXL_HDM_DECODER0_TL_LOW(i) (0x20 * (i) + 0x24)
|
|
|
|
#define CXL_HDM_DECODER0_TL_HIGH(i) (0x20 * (i) + 0x28)
|
2022-07-22 00:19:12 +00:00
|
|
|
#define CXL_HDM_DECODER0_SKIP_LOW(i) CXL_HDM_DECODER0_TL_LOW(i)
|
|
|
|
#define CXL_HDM_DECODER0_SKIP_HIGH(i) CXL_HDM_DECODER0_TL_HIGH(i)
|
2021-05-28 00:49:22 +00:00
|
|
|
|
2022-08-29 22:03:14 +00:00
|
|
|
/* HDM decoder control register constants CXL 3.0 8.2.5.19.7 */
|
|
|
|
#define CXL_DECODER_MIN_GRANULARITY 256
|
|
|
|
#define CXL_DECODER_MAX_ENCODED_IG 6
|
|
|
|
|
2021-06-11 19:01:11 +00:00
|
|
|
static inline int cxl_hdm_decoder_count(u32 cap_hdr)
|
|
|
|
{
|
|
|
|
int val = FIELD_GET(CXL_HDM_DECODER_COUNT_MASK, cap_hdr);
|
|
|
|
|
|
|
|
return val ? val * 2 : 1;
|
|
|
|
}
|
|
|
|
|
2022-05-23 00:04:27 +00:00
|
|
|
/* Encode defined in CXL 2.0 8.2.5.12.7 HDM Decoder Control Register */
|
2022-12-05 21:16:07 +00:00
|
|
|
static inline int eig_to_granularity(u16 eig, unsigned int *granularity)
|
2022-05-23 00:04:27 +00:00
|
|
|
{
|
2022-12-05 21:16:07 +00:00
|
|
|
if (eig > CXL_DECODER_MAX_ENCODED_IG)
|
2022-05-23 00:04:27 +00:00
|
|
|
return -EINVAL;
|
2022-12-05 21:16:07 +00:00
|
|
|
*granularity = CXL_DECODER_MIN_GRANULARITY << eig;
|
2022-05-23 00:04:27 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Encode defined in CXL ECN "3, 6, 12 and 16-way memory Interleaving" */
|
2022-12-05 21:20:01 +00:00
|
|
|
static inline int eiw_to_ways(u8 eiw, unsigned int *ways)
|
2022-05-23 00:04:27 +00:00
|
|
|
{
|
2022-12-05 21:20:01 +00:00
|
|
|
switch (eiw) {
|
2022-05-23 00:04:27 +00:00
|
|
|
case 0 ... 4:
|
2022-12-05 21:20:01 +00:00
|
|
|
*ways = 1 << eiw;
|
2022-05-23 00:04:27 +00:00
|
|
|
break;
|
|
|
|
case 8 ... 10:
|
2022-12-05 21:20:01 +00:00
|
|
|
*ways = 3 << (eiw - 8);
|
2022-05-23 00:04:27 +00:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-12-05 21:16:07 +00:00
|
|
|
static inline int granularity_to_eig(int granularity, u16 *eig)
|
2022-04-25 18:36:48 +00:00
|
|
|
{
|
2022-12-05 21:16:07 +00:00
|
|
|
if (granularity > SZ_16K || granularity < CXL_DECODER_MIN_GRANULARITY ||
|
|
|
|
!is_power_of_2(granularity))
|
2022-04-25 18:36:48 +00:00
|
|
|
return -EINVAL;
|
2022-12-05 21:16:07 +00:00
|
|
|
*eig = ilog2(granularity) - 8;
|
2022-04-25 18:36:48 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-12-05 21:20:01 +00:00
|
|
|
static inline int ways_to_eiw(unsigned int ways, u8 *eiw)
|
2022-04-25 18:36:48 +00:00
|
|
|
{
|
|
|
|
if (ways > 16)
|
|
|
|
return -EINVAL;
|
|
|
|
if (is_power_of_2(ways)) {
|
2022-12-05 21:20:01 +00:00
|
|
|
*eiw = ilog2(ways);
|
2022-04-25 18:36:48 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (ways % 3)
|
|
|
|
return -EINVAL;
|
|
|
|
ways /= 3;
|
|
|
|
if (!is_power_of_2(ways))
|
|
|
|
return -EINVAL;
|
2022-12-05 21:20:01 +00:00
|
|
|
*eiw = ilog2(ways) + 8;
|
2022-04-25 18:36:48 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-11-29 17:48:48 +00:00
|
|
|
/* RAS Registers CXL 2.0 8.2.5.9 CXL RAS Capability Structure */
|
|
|
|
#define CXL_RAS_UNCORRECTABLE_STATUS_OFFSET 0x0
|
|
|
|
#define CXL_RAS_UNCORRECTABLE_STATUS_MASK (GENMASK(16, 14) | GENMASK(11, 0))
|
|
|
|
#define CXL_RAS_UNCORRECTABLE_MASK_OFFSET 0x4
|
|
|
|
#define CXL_RAS_UNCORRECTABLE_MASK_MASK (GENMASK(16, 14) | GENMASK(11, 0))
|
2023-02-14 17:00:24 +00:00
|
|
|
#define CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK BIT(8)
|
2022-11-29 17:48:48 +00:00
|
|
|
#define CXL_RAS_UNCORRECTABLE_SEVERITY_OFFSET 0x8
|
|
|
|
#define CXL_RAS_UNCORRECTABLE_SEVERITY_MASK (GENMASK(16, 14) | GENMASK(11, 0))
|
|
|
|
#define CXL_RAS_CORRECTABLE_STATUS_OFFSET 0xC
|
|
|
|
#define CXL_RAS_CORRECTABLE_STATUS_MASK GENMASK(6, 0)
|
|
|
|
#define CXL_RAS_CORRECTABLE_MASK_OFFSET 0x10
|
|
|
|
#define CXL_RAS_CORRECTABLE_MASK_MASK GENMASK(6, 0)
|
|
|
|
#define CXL_RAS_CAP_CONTROL_OFFSET 0x14
|
2022-11-29 17:48:59 +00:00
|
|
|
#define CXL_RAS_CAP_CONTROL_FE_MASK GENMASK(5, 0)
|
2022-11-29 17:48:48 +00:00
|
|
|
#define CXL_RAS_HEADER_LOG_OFFSET 0x18
|
|
|
|
#define CXL_RAS_CAPABILITY_LENGTH 0x58
|
2022-12-08 17:02:00 +00:00
|
|
|
#define CXL_HEADERLOG_SIZE SZ_512
|
|
|
|
#define CXL_HEADERLOG_SIZE_U32 SZ_512 / sizeof(u32)
|
2022-11-29 17:48:48 +00:00
|
|
|
|
2021-02-17 04:09:51 +00:00
|
|
|
/* CXL 2.0 8.2.8.1 Device Capabilities Array Register */
|
|
|
|
#define CXLDEV_CAP_ARRAY_OFFSET 0x0
|
|
|
|
#define CXLDEV_CAP_ARRAY_CAP_ID 0
|
|
|
|
#define CXLDEV_CAP_ARRAY_ID_MASK GENMASK_ULL(15, 0)
|
|
|
|
#define CXLDEV_CAP_ARRAY_COUNT_MASK GENMASK_ULL(47, 32)
|
|
|
|
/* CXL 2.0 8.2.8.2 CXL Device Capability Header Register */
|
|
|
|
#define CXLDEV_CAP_HDR_CAP_ID_MASK GENMASK(15, 0)
|
|
|
|
/* CXL 2.0 8.2.8.2.1 CXL Device Capabilities */
|
|
|
|
#define CXLDEV_CAP_CAP_ID_DEVICE_STATUS 0x1
|
|
|
|
#define CXLDEV_CAP_CAP_ID_PRIMARY_MAILBOX 0x2
|
|
|
|
#define CXLDEV_CAP_CAP_ID_SECONDARY_MAILBOX 0x3
|
|
|
|
#define CXLDEV_CAP_CAP_ID_MEMDEV 0x4000
|
|
|
|
|
2023-01-18 05:53:36 +00:00
|
|
|
/* CXL 3.0 8.2.8.3.1 Event Status Register */
|
|
|
|
#define CXLDEV_DEV_EVENT_STATUS_OFFSET 0x00
|
|
|
|
#define CXLDEV_EVENT_STATUS_INFO BIT(0)
|
|
|
|
#define CXLDEV_EVENT_STATUS_WARN BIT(1)
|
|
|
|
#define CXLDEV_EVENT_STATUS_FAIL BIT(2)
|
|
|
|
#define CXLDEV_EVENT_STATUS_FATAL BIT(3)
|
|
|
|
|
|
|
|
#define CXLDEV_EVENT_STATUS_ALL (CXLDEV_EVENT_STATUS_INFO | \
|
|
|
|
CXLDEV_EVENT_STATUS_WARN | \
|
|
|
|
CXLDEV_EVENT_STATUS_FAIL | \
|
|
|
|
CXLDEV_EVENT_STATUS_FATAL)
|
|
|
|
|
2023-01-18 05:53:37 +00:00
|
|
|
/* CXL rev 3.0 section 8.2.9.2.4; Table 8-52 */
|
|
|
|
#define CXLDEV_EVENT_INT_MODE_MASK GENMASK(1, 0)
|
|
|
|
#define CXLDEV_EVENT_INT_MSGNUM_MASK GENMASK(7, 4)
|
|
|
|
|
2021-02-17 04:09:51 +00:00
|
|
|
/* CXL 2.0 8.2.8.4 Mailbox Registers */
|
|
|
|
#define CXLDEV_MBOX_CAPS_OFFSET 0x00
|
|
|
|
#define CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK GENMASK(4, 0)
|
2023-05-23 17:09:27 +00:00
|
|
|
#define CXLDEV_MBOX_CAP_BG_CMD_IRQ BIT(6)
|
|
|
|
#define CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK GENMASK(10, 7)
|
2021-02-17 04:09:51 +00:00
|
|
|
#define CXLDEV_MBOX_CTRL_OFFSET 0x04
|
|
|
|
#define CXLDEV_MBOX_CTRL_DOORBELL BIT(0)
|
2023-05-23 17:09:27 +00:00
|
|
|
#define CXLDEV_MBOX_CTRL_BG_CMD_IRQ BIT(2)
|
2021-02-17 04:09:51 +00:00
|
|
|
#define CXLDEV_MBOX_CMD_OFFSET 0x08
|
|
|
|
#define CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK GENMASK_ULL(15, 0)
|
|
|
|
#define CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK GENMASK_ULL(36, 16)
|
|
|
|
#define CXLDEV_MBOX_STATUS_OFFSET 0x10
|
2023-05-23 17:09:27 +00:00
|
|
|
#define CXLDEV_MBOX_STATUS_BG_CMD BIT(0)
|
2021-02-17 04:09:51 +00:00
|
|
|
#define CXLDEV_MBOX_STATUS_RET_CODE_MASK GENMASK_ULL(47, 32)
|
|
|
|
#define CXLDEV_MBOX_BG_CMD_STATUS_OFFSET 0x18
|
2023-05-23 17:09:27 +00:00
|
|
|
#define CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK GENMASK_ULL(15, 0)
|
|
|
|
#define CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK GENMASK_ULL(22, 16)
|
|
|
|
#define CXLDEV_MBOX_BG_CMD_COMMAND_RC_MASK GENMASK_ULL(47, 32)
|
|
|
|
#define CXLDEV_MBOX_BG_CMD_COMMAND_VENDOR_MASK GENMASK_ULL(63, 48)
|
2021-02-17 04:09:51 +00:00
|
|
|
#define CXLDEV_MBOX_PAYLOAD_OFFSET 0x20
|
|
|
|
|
cxl/mem: Introduce 'struct cxl_regs' for "composable" CXL devices
CXL MMIO register blocks are organized by device type and capabilities.
There are Component registers, Device registers (yes, an ambiguous
name), and Memory Device registers (a specific extension of Device
registers).
It is possible for a given device instance (endpoint or port) to
implement register sets from multiple of the above categories.
The driver code that enumerates and maps the registers is type specific
so it is useful to have a dedicated type and helpers for each block
type.
At the same time, once the registers are mapped the origin type does not
matter. It is overly pedantic to reference the register block type in
code that is using the registers.
In preparation for the endpoint driver to incorporate Component registers
into its MMIO operations reorganize the registers to allow typed
enumeration + mapping, but anonymous usage. With the end state of
'struct cxl_regs' to be:
struct cxl_regs {
union {
struct {
CXL_DEVICE_REGS();
};
struct cxl_device_regs device_regs;
};
union {
struct {
CXL_COMPONENT_REGS();
};
struct cxl_component_regs component_regs;
};
};
With this arrangement the driver can share component init code with
ports, but when using the registers it can directly reference the
component register block type by name without the 'component_regs'
prefix.
So, map + enumerate can be shared across drivers of different CXL
classes e.g.:
void cxl_setup_device_regs(struct device *dev, void __iomem *base,
struct cxl_device_regs *regs);
void cxl_setup_component_regs(struct device *dev, void __iomem *base,
struct cxl_component_regs *regs);
...while inline usage in the driver need not indicate where the
registers came from:
readl(cxlm->regs.mbox + MBOX_OFFSET);
readl(cxlm->regs.hdm + HDM_OFFSET);
...instead of:
readl(cxlm->regs.device_regs.mbox + MBOX_OFFSET);
readl(cxlm->regs.component_regs.hdm + HDM_OFFSET);
This complexity of the definition in .h yields improvement in code
readability in .c while maintaining type-safety for organization of
setup code. It prepares the implementation to maintain organization in
the face of CXL devices that compose register interfaces consisting of
multiple types.
Given that this new container is named 'regs' rename the common register
base pointer @base, and fixup the kernel-doc for the missing @cxlmd
description.
Reviewed-by: Ben Widawsky <ben.widawsky@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/162096971451.1865304.13540251513463515153.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2021-05-14 05:21:54 +00:00
|
|
|
/*
|
2021-07-31 03:25:50 +00:00
|
|
|
* Using struct_group() allows for per register-block-type helper routines,
|
|
|
|
* without requiring block-type agnostic code to include the prefix.
|
cxl/mem: Introduce 'struct cxl_regs' for "composable" CXL devices
CXL MMIO register blocks are organized by device type and capabilities.
There are Component registers, Device registers (yes, an ambiguous
name), and Memory Device registers (a specific extension of Device
registers).
It is possible for a given device instance (endpoint or port) to
implement register sets from multiple of the above categories.
The driver code that enumerates and maps the registers is type specific
so it is useful to have a dedicated type and helpers for each block
type.
At the same time, once the registers are mapped the origin type does not
matter. It is overly pedantic to reference the register block type in
code that is using the registers.
In preparation for the endpoint driver to incorporate Component registers
into its MMIO operations reorganize the registers to allow typed
enumeration + mapping, but anonymous usage. With the end state of
'struct cxl_regs' to be:
struct cxl_regs {
union {
struct {
CXL_DEVICE_REGS();
};
struct cxl_device_regs device_regs;
};
union {
struct {
CXL_COMPONENT_REGS();
};
struct cxl_component_regs component_regs;
};
};
With this arrangement the driver can share component init code with
ports, but when using the registers it can directly reference the
component register block type by name without the 'component_regs'
prefix.
So, map + enumerate can be shared across drivers of different CXL
classes e.g.:
void cxl_setup_device_regs(struct device *dev, void __iomem *base,
struct cxl_device_regs *regs);
void cxl_setup_component_regs(struct device *dev, void __iomem *base,
struct cxl_component_regs *regs);
...while inline usage in the driver need not indicate where the
registers came from:
readl(cxlm->regs.mbox + MBOX_OFFSET);
readl(cxlm->regs.hdm + HDM_OFFSET);
...instead of:
readl(cxlm->regs.device_regs.mbox + MBOX_OFFSET);
readl(cxlm->regs.component_regs.hdm + HDM_OFFSET);
This complexity of the definition in .h yields improvement in code
readability in .c while maintaining type-safety for organization of
setup code. It prepares the implementation to maintain organization in
the face of CXL devices that compose register interfaces consisting of
multiple types.
Given that this new container is named 'regs' rename the common register
base pointer @base, and fixup the kernel-doc for the missing @cxlmd
description.
Reviewed-by: Ben Widawsky <ben.widawsky@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/162096971451.1865304.13540251513463515153.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2021-05-14 05:21:54 +00:00
|
|
|
*/
|
|
|
|
struct cxl_regs {
|
2021-07-31 03:25:50 +00:00
|
|
|
/*
|
|
|
|
* Common set of CXL Component register block base pointers
|
|
|
|
* @hdm_decoder: CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure
|
2022-11-29 17:48:48 +00:00
|
|
|
* @ras: CXL 2.0 8.2.5.9 CXL RAS Capability Structure
|
2021-07-31 03:25:50 +00:00
|
|
|
*/
|
|
|
|
struct_group_tagged(cxl_component_regs, component,
|
|
|
|
void __iomem *hdm_decoder;
|
2022-11-29 17:48:48 +00:00
|
|
|
void __iomem *ras;
|
2021-07-31 03:25:50 +00:00
|
|
|
);
|
|
|
|
/*
|
|
|
|
* Common set of CXL Device register block base pointers
|
|
|
|
* @status: CXL 2.0 8.2.8.3 Device Status Registers
|
|
|
|
* @mbox: CXL 2.0 8.2.8.4 Mailbox Registers
|
|
|
|
* @memdev: CXL 2.0 8.2.8.5 Memory Device Registers
|
|
|
|
*/
|
|
|
|
struct_group_tagged(cxl_device_regs, device_regs,
|
|
|
|
void __iomem *status, *mbox, *memdev;
|
|
|
|
);
|
2023-05-26 09:58:22 +00:00
|
|
|
|
|
|
|
struct_group_tagged(cxl_pmu_regs, pmu_regs,
|
|
|
|
void __iomem *pmu;
|
|
|
|
);
|
2023-10-18 17:17:07 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* RCH downstream port specific RAS register
|
|
|
|
* @aer: CXL 3.0 8.2.1.1 RCH Downstream Port RCRB
|
|
|
|
*/
|
|
|
|
struct_group_tagged(cxl_rch_regs, rch_regs,
|
|
|
|
void __iomem *dport_aer;
|
|
|
|
);
|
cxl/mem: Introduce 'struct cxl_regs' for "composable" CXL devices
CXL MMIO register blocks are organized by device type and capabilities.
There are Component registers, Device registers (yes, an ambiguous
name), and Memory Device registers (a specific extension of Device
registers).
It is possible for a given device instance (endpoint or port) to
implement register sets from multiple of the above categories.
The driver code that enumerates and maps the registers is type specific
so it is useful to have a dedicated type and helpers for each block
type.
At the same time, once the registers are mapped the origin type does not
matter. It is overly pedantic to reference the register block type in
code that is using the registers.
In preparation for the endpoint driver to incorporate Component registers
into its MMIO operations reorganize the registers to allow typed
enumeration + mapping, but anonymous usage. With the end state of
'struct cxl_regs' to be:
struct cxl_regs {
union {
struct {
CXL_DEVICE_REGS();
};
struct cxl_device_regs device_regs;
};
union {
struct {
CXL_COMPONENT_REGS();
};
struct cxl_component_regs component_regs;
};
};
With this arrangement the driver can share component init code with
ports, but when using the registers it can directly reference the
component register block type by name without the 'component_regs'
prefix.
So, map + enumerate can be shared across drivers of different CXL
classes e.g.:
void cxl_setup_device_regs(struct device *dev, void __iomem *base,
struct cxl_device_regs *regs);
void cxl_setup_component_regs(struct device *dev, void __iomem *base,
struct cxl_component_regs *regs);
...while inline usage in the driver need not indicate where the
registers came from:
readl(cxlm->regs.mbox + MBOX_OFFSET);
readl(cxlm->regs.hdm + HDM_OFFSET);
...instead of:
readl(cxlm->regs.device_regs.mbox + MBOX_OFFSET);
readl(cxlm->regs.component_regs.hdm + HDM_OFFSET);
This complexity of the definition in .h yields improvement in code
readability in .c while maintaining type-safety for organization of
setup code. It prepares the implementation to maintain organization in
the face of CXL devices that compose register interfaces consisting of
multiple types.
Given that this new container is named 'regs' rename the common register
base pointer @base, and fixup the kernel-doc for the missing @cxlmd
description.
Reviewed-by: Ben Widawsky <ben.widawsky@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/162096971451.1865304.13540251513463515153.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2021-05-14 05:21:54 +00:00
|
|
|
};
|
|
|
|
|
2021-06-04 00:50:36 +00:00
|
|
|
struct cxl_reg_map {
|
|
|
|
bool valid;
|
2022-11-29 17:48:42 +00:00
|
|
|
int id;
|
2021-06-04 00:50:36 +00:00
|
|
|
unsigned long offset;
|
|
|
|
unsigned long size;
|
|
|
|
};
|
|
|
|
|
2021-05-28 00:49:22 +00:00
|
|
|
struct cxl_component_reg_map {
|
|
|
|
struct cxl_reg_map hdm_decoder;
|
2022-11-29 17:48:48 +00:00
|
|
|
struct cxl_reg_map ras;
|
2021-05-28 00:49:22 +00:00
|
|
|
};
|
|
|
|
|
2021-06-04 00:50:36 +00:00
|
|
|
struct cxl_device_reg_map {
|
|
|
|
struct cxl_reg_map status;
|
|
|
|
struct cxl_reg_map mbox;
|
|
|
|
struct cxl_reg_map memdev;
|
|
|
|
};
|
|
|
|
|
2023-05-26 09:58:22 +00:00
|
|
|
struct cxl_pmu_reg_map {
|
|
|
|
struct cxl_reg_map pmu;
|
|
|
|
};
|
|
|
|
|
2021-10-15 21:57:27 +00:00
|
|
|
/**
|
|
|
|
* struct cxl_register_map - DVSEC harvested register block mapping parameters
|
2023-10-18 17:16:55 +00:00
|
|
|
* @host: device for devm operations and logging
|
2021-10-15 21:57:27 +00:00
|
|
|
* @base: virtual base of the register-block-BAR + @block_offset
|
2022-11-29 17:48:30 +00:00
|
|
|
* @resource: physical resource base of the register block
|
|
|
|
* @max_size: maximum mapping size to perform register search
|
2021-10-15 21:57:27 +00:00
|
|
|
* @reg_type: see enum cxl_regloc_type
|
|
|
|
* @component_map: cxl_reg_map for component registers
|
|
|
|
* @device_map: cxl_reg_maps for device registers
|
2023-05-26 09:58:22 +00:00
|
|
|
* @pmu_map: cxl_reg_maps for CXL Performance Monitoring Units
|
2021-10-15 21:57:27 +00:00
|
|
|
*/
|
2021-06-04 00:50:36 +00:00
|
|
|
struct cxl_register_map {
|
2023-10-18 17:16:55 +00:00
|
|
|
struct device *host;
|
2021-10-15 21:57:27 +00:00
|
|
|
void __iomem *base;
|
2022-11-29 17:48:30 +00:00
|
|
|
resource_size_t resource;
|
|
|
|
resource_size_t max_size;
|
2021-06-04 00:50:36 +00:00
|
|
|
u8 reg_type;
|
|
|
|
union {
|
2021-05-28 00:49:22 +00:00
|
|
|
struct cxl_component_reg_map component_map;
|
2021-06-04 00:50:36 +00:00
|
|
|
struct cxl_device_reg_map device_map;
|
2023-05-26 09:58:22 +00:00
|
|
|
struct cxl_pmu_reg_map pmu_map;
|
2021-06-04 00:50:36 +00:00
|
|
|
};
|
|
|
|
};
|
|
|
|
|
2021-05-28 00:49:22 +00:00
|
|
|
void cxl_probe_component_regs(struct device *dev, void __iomem *base,
|
|
|
|
struct cxl_component_reg_map *map);
|
2021-06-04 00:50:36 +00:00
|
|
|
void cxl_probe_device_regs(struct device *dev, void __iomem *base,
|
|
|
|
struct cxl_device_reg_map *map);
|
2023-06-26 01:56:13 +00:00
|
|
|
int cxl_map_component_regs(const struct cxl_register_map *map,
|
2023-06-22 20:55:02 +00:00
|
|
|
struct cxl_component_regs *regs,
|
2022-11-29 17:48:42 +00:00
|
|
|
unsigned long map_mask);
|
2023-06-26 01:56:13 +00:00
|
|
|
int cxl_map_device_regs(const struct cxl_register_map *map,
|
2023-06-22 20:55:02 +00:00
|
|
|
struct cxl_device_regs *regs);
|
2023-10-18 17:17:13 +00:00
|
|
|
int cxl_map_pmu_regs(struct cxl_register_map *map, struct cxl_pmu_regs *regs);
|
2021-05-14 05:22:05 +00:00
|
|
|
|
2022-01-24 00:29:10 +00:00
|
|
|
enum cxl_regloc_type;
|
2023-05-26 09:58:21 +00:00
|
|
|
int cxl_count_regblock(struct pci_dev *pdev, enum cxl_regloc_type type);
|
|
|
|
int cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type,
|
|
|
|
struct cxl_register_map *map, int index);
|
2022-01-24 00:29:10 +00:00
|
|
|
int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
|
|
|
|
struct cxl_register_map *map);
|
2023-06-22 20:55:03 +00:00
|
|
|
int cxl_setup_regs(struct cxl_register_map *map);
|
2023-06-25 18:35:20 +00:00
|
|
|
struct cxl_dport;
|
|
|
|
resource_size_t cxl_rcd_component_reg_phys(struct device *dev,
|
|
|
|
struct cxl_dport *dport);
|
2022-12-03 08:40:29 +00:00
|
|
|
|
2021-06-09 16:01:35 +00:00
|
|
|
#define CXL_RESOURCE_NONE ((resource_size_t) -1)
|
2021-06-09 16:01:46 +00:00
|
|
|
#define CXL_TARGET_STRLEN 20
|
2021-06-09 16:01:35 +00:00
|
|
|
|
2021-06-09 16:43:29 +00:00
|
|
|
/*
|
|
|
|
* cxl_decoder flags that define the type of memory / devices this
|
|
|
|
* decoder supports as well as configuration lock status See "CXL 2.0
|
|
|
|
* 8.2.5.12.7 CXL HDM Decoder 0 Control Register" for details.
|
2023-02-11 01:31:17 +00:00
|
|
|
* Additionally indicate whether decoder settings were autodetected,
|
|
|
|
* user customized.
|
2021-06-09 16:43:29 +00:00
|
|
|
*/
|
|
|
|
#define CXL_DECODER_F_RAM BIT(0)
|
|
|
|
#define CXL_DECODER_F_PMEM BIT(1)
|
|
|
|
#define CXL_DECODER_F_TYPE2 BIT(2)
|
|
|
|
#define CXL_DECODER_F_TYPE3 BIT(3)
|
|
|
|
#define CXL_DECODER_F_LOCK BIT(4)
|
2022-02-01 20:24:30 +00:00
|
|
|
#define CXL_DECODER_F_ENABLE BIT(5)
|
|
|
|
#define CXL_DECODER_F_MASK GENMASK(5, 0)
|
2021-06-09 16:43:29 +00:00
|
|
|
|
|
|
|
enum cxl_decoder_type {
|
2023-06-15 01:30:13 +00:00
|
|
|
CXL_DECODER_DEVMEM = 2,
|
|
|
|
CXL_DECODER_HOSTONLYMEM = 3,
|
2021-06-09 16:43:29 +00:00
|
|
|
};
|
|
|
|
|
cxl/bus: Populate the target list at decoder create
As found by cxl_test, the implementation populated the target_list for
the single dport exceptional case, it missed populating the target_list
for the typical multi-dport case. Root decoders always know their target
list at the beginning of time, and even switch-level decoders should
have a target list of one or more zeros by default, depending on the
interleave-ways setting.
Walk the hosting port's dport list and populate based on the passed in
map.
Move devm_cxl_add_passthrough_decoder() out of line now that it does the
work of generating a target_map.
Before:
$ cat /sys/bus/cxl/devices/root2/decoder*/target_list
0
0
After:
$ cat /sys/bus/cxl/devices/root2/decoder*/target_list
0
0,1,2,3
0
0,1,2,3
Where root2 is a CXL topology root object generated by 'cxl_test'.
Acked-by: Ben Widawsky <ben.widawsky@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/163116439000.2460985.11713777051267946018.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2021-09-09 05:13:10 +00:00
|
|
|
/*
|
|
|
|
* Current specification goes up to 8, double that seems a reasonable
|
|
|
|
* software max for the foreseeable future
|
|
|
|
*/
|
|
|
|
#define CXL_DECODER_MAX_INTERLEAVE 16
|
|
|
|
|
2023-10-12 18:53:37 +00:00
|
|
|
#define CXL_QOS_CLASS_INVALID -1
|
2022-07-23 00:56:09 +00:00
|
|
|
|
2021-06-09 16:43:29 +00:00
|
|
|
/**
|
2022-05-19 00:52:23 +00:00
|
|
|
* struct cxl_decoder - Common CXL HDM Decoder Attributes
|
2021-06-09 16:43:29 +00:00
|
|
|
* @dev: this decoder's device
|
|
|
|
* @id: kernel device name id
|
2022-05-19 00:55:13 +00:00
|
|
|
* @hpa_range: Host physical address range mapped by this decoder
|
2021-06-09 16:43:29 +00:00
|
|
|
* @interleave_ways: number of cxl_dports in this decode
|
|
|
|
* @interleave_granularity: data stride per dport
|
|
|
|
* @target_type: accelerator vs expander (type2 vs type3) selector
|
2022-06-04 22:49:53 +00:00
|
|
|
* @region: currently assigned region for this decoder
|
2021-06-09 16:43:29 +00:00
|
|
|
* @flags: memory type capabilities and locking
|
2022-06-09 05:56:37 +00:00
|
|
|
* @commit: device/decoder-type specific callback to commit settings to hw
|
|
|
|
* @reset: device/decoder-type specific callback to reset hw settings
|
|
|
|
*/
|
2021-06-09 16:43:29 +00:00
|
|
|
struct cxl_decoder {
|
|
|
|
struct device dev;
|
|
|
|
int id;
|
2022-05-19 01:02:39 +00:00
|
|
|
struct range hpa_range;
|
2021-06-09 16:43:29 +00:00
|
|
|
int interleave_ways;
|
|
|
|
int interleave_granularity;
|
|
|
|
enum cxl_decoder_type target_type;
|
2022-06-04 22:49:53 +00:00
|
|
|
struct cxl_region *region;
|
2021-06-09 16:43:29 +00:00
|
|
|
unsigned long flags;
|
2022-06-09 05:56:37 +00:00
|
|
|
int (*commit)(struct cxl_decoder *cxld);
|
cxl/port: Fix use-after-free, permit out-of-order decoder shutdown
In support of investigating an initialization failure report [1],
cxl_test was updated to register mock memory-devices after the mock
root-port/bus device had been registered. That led to cxl_test crashing
with a use-after-free bug with the following signature:
cxl_port_attach_region: cxl region3: cxl_host_bridge.0:port3 decoder3.0 add: mem0:decoder7.0 @ 0 next: cxl_switch_uport.0 nr_eps: 1 nr_targets: 1
cxl_port_attach_region: cxl region3: cxl_host_bridge.0:port3 decoder3.0 add: mem4:decoder14.0 @ 1 next: cxl_switch_uport.0 nr_eps: 2 nr_targets: 1
cxl_port_setup_targets: cxl region3: cxl_switch_uport.0:port6 target[0] = cxl_switch_dport.0 for mem0:decoder7.0 @ 0
1) cxl_port_setup_targets: cxl region3: cxl_switch_uport.0:port6 target[1] = cxl_switch_dport.4 for mem4:decoder14.0 @ 1
[..]
cxld_unregister: cxl decoder14.0:
cxl_region_decode_reset: cxl_region region3:
mock_decoder_reset: cxl_port port3: decoder3.0 reset
2) mock_decoder_reset: cxl_port port3: decoder3.0: out of order reset, expected decoder3.1
cxl_endpoint_decoder_release: cxl decoder14.0:
[..]
cxld_unregister: cxl decoder7.0:
3) cxl_region_decode_reset: cxl_region region3:
Oops: general protection fault, probably for non-canonical address 0x6b6b6b6b6b6b6bc3: 0000 [#1] PREEMPT SMP PTI
[..]
RIP: 0010:to_cxl_port+0x8/0x60 [cxl_core]
[..]
Call Trace:
<TASK>
cxl_region_decode_reset+0x69/0x190 [cxl_core]
cxl_region_detach+0xe8/0x210 [cxl_core]
cxl_decoder_kill_region+0x27/0x40 [cxl_core]
cxld_unregister+0x5d/0x60 [cxl_core]
At 1) a region has been established with 2 endpoint decoders (7.0 and
14.0). Those endpoints share a common switch-decoder in the topology
(3.0). At teardown, 2), decoder14.0 is the first to be removed and hits
the "out of order reset case" in the switch decoder. The effect though
is that region3 cleanup is aborted leaving it in-tact and
referencing decoder14.0. At 3) the second attempt to teardown region3
trips over the stale decoder14.0 object which has long since been
deleted.
The fix here is to recognize that the CXL specification places no
mandate on in-order shutdown of switch-decoders, the driver enforces
in-order allocation, and hardware enforces in-order commit. So, rather
than fail and leave objects dangling, always remove them.
In support of making cxl_region_decode_reset() always succeed,
cxl_region_invalidate_memregion() failures are turned into warnings.
Crashing the kernel is ok there since system integrity is at risk if
caches cannot be managed around physical address mutation events like
CXL region destruction.
A new device_for_each_child_reverse_from() is added to cleanup
port->commit_end after all dependent decoders have been disabled. In
other words if decoders are allocated 0->1->2 and disabled 1->2->0 then
port->commit_end only decrements from 2 after 2 has been disabled, and
it decrements all the way to zero since 1 was disabled previously.
Link: http://lore.kernel.org/20241004212504.1246-1-gourry@gourry.net [1]
Cc: stable@vger.kernel.org
Fixes: 176baefb2eb5 ("cxl/hdm: Commit decoder state to hardware")
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Alison Schofield <alison.schofield@intel.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Zijun Hu <quic_zijuhu@quicinc.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Link: https://patch.msgid.link/172964782781.81806.17902885593105284330.stgit@dwillia2-xfh.jf.intel.com
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
2024-10-23 01:43:49 +00:00
|
|
|
void (*reset)(struct cxl_decoder *cxld);
|
2022-05-19 00:52:23 +00:00
|
|
|
};
|
|
|
|
|
2022-06-04 22:49:53 +00:00
|
|
|
/*
|
|
|
|
* CXL_DECODER_DEAD prevents endpoints from being reattached to regions
|
|
|
|
* while cxld_unregister() is running
|
|
|
|
*/
|
2022-05-23 19:15:35 +00:00
|
|
|
enum cxl_decoder_mode {
|
|
|
|
CXL_DECODER_NONE,
|
|
|
|
CXL_DECODER_RAM,
|
|
|
|
CXL_DECODER_PMEM,
|
|
|
|
CXL_DECODER_MIXED,
|
2022-06-04 22:49:53 +00:00
|
|
|
CXL_DECODER_DEAD,
|
2022-05-23 19:15:35 +00:00
|
|
|
};
|
|
|
|
|
2023-02-10 09:05:39 +00:00
|
|
|
static inline const char *cxl_decoder_mode_name(enum cxl_decoder_mode mode)
|
|
|
|
{
|
|
|
|
static const char * const names[] = {
|
|
|
|
[CXL_DECODER_NONE] = "none",
|
|
|
|
[CXL_DECODER_RAM] = "ram",
|
|
|
|
[CXL_DECODER_PMEM] = "pmem",
|
|
|
|
[CXL_DECODER_MIXED] = "mixed",
|
|
|
|
};
|
|
|
|
|
|
|
|
if (mode >= CXL_DECODER_NONE && mode <= CXL_DECODER_MIXED)
|
|
|
|
return names[mode];
|
|
|
|
return "mixed";
|
|
|
|
}
|
|
|
|
|
2023-02-11 01:31:17 +00:00
|
|
|
/*
|
|
|
|
* Track whether this decoder is reserved for region autodiscovery, or
|
|
|
|
* free for userspace provisioning.
|
|
|
|
*/
|
|
|
|
enum cxl_decoder_state {
|
|
|
|
CXL_DECODER_STATE_MANUAL,
|
|
|
|
CXL_DECODER_STATE_AUTO,
|
|
|
|
};
|
|
|
|
|
2022-05-21 23:24:14 +00:00
|
|
|
/**
|
|
|
|
* struct cxl_endpoint_decoder - Endpoint / SPA to DPA decoder
|
|
|
|
* @cxld: base cxl_decoder_object
|
|
|
|
* @dpa_res: actively claimed DPA span of this decoder
|
|
|
|
* @skip: offset into @dpa_res where @cxld.hpa_range maps
|
2022-05-23 19:15:35 +00:00
|
|
|
* @mode: which memory type / access-mode-partition this decoder targets
|
2023-02-11 01:31:17 +00:00
|
|
|
* @state: autodiscovery state
|
2022-06-04 22:49:53 +00:00
|
|
|
* @pos: interleave position in @cxld.region
|
2022-05-21 23:24:14 +00:00
|
|
|
*/
|
|
|
|
struct cxl_endpoint_decoder {
|
|
|
|
struct cxl_decoder cxld;
|
|
|
|
struct resource *dpa_res;
|
|
|
|
resource_size_t skip;
|
2022-05-23 19:15:35 +00:00
|
|
|
enum cxl_decoder_mode mode;
|
2023-02-11 01:31:17 +00:00
|
|
|
enum cxl_decoder_state state;
|
2022-06-04 22:49:53 +00:00
|
|
|
int pos;
|
2022-05-21 23:24:14 +00:00
|
|
|
};
|
|
|
|
|
2022-05-19 00:52:23 +00:00
|
|
|
/**
|
|
|
|
* struct cxl_switch_decoder - Switch specific CXL HDM Decoder
|
|
|
|
* @cxld: base cxl_decoder object
|
|
|
|
* @nr_targets: number of elements in @target
|
|
|
|
* @target: active ordered target list in current decoder configuration
|
|
|
|
*
|
|
|
|
* The 'switch' decoder type represents the decoder instances of cxl_port's that
|
|
|
|
* route from the root of a CXL memory decode topology to the endpoints. They
|
|
|
|
* come in two flavors, root-level decoders, statically defined by platform
|
|
|
|
* firmware, and mid-level decoders, where interleave-granularity,
|
|
|
|
* interleave-width, and the target list are mutable.
|
|
|
|
*/
|
|
|
|
struct cxl_switch_decoder {
|
|
|
|
struct cxl_decoder cxld;
|
2021-12-10 21:36:27 +00:00
|
|
|
int nr_targets;
|
2021-06-09 16:43:29 +00:00
|
|
|
struct cxl_dport *target[];
|
|
|
|
};
|
|
|
|
|
2022-11-30 22:47:25 +00:00
|
|
|
struct cxl_root_decoder;
|
2024-07-03 05:29:50 +00:00
|
|
|
typedef u64 (*cxl_hpa_to_spa_fn)(struct cxl_root_decoder *cxlrd, u64 hpa);
|
2021-06-15 23:18:17 +00:00
|
|
|
|
2022-07-13 01:38:26 +00:00
|
|
|
/**
|
|
|
|
* struct cxl_root_decoder - Static platform CXL address decoder
|
|
|
|
* @res: host / parent resource for region allocations
|
cxl/region: Add region creation support
CXL 2.0 allows for dynamic provisioning of new memory regions (system
physical address resources like "System RAM" and "Persistent Memory").
Whereas DDR and PMEM resources are conveyed statically at boot, CXL
allows for assembling and instantiating new regions from the available
capacity of CXL memory expanders in the system.
Sysfs with an "echo $region_name > $create_region_attribute" interface
is chosen as the mechanism to initiate the provisioning process. This
was chosen over ioctl() and netlink() to keep the configuration
interface entirely in a pseudo-fs interface, and it was chosen over
configfs since, aside from this one creation event, the interface is
read-mostly. I.e. configfs supports cases where an object is designed to
be provisioned each boot, like an iSCSI storage target, and CXL region
creation is mostly for PMEM regions which are created usually once
per-lifetime of a server instance. This is an improvement over nvdimm
that pre-created "seed" devices that tended to confuse users looking to
determine which devices are active and which are idle.
Recall that the major change that CXL brings over previous persistent
memory architectures is the ability to dynamically define new regions.
Compare that to drivers like 'nfit' where the region configuration is
statically defined by platform firmware.
Regions are created as a child of a root decoder that encompasses an
address space with constraints. When created through sysfs, the root
decoder is explicit. When created from an LSA's region structure a root
decoder will possibly need to be inferred by the driver.
Upon region creation through sysfs, a vacant region is created with a
unique name. Regions have a number of attributes that must be configured
before the region can be bound to the driver where HDM decoder program
is completed.
An example of creating a new region:
- Allocate a new region name:
region=$(cat /sys/bus/cxl/devices/decoder0.0/create_pmem_region)
- Create a new region by name:
while
region=$(cat /sys/bus/cxl/devices/decoder0.0/create_pmem_region)
! echo $region > /sys/bus/cxl/devices/decoder0.0/create_pmem_region
do true; done
- Region now exists in sysfs:
stat -t /sys/bus/cxl/devices/decoder0.0/$region
- Delete the region, and name:
echo $region > /sys/bus/cxl/devices/decoder0.0/delete_region
Signed-off-by: Ben Widawsky <bwidawsk@kernel.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/165784333909.1758207.794374602146306032.stgit@dwillia2-xfh.jf.intel.com
[djbw: simplify locking, reword changelog]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2021-06-08 17:28:34 +00:00
|
|
|
* @region_id: region id for next region provisioning event
|
2024-07-03 05:29:50 +00:00
|
|
|
* @hpa_to_spa: translate CXL host-physical-address to Platform system-physical-address
|
2022-11-30 22:47:25 +00:00
|
|
|
* @platform_data: platform specific configuration data
|
2023-02-11 01:31:17 +00:00
|
|
|
* @range_lock: sync region autodiscovery by address range
|
2023-10-12 18:53:37 +00:00
|
|
|
* @qos_class: QoS performance class cookie
|
2022-07-13 01:38:26 +00:00
|
|
|
* @cxlsd: base cxl switch decoder
|
|
|
|
*/
|
|
|
|
struct cxl_root_decoder {
|
|
|
|
struct resource *res;
|
cxl/region: Add region creation support
CXL 2.0 allows for dynamic provisioning of new memory regions (system
physical address resources like "System RAM" and "Persistent Memory").
Whereas DDR and PMEM resources are conveyed statically at boot, CXL
allows for assembling and instantiating new regions from the available
capacity of CXL memory expanders in the system.
Sysfs with an "echo $region_name > $create_region_attribute" interface
is chosen as the mechanism to initiate the provisioning process. This
was chosen over ioctl() and netlink() to keep the configuration
interface entirely in a pseudo-fs interface, and it was chosen over
configfs since, aside from this one creation event, the interface is
read-mostly. I.e. configfs supports cases where an object is designed to
be provisioned each boot, like an iSCSI storage target, and CXL region
creation is mostly for PMEM regions which are created usually once
per-lifetime of a server instance. This is an improvement over nvdimm
that pre-created "seed" devices that tended to confuse users looking to
determine which devices are active and which are idle.
Recall that the major change that CXL brings over previous persistent
memory architectures is the ability to dynamically define new regions.
Compare that to drivers like 'nfit' where the region configuration is
statically defined by platform firmware.
Regions are created as a child of a root decoder that encompasses an
address space with constraints. When created through sysfs, the root
decoder is explicit. When created from an LSA's region structure a root
decoder will possibly need to be inferred by the driver.
Upon region creation through sysfs, a vacant region is created with a
unique name. Regions have a number of attributes that must be configured
before the region can be bound to the driver where HDM decoder program
is completed.
An example of creating a new region:
- Allocate a new region name:
region=$(cat /sys/bus/cxl/devices/decoder0.0/create_pmem_region)
- Create a new region by name:
while
region=$(cat /sys/bus/cxl/devices/decoder0.0/create_pmem_region)
! echo $region > /sys/bus/cxl/devices/decoder0.0/create_pmem_region
do true; done
- Region now exists in sysfs:
stat -t /sys/bus/cxl/devices/decoder0.0/$region
- Delete the region, and name:
echo $region > /sys/bus/cxl/devices/decoder0.0/delete_region
Signed-off-by: Ben Widawsky <bwidawsk@kernel.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/165784333909.1758207.794374602146306032.stgit@dwillia2-xfh.jf.intel.com
[djbw: simplify locking, reword changelog]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2021-06-08 17:28:34 +00:00
|
|
|
atomic_t region_id;
|
2024-07-03 05:29:50 +00:00
|
|
|
cxl_hpa_to_spa_fn hpa_to_spa;
|
2022-11-30 22:47:25 +00:00
|
|
|
void *platform_data;
|
2023-02-11 01:31:17 +00:00
|
|
|
struct mutex range_lock;
|
2023-10-12 18:53:37 +00:00
|
|
|
int qos_class;
|
2022-07-13 01:38:26 +00:00
|
|
|
struct cxl_switch_decoder cxlsd;
|
|
|
|
};
|
|
|
|
|
2021-05-27 20:30:41 +00:00
|
|
|
/*
|
|
|
|
* enum cxl_config_state - State machine for region configuration
|
|
|
|
* @CXL_CONFIG_IDLE: Any sysfs attribute can be written freely
|
2022-04-25 18:36:48 +00:00
|
|
|
* @CXL_CONFIG_INTERLEAVE_ACTIVE: region size has been set, no more
|
|
|
|
* changes to interleave_ways or interleave_granularity
|
2021-05-27 20:30:41 +00:00
|
|
|
* @CXL_CONFIG_ACTIVE: All targets have been added the region is now
|
|
|
|
* active
|
2022-06-09 05:56:37 +00:00
|
|
|
* @CXL_CONFIG_RESET_PENDING: see commit_store()
|
|
|
|
* @CXL_CONFIG_COMMIT: Soft-config has been committed to hardware
|
2021-05-27 20:30:41 +00:00
|
|
|
*/
|
|
|
|
enum cxl_config_state {
|
|
|
|
CXL_CONFIG_IDLE,
|
2022-04-25 18:36:48 +00:00
|
|
|
CXL_CONFIG_INTERLEAVE_ACTIVE,
|
2021-05-27 20:30:41 +00:00
|
|
|
CXL_CONFIG_ACTIVE,
|
2022-06-09 05:56:37 +00:00
|
|
|
CXL_CONFIG_RESET_PENDING,
|
|
|
|
CXL_CONFIG_COMMIT,
|
2021-05-27 20:30:41 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* struct cxl_region_params - region settings
|
|
|
|
* @state: allow the driver to lockdown further parameter changes
|
|
|
|
* @uuid: unique id for persistent regions
|
2022-04-25 18:36:48 +00:00
|
|
|
* @interleave_ways: number of endpoints in the region
|
|
|
|
* @interleave_granularity: capacity each endpoint contributes to a stripe
|
2022-04-25 18:43:44 +00:00
|
|
|
* @res: allocated iomem capacity for this region
|
2022-08-04 07:54:47 +00:00
|
|
|
* @targets: active ordered targets in current decoder configuration
|
|
|
|
* @nr_targets: number of targets
|
2021-05-27 20:30:41 +00:00
|
|
|
*
|
|
|
|
* State transitions are protected by the cxl_region_rwsem
|
|
|
|
*/
|
|
|
|
struct cxl_region_params {
|
|
|
|
enum cxl_config_state state;
|
|
|
|
uuid_t uuid;
|
2022-04-25 18:36:48 +00:00
|
|
|
int interleave_ways;
|
|
|
|
int interleave_granularity;
|
2022-04-25 18:43:44 +00:00
|
|
|
struct resource *res;
|
2022-06-04 22:49:53 +00:00
|
|
|
struct cxl_endpoint_decoder *targets[CXL_DECODER_MAX_INTERLEAVE];
|
|
|
|
int nr_targets;
|
2021-05-27 20:30:41 +00:00
|
|
|
};
|
|
|
|
|
2023-02-11 01:31:17 +00:00
|
|
|
/*
|
|
|
|
* Indicate whether this region has been assembled by autodetection or
|
|
|
|
* userspace assembly. Prevent endpoint decoders outside of automatic
|
|
|
|
* detection from being added to the region.
|
|
|
|
*/
|
2023-06-17 01:24:28 +00:00
|
|
|
#define CXL_REGION_F_AUTO 0
|
2023-02-11 01:31:17 +00:00
|
|
|
|
2023-06-17 01:24:34 +00:00
|
|
|
/*
|
|
|
|
* Require that a committed region successfully complete a teardown once
|
|
|
|
* any of its associated decoders have been torn down. This maintains
|
|
|
|
* the commit state for the region since there are committed decoders,
|
|
|
|
* but blocks cxl_region_probe().
|
|
|
|
*/
|
|
|
|
#define CXL_REGION_F_NEEDS_RESET 1
|
2023-02-11 01:31:17 +00:00
|
|
|
|
cxl/region: Add region creation support
CXL 2.0 allows for dynamic provisioning of new memory regions (system
physical address resources like "System RAM" and "Persistent Memory").
Whereas DDR and PMEM resources are conveyed statically at boot, CXL
allows for assembling and instantiating new regions from the available
capacity of CXL memory expanders in the system.
Sysfs with an "echo $region_name > $create_region_attribute" interface
is chosen as the mechanism to initiate the provisioning process. This
was chosen over ioctl() and netlink() to keep the configuration
interface entirely in a pseudo-fs interface, and it was chosen over
configfs since, aside from this one creation event, the interface is
read-mostly. I.e. configfs supports cases where an object is designed to
be provisioned each boot, like an iSCSI storage target, and CXL region
creation is mostly for PMEM regions which are created usually once
per-lifetime of a server instance. This is an improvement over nvdimm
that pre-created "seed" devices that tended to confuse users looking to
determine which devices are active and which are idle.
Recall that the major change that CXL brings over previous persistent
memory architectures is the ability to dynamically define new regions.
Compare that to drivers like 'nfit' where the region configuration is
statically defined by platform firmware.
Regions are created as a child of a root decoder that encompasses an
address space with constraints. When created through sysfs, the root
decoder is explicit. When created from an LSA's region structure a root
decoder will possibly need to be inferred by the driver.
Upon region creation through sysfs, a vacant region is created with a
unique name. Regions have a number of attributes that must be configured
before the region can be bound to the driver where HDM decoder program
is completed.
An example of creating a new region:
- Allocate a new region name:
region=$(cat /sys/bus/cxl/devices/decoder0.0/create_pmem_region)
- Create a new region by name:
while
region=$(cat /sys/bus/cxl/devices/decoder0.0/create_pmem_region)
! echo $region > /sys/bus/cxl/devices/decoder0.0/create_pmem_region
do true; done
- Region now exists in sysfs:
stat -t /sys/bus/cxl/devices/decoder0.0/$region
- Delete the region, and name:
echo $region > /sys/bus/cxl/devices/decoder0.0/delete_region
Signed-off-by: Ben Widawsky <bwidawsk@kernel.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/165784333909.1758207.794374602146306032.stgit@dwillia2-xfh.jf.intel.com
[djbw: simplify locking, reword changelog]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2021-06-08 17:28:34 +00:00
|
|
|
/**
|
|
|
|
* struct cxl_region - CXL region
|
|
|
|
* @dev: This region's device
|
|
|
|
* @id: This region's id. Id is globally unique across all regions
|
|
|
|
* @mode: Endpoint decoder allocation / access mode
|
|
|
|
* @type: Endpoint decoder target type
|
cxl/pmem: Refactor nvdimm device registration, delete the workqueue
The three objects 'struct cxl_nvdimm_bridge', 'struct cxl_nvdimm', and
'struct cxl_pmem_region' manage CXL persistent memory resources. The
bridge represents base platform resources, the nvdimm represents one or
more endpoints, and the region is a collection of nvdimms that
contribute to an assembled address range.
Their relationship is such that a region is torn down if any component
endpoints are removed. All regions and endpoints are torn down if the
foundational bridge device goes down.
A workqueue was deployed to manage these interdependencies, but it is
difficult to reason about, and fragile. A recent attempt to take the CXL
root device lock in the cxl_mem driver was reported by lockdep as
colliding with the flush_work() in the cxl_pmem flows.
Instead of the workqueue, arrange for all pmem/nvdimm devices to be torn
down immediately and hierarchically. A similar change is made to both
the 'cxl_nvdimm' and 'cxl_pmem_region' objects. For bisect-ability both
changes are made in the same patch which unfortunately makes the patch
bigger than desired.
Arrange for cxl_memdev and cxl_region to register a cxl_nvdimm and
cxl_pmem_region as a devres release action of the bridge device.
Additionally, include a devres release action of the cxl_memdev or
cxl_region device that triggers the bridge's release action if an endpoint
exits before the bridge. I.e. this allows either unplugging the bridge,
or unplugging and endpoint to result in the same cleanup actions.
To keep the patch smaller the cleanup of the now defunct workqueue
infrastructure is saved for a follow-on patch.
Tested-by: Robert Richter <rrichter@amd.com>
Link: https://lore.kernel.org/r/166993041773.1882361.16444301376147207609.stgit@dwillia2-xfh.jf.intel.com
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2022-12-01 21:33:37 +00:00
|
|
|
* @cxl_nvb: nvdimm bridge for coordinating @cxlr_pmem setup / shutdown
|
|
|
|
* @cxlr_pmem: (for pmem regions) cached copy of the nvdimm bridge
|
2022-12-01 22:03:41 +00:00
|
|
|
* @flags: Region state flags
|
2021-05-27 20:30:41 +00:00
|
|
|
* @params: active + config params for the region
|
2024-03-08 21:59:28 +00:00
|
|
|
* @coord: QoS access coordinates for the region
|
2024-03-08 21:59:30 +00:00
|
|
|
* @memory_notifier: notifier for setting the access coordinates to node
|
2024-06-18 08:46:38 +00:00
|
|
|
* @adist_notifier: notifier for calculating the abstract distance of node
|
cxl/region: Add region creation support
CXL 2.0 allows for dynamic provisioning of new memory regions (system
physical address resources like "System RAM" and "Persistent Memory").
Whereas DDR and PMEM resources are conveyed statically at boot, CXL
allows for assembling and instantiating new regions from the available
capacity of CXL memory expanders in the system.
Sysfs with an "echo $region_name > $create_region_attribute" interface
is chosen as the mechanism to initiate the provisioning process. This
was chosen over ioctl() and netlink() to keep the configuration
interface entirely in a pseudo-fs interface, and it was chosen over
configfs since, aside from this one creation event, the interface is
read-mostly. I.e. configfs supports cases where an object is designed to
be provisioned each boot, like an iSCSI storage target, and CXL region
creation is mostly for PMEM regions which are created usually once
per-lifetime of a server instance. This is an improvement over nvdimm
that pre-created "seed" devices that tended to confuse users looking to
determine which devices are active and which are idle.
Recall that the major change that CXL brings over previous persistent
memory architectures is the ability to dynamically define new regions.
Compare that to drivers like 'nfit' where the region configuration is
statically defined by platform firmware.
Regions are created as a child of a root decoder that encompasses an
address space with constraints. When created through sysfs, the root
decoder is explicit. When created from an LSA's region structure a root
decoder will possibly need to be inferred by the driver.
Upon region creation through sysfs, a vacant region is created with a
unique name. Regions have a number of attributes that must be configured
before the region can be bound to the driver where HDM decoder program
is completed.
An example of creating a new region:
- Allocate a new region name:
region=$(cat /sys/bus/cxl/devices/decoder0.0/create_pmem_region)
- Create a new region by name:
while
region=$(cat /sys/bus/cxl/devices/decoder0.0/create_pmem_region)
! echo $region > /sys/bus/cxl/devices/decoder0.0/create_pmem_region
do true; done
- Region now exists in sysfs:
stat -t /sys/bus/cxl/devices/decoder0.0/$region
- Delete the region, and name:
echo $region > /sys/bus/cxl/devices/decoder0.0/delete_region
Signed-off-by: Ben Widawsky <bwidawsk@kernel.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/165784333909.1758207.794374602146306032.stgit@dwillia2-xfh.jf.intel.com
[djbw: simplify locking, reword changelog]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2021-06-08 17:28:34 +00:00
|
|
|
*/
|
|
|
|
struct cxl_region {
|
|
|
|
struct device dev;
|
|
|
|
int id;
|
|
|
|
enum cxl_decoder_mode mode;
|
|
|
|
enum cxl_decoder_type type;
|
cxl/pmem: Refactor nvdimm device registration, delete the workqueue
The three objects 'struct cxl_nvdimm_bridge', 'struct cxl_nvdimm', and
'struct cxl_pmem_region' manage CXL persistent memory resources. The
bridge represents base platform resources, the nvdimm represents one or
more endpoints, and the region is a collection of nvdimms that
contribute to an assembled address range.
Their relationship is such that a region is torn down if any component
endpoints are removed. All regions and endpoints are torn down if the
foundational bridge device goes down.
A workqueue was deployed to manage these interdependencies, but it is
difficult to reason about, and fragile. A recent attempt to take the CXL
root device lock in the cxl_mem driver was reported by lockdep as
colliding with the flush_work() in the cxl_pmem flows.
Instead of the workqueue, arrange for all pmem/nvdimm devices to be torn
down immediately and hierarchically. A similar change is made to both
the 'cxl_nvdimm' and 'cxl_pmem_region' objects. For bisect-ability both
changes are made in the same patch which unfortunately makes the patch
bigger than desired.
Arrange for cxl_memdev and cxl_region to register a cxl_nvdimm and
cxl_pmem_region as a devres release action of the bridge device.
Additionally, include a devres release action of the cxl_memdev or
cxl_region device that triggers the bridge's release action if an endpoint
exits before the bridge. I.e. this allows either unplugging the bridge,
or unplugging and endpoint to result in the same cleanup actions.
To keep the patch smaller the cleanup of the now defunct workqueue
infrastructure is saved for a follow-on patch.
Tested-by: Robert Richter <rrichter@amd.com>
Link: https://lore.kernel.org/r/166993041773.1882361.16444301376147207609.stgit@dwillia2-xfh.jf.intel.com
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2022-12-01 21:33:37 +00:00
|
|
|
struct cxl_nvdimm_bridge *cxl_nvb;
|
|
|
|
struct cxl_pmem_region *cxlr_pmem;
|
2022-12-01 22:03:41 +00:00
|
|
|
unsigned long flags;
|
2021-05-27 20:30:41 +00:00
|
|
|
struct cxl_region_params params;
|
2024-03-08 21:59:28 +00:00
|
|
|
struct access_coordinate coord[ACCESS_COORDINATE_MAX];
|
2024-03-08 21:59:30 +00:00
|
|
|
struct notifier_block memory_notifier;
|
2024-06-18 08:46:38 +00:00
|
|
|
struct notifier_block adist_notifier;
|
cxl/region: Add region creation support
CXL 2.0 allows for dynamic provisioning of new memory regions (system
physical address resources like "System RAM" and "Persistent Memory").
Whereas DDR and PMEM resources are conveyed statically at boot, CXL
allows for assembling and instantiating new regions from the available
capacity of CXL memory expanders in the system.
Sysfs with an "echo $region_name > $create_region_attribute" interface
is chosen as the mechanism to initiate the provisioning process. This
was chosen over ioctl() and netlink() to keep the configuration
interface entirely in a pseudo-fs interface, and it was chosen over
configfs since, aside from this one creation event, the interface is
read-mostly. I.e. configfs supports cases where an object is designed to
be provisioned each boot, like an iSCSI storage target, and CXL region
creation is mostly for PMEM regions which are created usually once
per-lifetime of a server instance. This is an improvement over nvdimm
that pre-created "seed" devices that tended to confuse users looking to
determine which devices are active and which are idle.
Recall that the major change that CXL brings over previous persistent
memory architectures is the ability to dynamically define new regions.
Compare that to drivers like 'nfit' where the region configuration is
statically defined by platform firmware.
Regions are created as a child of a root decoder that encompasses an
address space with constraints. When created through sysfs, the root
decoder is explicit. When created from an LSA's region structure a root
decoder will possibly need to be inferred by the driver.
Upon region creation through sysfs, a vacant region is created with a
unique name. Regions have a number of attributes that must be configured
before the region can be bound to the driver where HDM decoder program
is completed.
An example of creating a new region:
- Allocate a new region name:
region=$(cat /sys/bus/cxl/devices/decoder0.0/create_pmem_region)
- Create a new region by name:
while
region=$(cat /sys/bus/cxl/devices/decoder0.0/create_pmem_region)
! echo $region > /sys/bus/cxl/devices/decoder0.0/create_pmem_region
do true; done
- Region now exists in sysfs:
stat -t /sys/bus/cxl/devices/decoder0.0/$region
- Delete the region, and name:
echo $region > /sys/bus/cxl/devices/decoder0.0/delete_region
Signed-off-by: Ben Widawsky <bwidawsk@kernel.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/165784333909.1758207.794374602146306032.stgit@dwillia2-xfh.jf.intel.com
[djbw: simplify locking, reword changelog]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2021-06-08 17:28:34 +00:00
|
|
|
};
|
|
|
|
|
2021-06-15 23:18:17 +00:00
|
|
|
struct cxl_nvdimm_bridge {
|
2021-09-14 19:08:40 +00:00
|
|
|
int id;
|
2021-06-15 23:18:17 +00:00
|
|
|
struct device dev;
|
|
|
|
struct cxl_port *port;
|
|
|
|
struct nvdimm_bus *nvdimm_bus;
|
|
|
|
struct nvdimm_bus_descriptor nd_desc;
|
|
|
|
};
|
|
|
|
|
2022-12-01 22:03:19 +00:00
|
|
|
#define CXL_DEV_ID_LEN 19
|
|
|
|
|
2021-06-15 23:36:31 +00:00
|
|
|
struct cxl_nvdimm {
|
|
|
|
struct device dev;
|
|
|
|
struct cxl_memdev *cxlmd;
|
2022-12-01 22:03:19 +00:00
|
|
|
u8 dev_id[CXL_DEV_ID_LEN]; /* for nvdimm, string of 'serial' */
|
2022-01-11 16:06:40 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct cxl_pmem_region_mapping {
|
|
|
|
struct cxl_memdev *cxlmd;
|
|
|
|
struct cxl_nvdimm *cxl_nvd;
|
|
|
|
u64 start;
|
|
|
|
u64 size;
|
|
|
|
int position;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct cxl_pmem_region {
|
|
|
|
struct device dev;
|
|
|
|
struct cxl_region *cxlr;
|
|
|
|
struct nd_region *nd_region;
|
|
|
|
struct range hpa_range;
|
|
|
|
int nr_mappings;
|
|
|
|
struct cxl_pmem_region_mapping mapping[];
|
2021-06-15 23:36:31 +00:00
|
|
|
};
|
|
|
|
|
2023-02-10 09:07:19 +00:00
|
|
|
struct cxl_dax_region {
|
|
|
|
struct device dev;
|
|
|
|
struct cxl_region *cxlr;
|
|
|
|
struct range hpa_range;
|
|
|
|
};
|
|
|
|
|
2021-06-09 16:01:35 +00:00
|
|
|
/**
|
|
|
|
* struct cxl_port - logical collection of upstream port devices and
|
|
|
|
* downstream port devices to construct a CXL memory
|
|
|
|
* decode hierarchy.
|
|
|
|
* @dev: this port's device
|
2023-06-22 20:55:01 +00:00
|
|
|
* @uport_dev: PCI or platform device implementing the upstream port capability
|
2022-06-01 19:49:32 +00:00
|
|
|
* @host_bridge: Shortcut to the platform attach point for this port
|
2021-06-09 16:01:35 +00:00
|
|
|
* @id: id for port device-name
|
2021-06-09 16:01:46 +00:00
|
|
|
* @dports: cxl_dport instances referenced by decoders
|
2022-02-04 15:08:40 +00:00
|
|
|
* @endpoints: cxl_ep instances, endpoints that are a descendant of this port
|
2022-06-07 17:56:10 +00:00
|
|
|
* @regions: cxl_region_ref instances, regions mapped by this port
|
2022-05-27 17:57:01 +00:00
|
|
|
* @parent_dport: dport that points to this port in the parent
|
2021-06-09 16:43:29 +00:00
|
|
|
* @decoder_ida: allocator for decoder ids
|
2023-10-18 17:16:57 +00:00
|
|
|
* @reg_map: component and ras register mapping parameters
|
2022-11-04 00:30:54 +00:00
|
|
|
* @nr_dports: number of entries in @dports
|
2022-05-24 19:04:58 +00:00
|
|
|
* @hdm_end: track last allocated HDM decoder instance for allocation ordering
|
2022-06-09 05:56:37 +00:00
|
|
|
* @commit_end: cursor to track highest committed decoder for commit ordering
|
2022-02-04 15:08:40 +00:00
|
|
|
* @dead: last ep has been removed, force port re-creation
|
2022-01-24 00:29:53 +00:00
|
|
|
* @depth: How deep this port is relative to the root. depth 0 is the root.
|
2022-07-19 20:52:49 +00:00
|
|
|
* @cdat: Cached CDAT data
|
|
|
|
* @cdat_available: Should a CDAT attribute be available in sysfs
|
2023-12-21 22:03:39 +00:00
|
|
|
* @pci_latency: Upstream latency in picoseconds
|
2021-06-09 16:01:35 +00:00
|
|
|
*/
|
|
|
|
struct cxl_port {
|
|
|
|
struct device dev;
|
2023-06-22 20:55:01 +00:00
|
|
|
struct device *uport_dev;
|
2022-06-01 19:49:32 +00:00
|
|
|
struct device *host_bridge;
|
2021-06-09 16:01:35 +00:00
|
|
|
int id;
|
2022-05-28 03:51:19 +00:00
|
|
|
struct xarray dports;
|
2022-05-27 17:58:26 +00:00
|
|
|
struct xarray endpoints;
|
2022-06-07 17:56:10 +00:00
|
|
|
struct xarray regions;
|
2022-05-27 17:57:01 +00:00
|
|
|
struct cxl_dport *parent_dport;
|
2021-06-09 16:43:29 +00:00
|
|
|
struct ida decoder_ida;
|
2023-10-18 17:16:57 +00:00
|
|
|
struct cxl_register_map reg_map;
|
2022-11-04 00:30:54 +00:00
|
|
|
int nr_dports;
|
2022-05-24 19:04:58 +00:00
|
|
|
int hdm_end;
|
2022-06-09 05:56:37 +00:00
|
|
|
int commit_end;
|
2022-02-04 15:08:40 +00:00
|
|
|
bool dead;
|
2022-01-24 00:29:53 +00:00
|
|
|
unsigned int depth;
|
2022-07-19 20:52:49 +00:00
|
|
|
struct cxl_cdat {
|
|
|
|
void *table;
|
|
|
|
size_t length;
|
|
|
|
} cdat;
|
|
|
|
bool cdat_available;
|
2023-12-21 22:03:39 +00:00
|
|
|
long pci_latency;
|
2021-06-09 16:01:35 +00:00
|
|
|
};
|
|
|
|
|
2023-12-21 22:03:32 +00:00
|
|
|
/**
|
|
|
|
* struct cxl_root - logical collection of root cxl_port items
|
|
|
|
*
|
|
|
|
* @port: cxl_port member
|
|
|
|
* @ops: cxl root operations
|
|
|
|
*/
|
|
|
|
struct cxl_root {
|
|
|
|
struct cxl_port port;
|
|
|
|
const struct cxl_root_ops *ops;
|
|
|
|
};
|
|
|
|
|
|
|
|
static inline struct cxl_root *
|
|
|
|
to_cxl_root(const struct cxl_port *port)
|
|
|
|
{
|
|
|
|
return container_of(port, struct cxl_root, port);
|
|
|
|
}
|
|
|
|
|
2024-01-05 22:07:40 +00:00
|
|
|
struct cxl_root_ops {
|
|
|
|
int (*qos_class)(struct cxl_root *cxl_root,
|
|
|
|
struct access_coordinate *coord, int entries,
|
|
|
|
int *qos_class);
|
|
|
|
};
|
|
|
|
|
2022-05-28 03:51:19 +00:00
|
|
|
static inline struct cxl_dport *
|
|
|
|
cxl_find_dport_by_dev(struct cxl_port *port, const struct device *dport_dev)
|
|
|
|
{
|
|
|
|
return xa_load(&port->dports, (unsigned long)dport_dev);
|
|
|
|
}
|
|
|
|
|
2023-06-22 20:54:59 +00:00
|
|
|
struct cxl_rcrb_info {
|
|
|
|
resource_size_t base;
|
|
|
|
u16 aer_cap;
|
|
|
|
};
|
|
|
|
|
2021-06-09 16:01:46 +00:00
|
|
|
/**
|
|
|
|
* struct cxl_dport - CXL downstream port
|
2023-06-22 20:55:00 +00:00
|
|
|
* @dport_dev: PCI bridge or firmware device representing the downstream link
|
2023-10-18 17:16:57 +00:00
|
|
|
* @reg_map: component and ras register mapping parameters
|
2021-06-09 16:01:46 +00:00
|
|
|
* @port_id: unique hardware identifier for dport in decoder target list
|
2023-06-22 20:54:59 +00:00
|
|
|
* @rcrb: Data about the Root Complex Register Block layout
|
2022-12-03 08:40:29 +00:00
|
|
|
* @rch: Indicate whether this dport was enumerated in RCH or VH mode
|
2021-06-09 16:01:46 +00:00
|
|
|
* @port: reference to cxl_port that contains this downstream port
|
2023-10-18 17:17:07 +00:00
|
|
|
* @regs: Dport parsed register blocks
|
2024-04-03 15:47:15 +00:00
|
|
|
* @coord: access coordinates (bandwidth and latency performance attributes)
|
2023-12-21 22:03:39 +00:00
|
|
|
* @link_latency: calculated PCIe downstream latency
|
2021-06-09 16:01:46 +00:00
|
|
|
*/
|
|
|
|
struct cxl_dport {
|
2023-06-22 20:55:00 +00:00
|
|
|
struct device *dport_dev;
|
2023-10-18 17:16:57 +00:00
|
|
|
struct cxl_register_map reg_map;
|
2021-06-09 16:01:46 +00:00
|
|
|
int port_id;
|
2023-06-22 20:54:59 +00:00
|
|
|
struct cxl_rcrb_info rcrb;
|
2022-12-03 08:40:29 +00:00
|
|
|
bool rch;
|
2021-06-09 16:01:46 +00:00
|
|
|
struct cxl_port *port;
|
2023-10-18 17:17:07 +00:00
|
|
|
struct cxl_regs regs;
|
2024-04-03 15:47:15 +00:00
|
|
|
struct access_coordinate coord[ACCESS_COORDINATE_MAX];
|
2023-12-21 22:03:39 +00:00
|
|
|
long link_latency;
|
2021-06-09 16:01:46 +00:00
|
|
|
};
|
|
|
|
|
2022-02-04 15:08:40 +00:00
|
|
|
/**
|
|
|
|
* struct cxl_ep - track an endpoint's interest in a port
|
|
|
|
* @ep: device that hosts a generic CXL endpoint (expander or accelerator)
|
2022-05-27 07:56:59 +00:00
|
|
|
* @dport: which dport routes to this endpoint on @port
|
2022-06-07 17:35:39 +00:00
|
|
|
* @next: cxl switch port across the link attached to @dport NULL if
|
|
|
|
* attached to an endpoint
|
2022-02-04 15:08:40 +00:00
|
|
|
*/
|
|
|
|
struct cxl_ep {
|
|
|
|
struct device *ep;
|
2022-05-27 07:56:59 +00:00
|
|
|
struct cxl_dport *dport;
|
2022-06-07 17:35:39 +00:00
|
|
|
struct cxl_port *next;
|
2022-02-04 15:08:40 +00:00
|
|
|
};
|
|
|
|
|
2022-06-07 17:56:10 +00:00
|
|
|
/**
|
|
|
|
* struct cxl_region_ref - track a region's interest in a port
|
|
|
|
* @port: point in topology to install this reference
|
|
|
|
* @decoder: decoder assigned for @region in @port
|
|
|
|
* @region: region for this reference
|
|
|
|
* @endpoints: cxl_ep references for region members beneath @port
|
2022-06-06 22:18:31 +00:00
|
|
|
* @nr_targets_set: track how many targets have been programmed during setup
|
2022-06-07 17:56:10 +00:00
|
|
|
* @nr_eps: number of endpoints beneath @port
|
|
|
|
* @nr_targets: number of distinct targets needed to reach @nr_eps
|
|
|
|
*/
|
|
|
|
struct cxl_region_ref {
|
|
|
|
struct cxl_port *port;
|
|
|
|
struct cxl_decoder *decoder;
|
|
|
|
struct cxl_region *region;
|
|
|
|
struct xarray endpoints;
|
2022-06-06 22:18:31 +00:00
|
|
|
int nr_targets_set;
|
2022-06-07 17:56:10 +00:00
|
|
|
int nr_eps;
|
|
|
|
int nr_targets;
|
|
|
|
};
|
|
|
|
|
2022-01-31 21:33:13 +00:00
|
|
|
/*
|
|
|
|
* The platform firmware device hosting the root is also the top of the
|
|
|
|
* CXL port topology. All other CXL ports have another CXL port as their
|
2023-06-22 20:55:01 +00:00
|
|
|
* parent and their ->uport_dev / host device is out-of-line of the port
|
2022-01-31 21:33:13 +00:00
|
|
|
* ancestry.
|
|
|
|
*/
|
|
|
|
static inline bool is_cxl_root(struct cxl_port *port)
|
|
|
|
{
|
2023-06-22 20:55:01 +00:00
|
|
|
return port->uport_dev == port->dev.parent;
|
2022-01-31 21:33:13 +00:00
|
|
|
}
|
|
|
|
|
2023-10-16 17:57:48 +00:00
|
|
|
int cxl_num_decoders_committed(struct cxl_port *port);
|
2023-01-11 11:30:17 +00:00
|
|
|
bool is_cxl_port(const struct device *dev);
|
|
|
|
struct cxl_port *to_cxl_port(const struct device *dev);
|
cxl/port: Fix use-after-free, permit out-of-order decoder shutdown
In support of investigating an initialization failure report [1],
cxl_test was updated to register mock memory-devices after the mock
root-port/bus device had been registered. That led to cxl_test crashing
with a use-after-free bug with the following signature:
cxl_port_attach_region: cxl region3: cxl_host_bridge.0:port3 decoder3.0 add: mem0:decoder7.0 @ 0 next: cxl_switch_uport.0 nr_eps: 1 nr_targets: 1
cxl_port_attach_region: cxl region3: cxl_host_bridge.0:port3 decoder3.0 add: mem4:decoder14.0 @ 1 next: cxl_switch_uport.0 nr_eps: 2 nr_targets: 1
cxl_port_setup_targets: cxl region3: cxl_switch_uport.0:port6 target[0] = cxl_switch_dport.0 for mem0:decoder7.0 @ 0
1) cxl_port_setup_targets: cxl region3: cxl_switch_uport.0:port6 target[1] = cxl_switch_dport.4 for mem4:decoder14.0 @ 1
[..]
cxld_unregister: cxl decoder14.0:
cxl_region_decode_reset: cxl_region region3:
mock_decoder_reset: cxl_port port3: decoder3.0 reset
2) mock_decoder_reset: cxl_port port3: decoder3.0: out of order reset, expected decoder3.1
cxl_endpoint_decoder_release: cxl decoder14.0:
[..]
cxld_unregister: cxl decoder7.0:
3) cxl_region_decode_reset: cxl_region region3:
Oops: general protection fault, probably for non-canonical address 0x6b6b6b6b6b6b6bc3: 0000 [#1] PREEMPT SMP PTI
[..]
RIP: 0010:to_cxl_port+0x8/0x60 [cxl_core]
[..]
Call Trace:
<TASK>
cxl_region_decode_reset+0x69/0x190 [cxl_core]
cxl_region_detach+0xe8/0x210 [cxl_core]
cxl_decoder_kill_region+0x27/0x40 [cxl_core]
cxld_unregister+0x5d/0x60 [cxl_core]
At 1) a region has been established with 2 endpoint decoders (7.0 and
14.0). Those endpoints share a common switch-decoder in the topology
(3.0). At teardown, 2), decoder14.0 is the first to be removed and hits
the "out of order reset case" in the switch decoder. The effect though
is that region3 cleanup is aborted leaving it in-tact and
referencing decoder14.0. At 3) the second attempt to teardown region3
trips over the stale decoder14.0 object which has long since been
deleted.
The fix here is to recognize that the CXL specification places no
mandate on in-order shutdown of switch-decoders, the driver enforces
in-order allocation, and hardware enforces in-order commit. So, rather
than fail and leave objects dangling, always remove them.
In support of making cxl_region_decode_reset() always succeed,
cxl_region_invalidate_memregion() failures are turned into warnings.
Crashing the kernel is ok there since system integrity is at risk if
caches cannot be managed around physical address mutation events like
CXL region destruction.
A new device_for_each_child_reverse_from() is added to cleanup
port->commit_end after all dependent decoders have been disabled. In
other words if decoders are allocated 0->1->2 and disabled 1->2->0 then
port->commit_end only decrements from 2 after 2 has been disabled, and
it decrements all the way to zero since 1 was disabled previously.
Link: http://lore.kernel.org/20241004212504.1246-1-gourry@gourry.net [1]
Cc: stable@vger.kernel.org
Fixes: 176baefb2eb5 ("cxl/hdm: Commit decoder state to hardware")
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Alison Schofield <alison.schofield@intel.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Zijun Hu <quic_zijuhu@quicinc.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Link: https://patch.msgid.link/172964782781.81806.17902885593105284330.stgit@dwillia2-xfh.jf.intel.com
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
2024-10-23 01:43:49 +00:00
|
|
|
void cxl_port_commit_reap(struct cxl_decoder *cxld);
|
2022-02-01 02:10:04 +00:00
|
|
|
struct pci_bus;
|
2023-06-22 20:55:01 +00:00
|
|
|
int devm_cxl_register_pci_bus(struct device *host, struct device *uport_dev,
|
2022-01-31 16:44:52 +00:00
|
|
|
struct pci_bus *bus);
|
|
|
|
struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port);
|
2023-06-22 20:55:01 +00:00
|
|
|
struct cxl_port *devm_cxl_add_port(struct device *host,
|
|
|
|
struct device *uport_dev,
|
2021-06-09 16:01:35 +00:00
|
|
|
resource_size_t component_reg_phys,
|
2022-05-27 17:57:01 +00:00
|
|
|
struct cxl_dport *parent_dport);
|
2023-12-21 22:03:32 +00:00
|
|
|
struct cxl_root *devm_cxl_add_root(struct device *host,
|
|
|
|
const struct cxl_root_ops *ops);
|
2024-01-05 22:07:40 +00:00
|
|
|
struct cxl_root *find_cxl_root(struct cxl_port *port);
|
2024-01-05 22:07:34 +00:00
|
|
|
void put_cxl_root(struct cxl_root *cxl_root);
|
|
|
|
DEFINE_FREE(put_cxl_root, struct cxl_root *, if (_T) put_cxl_root(_T))
|
|
|
|
|
2024-08-30 01:31:36 +00:00
|
|
|
DEFINE_FREE(put_cxl_port, struct cxl_port *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev))
|
2022-02-04 15:08:40 +00:00
|
|
|
int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd);
|
2022-12-01 21:33:48 +00:00
|
|
|
void cxl_bus_rescan(void);
|
|
|
|
void cxl_bus_drain(void);
|
2023-06-22 20:55:09 +00:00
|
|
|
struct cxl_port *cxl_pci_find_port(struct pci_dev *pdev,
|
|
|
|
struct cxl_dport **dport);
|
2022-05-27 17:57:01 +00:00
|
|
|
struct cxl_port *cxl_mem_find_port(struct cxl_memdev *cxlmd,
|
|
|
|
struct cxl_dport **dport);
|
2022-02-04 15:18:31 +00:00
|
|
|
bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd);
|
2022-02-04 15:08:40 +00:00
|
|
|
|
2022-02-01 21:23:14 +00:00
|
|
|
struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
|
2022-02-01 02:10:04 +00:00
|
|
|
struct device *dport, int port_id,
|
|
|
|
resource_size_t component_reg_phys);
|
2022-12-03 08:40:29 +00:00
|
|
|
struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
|
|
|
|
struct device *dport_dev, int port_id,
|
|
|
|
resource_size_t rcrb);
|
2022-02-04 15:08:40 +00:00
|
|
|
|
2023-10-27 22:08:06 +00:00
|
|
|
#ifdef CONFIG_PCIEAER_CXL
|
|
|
|
void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport);
|
2024-08-30 06:13:06 +00:00
|
|
|
void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host);
|
2023-10-27 22:08:06 +00:00
|
|
|
#else
|
2024-08-30 06:13:06 +00:00
|
|
|
static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport,
|
|
|
|
struct device *host) { }
|
2023-10-27 22:08:06 +00:00
|
|
|
#endif
|
|
|
|
|
2021-06-09 16:43:29 +00:00
|
|
|
struct cxl_decoder *to_cxl_decoder(struct device *dev);
|
2022-07-13 01:38:26 +00:00
|
|
|
struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev);
|
2023-02-10 09:06:45 +00:00
|
|
|
struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev);
|
2022-05-21 23:24:14 +00:00
|
|
|
struct cxl_endpoint_decoder *to_cxl_endpoint_decoder(struct device *dev);
|
2021-06-15 23:18:17 +00:00
|
|
|
bool is_root_decoder(struct device *dev);
|
2023-02-10 09:06:45 +00:00
|
|
|
bool is_switch_decoder(struct device *dev);
|
2022-03-04 21:36:45 +00:00
|
|
|
bool is_endpoint_decoder(struct device *dev);
|
2022-07-13 01:38:26 +00:00
|
|
|
struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
|
2024-07-03 05:29:52 +00:00
|
|
|
unsigned int nr_targets);
|
2022-05-19 00:52:23 +00:00
|
|
|
struct cxl_switch_decoder *cxl_switch_decoder_alloc(struct cxl_port *port,
|
|
|
|
unsigned int nr_targets);
|
2021-09-21 19:22:16 +00:00
|
|
|
int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map);
|
2022-05-21 23:24:14 +00:00
|
|
|
struct cxl_endpoint_decoder *cxl_endpoint_decoder_alloc(struct cxl_port *port);
|
2022-02-01 20:24:30 +00:00
|
|
|
int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map);
|
2021-09-21 19:22:16 +00:00
|
|
|
int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld);
|
2024-04-05 22:05:50 +00:00
|
|
|
static inline int cxl_root_decoder_autoremove(struct device *host,
|
|
|
|
struct cxl_root_decoder *cxlrd)
|
|
|
|
{
|
|
|
|
return cxl_decoder_autoremove(host, &cxlrd->cxlsd.cxld);
|
|
|
|
}
|
2022-02-04 15:18:31 +00:00
|
|
|
int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint);
|
|
|
|
|
2023-02-14 19:41:13 +00:00
|
|
|
/**
|
|
|
|
* struct cxl_endpoint_dvsec_info - Cached DVSEC info
|
2023-04-03 21:33:48 +00:00
|
|
|
* @mem_enabled: cached value of mem_enabled in the DVSEC at init time
|
2023-02-14 19:41:13 +00:00
|
|
|
* @ranges: Number of active HDM ranges this device uses.
|
2023-04-03 21:33:48 +00:00
|
|
|
* @port: endpoint port associated with this info instance
|
2023-02-14 19:41:13 +00:00
|
|
|
* @dvsec_range: cached attributes of the ranges in the DVSEC, PCIE_DEVICE
|
|
|
|
*/
|
|
|
|
struct cxl_endpoint_dvsec_info {
|
|
|
|
bool mem_enabled;
|
|
|
|
int ranges;
|
2023-04-03 21:33:48 +00:00
|
|
|
struct cxl_port *port;
|
2023-02-14 19:41:13 +00:00
|
|
|
struct range dvsec_range[2];
|
|
|
|
};
|
|
|
|
|
2022-02-01 20:24:30 +00:00
|
|
|
struct cxl_hdm;
|
2023-02-14 19:41:30 +00:00
|
|
|
struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
|
|
|
|
struct cxl_endpoint_dvsec_info *info);
|
2023-02-14 19:41:24 +00:00
|
|
|
int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
|
|
|
|
struct cxl_endpoint_dvsec_info *info);
|
2022-02-01 21:23:14 +00:00
|
|
|
int devm_cxl_add_passthrough_decoder(struct cxl_port *port);
|
2024-08-28 08:42:29 +00:00
|
|
|
int cxl_dvsec_rr_decode(struct device *dev, struct cxl_port *port,
|
2023-02-14 19:41:13 +00:00
|
|
|
struct cxl_endpoint_dvsec_info *info);
|
2021-06-09 16:43:29 +00:00
|
|
|
|
cxl/region: Add region creation support
CXL 2.0 allows for dynamic provisioning of new memory regions (system
physical address resources like "System RAM" and "Persistent Memory").
Whereas DDR and PMEM resources are conveyed statically at boot, CXL
allows for assembling and instantiating new regions from the available
capacity of CXL memory expanders in the system.
Sysfs with an "echo $region_name > $create_region_attribute" interface
is chosen as the mechanism to initiate the provisioning process. This
was chosen over ioctl() and netlink() to keep the configuration
interface entirely in a pseudo-fs interface, and it was chosen over
configfs since, aside from this one creation event, the interface is
read-mostly. I.e. configfs supports cases where an object is designed to
be provisioned each boot, like an iSCSI storage target, and CXL region
creation is mostly for PMEM regions which are created usually once
per-lifetime of a server instance. This is an improvement over nvdimm
that pre-created "seed" devices that tended to confuse users looking to
determine which devices are active and which are idle.
Recall that the major change that CXL brings over previous persistent
memory architectures is the ability to dynamically define new regions.
Compare that to drivers like 'nfit' where the region configuration is
statically defined by platform firmware.
Regions are created as a child of a root decoder that encompasses an
address space with constraints. When created through sysfs, the root
decoder is explicit. When created from an LSA's region structure a root
decoder will possibly need to be inferred by the driver.
Upon region creation through sysfs, a vacant region is created with a
unique name. Regions have a number of attributes that must be configured
before the region can be bound to the driver where HDM decoder program
is completed.
An example of creating a new region:
- Allocate a new region name:
region=$(cat /sys/bus/cxl/devices/decoder0.0/create_pmem_region)
- Create a new region by name:
while
region=$(cat /sys/bus/cxl/devices/decoder0.0/create_pmem_region)
! echo $region > /sys/bus/cxl/devices/decoder0.0/create_pmem_region
do true; done
- Region now exists in sysfs:
stat -t /sys/bus/cxl/devices/decoder0.0/$region
- Delete the region, and name:
echo $region > /sys/bus/cxl/devices/decoder0.0/delete_region
Signed-off-by: Ben Widawsky <bwidawsk@kernel.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/165784333909.1758207.794374602146306032.stgit@dwillia2-xfh.jf.intel.com
[djbw: simplify locking, reword changelog]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2021-06-08 17:28:34 +00:00
|
|
|
bool is_cxl_region(struct device *dev);
|
|
|
|
|
2021-02-17 04:09:52 +00:00
|
|
|
extern struct bus_type cxl_bus_type;
|
2021-06-15 23:18:11 +00:00
|
|
|
|
|
|
|
struct cxl_driver {
|
|
|
|
const char *name;
|
|
|
|
int (*probe)(struct device *dev);
|
|
|
|
void (*remove)(struct device *dev);
|
|
|
|
struct device_driver drv;
|
|
|
|
int id;
|
|
|
|
};
|
|
|
|
|
2024-07-01 12:07:37 +00:00
|
|
|
#define to_cxl_drv(__drv) container_of_const(__drv, struct cxl_driver, drv)
|
2021-06-15 23:18:11 +00:00
|
|
|
|
|
|
|
int __cxl_driver_register(struct cxl_driver *cxl_drv, struct module *owner,
|
|
|
|
const char *modname);
|
|
|
|
#define cxl_driver_register(x) __cxl_driver_register(x, THIS_MODULE, KBUILD_MODNAME)
|
|
|
|
void cxl_driver_unregister(struct cxl_driver *cxl_drv);
|
|
|
|
|
2022-01-24 00:29:15 +00:00
|
|
|
#define module_cxl_driver(__cxl_driver) \
|
|
|
|
module_driver(__cxl_driver, cxl_driver_register, cxl_driver_unregister)
|
|
|
|
|
2021-06-15 23:36:31 +00:00
|
|
|
#define CXL_DEVICE_NVDIMM_BRIDGE 1
|
|
|
|
#define CXL_DEVICE_NVDIMM 2
|
2022-02-01 21:07:51 +00:00
|
|
|
#define CXL_DEVICE_PORT 3
|
|
|
|
#define CXL_DEVICE_ROOT 4
|
2022-02-04 15:18:31 +00:00
|
|
|
#define CXL_DEVICE_MEMORY_EXPANDER 5
|
2021-06-15 21:00:40 +00:00
|
|
|
#define CXL_DEVICE_REGION 6
|
2022-01-11 16:06:40 +00:00
|
|
|
#define CXL_DEVICE_PMEM_REGION 7
|
2023-02-10 09:07:19 +00:00
|
|
|
#define CXL_DEVICE_DAX_REGION 8
|
2023-05-26 09:58:22 +00:00
|
|
|
#define CXL_DEVICE_PMU 9
|
2021-06-15 23:18:17 +00:00
|
|
|
|
2021-06-15 23:18:11 +00:00
|
|
|
#define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*")
|
|
|
|
#define CXL_MODALIAS_FMT "cxl:t%d"
|
|
|
|
|
2021-06-15 23:18:17 +00:00
|
|
|
struct cxl_nvdimm_bridge *to_cxl_nvdimm_bridge(struct device *dev);
|
|
|
|
struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host,
|
|
|
|
struct cxl_port *port);
|
2021-06-15 23:36:31 +00:00
|
|
|
struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev);
|
|
|
|
bool is_cxl_nvdimm(struct device *dev);
|
2021-11-11 18:19:05 +00:00
|
|
|
bool is_cxl_nvdimm_bridge(struct device *dev);
|
2024-06-12 06:44:23 +00:00
|
|
|
int devm_cxl_add_nvdimm(struct cxl_port *parent_port, struct cxl_memdev *cxlmd);
|
|
|
|
struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_port *port);
|
2022-01-11 16:06:40 +00:00
|
|
|
|
|
|
|
#ifdef CONFIG_CXL_REGION
|
|
|
|
bool is_cxl_pmem_region(struct device *dev);
|
|
|
|
struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev);
|
2023-02-11 01:31:17 +00:00
|
|
|
int cxl_add_to_region(struct cxl_port *root,
|
|
|
|
struct cxl_endpoint_decoder *cxled);
|
2023-02-10 09:07:19 +00:00
|
|
|
struct cxl_dax_region *to_cxl_dax_region(struct device *dev);
|
2022-01-11 16:06:40 +00:00
|
|
|
#else
|
|
|
|
static inline bool is_cxl_pmem_region(struct device *dev)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
static inline struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev)
|
|
|
|
{
|
|
|
|
return NULL;
|
|
|
|
}
|
2023-02-11 01:31:17 +00:00
|
|
|
static inline int cxl_add_to_region(struct cxl_port *root,
|
|
|
|
struct cxl_endpoint_decoder *cxled)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
2023-02-10 09:07:19 +00:00
|
|
|
static inline struct cxl_dax_region *to_cxl_dax_region(struct device *dev)
|
|
|
|
{
|
|
|
|
return NULL;
|
|
|
|
}
|
2022-01-11 16:06:40 +00:00
|
|
|
#endif
|
2021-09-14 19:14:22 +00:00
|
|
|
|
2023-12-21 22:03:13 +00:00
|
|
|
void cxl_endpoint_parse_cdat(struct cxl_port *port);
|
2023-12-21 22:03:26 +00:00
|
|
|
void cxl_switch_parse_cdat(struct cxl_port *port);
|
2023-12-21 22:03:13 +00:00
|
|
|
|
2023-12-21 22:03:58 +00:00
|
|
|
int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
|
|
|
|
struct access_coordinate *coord);
|
2024-03-08 21:59:28 +00:00
|
|
|
void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
|
|
|
|
struct cxl_endpoint_decoder *cxled);
|
2024-09-04 00:11:51 +00:00
|
|
|
void cxl_region_shared_upstream_bandwidth_update(struct cxl_region *cxlr);
|
2023-12-21 22:03:58 +00:00
|
|
|
|
2024-02-06 19:03:39 +00:00
|
|
|
void cxl_memdev_update_perf(struct cxl_memdev *cxlmd);
|
|
|
|
|
2024-03-08 21:59:24 +00:00
|
|
|
void cxl_coordinates_combine(struct access_coordinate *out,
|
|
|
|
struct access_coordinate *c1,
|
|
|
|
struct access_coordinate *c2);
|
|
|
|
|
2024-05-02 16:57:34 +00:00
|
|
|
bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port);
|
|
|
|
|
2021-09-14 19:14:22 +00:00
|
|
|
/*
|
|
|
|
* Unit test builds overrides this to __weak, find the 'strong' version
|
|
|
|
* of these symbols in tools/testing/cxl/.
|
|
|
|
*/
|
|
|
|
#ifndef __mock
|
|
|
|
#define __mock static
|
|
|
|
#endif
|
2022-01-31 19:50:09 +00:00
|
|
|
|
2021-02-17 04:09:51 +00:00
|
|
|
#endif /* __CXL_H__ */
|