From 675e979db473d08be346a3190c5f0db095a57153 Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Fri, 7 Jun 2024 16:43:58 +0200 Subject: [PATCH 01/15] cxl/events: Use a common struct for DRAM and General Media events cxl_event_common was an unfortunate naming choice and caused confusion with the existing Common Event Record. Furthermore, its fields didn't map all the common information between DRAM and General Media Events. Remove cxl_event_common and introduce cxl_event_media_hdr to record common information between DRAM and General Media events. cxl_event_media_hdr, which is embedded in both cxl_event_gen_media and cxl_event_dram, leverages the commonalities between the two events to simplify their respective handling. Suggested-by: Dan Williams Reviewed-by: Alison Schofield Reviewed-by: Dan Williams Reviewed-by: Ira Weiny Signed-off-by: Fabio M. De Francesco Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240607144423.48681-1-fabio.m.de.francesco@linux.intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/mbox.c | 2 +- drivers/cxl/core/trace.h | 32 ++++++++++----------- include/linux/cxl-event.h | 45 +++++++++++++----------------- tools/testing/cxl/test/mem.c | 54 +++++++++++++++++++----------------- 4 files changed, 65 insertions(+), 68 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 2626f3fff201..a08f050cc1ca 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -875,7 +875,7 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd, guard(rwsem_read)(&cxl_region_rwsem); guard(rwsem_read)(&cxl_dpa_rwsem); - dpa = le64_to_cpu(evt->common.phys_addr) & CXL_DPA_MASK; + dpa = le64_to_cpu(evt->media_hdr.phys_addr) & CXL_DPA_MASK; cxlr = cxl_dpa_to_region(cxlmd, dpa); if (cxlr) hpa = cxl_trace_hpa(cxlr, cxlmd, dpa); diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index ee5cd4eb2f16..6d8b71d8f6c4 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -340,23 +340,23 @@ TRACE_EVENT(cxl_general_media, ), TP_fast_assign( - CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); + CXL_EVT_TP_fast_assign(cxlmd, log, rec->media_hdr.hdr); __entry->hdr_uuid = CXL_EVENT_GEN_MEDIA_UUID; /* General Media */ - __entry->dpa = le64_to_cpu(rec->phys_addr); + __entry->dpa = le64_to_cpu(rec->media_hdr.phys_addr); __entry->dpa_flags = __entry->dpa & CXL_DPA_FLAGS_MASK; /* Mask after flags have been parsed */ __entry->dpa &= CXL_DPA_MASK; - __entry->descriptor = rec->descriptor; - __entry->type = rec->type; - __entry->transaction_type = rec->transaction_type; - __entry->channel = rec->channel; - __entry->rank = rec->rank; + __entry->descriptor = rec->media_hdr.descriptor; + __entry->type = rec->media_hdr.type; + __entry->transaction_type = rec->media_hdr.transaction_type; + __entry->channel = rec->media_hdr.channel; + __entry->rank = rec->media_hdr.rank; __entry->device = get_unaligned_le24(rec->device); memcpy(__entry->comp_id, &rec->component_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE); - __entry->validity_flags = get_unaligned_le16(&rec->validity_flags); + __entry->validity_flags = get_unaligned_le16(&rec->media_hdr.validity_flags); __entry->hpa = hpa; if (cxlr) { __assign_str(region_name); @@ -440,19 +440,19 @@ TRACE_EVENT(cxl_dram, ), TP_fast_assign( - CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); + CXL_EVT_TP_fast_assign(cxlmd, log, rec->media_hdr.hdr); __entry->hdr_uuid = CXL_EVENT_DRAM_UUID; /* DRAM */ - __entry->dpa = le64_to_cpu(rec->phys_addr); + __entry->dpa = le64_to_cpu(rec->media_hdr.phys_addr); __entry->dpa_flags = __entry->dpa & CXL_DPA_FLAGS_MASK; __entry->dpa &= CXL_DPA_MASK; - __entry->descriptor = rec->descriptor; - __entry->type = rec->type; - __entry->transaction_type = rec->transaction_type; - __entry->validity_flags = get_unaligned_le16(rec->validity_flags); - __entry->channel = rec->channel; - __entry->rank = rec->rank; + __entry->descriptor = rec->media_hdr.descriptor; + __entry->type = rec->media_hdr.type; + __entry->transaction_type = rec->media_hdr.transaction_type; + __entry->validity_flags = get_unaligned_le16(rec->media_hdr.validity_flags); + __entry->channel = rec->media_hdr.channel; + __entry->rank = rec->media_hdr.rank; __entry->nibble_mask = get_unaligned_le24(rec->nibble_mask); __entry->bank_group = rec->bank_group; __entry->bank = rec->bank; diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h index 60b25020281f..0bea1afbd747 100644 --- a/include/linux/cxl-event.h +++ b/include/linux/cxl-event.h @@ -21,6 +21,21 @@ struct cxl_event_record_hdr { u8 reserved[15]; } __packed; +struct cxl_event_media_hdr { + struct cxl_event_record_hdr hdr; + __le64 phys_addr; + u8 descriptor; + u8 type; + u8 transaction_type; + /* + * The meaning of Validity Flags from bit 2 is + * different across DRAM and General Media records + */ + u8 validity_flags[2]; + u8 channel; + u8 rank; +} __packed; + #define CXL_EVENT_RECORD_DATA_LENGTH 0x50 struct cxl_event_generic { struct cxl_event_record_hdr hdr; @@ -33,14 +48,7 @@ struct cxl_event_generic { */ #define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10 struct cxl_event_gen_media { - struct cxl_event_record_hdr hdr; - __le64 phys_addr; - u8 descriptor; - u8 type; - u8 transaction_type; - u8 validity_flags[2]; - u8 channel; - u8 rank; + struct cxl_event_media_hdr media_hdr; u8 device[3]; u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; u8 reserved[46]; @@ -52,14 +60,7 @@ struct cxl_event_gen_media { */ #define CXL_EVENT_DER_CORRECTION_MASK_SIZE 0x20 struct cxl_event_dram { - struct cxl_event_record_hdr hdr; - __le64 phys_addr; - u8 descriptor; - u8 type; - u8 transaction_type; - u8 validity_flags[2]; - u8 channel; - u8 rank; + struct cxl_event_media_hdr media_hdr; u8 nibble_mask[3]; u8 bank_group; u8 bank; @@ -95,21 +96,13 @@ struct cxl_event_mem_module { u8 reserved[0x3d]; } __packed; -/* - * General Media or DRAM Event Common Fields - * - provides common access to phys_addr - */ -struct cxl_event_common { - struct cxl_event_record_hdr hdr; - __le64 phys_addr; -} __packed; - union cxl_event { struct cxl_event_generic generic; struct cxl_event_gen_media gen_media; struct cxl_event_dram dram; struct cxl_event_mem_module mem_module; - struct cxl_event_common common; + /* dram & gen_media event header */ + struct cxl_event_media_hdr media_hdr; } __packed; /* diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index eaf091a3d331..94de2cf60f4f 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -385,19 +385,21 @@ struct cxl_test_gen_media { struct cxl_test_gen_media gen_media = { .id = CXL_EVENT_GEN_MEDIA_UUID, .rec = { - .hdr = { - .length = sizeof(struct cxl_test_gen_media), - .flags[0] = CXL_EVENT_RECORD_FLAG_PERMANENT, - /* .handle = Set dynamically */ - .related_handle = cpu_to_le16(0), + .media_hdr = { + .hdr = { + .length = sizeof(struct cxl_test_gen_media), + .flags[0] = CXL_EVENT_RECORD_FLAG_PERMANENT, + /* .handle = Set dynamically */ + .related_handle = cpu_to_le16(0), + }, + .phys_addr = cpu_to_le64(0x2000), + .descriptor = CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT, + .type = CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR, + .transaction_type = CXL_GMER_TRANS_HOST_WRITE, + /* .validity_flags = */ + .channel = 1, + .rank = 30, }, - .phys_addr = cpu_to_le64(0x2000), - .descriptor = CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT, - .type = CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR, - .transaction_type = CXL_GMER_TRANS_HOST_WRITE, - /* .validity_flags = */ - .channel = 1, - .rank = 30 }, }; @@ -409,18 +411,20 @@ struct cxl_test_dram { struct cxl_test_dram dram = { .id = CXL_EVENT_DRAM_UUID, .rec = { - .hdr = { - .length = sizeof(struct cxl_test_dram), - .flags[0] = CXL_EVENT_RECORD_FLAG_PERF_DEGRADED, - /* .handle = Set dynamically */ - .related_handle = cpu_to_le16(0), + .media_hdr = { + .hdr = { + .length = sizeof(struct cxl_test_dram), + .flags[0] = CXL_EVENT_RECORD_FLAG_PERF_DEGRADED, + /* .handle = Set dynamically */ + .related_handle = cpu_to_le16(0), + }, + .phys_addr = cpu_to_le64(0x8000), + .descriptor = CXL_GMER_EVT_DESC_THRESHOLD_EVENT, + .type = CXL_GMER_MEM_EVT_TYPE_INV_ADDR, + .transaction_type = CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB, + /* .validity_flags = */ + .channel = 1, }, - .phys_addr = cpu_to_le64(0x8000), - .descriptor = CXL_GMER_EVT_DESC_THRESHOLD_EVENT, - .type = CXL_GMER_MEM_EVT_TYPE_INV_ADDR, - .transaction_type = CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB, - /* .validity_flags = */ - .channel = 1, .bank_group = 5, .bank = 2, .column = {0xDE, 0xAD}, @@ -474,11 +478,11 @@ static int mock_set_timestamp(struct cxl_dev_state *cxlds, static void cxl_mock_add_event_logs(struct mock_event_store *mes) { put_unaligned_le16(CXL_GMER_VALID_CHANNEL | CXL_GMER_VALID_RANK, - &gen_media.rec.validity_flags); + &gen_media.rec.media_hdr.validity_flags); put_unaligned_le16(CXL_DER_VALID_CHANNEL | CXL_DER_VALID_BANK_GROUP | CXL_DER_VALID_BANK | CXL_DER_VALID_COLUMN, - &dram.rec.validity_flags); + &dram.rec.media_hdr.validity_flags); mes_add_event(mes, CXL_EVENT_TYPE_INFO, &maint_needed); mes_add_event(mes, CXL_EVENT_TYPE_INFO, From a0caa19711ceb54c34368f66a746844fb03fde6c Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Fri, 7 Jun 2024 06:57:15 -0700 Subject: [PATCH 02/15] cxl: add missing MODULE_DESCRIPTION() macros make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/cxl/core/cxl_core.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/cxl/cxl_pci.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/cxl/cxl_mem.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/cxl/cxl_acpi.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/cxl/cxl_pmem.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/cxl/cxl_port.o Add the missing invocations of the MODULE_DESCRIPTION() macro. Signed-off-by: Jeff Johnson Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20240607-md-drivers-cxl-v2-1-0c61d95ee7a7@quicinc.com Signed-off-by: Dave Jiang --- drivers/cxl/acpi.c | 1 + drivers/cxl/core/port.c | 1 + drivers/cxl/mem.c | 1 + drivers/cxl/pci.c | 1 + drivers/cxl/pmem.c | 1 + drivers/cxl/port.c | 1 + 6 files changed, 6 insertions(+) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 571069863c62..e51315ea4a6a 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -921,6 +921,7 @@ static void __exit cxl_acpi_exit(void) /* load before dax_hmem sees 'Soft Reserved' CXL ranges */ subsys_initcall(cxl_acpi_init); module_exit(cxl_acpi_exit); +MODULE_DESCRIPTION("CXL ACPI: Platform Support"); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(CXL); MODULE_IMPORT_NS(ACPI); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 887ed6e358fb..e31c5fcd9bf8 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -2356,5 +2356,6 @@ static void cxl_core_exit(void) subsys_initcall(cxl_core_init); module_exit(cxl_core_exit); +MODULE_DESCRIPTION("CXL: Core Compute Express Link support"); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(CXL); diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 0c79d9ce877c..1afb0e78082b 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -252,6 +252,7 @@ static struct cxl_driver cxl_mem_driver = { module_cxl_driver(cxl_mem_driver); +MODULE_DESCRIPTION("CXL: Memory Expansion"); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(CXL); MODULE_ALIAS_CXL(CXL_DEVICE_MEMORY_EXPANDER); diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index e53646e9f2fb..4be35dc22202 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -1066,5 +1066,6 @@ static void __exit cxl_pci_driver_exit(void) module_init(cxl_pci_driver_init); module_exit(cxl_pci_driver_exit); +MODULE_DESCRIPTION("CXL: PCI manageability"); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(CXL); diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 2ecdaee63021..4ef93da22335 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -453,6 +453,7 @@ static __exit void cxl_pmem_exit(void) cxl_driver_unregister(&cxl_nvdimm_bridge_driver); } +MODULE_DESCRIPTION("CXL PMEM: Persistent Memory Support"); MODULE_LICENSE("GPL v2"); module_init(cxl_pmem_init); module_exit(cxl_pmem_exit); diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 97c21566677a..d7d5d982ce69 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -209,6 +209,7 @@ static struct cxl_driver cxl_port_driver = { }; module_cxl_driver(cxl_port_driver); +MODULE_DESCRIPTION("CXL: Port enumeration and services"); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(CXL); MODULE_ALIAS_CXL(CXL_DEVICE_PORT); From a3483ee7e6a7f2d12b5950246f4e0ef94f4a5df0 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Tue, 18 Jun 2024 16:46:37 +0800 Subject: [PATCH 03/15] cxl/region: Fix a race condition in memory hotplug notifier In the memory hotplug notifier function of the CXL region, cxl_region_perf_attrs_callback(), the node ID is obtained by checking the host address range of the region. However, the address range information is not available when the region is registered in devm_cxl_add_region(). Additionally, this information may be removed or added under the protection of cxl_region_rwsem during runtime. If the memory notifier is called for nodes other than that backed by the region, a race condition may occur, potentially leading to a NULL dereference or an invalid address range. The race condition is addressed by checking the availability of the address range information under the protection of cxl_region_rwsem. To enhance code readability and use guard(), the relevant code has been moved into a newly added function: cxl_region_nid(). Fixes: 067353a46d8c ("cxl/region: Add memory hotplug notifier for cxl region") Signed-off-by: Huang, Ying Cc: Dan Williams Cc: Alison Schofield Cc: Andrew Morton Cc: Jonathan Cameron Cc: Dave Jiang Cc: Bharata B Rao Cc: Alistair Popple Cc: Aneesh Kumar K.V Cc: Davidlohr Bueso Cc: Vishal Verma Cc: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20240618084639.1419629-2-ying.huang@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 3c2b6144be23..51aeef2c012c 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2304,14 +2304,25 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid) return true; } +static int cxl_region_nid(struct cxl_region *cxlr) +{ + struct cxl_region_params *p = &cxlr->params; + struct cxl_endpoint_decoder *cxled; + struct cxl_decoder *cxld; + + guard(rwsem_read)(&cxl_region_rwsem); + cxled = p->targets[0]; + if (!cxled) + return NUMA_NO_NODE; + cxld = &cxled->cxld; + return phys_to_target_node(cxld->hpa_range.start); +} + static int cxl_region_perf_attrs_callback(struct notifier_block *nb, unsigned long action, void *arg) { struct cxl_region *cxlr = container_of(nb, struct cxl_region, memory_notifier); - struct cxl_region_params *p = &cxlr->params; - struct cxl_endpoint_decoder *cxled = p->targets[0]; - struct cxl_decoder *cxld = &cxled->cxld; struct memory_notify *mnb = arg; int nid = mnb->status_change_nid; int region_nid; @@ -2319,7 +2330,7 @@ static int cxl_region_perf_attrs_callback(struct notifier_block *nb, if (nid == NUMA_NO_NODE || action != MEM_ONLINE) return NOTIFY_DONE; - region_nid = phys_to_target_node(cxld->hpa_range.start); + region_nid = cxl_region_nid(cxlr); if (nid != region_nid) return NOTIFY_DONE; From 643e8e3e65290fdfe507bb23fa524c77e1345af3 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Tue, 18 Jun 2024 16:46:38 +0800 Subject: [PATCH 04/15] cxl/region: Support to calculate memory tier abstract distance An abstract distance value must be assigned by the driver that makes the memory available to the system. It reflects relative performance and is used to place memory nodes backed by CXL regions in the appropriate memory tiers allowing promotion/demotion within the existing memory tiering mechanism. The abstract distance is calculated based on the memory access latency and bandwidth of CXL regions. Signed-off-by: Huang, Ying Acked-by: Dan Williams Cc: Alison Schofield Cc: Andrew Morton Cc: Jonathan Cameron Cc: Dave Jiang Cc: Bharata B Rao Cc: Alistair Popple Cc: Aneesh Kumar K.V Cc: Davidlohr Bueso Cc: Vishal Verma Cc: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20240618084639.1419629-3-ying.huang@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 27 +++++++++++++++++++++++++++ drivers/cxl/cxl.h | 2 ++ 2 files changed, 29 insertions(+) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 51aeef2c012c..dc15ceba7ab7 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include "core.h" @@ -2228,6 +2229,7 @@ static void unregister_region(void *_cxlr) int i; unregister_memory_notifier(&cxlr->memory_notifier); + unregister_mt_adistance_algorithm(&cxlr->adist_notifier); device_del(&cxlr->dev); /* @@ -2340,6 +2342,27 @@ static int cxl_region_perf_attrs_callback(struct notifier_block *nb, return NOTIFY_OK; } +static int cxl_region_calculate_adistance(struct notifier_block *nb, + unsigned long nid, void *data) +{ + struct cxl_region *cxlr = container_of(nb, struct cxl_region, + adist_notifier); + struct access_coordinate *perf; + int *adist = data; + int region_nid; + + region_nid = cxl_region_nid(cxlr); + if (nid != region_nid) + return NOTIFY_OK; + + perf = &cxlr->coord[ACCESS_COORDINATE_CPU]; + + if (mt_perf_to_adistance(perf, adist)) + return NOTIFY_OK; + + return NOTIFY_STOP; +} + /** * devm_cxl_add_region - Adds a region to a decoder * @cxlrd: root decoder @@ -2382,6 +2405,10 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd, cxlr->memory_notifier.priority = CXL_CALLBACK_PRI; register_memory_notifier(&cxlr->memory_notifier); + cxlr->adist_notifier.notifier_call = cxl_region_calculate_adistance; + cxlr->adist_notifier.priority = 100; + register_mt_adistance_algorithm(&cxlr->adist_notifier); + rc = devm_add_action_or_reset(port->uport_dev, unregister_region, cxlr); if (rc) return ERR_PTR(rc); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 603c0120cff8..f46252373159 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -522,6 +522,7 @@ struct cxl_region_params { * @params: active + config params for the region * @coord: QoS access coordinates for the region * @memory_notifier: notifier for setting the access coordinates to node + * @adist_notifier: notifier for calculating the abstract distance of node */ struct cxl_region { struct device dev; @@ -534,6 +535,7 @@ struct cxl_region { struct cxl_region_params params; struct access_coordinate coord[ACCESS_COORDINATE_MAX]; struct notifier_block memory_notifier; + struct notifier_block adist_notifier; }; struct cxl_nvdimm_bridge { From f3d70720e92c24c22e0e63c2588636edba33eb1a Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Tue, 18 Jun 2024 16:46:39 +0800 Subject: [PATCH 05/15] cxl/region: Simplify cxl_region_nid() The node ID of the region can be gotten via resource start address directly. This simplifies the implementation of cxl_region_nid(). Signed-off-by: Huang Ying Suggested-by: Alison Schofield Cc: Dan Williams Cc: Andrew Morton Cc: Jonathan Cameron Cc: Dave Jiang Cc: Bharata B Rao Cc: Alistair Popple Cc: Aneesh Kumar K.V Cc: Davidlohr Bueso Cc: Vishal Verma Cc: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20240618084639.1419629-4-ying.huang@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index dc15ceba7ab7..32473fddce03 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2309,15 +2309,13 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid) static int cxl_region_nid(struct cxl_region *cxlr) { struct cxl_region_params *p = &cxlr->params; - struct cxl_endpoint_decoder *cxled; - struct cxl_decoder *cxld; + struct resource *res; guard(rwsem_read)(&cxl_region_rwsem); - cxled = p->targets[0]; - if (!cxled) + res = p->res; + if (!res) return NUMA_NO_NODE; - cxld = &cxled->cxld; - return phys_to_target_node(cxld->hpa_range.start); + return phys_to_target_node(res->start); } static int cxl_region_perf_attrs_callback(struct notifier_block *nb, From 86588139b84386a47108b024d703c3c24ddbbec8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 3 Jul 2024 18:01:35 -0700 Subject: [PATCH 06/15] Documentation: CXL Maturity Map Provide a survey of the work-in-progress maturity (implementation status) of various aspects of the CXL subsystem. Clarify that in addition to ongoing upkeep relative to specification updates, there are some long running themes in the driver that respond to the discovery of new corner cases (bugs) and new use cases (feature extensions). The primary audience is distribution maintainers, but it also serves as a guide for kernel developers to understand what aspects of the CXL subsystem need more help. It is a landing page to document ongoing progress, and a guide to discern exposure to work-in-progress features. Reviewed-by: Adam Manzanares Signed-off-by: Dan Williams Reviewed-by: Jonathan Cameron Reviewed-by: Ira Weiny Reviewed-by: Alison Schofield Link: https://patch.msgid.link/172005486862.2048248.6668794717827294862.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dave Jiang --- Documentation/driver-api/cxl/index.rst | 2 + Documentation/driver-api/cxl/maturity-map.rst | 202 ++++++++++++++++++ MAINTAINERS | 1 + 3 files changed, 205 insertions(+) create mode 100644 Documentation/driver-api/cxl/maturity-map.rst diff --git a/Documentation/driver-api/cxl/index.rst b/Documentation/driver-api/cxl/index.rst index 036e49553542..12b82725d322 100644 --- a/Documentation/driver-api/cxl/index.rst +++ b/Documentation/driver-api/cxl/index.rst @@ -9,4 +9,6 @@ Compute Express Link memory-devices + maturity-map + .. only:: subproject and html diff --git a/Documentation/driver-api/cxl/maturity-map.rst b/Documentation/driver-api/cxl/maturity-map.rst new file mode 100644 index 000000000000..df8e2ac2a320 --- /dev/null +++ b/Documentation/driver-api/cxl/maturity-map.rst @@ -0,0 +1,202 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: + +=========================================== +Compute Express Link Subsystem Maturity Map +=========================================== + +The Linux CXL subsystem tracks the dynamic `CXL specification +`_ that +continues to respond to new use cases with new features, capability +updates and fixes. At any given point some aspects of the subsystem are +more mature than others. While the periodic pull requests summarize the +`work being incorporated each merge window +`_, +those do not always convey progress relative to a starting point and a +future end goal. + +What follows is a coarse breakdown of the subsystem's major +responsibilities along with a maturity score. The expectation is that +the change-history of this document provides an overview summary of the +subsystem maturation over time. + +The maturity scores are: + +- [3] Mature: Work in this area is complete and no changes on the horizon. + Note that this score can regress from one kernel release to the next + based on new test results or end user reports. + +- [2] Stabilizing: Major functionality operational, common cases are + mature, but known corner cases are still a work in progress. + +- [1] Initial: Capability that has exited the Proof of Concept phase, but + may still have significant gaps to close and fixes to apply as real + world testing occurs. + +- [0] Known gap: Feature is on a medium to long term horizon to + implement. If the specification has a feature that does not even have + a '0' score in this document, there is a good chance that no one in + the linux-cxl@vger.kernel.org community has started to look at it. + +- X: Out of scope for kernel enabling, or kernel enabling not required + +Feature and Capabilities +======================== + +Enumeration / Provisioning +-------------------------- +All of the fundamental enumeration an object model of the subsystem is +in place, but there are several corner cases that are pending closure. + + +* [2] CXL Window Enumeration + + * [0] :ref:`Extended-linear memory-side cache ` + * [0] Low Memory-hole + * [0] Hetero-interleave + +* [2] Switch Enumeration + + * [0] CXL register enumeration link-up dependency + +* [2] HDM Decoder Configuration + + * [0] Decoder target and granularity constraints + +* [2] Performance enumeration + + * [3] Endpoint CDAT + * [3] Switch CDAT + * [1] CDAT to Core-mm integration + + * [1] x86 + * [0] Arm64 + * [0] All other arch. + + * [0] Shared link + +* [2] Hotplug + (see CXL Window Enumeration) + + * [0] Handle Soft Reserved conflicts + +* [0] :ref:`RCH link status ` +* [0] Fabrics / G-FAM (chapter 7) +* [0] Global Access Endpoint + + +RAS +--- +In many ways CXL can be seen as a standardization of what would normally +be handled by custom EDAC drivers. The open development here is +mainly caused by the enumeration corner cases above. + +* [3] Component events (OS) +* [2] Component events (FFM) +* [1] Endpoint protocol errors (OS) +* [1] Endpoint protocol errors (FFM) +* [0] Switch protocol errors (OS) +* [1] Switch protocol errors (FFM) +* [2] DPA->HPA Address translation + + * [1] XOR Interleave translation + (see CXL Window Enumeration) + +* [1] Memory Failure coordination +* [0] Scrub control +* [2] ACPI error injection EINJ + + * [0] EINJ v2 + * [X] Compliance DOE + +* [2] Native error injection +* [3] RCH error handling +* [1] VH error handling +* [0] PPR +* [0] Sparing +* [0] Device built in test + + +Mailbox commands +---------------- + +* [3] Firmware update +* [3] Health / Alerts +* [1] :ref:`Background commands ` +* [3] Sanitization +* [3] Security commands +* [3] RAW Command Debug Passthrough +* [0] CEL-only-validation Passthrough +* [0] Switch CCI +* [3] Timestamp +* [1] PMEM labels +* [0] PMEM GPF / Dirty Shutdown +* [0] Scan Media + +PMU +--- +* [1] Type 3 PMU +* [0] Switch USP/ DSP, Root Port + +Security +-------- + +* [X] CXL Trusted Execution Environment Security Protocol (TSP) +* [X] CXL IDE (subsumed by TSP) + +Memory-pooling +-------------- + +* [1] Hotplug of LDs (via PCI hotplug) +* [0] Dynamic Capacity Device (DCD) Support + +Multi-host sharing +------------------ + +* [0] Hardware coherent shared memory +* [0] Software managed coherency shared memory + +Multi-host memory +----------------- + +* [0] Dynamic Capacity Device Support +* [0] Sharing + +Accelerator +----------- + +* [0] Accelerator memory enumeration HDM-D (CXL 1.1/2.0 Type-2) +* [0] Accelerator memory enumeration HDM-DB (CXL 3.0 Type-2) +* [0] CXL.cache 68b (CXL 2.0) +* [0] CXL.cache 256b Cache IDs (CXL 3.0) + +User Flow Support +----------------- + +* [0] HPA->DPA Address translation (need xormaps export solution) + +Details +======= + +.. _extended-linear: + +* **Extended-linear memory-side cache**: An HMAT proposal to enumerate the presence of a + memory-side cache where the cache capacity extends the SRAT address + range capacity. `See the ECN + `_ + for more details: + +.. _rch-link-status: + +* **RCH Link Status**: RCH (Restricted CXL Host) topologies, end up + hiding some standard registers like PCIe Link Status / Capabilities in + the CXL RCRB (Root Complex Register Block). + +.. _background-commands: + +* **Background commands**: The CXL background command mechanism is + awkward as the single slot is monopolized potentially indefinitely by + various commands. A `cancel on conflict + `_ + facility is needed to make sure the kernel can ensure forward progress + of priority commands. diff --git a/MAINTAINERS b/MAINTAINERS index 3c4fdf74a3f9..ce047ab7e977 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5485,6 +5485,7 @@ M: Ira Weiny M: Dan Williams L: linux-cxl@vger.kernel.org S: Maintained +F: Documentation/driver-api/cxl F: drivers/cxl/ F: include/linux/einj-cxl.h F: include/linux/cxl-event.h From 8ecef8e01a08c7e3e4ffc8f08d9f9663984f334b Mon Sep 17 00:00:00 2001 From: peng guo Date: Wed, 10 Jul 2024 10:31:12 +0800 Subject: [PATCH 07/15] cxl/core: Fix incorrect vendor debug UUID define When user send a mbox command whose opcode is CXL_MBOX_OP_CLEAR_LOG and the in_payload is normal vendor debug log UUID according to the CXL specification cxl_payload_from_user_allowed() will return false unexpectedly, Sending mbox cmd operation fails and the kernel log will print: Clear Log: input payload not allowed. All CXL devices that support a debug log shall support the Vendor Debug Log to allow the log to be accessed through a common host driver, for any device, all versions of the CXL specification define the same value with Log Identifier of: 5e1819d9-11a9-400c-811f-d60719403d86 Refer to CXL spec r3.1 Table 8-71 Fix the definition value of DEFINE_CXL_VENDOR_DEBUG_UUID to match the CXL specification. Fixes: 472b1ce6e9d6 ("cxl/mem: Enable commands via CEL") Signed-off-by: peng guo Reviewed-by: Alison Schofield Link: https://patch.msgid.link/20240710023112.8063-1-engguopeng@buaa.edu.cn Signed-off-by: Dave Jiang --- drivers/cxl/cxlmem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 19aba81cdf13..f55141c6d64e 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -562,7 +562,7 @@ enum cxl_opcode { 0x3b, 0x3f, 0x17) #define DEFINE_CXL_VENDOR_DEBUG_UUID \ - UUID_INIT(0xe1819d9, 0x11a9, 0x400c, 0x81, 0x1f, 0xd6, 0x07, 0x19, \ + UUID_INIT(0x5e1819d9, 0x11a9, 0x400c, 0x81, 0x1f, 0xd6, 0x07, 0x19, \ 0x40, 0x3d, 0x86) struct cxl_mbox_get_supported_logs { From bebfbbaffccfb69126c5a6a1d41cd868b6419370 Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Fri, 28 Jun 2024 19:48:07 +0200 Subject: [PATCH 08/15] cxl/acpi: Warn on mixed CXL VH and RCH/RCD Hierarchy Each Host Bridge instance has a corresponding CXL Host Bridge Structure (CHBS) ACPI table that identifies its capabilities. CHBS tables can be two types (CXL 3.1 Table 9-21): The PCIe Root Complex Register Block (RCRB) and CXL Host Bridge Component Registers (CHBCR). If a Host Bridge is attached to a device that is operating in Restricted CXL Device Mode (RCD), BIOS publishes an RCRB with the base address of registers that describe its capabilities (CXL 3.1 sec. 9.11). Instead, the new (CXL 2.0+) Component registers can only be accessed by means of a base address published with a CHBCR (CXL 3.1 sec. 9.12). If an eRCD (a device that forces the host-bridge into CXL 1.1 Restricted CXL Host mode) is attached to a CXL 2.0+ Host-Bridge, the current CXL specification does not define a mechanism for finding CXL-2.0-only root-port component registers like HDM decoders and Extended Security capability. An algorithm to locate a CHBCR associated with an RCRB, would be too invasive to land without some concrete motivation. Therefore, just print a message to inform of unsupported config. Count how many different CHBS "Version" types are detected by cxl_get_chbs_iter(). Then make cxl_get_chbs() print a warning if that sum is greater than 1. Tested-by: Alison Schofield Signed-off-by: Fabio M. De Francesco Reviewed-by: Davidlohr Bueso Reviewed-by: Alison Schofield Link: https://patch.msgid.link/20240628175535.272472-1-fabio.m.de.francesco@linux.intel.com Signed-off-by: Dave Jiang --- drivers/cxl/acpi.c | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index e51315ea4a6a..574918a9ae3a 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -482,6 +482,8 @@ struct cxl_chbs_context { unsigned long long uid; resource_size_t base; u32 cxl_version; + int nr_versions; + u32 saved_version; }; static int cxl_get_chbs_iter(union acpi_subtable_headers *header, void *arg, @@ -490,22 +492,31 @@ static int cxl_get_chbs_iter(union acpi_subtable_headers *header, void *arg, struct cxl_chbs_context *ctx = arg; struct acpi_cedt_chbs *chbs; - if (ctx->base != CXL_RESOURCE_NONE) - return 0; - chbs = (struct acpi_cedt_chbs *) header; - if (ctx->uid != chbs->uid) - return 0; - - ctx->cxl_version = chbs->cxl_version; - if (!chbs->base) - return 0; - if (chbs->cxl_version == ACPI_CEDT_CHBS_VERSION_CXL11 && chbs->length != CXL_RCRB_SIZE) return 0; + if (!chbs->base) + return 0; + + if (ctx->saved_version != chbs->cxl_version) { + /* + * cxl_version cannot be overwritten before the next two + * checks, then use saved_version + */ + ctx->saved_version = chbs->cxl_version; + ctx->nr_versions++; + } + + if (ctx->base != CXL_RESOURCE_NONE) + return 0; + + if (ctx->uid != chbs->uid) + return 0; + + ctx->cxl_version = chbs->cxl_version; ctx->base = chbs->base; return 0; @@ -529,10 +540,19 @@ static int cxl_get_chbs(struct device *dev, struct acpi_device *hb, .uid = uid, .base = CXL_RESOURCE_NONE, .cxl_version = UINT_MAX, + .saved_version = UINT_MAX, }; acpi_table_parse_cedt(ACPI_CEDT_TYPE_CHBS, cxl_get_chbs_iter, ctx); + if (ctx->nr_versions > 1) { + /* + * Disclaim eRCD support given some component register may + * only be found via CHBCR + */ + dev_info(dev, "Unsupported platform config, mixed Virtual Host and Restricted CXL Host hierarchy."); + } + return 0; } From 591209c79844c1ecbee79e6b5a019e5b61eab8d3 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Sat, 6 Jul 2024 18:53:43 -0700 Subject: [PATCH 09/15] cxl/memdev: Replace ENXIO with EBUSY for inject poison limit reached The CXL driver provides a debugfs interface offering users the ability to inject and clear poison to a memdev. Once a user has injected up to the devices limit further injection requests fail with ENXIO until a clear poison is issued. Users may not have device specs in hand or may want to intentionally hit the limit and then clear. Replace the usual ENXIO return status with EBUSY so users can recognize this failure. Signed-off-by: Alison Schofield Tested-by: Xingtao Yao Reviewed-by: Dan Williams Reviewed-by: Davidlohr Bueso Link: https://patch.msgid.link/825bd4c67fb55a4373c4182d999ad49d4e6b4fe7.1720316188.git.alison.schofield@intel.com Signed-off-by: Dave Jiang --- Documentation/ABI/testing/debugfs-cxl | 7 ++++--- drivers/cxl/cxlmem.h | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl index c61f9b813973..12488c14be64 100644 --- a/Documentation/ABI/testing/debugfs-cxl +++ b/Documentation/ABI/testing/debugfs-cxl @@ -14,9 +14,10 @@ Description: event to its internal Informational Event log, updates the Event Status register, and if configured, interrupts the host. It is not an error to inject poison into an address that - already has poison present and no error is returned. The - inject_poison attribute is only visible for devices supporting - the capability. + already has poison present and no error is returned. If the + device returns 'Inject Poison Limit Reached' an -EBUSY error + is returned to the user. The inject_poison attribute is only + visible for devices supporting the capability. What: /sys/kernel/debug/memX/clear_poison diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index f55141c6d64e..40a465c37e03 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -161,7 +161,7 @@ struct cxl_mbox_cmd { C(FWRESET, -ENXIO, "FW failed to activate, needs cold reset"), \ C(HANDLE, -ENXIO, "one or more Event Record Handles were invalid"), \ C(PADDR, -EFAULT, "physical address specified is invalid"), \ - C(POISONLMT, -ENXIO, "poison injection limit has been reached"), \ + C(POISONLMT, -EBUSY, "poison injection limit has been reached"), \ C(MEDIAFAILURE, -ENXIO, "permanent issue with the media"), \ C(ABORT, -ENXIO, "background cmd was aborted by device"), \ C(SECURITY, -ENXIO, "not valid in the current security state"), \ From 3a8617c7df6eb351227aad9b0df647f34a7ef423 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Sat, 6 Jul 2024 18:53:44 -0700 Subject: [PATCH 10/15] cxl/test: Replace ENXIO with EBUSY for inject poison limit reached The CXL driver was recently updated to return EBUSY rather than ENXIO when the device reports that an injection request exceeds the device's limit. That change to EBUSY allows debug users to differentiate between limit reached and inject failures for any other reason. Change cxl-test to also return EBUSY and tidy up the dev_dbg() messaging to emit the correct limit. Reminder: the cxl-test per device injection limit is a configurable attribute: /sys/bus/platform/drivers/cxl_mock_mem/poison_inject_max Signed-off-by: Alison Schofield Tested-by: Xingtao Yao Reviewed-by: Dan Williams Reviewed-by: Davidlohr Bueso Link: https://patch.msgid.link/ba1b80e1658b644d85d0d5e2287112d00a48b9cf.1720316188.git.alison.schofield@intel.com Signed-off-by: Dave Jiang --- tools/testing/cxl/test/mem.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 94de2cf60f4f..129f179b0ac5 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -1135,27 +1135,28 @@ static bool mock_poison_dev_max_injected(struct cxl_dev_state *cxlds) return (count >= poison_inject_dev_max); } -static bool mock_poison_add(struct cxl_dev_state *cxlds, u64 dpa) +static int mock_poison_add(struct cxl_dev_state *cxlds, u64 dpa) { + /* Return EBUSY to match the CXL driver handling */ if (mock_poison_dev_max_injected(cxlds)) { dev_dbg(cxlds->dev, "Device poison injection limit has been reached: %d\n", - MOCK_INJECT_DEV_MAX); - return false; + poison_inject_dev_max); + return -EBUSY; } for (int i = 0; i < MOCK_INJECT_TEST_MAX; i++) { if (!mock_poison_list[i].cxlds) { mock_poison_list[i].cxlds = cxlds; mock_poison_list[i].dpa = dpa; - return true; + return 0; } } dev_dbg(cxlds->dev, "Mock test poison injection limit has been reached: %d\n", MOCK_INJECT_TEST_MAX); - return false; + return -ENXIO; } static bool mock_poison_found(struct cxl_dev_state *cxlds, u64 dpa) @@ -1179,10 +1180,8 @@ static int mock_inject_poison(struct cxl_dev_state *cxlds, dev_dbg(cxlds->dev, "DPA: 0x%llx already poisoned\n", dpa); return 0; } - if (!mock_poison_add(cxlds, dpa)) - return -ENXIO; - return 0; + return mock_poison_add(cxlds, dpa); } static bool mock_poison_del(struct cxl_dev_state *cxlds, u64 dpa) From 9aa5f6235e16ac6fceed789a30e72addf1abd7d8 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Tue, 2 Jul 2024 22:29:49 -0700 Subject: [PATCH 11/15] cxl/core: Fold cxl_trace_hpa() into cxl_dpa_to_hpa() Although cxl_trace_hpa() is used to populate TRACE EVENTs with HPA addresses the work it performs is a DPA to HPA translation not a trace. Tidy up this naming by moving the minimal work done in cxl_trace_hpa() into cxl_dpa_to_hpa() and use cxl_dpa_to_hpa() for trace event callbacks. Suggested-by: Dan Williams Signed-off-by: Alison Schofield Reviewed-by: Dan Williams Reviewed-by: Robert Richter Link: https://patch.msgid.link/452a9b0c525b774c72d9d5851515ffa928750132.1719980933.git.alison.schofield@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/core.h | 8 ++++---- drivers/cxl/core/mbox.c | 2 +- drivers/cxl/core/region.c | 33 +++++++++++++-------------------- drivers/cxl/core/trace.h | 4 ++-- 4 files changed, 20 insertions(+), 27 deletions(-) diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 625394486459..72a506c9dbd0 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -28,12 +28,12 @@ int cxl_region_init(void); void cxl_region_exit(void); int cxl_get_poison_by_endpoint(struct cxl_port *port); struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa); -u64 cxl_trace_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, - u64 dpa); +u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, + u64 dpa); #else -static inline u64 -cxl_trace_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, u64 dpa) +static inline u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, + const struct cxl_memdev *cxlmd, u64 dpa) { return ULLONG_MAX; } diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 2626f3fff201..eb0b08e5136f 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -878,7 +878,7 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd, dpa = le64_to_cpu(evt->common.phys_addr) & CXL_DPA_MASK; cxlr = cxl_dpa_to_region(cxlmd, dpa); if (cxlr) - hpa = cxl_trace_hpa(cxlr, cxlmd, dpa); + hpa = cxl_dpa_to_hpa(cxlr, cxlmd, dpa); if (event_type == CXL_CPER_EVENT_GEN_MEDIA) trace_cxl_general_media(cxlmd, type, cxlr, hpa, diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 3c2b6144be23..237c28d5f2cc 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2749,15 +2749,25 @@ static bool cxl_is_hpa_in_range(u64 hpa, struct cxl_region *cxlr, int pos) return false; } -static u64 cxl_dpa_to_hpa(u64 dpa, struct cxl_region *cxlr, - struct cxl_endpoint_decoder *cxled) +u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, + u64 dpa) { u64 dpa_offset, hpa_offset, bits_upper, mask_upper, hpa; struct cxl_region_params *p = &cxlr->params; - int pos = cxled->pos; + struct cxl_endpoint_decoder *cxled = NULL; u16 eig = 0; u8 eiw = 0; + int pos; + for (int i = 0; i < p->nr_targets; i++) { + cxled = p->targets[i]; + if (cxlmd == cxled_to_memdev(cxled)) + break; + } + if (!cxled || cxlmd != cxled_to_memdev(cxled)) + return ULLONG_MAX; + + pos = cxled->pos; ways_to_eiw(p->interleave_ways, &eiw); granularity_to_eig(p->interleave_granularity, &eig); @@ -2797,23 +2807,6 @@ static u64 cxl_dpa_to_hpa(u64 dpa, struct cxl_region *cxlr, return hpa; } -u64 cxl_trace_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, - u64 dpa) -{ - struct cxl_region_params *p = &cxlr->params; - struct cxl_endpoint_decoder *cxled = NULL; - - for (int i = 0; i < p->nr_targets; i++) { - cxled = p->targets[i]; - if (cxlmd == cxled_to_memdev(cxled)) - break; - } - if (!cxled || cxlmd != cxled_to_memdev(cxled)) - return ULLONG_MAX; - - return cxl_dpa_to_hpa(dpa, cxlr, cxled); -} - static struct lock_class_key cxl_pmem_region_key; static int cxl_pmem_region_alloc(struct cxl_region *cxlr) diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index ee5cd4eb2f16..21b76e9c5c60 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -704,8 +704,8 @@ TRACE_EVENT(cxl_poison, if (cxlr) { __assign_str(region); memcpy(__entry->uuid, &cxlr->params.uuid, 16); - __entry->hpa = cxl_trace_hpa(cxlr, cxlmd, - __entry->dpa); + __entry->hpa = cxl_dpa_to_hpa(cxlr, cxlmd, + __entry->dpa); } else { __assign_str(region); memset(__entry->uuid, 0, 16); From 3b2fedcd75e3991e77c2a8c3ebcab0ea68b2d69d Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Tue, 2 Jul 2024 22:29:50 -0700 Subject: [PATCH 12/15] cxl: Restore XOR'd position bits during address translation When a device reports a DPA in events like poison, general_media, and dram, the driver translates that DPA back to an HPA. Presently, the CXL driver translation only considers the Modulo position and will report the wrong HPA for XOR configured root decoders. Add a helper function that restores the XOR'd bits during DPA->HPA address translation. Plumb a root decoder callback to the new helper when XOR interleave arithmetic is in use. For Modulo arithmetic, just let the callback be NULL - as in no extra work required. Upon completion of a DPA->HPA translation a couple of checks are performed on the result. One simply confirms that the calculated HPA is within the address range of the region. That test is useful for both Modulo and XOR interleave arithmetic decodes. A second check confirms that the HPA is within an expected chunk based on the endpoints position in the region and the region granularity. An XOR decode disrupts the Modulo pattern making the chunk check useless. To align the checks with the proper decode, pull the region range check inline and use the helper to do the chunk check for Modulo decodes only. A cxl-test unit test is posted for upstream review here: https://lore.kernel.org/20240624210644.495563-1-alison.schofield@intel.com/ Fixes: 28a3ae4ff66c ("cxl/trace: Add an HPA to cxl_poison trace events") Signed-off-by: Alison Schofield Tested-by: Diego Garcia Rodriguez Reviewed-by: Dan Williams Link: https://patch.msgid.link/1a1ac880d9f889bd6384e657e810431b9a0a72e5.1719980933.git.alison.schofield@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/acpi.c | 40 +++++++++++++++++++++++++++++++++++++++ drivers/cxl/core/region.c | 23 +++++++++++++--------- drivers/cxl/cxl.h | 3 +++ 3 files changed, 57 insertions(+), 9 deletions(-) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 571069863c62..6b6ae9c81368 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -74,6 +74,43 @@ static struct cxl_dport *cxl_hb_xor(struct cxl_root_decoder *cxlrd, int pos) return cxlrd->cxlsd.target[n]; } +static u64 cxl_xor_hpa_to_spa(struct cxl_root_decoder *cxlrd, u64 hpa) +{ + struct cxl_cxims_data *cximsd = cxlrd->platform_data; + int hbiw = cxlrd->cxlsd.nr_targets; + u64 val; + int pos; + + /* No xormaps for host bridge interleave ways of 1 or 3 */ + if (hbiw == 1 || hbiw == 3) + return hpa; + + /* + * For root decoders using xormaps (hbiw: 2,4,6,8,12,16) restore + * the position bit to its value before the xormap was applied at + * HPA->DPA translation. + * + * pos is the lowest set bit in an XORMAP + * val is the XORALLBITS(HPA & XORMAP) + * + * XORALLBITS: The CXL spec (3.1 Table 9-22) defines XORALLBITS + * as an operation that outputs a single bit by XORing all the + * bits in the input (hpa & xormap). Implement XORALLBITS using + * hweight64(). If the hamming weight is even the XOR of those + * bits results in val==0, if odd the XOR result is val==1. + */ + + for (int i = 0; i < cximsd->nr_maps; i++) { + if (!cximsd->xormaps[i]) + continue; + pos = __ffs(cximsd->xormaps[i]); + val = (hweight64(hpa & cximsd->xormaps[i]) & 1); + hpa = (hpa & ~(1ULL << pos)) | (val << pos); + } + + return hpa; +} + struct cxl_cxims_context { struct device *dev; struct cxl_root_decoder *cxlrd; @@ -434,6 +471,9 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, cxlrd->qos_class = cfmws->qtg_id; + if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) + cxlrd->hpa_to_spa = cxl_xor_hpa_to_spa; + rc = cxl_decoder_add(cxld, target_map); if (rc) return rc; diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 237c28d5f2cc..23abd0f7b856 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2723,20 +2723,13 @@ struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa) return ctx.cxlr; } -static bool cxl_is_hpa_in_range(u64 hpa, struct cxl_region *cxlr, int pos) +static bool cxl_is_hpa_in_chunk(u64 hpa, struct cxl_region *cxlr, int pos) { struct cxl_region_params *p = &cxlr->params; int gran = p->interleave_granularity; int ways = p->interleave_ways; u64 offset; - /* Is the hpa within this region at all */ - if (hpa < p->res->start || hpa > p->res->end) { - dev_dbg(&cxlr->dev, - "Addr trans fail: hpa 0x%llx not in region\n", hpa); - return false; - } - /* Is the hpa in an expected chunk for its pos(-ition) */ offset = hpa - p->res->start; offset = do_div(offset, gran * ways); @@ -2752,6 +2745,7 @@ static bool cxl_is_hpa_in_range(u64 hpa, struct cxl_region *cxlr, int pos) u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, u64 dpa) { + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); u64 dpa_offset, hpa_offset, bits_upper, mask_upper, hpa; struct cxl_region_params *p = &cxlr->params; struct cxl_endpoint_decoder *cxled = NULL; @@ -2801,7 +2795,18 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, /* Apply the hpa_offset to the region base address */ hpa = hpa_offset + p->res->start; - if (!cxl_is_hpa_in_range(hpa, cxlr, cxled->pos)) + /* Root decoder translation overrides typical modulo decode */ + if (cxlrd->hpa_to_spa) + hpa = cxlrd->hpa_to_spa(cxlrd, hpa); + + if (hpa < p->res->start || hpa > p->res->end) { + dev_dbg(&cxlr->dev, + "Addr trans fail: hpa 0x%llx not in region\n", hpa); + return ULLONG_MAX; + } + + /* Simple chunk check, by pos & gran, only applies to modulo decodes */ + if (!cxlrd->hpa_to_spa && (!cxl_is_hpa_in_chunk(hpa, cxlr, pos))) return ULLONG_MAX; return hpa; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 603c0120cff8..dfc4f27d195c 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -434,12 +434,14 @@ struct cxl_switch_decoder { struct cxl_root_decoder; typedef struct cxl_dport *(*cxl_calc_hb_fn)(struct cxl_root_decoder *cxlrd, int pos); +typedef u64 (*cxl_hpa_to_spa_fn)(struct cxl_root_decoder *cxlrd, u64 hpa); /** * struct cxl_root_decoder - Static platform CXL address decoder * @res: host / parent resource for region allocations * @region_id: region id for next region provisioning event * @calc_hb: which host bridge covers the n'th position by granularity + * @hpa_to_spa: translate CXL host-physical-address to Platform system-physical-address * @platform_data: platform specific configuration data * @range_lock: sync region autodiscovery by address range * @qos_class: QoS performance class cookie @@ -449,6 +451,7 @@ struct cxl_root_decoder { struct resource *res; atomic_t region_id; cxl_calc_hb_fn calc_hb; + cxl_hpa_to_spa_fn hpa_to_spa; void *platform_data; struct mutex range_lock; int qos_class; From 82a3e3a235633aa0575fac9507d648dd80f3437f Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Tue, 2 Jul 2024 22:29:51 -0700 Subject: [PATCH 13/15] cxl/region: Verify target positions using the ordered target list When a root decoder is configured the interleave target list is read from the BIOS populated CFMWS structure. Per the CXL spec 3.1 Table 9-22 the target list is in interleave order. The CXL driver populates its decoder target list in the same order and stores it in 'struct cxl_switch_decoder' field "@target: active ordered target list in current decoder configuration" Given the promise of an ordered list, the driver can stop duplicating the work of BIOS and simply check target positions against the ordered list during region configuration. The simplified check against the ordered list is presented here. A follow-on patch will remove the unused code. For Modulo arithmetic this is not a fix, only a simplification. For XOR arithmetic this is a fix for HB IW of 3,6,12. Fixes: f9db85bfec0d ("cxl/acpi: Support CXL XOR Interleave Math (CXIMS)") Signed-off-by: Alison Schofield Reviewed-by: Dan Williams Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/35d08d3aba08fee0f9b86ab1cef0c25116ca8a55.1719980933.git.alison.schofield@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 23abd0f7b856..2772828ca6ca 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1559,10 +1559,13 @@ static int cxl_region_attach_position(struct cxl_region *cxlr, const struct cxl_dport *dport, int pos) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd; + struct cxl_decoder *cxld = &cxlsd->cxld; + int iw = cxld->interleave_ways; struct cxl_port *iter; int rc; - if (cxlrd->calc_hb(cxlrd, pos) != dport) { + if (dport != cxlrd->cxlsd.target[pos % iw]) { dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n", dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), dev_name(&cxlrd->cxlsd.cxld.dev)); From 8f55ada796565ce801418bf579f31a6a522d0337 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Tue, 2 Jul 2024 22:29:52 -0700 Subject: [PATCH 14/15] cxl: Remove defunct code calculating host bridge target positions The CXL Spec 3.1 Table 9-22 requires that the BIOS populate the CFMWS target list in interleave target order. This means the calculations the CXL driver added to determine positions when XOR math is in use, along with the entire XOR vs Modulo call back setup is not needed. A prior patch added a common method to verify positions. Remove the now unused code related to the cxl_calc_hb_fn. Signed-off-by: Alison Schofield Reviewed-by: Dan Williams Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/2e2c32a2d0f1007e920b58712d15edad2e48d857.1719980933.git.alison.schofield@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/acpi.c | 60 ++--------------------------------------- drivers/cxl/core/port.c | 20 +------------- drivers/cxl/cxl.h | 8 +----- 3 files changed, 4 insertions(+), 84 deletions(-) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 6b6ae9c81368..921cee3bb980 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -22,57 +22,6 @@ static const guid_t acpi_cxl_qtg_id_guid = GUID_INIT(0xF365F9A6, 0xA7DE, 0x4071, 0xA6, 0x6A, 0xB4, 0x0C, 0x0B, 0x4F, 0x8E, 0x52); -/* - * Find a targets entry (n) in the host bridge interleave list. - * CXL Specification 3.0 Table 9-22 - */ -static int cxl_xor_calc_n(u64 hpa, struct cxl_cxims_data *cximsd, int iw, - int ig) -{ - int i = 0, n = 0; - u8 eiw; - - /* IW: 2,4,6,8,12,16 begin building 'n' using xormaps */ - if (iw != 3) { - for (i = 0; i < cximsd->nr_maps; i++) - n |= (hweight64(hpa & cximsd->xormaps[i]) & 1) << i; - } - /* IW: 3,6,12 add a modulo calculation to 'n' */ - if (!is_power_of_2(iw)) { - if (ways_to_eiw(iw, &eiw)) - return -1; - hpa &= GENMASK_ULL(51, eiw + ig); - n |= do_div(hpa, 3) << i; - } - return n; -} - -static struct cxl_dport *cxl_hb_xor(struct cxl_root_decoder *cxlrd, int pos) -{ - struct cxl_cxims_data *cximsd = cxlrd->platform_data; - struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd; - struct cxl_decoder *cxld = &cxlsd->cxld; - int ig = cxld->interleave_granularity; - int iw = cxld->interleave_ways; - int n = 0; - u64 hpa; - - if (dev_WARN_ONCE(&cxld->dev, - cxld->interleave_ways != cxlsd->nr_targets, - "misconfigured root decoder\n")) - return NULL; - - hpa = cxlrd->res->start + pos * ig; - - /* Entry (n) is 0 for no interleave (iw == 1) */ - if (iw != 1) - n = cxl_xor_calc_n(hpa, cximsd, iw, ig); - - if (n < 0) - return NULL; - - return cxlrd->cxlsd.target[n]; -} static u64 cxl_xor_hpa_to_spa(struct cxl_root_decoder *cxlrd, u64 hpa) { @@ -398,7 +347,6 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, struct cxl_port *root_port = ctx->root_port; struct cxl_cxims_context cxims_ctx; struct device *dev = ctx->dev; - cxl_calc_hb_fn cxl_calc_hb; struct cxl_decoder *cxld; unsigned int ways, i, ig; int rc; @@ -426,13 +374,9 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, if (rc) return rc; - if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_MODULO) - cxl_calc_hb = cxl_hb_modulo; - else - cxl_calc_hb = cxl_hb_xor; - struct cxl_root_decoder *cxlrd __free(put_cxlrd) = - cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb); + cxl_root_decoder_alloc(root_port, ways); + if (IS_ERR(cxlrd)) return PTR_ERR(cxlrd); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 887ed6e358fb..bc9e18d02598 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1733,21 +1733,6 @@ static int decoder_populate_targets(struct cxl_switch_decoder *cxlsd, return 0; } -struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos) -{ - struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd; - struct cxl_decoder *cxld = &cxlsd->cxld; - int iw; - - iw = cxld->interleave_ways; - if (dev_WARN_ONCE(&cxld->dev, iw != cxlsd->nr_targets, - "misconfigured root decoder\n")) - return NULL; - - return cxlrd->cxlsd.target[pos % iw]; -} -EXPORT_SYMBOL_NS_GPL(cxl_hb_modulo, CXL); - static struct lock_class_key cxl_decoder_key; /** @@ -1807,7 +1792,6 @@ static int cxl_switch_decoder_init(struct cxl_port *port, * cxl_root_decoder_alloc - Allocate a root level decoder * @port: owning CXL root of this decoder * @nr_targets: static number of downstream targets - * @calc_hb: which host bridge covers the n'th position by granularity * * Return: A new cxl decoder to be registered by cxl_decoder_add(). A * 'CXL root' decoder is one that decodes from a top-level / static platform @@ -1815,8 +1799,7 @@ static int cxl_switch_decoder_init(struct cxl_port *port, * topology. */ struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, - unsigned int nr_targets, - cxl_calc_hb_fn calc_hb) + unsigned int nr_targets) { struct cxl_root_decoder *cxlrd; struct cxl_switch_decoder *cxlsd; @@ -1838,7 +1821,6 @@ struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, return ERR_PTR(rc); } - cxlrd->calc_hb = calc_hb; mutex_init(&cxlrd->range_lock); cxld = &cxlsd->cxld; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index dfc4f27d195c..e27a0cd4f107 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -432,15 +432,12 @@ struct cxl_switch_decoder { }; struct cxl_root_decoder; -typedef struct cxl_dport *(*cxl_calc_hb_fn)(struct cxl_root_decoder *cxlrd, - int pos); typedef u64 (*cxl_hpa_to_spa_fn)(struct cxl_root_decoder *cxlrd, u64 hpa); /** * struct cxl_root_decoder - Static platform CXL address decoder * @res: host / parent resource for region allocations * @region_id: region id for next region provisioning event - * @calc_hb: which host bridge covers the n'th position by granularity * @hpa_to_spa: translate CXL host-physical-address to Platform system-physical-address * @platform_data: platform specific configuration data * @range_lock: sync region autodiscovery by address range @@ -450,7 +447,6 @@ typedef u64 (*cxl_hpa_to_spa_fn)(struct cxl_root_decoder *cxlrd, u64 hpa); struct cxl_root_decoder { struct resource *res; atomic_t region_id; - cxl_calc_hb_fn calc_hb; cxl_hpa_to_spa_fn hpa_to_spa; void *platform_data; struct mutex range_lock; @@ -775,9 +771,7 @@ bool is_root_decoder(struct device *dev); bool is_switch_decoder(struct device *dev); bool is_endpoint_decoder(struct device *dev); struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, - unsigned int nr_targets, - cxl_calc_hb_fn calc_hb); -struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos); + unsigned int nr_targets); struct cxl_switch_decoder *cxl_switch_decoder_alloc(struct cxl_port *port, unsigned int nr_targets); int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map); From a0328b397f3339d8d17a6ec356e94b3c110b010c Mon Sep 17 00:00:00 2001 From: Foryun Ma Date: Tue, 4 Jun 2024 11:21:51 +0800 Subject: [PATCH 15/15] cxl/core/pci: Move reading of control register to immediately before usage Relocate the reading of the DVSEC control register to immediately before usage and avoid unnecessary PCI config access from the read if DVSEC capability check, hdm_count check, or device validity check results in failure. Signed-off-by: Foryun Ma Reviewed-by: Jonathan Cameron Reviewed-by: Alison Schofield Link: https://patch.msgid.link/20240604032151.655-1-foryun.ma@jaguarmicro.com Signed-off-by: Dave Jiang --- drivers/cxl/core/pci.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 8567dd11eaac..a663e7566c48 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -338,10 +338,6 @@ int cxl_dvsec_rr_decode(struct device *dev, int d, if (rc) return rc; - rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); - if (rc) - return rc; - if (!(cap & CXL_DVSEC_MEM_CAPABLE)) { dev_dbg(dev, "Not MEM Capable\n"); return -ENXIO; @@ -368,6 +364,10 @@ int cxl_dvsec_rr_decode(struct device *dev, int d, * disabled, and they will remain moot after the HDM Decoder * capability is enabled. */ + rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); + if (rc) + return rc; + info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); if (!info->mem_enabled) return 0;