diff options
| -rw-r--r-- | Documentation/ABI/testing/sysfs-bus-cxl | 11 | ||||
| -rw-r--r-- | Documentation/driver-api/cxl/allocation/page-allocator.rst | 31 | ||||
| -rw-r--r-- | drivers/acpi/numa/hmat.c | 11 | ||||
| -rw-r--r-- | drivers/cxl/acpi.c | 73 | ||||
| -rw-r--r-- | drivers/cxl/core/cdat.c | 4 | ||||
| -rw-r--r-- | drivers/cxl/core/hdm.c | 3 | ||||
| -rw-r--r-- | drivers/cxl/core/pci.c | 87 | ||||
| -rw-r--r-- | drivers/cxl/core/port.c | 1 | ||||
| -rw-r--r-- | drivers/cxl/core/region.c | 311 | ||||
| -rw-r--r-- | drivers/cxl/cxl.h | 29 | ||||
| -rw-r--r-- | drivers/cxl/cxlpci.h | 1 | ||||
| -rw-r--r-- | drivers/cxl/pci.c | 2 | ||||
| -rw-r--r-- | tools/testing/cxl/Kbuild | 3 | ||||
| -rw-r--r-- | tools/testing/cxl/test/Kbuild | 1 | ||||
| -rw-r--r-- | tools/testing/cxl/test/cxl.c | 86 | ||||
| -rw-r--r-- | tools/testing/cxl/test/cxl_translate.c | 445 | ||||
| -rw-r--r-- | tools/testing/cxl/test/mem.c | 11 | ||||
| -rw-r--r-- | tools/testing/cxl/test/mock.c | 52 | ||||
| -rw-r--r-- | tools/testing/cxl/test/mock.h | 4 |
19 files changed, 839 insertions, 327 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl index 6b4e8c7a963d..c80a1b5a03db 100644 --- a/Documentation/ABI/testing/sysfs-bus-cxl +++ b/Documentation/ABI/testing/sysfs-bus-cxl @@ -496,8 +496,17 @@ Description: changed, only freed by writing 0. The kernel makes no guarantees that data is maintained over an address space freeing event, and there is no guarantee that a free followed by an allocate - results in the same address being allocated. + results in the same address being allocated. If extended linear + cache is present, the size indicates extended linear cache size + plus the CXL region size. +What: /sys/bus/cxl/devices/regionZ/extended_linear_cache_size +Date: October, 2025 +KernelVersion: v6.19 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) The size of extended linear cache, if there is an extended + linear cache. Otherwise the attribute will not be visible. What: /sys/bus/cxl/devices/regionZ/mode Date: January, 2023 diff --git a/Documentation/driver-api/cxl/allocation/page-allocator.rst b/Documentation/driver-api/cxl/allocation/page-allocator.rst index 7b8fe1b8d5bb..3fa584a248bd 100644 --- a/Documentation/driver-api/cxl/allocation/page-allocator.rst +++ b/Documentation/driver-api/cxl/allocation/page-allocator.rst @@ -41,37 +41,6 @@ To simplify this, the page allocator will prefer :code:`ZONE_MOVABLE` over will fallback to allocate from :code:`ZONE_NORMAL`. -Zone and Node Quirks -==================== -Let's consider a configuration where the local DRAM capacity is largely onlined -into :code:`ZONE_NORMAL`, with no :code:`ZONE_MOVABLE` capacity present. The -CXL capacity has the opposite configuration - all onlined in -:code:`ZONE_MOVABLE`. - -Under the default allocation policy, the page allocator will completely skip -:code:`ZONE_MOVABLE` as a valid allocation target. This is because, as of -Linux v6.15, the page allocator does (approximately) the following: :: - - for (each zone in local_node): - - for (each node in fallback_order): - - attempt_allocation(gfp_flags); - -Because the local node does not have :code:`ZONE_MOVABLE`, the CXL node is -functionally unreachable for direct allocation. As a result, the only way -for CXL capacity to be used is via `demotion` in the reclaim path. - -This configuration also means that if the DRAM ndoe has :code:`ZONE_MOVABLE` -capacity - when that capacity is depleted, the page allocator will actually -prefer CXL :code:`ZONE_MOVABLE` pages over DRAM :code:`ZONE_NORMAL` pages. - -We may wish to invert this priority in future Linux versions. - -If `demotion` and `swap` are disabled, Linux will begin to cause OOM crashes -when the DRAM nodes are depleted. See the reclaim section for more details. - - CGroups and CPUSets =================== Finally, assuming CXL memory is reachable via the page allocation (i.e. onlined diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 11e4483685c9..77a81627aaef 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -910,12 +910,13 @@ static void hmat_register_target(struct memory_target *target) * Register generic port perf numbers. The nid may not be * initialized and is still NUMA_NO_NODE. */ - mutex_lock(&target_lock); - if (*(u16 *)target->gen_port_device_handle) { - hmat_update_generic_target(target); - target->registered = true; + scoped_guard(mutex, &target_lock) { + if (*(u16 *)target->gen_port_device_handle) { + hmat_update_generic_target(target); + target->registered = true; + return; + } } - mutex_unlock(&target_lock); hmat_hotplug_target(target); } diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index bd2e282ca93a..77ac940e3013 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -11,25 +11,36 @@ #include "cxlpci.h" #include "cxl.h" -struct cxl_cxims_data { - int nr_maps; - u64 xormaps[] __counted_by(nr_maps); -}; - static const guid_t acpi_cxl_qtg_id_guid = GUID_INIT(0xF365F9A6, 0xA7DE, 0x4071, 0xA6, 0x6A, 0xB4, 0x0C, 0x0B, 0x4F, 0x8E, 0x52); -static u64 cxl_apply_xor_maps(struct cxl_root_decoder *cxlrd, u64 addr) +#define HBIW_TO_NR_MAPS_SIZE (CXL_DECODER_MAX_INTERLEAVE + 1) +static const int hbiw_to_nr_maps[HBIW_TO_NR_MAPS_SIZE] = { + [1] = 0, [2] = 1, [3] = 0, [4] = 2, [6] = 1, [8] = 3, [12] = 2, [16] = 4 +}; + +static const int valid_hbiw[] = { 1, 2, 3, 4, 6, 8, 12, 16 }; + +u64 cxl_do_xormap_calc(struct cxl_cxims_data *cximsd, u64 addr, int hbiw) { - struct cxl_cxims_data *cximsd = cxlrd->platform_data; - int hbiw = cxlrd->cxlsd.nr_targets; + int nr_maps_to_apply = -1; u64 val; int pos; - /* No xormaps for host bridge interleave ways of 1 or 3 */ - if (hbiw == 1 || hbiw == 3) - return addr; + /* + * Strictly validate hbiw since this function is used for testing and + * that nullifies any expectation of trusted parameters from the CXL + * Region Driver. + */ + for (int i = 0; i < ARRAY_SIZE(valid_hbiw); i++) { + if (valid_hbiw[i] == hbiw) { + nr_maps_to_apply = hbiw_to_nr_maps[hbiw]; + break; + } + } + if (nr_maps_to_apply == -1 || nr_maps_to_apply > cximsd->nr_maps) + return ULLONG_MAX; /* * In regions using XOR interleave arithmetic the CXL HPA may not @@ -60,6 +71,14 @@ static u64 cxl_apply_xor_maps(struct cxl_root_decoder *cxlrd, u64 addr) return addr; } +EXPORT_SYMBOL_FOR_MODULES(cxl_do_xormap_calc, "cxl_translate"); + +static u64 cxl_apply_xor_maps(struct cxl_root_decoder *cxlrd, u64 addr) +{ + struct cxl_cxims_data *cximsd = cxlrd->platform_data; + + return cxl_do_xormap_calc(cximsd, addr, cxlrd->cxlsd.nr_targets); +} struct cxl_cxims_context { struct device *dev; @@ -353,7 +372,7 @@ static int cxl_acpi_set_cache_size(struct cxl_root_decoder *cxlrd) rc = hmat_get_extended_linear_cache_size(&res, nid, &cache_size); if (rc) - return rc; + return 0; /* * The cache range is expected to be within the CFMWS. @@ -378,21 +397,18 @@ static void cxl_setup_extended_linear_cache(struct cxl_root_decoder *cxlrd) int rc; rc = cxl_acpi_set_cache_size(cxlrd); - if (!rc) - return; - - if (rc != -EOPNOTSUPP) { + if (rc) { /* - * Failing to support extended linear cache region resize does not + * Failing to retrieve extended linear cache region resize does not * prevent the region from functioning. Only causes cxl list showing * incorrect region size. */ dev_warn(cxlrd->cxlsd.cxld.dev.parent, - "Extended linear cache calculation failed rc:%d\n", rc); - } + "Extended linear cache retrieval failed rc:%d\n", rc); - /* Ignoring return code */ - cxlrd->cache_size = 0; + /* Ignoring return code */ + cxlrd->cache_size = 0; + } } DEFINE_FREE(put_cxlrd, struct cxl_root_decoder *, @@ -453,8 +469,6 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, ig = CXL_DECODER_MIN_GRANULARITY; cxld->interleave_granularity = ig; - cxl_setup_extended_linear_cache(cxlrd); - if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) { if (ways != 1 && ways != 3) { cxims_ctx = (struct cxl_cxims_context) { @@ -470,18 +484,13 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, return -EINVAL; } } + cxlrd->ops.hpa_to_spa = cxl_apply_xor_maps; + cxlrd->ops.spa_to_hpa = cxl_apply_xor_maps; } - cxlrd->qos_class = cfmws->qtg_id; - - if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) { - cxlrd->ops = kzalloc(sizeof(*cxlrd->ops), GFP_KERNEL); - if (!cxlrd->ops) - return -ENOMEM; + cxl_setup_extended_linear_cache(cxlrd); - cxlrd->ops->hpa_to_spa = cxl_apply_xor_maps; - cxlrd->ops->spa_to_hpa = cxl_apply_xor_maps; - } + cxlrd->qos_class = cfmws->qtg_id; rc = cxl_decoder_add(cxld); if (rc) diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index c4bd6e8a0cf0..7120b5f2e31f 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -826,7 +826,7 @@ static struct xarray *cxl_switch_gather_bandwidth(struct cxl_region *cxlr, cxl_coordinates_combine(coords, coords, ctx->coord); /* - * Take the min of the calculated bandwdith and the upstream + * Take the min of the calculated bandwidth and the upstream * switch SSLBIS bandwidth if there's a parent switch */ if (!is_root) @@ -949,7 +949,7 @@ static struct xarray *cxl_hb_gather_bandwidth(struct xarray *xa) /** * cxl_region_update_bandwidth - Update the bandwidth access coordinates of a region * @cxlr: The region being operated on - * @input_xa: xarray holds cxl_perf_ctx wht calculated bandwidth per ACPI0017 instance + * @input_xa: xarray holds cxl_perf_ctx with calculated bandwidth per ACPI0017 instance */ static void cxl_region_update_bandwidth(struct cxl_region *cxlr, struct xarray *input_xa) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index d3a094ca01ad..1c5d2022c87a 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -905,6 +905,9 @@ static void cxl_decoder_reset(struct cxl_decoder *cxld) if ((cxld->flags & CXL_DECODER_F_ENABLE) == 0) return; + if (test_bit(CXL_DECODER_F_LOCK, &cxld->flags)) + return; + if (port->commit_end == id) cxl_port_commit_reap(cxld); else diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 18825e1505d6..5b023a0178a4 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -71,85 +71,6 @@ struct cxl_dport *__devm_cxl_add_dport_by_dev(struct cxl_port *port, } EXPORT_SYMBOL_NS_GPL(__devm_cxl_add_dport_by_dev, "CXL"); -struct cxl_walk_context { - struct pci_bus *bus; - struct cxl_port *port; - int type; - int error; - int count; -}; - -static int match_add_dports(struct pci_dev *pdev, void *data) -{ - struct cxl_walk_context *ctx = data; - struct cxl_port *port = ctx->port; - int type = pci_pcie_type(pdev); - struct cxl_register_map map; - struct cxl_dport *dport; - u32 lnkcap, port_num; - int rc; - - if (pdev->bus != ctx->bus) - return 0; - if (!pci_is_pcie(pdev)) - return 0; - if (type != ctx->type) - return 0; - if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP, - &lnkcap)) - return 0; - - rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map); - if (rc) - dev_dbg(&port->dev, "failed to find component registers\n"); - - port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap); - dport = devm_cxl_add_dport(port, &pdev->dev, port_num, map.resource); - if (IS_ERR(dport)) { - ctx->error = PTR_ERR(dport); - return PTR_ERR(dport); - } - ctx->count++; - - return 0; -} - -/** - * devm_cxl_port_enumerate_dports - enumerate downstream ports of the upstream port - * @port: cxl_port whose ->uport_dev is the upstream of dports to be enumerated - * - * Returns a positive number of dports enumerated or a negative error - * code. - */ -int devm_cxl_port_enumerate_dports(struct cxl_port *port) -{ - struct pci_bus *bus = cxl_port_to_pci_bus(port); - struct cxl_walk_context ctx; - int type; - - if (!bus) - return -ENXIO; - - if (pci_is_root_bus(bus)) - type = PCI_EXP_TYPE_ROOT_PORT; - else - type = PCI_EXP_TYPE_DOWNSTREAM; - - ctx = (struct cxl_walk_context) { - .port = port, - .bus = bus, - .type = type, - }; - pci_walk_bus(bus, match_add_dports, &ctx); - - if (ctx.count == 0) - return -ENODEV; - if (ctx.error) - return ctx.error; - return ctx.count; -} -EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, "CXL"); - static int cxl_dvsec_mem_range_valid(struct cxl_dev_state *cxlds, int id) { struct pci_dev *pdev = to_pci_dev(cxlds->dev); @@ -1217,6 +1138,14 @@ int cxl_gpf_port_setup(struct cxl_dport *dport) return 0; } +struct cxl_walk_context { + struct pci_bus *bus; + struct cxl_port *port; + int type; + int error; + int count; +}; + static int count_dports(struct pci_dev *pdev, void *data) { struct cxl_walk_context *ctx = data; diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 8128fd2b5b31..fef3aa0c6680 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -459,7 +459,6 @@ static void cxl_root_decoder_release(struct device *dev) if (atomic_read(&cxlrd->region_id) >= 0) memregion_free(atomic_read(&cxlrd->region_id)); __cxl_decoder_release(&cxlrd->cxlsd.cxld); - kfree(cxlrd->ops); kfree(cxlrd); } diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 41b64d871c5a..82d229c8f9bf 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -245,6 +245,9 @@ static void cxl_region_decode_reset(struct cxl_region *cxlr, int count) struct cxl_region_params *p = &cxlr->params; int i; + if (test_bit(CXL_REGION_F_LOCK, &cxlr->flags)) + return; + /* * Before region teardown attempt to flush, evict any data cached for * this region, or scream loudly about missing arch / platform support @@ -419,6 +422,9 @@ static ssize_t commit_store(struct device *dev, struct device_attribute *attr, return len; } + if (test_bit(CXL_REGION_F_LOCK, &cxlr->flags)) + return -EPERM; + rc = queue_reset(cxlr); if (rc) return rc; @@ -461,21 +467,6 @@ static ssize_t commit_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RW(commit); -static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a, - int n) -{ - struct device *dev = kobj_to_dev(kobj); - struct cxl_region *cxlr = to_cxl_region(dev); - - /* - * Support tooling that expects to find a 'uuid' attribute for all - * regions regardless of mode. - */ - if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_PARTMODE_PMEM) - return 0444; - return a->mode; -} - static ssize_t interleave_ways_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -754,6 +745,21 @@ static ssize_t size_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RW(size); +static ssize_t extended_linear_cache_size_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + struct cxl_region_params *p = &cxlr->params; + ssize_t rc; + + ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) + return rc; + return sysfs_emit(buf, "%#llx\n", p->cache_size); +} +static DEVICE_ATTR_RO(extended_linear_cache_size); + static struct attribute *cxl_region_attrs[] = { &dev_attr_uuid.attr, &dev_attr_commit.attr, @@ -762,9 +768,34 @@ static struct attribute *cxl_region_attrs[] = { &dev_attr_resource.attr, &dev_attr_size.attr, &dev_attr_mode.attr, + &dev_attr_extended_linear_cache_size.attr, NULL, }; +static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a, + int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct cxl_region *cxlr = to_cxl_region(dev); + + /* + * Support tooling that expects to find a 'uuid' attribute for all + * regions regardless of mode. + */ + if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_PARTMODE_PMEM) + return 0444; + + /* + * Don't display extended linear cache attribute if there is no + * extended linear cache. + */ + if (a == &dev_attr_extended_linear_cache_size.attr && + cxlr->params.cache_size == 0) + return 0; + + return a->mode; +} + static const struct attribute_group cxl_region_group = { .attrs = cxl_region_attrs, .is_visible = cxl_region_visible, @@ -838,16 +869,16 @@ static int match_free_decoder(struct device *dev, const void *data) return 1; } -static bool region_res_match_cxl_range(const struct cxl_region_params *p, - const struct range *range) +static bool spa_maps_hpa(const struct cxl_region_params *p, + const struct range *range) { if (!p->res) return false; /* - * If an extended linear cache region then the CXL range is assumed - * to be fronted by the DRAM range in current known implementation. - * This assumption will be made until a variant implementation exists. + * The extended linear cache region is constructed by a 1:1 ratio + * where the SPA maps equal amounts of DRAM and CXL HPA capacity with + * CXL decoders at the high end of the SPA range. */ return p->res->start + p->cache_size == range->start && p->res->end == range->end; @@ -865,7 +896,7 @@ static int match_auto_decoder(struct device *dev, const void *data) cxld = to_cxl_decoder(dev); r = &cxld->hpa_range; - if (region_res_match_cxl_range(p, r)) + if (spa_maps_hpa(p, r)) return 1; return 0; @@ -1059,6 +1090,16 @@ static int cxl_rr_assign_decoder(struct cxl_port *port, struct cxl_region *cxlr, return 0; } +static void cxl_region_set_lock(struct cxl_region *cxlr, + struct cxl_decoder *cxld) +{ + if (!test_bit(CXL_DECODER_F_LOCK, &cxld->flags)) + return; + + set_bit(CXL_REGION_F_LOCK, &cxlr->flags); + clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags); +} + /** * cxl_port_attach_region() - track a region's interest in a port by endpoint * @port: port to add a new region reference 'struct cxl_region_ref' @@ -1170,6 +1211,8 @@ static int cxl_port_attach_region(struct cxl_port *port, } } + cxl_region_set_lock(cxlr, cxld); + rc = cxl_rr_ep_add(cxl_rr, cxled); if (rc) { dev_dbg(&cxlr->dev, @@ -1328,7 +1371,7 @@ static int cxl_port_setup_targets(struct cxl_port *port, struct cxl_endpoint_decoder *cxled) { struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); - int parent_iw, parent_ig, ig, iw, rc, inc = 0, pos = cxled->pos; + int parent_iw, parent_ig, ig, iw, rc, pos = cxled->pos; struct cxl_port *parent_port = to_cxl_port(port->dev.parent); struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr); struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); @@ -1465,7 +1508,7 @@ static int cxl_port_setup_targets(struct cxl_port *port, if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { if (cxld->interleave_ways != iw || (iw > 1 && cxld->interleave_granularity != ig) || - !region_res_match_cxl_range(p, &cxld->hpa_range) || + !spa_maps_hpa(p, &cxld->hpa_range) || ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) { dev_err(&cxlr->dev, "%s:%s %s expected iw: %d ig: %d %pr\n", @@ -1520,9 +1563,8 @@ add_target: cxlsd->target[cxl_rr->nr_targets_set] = ep->dport; cxlsd->cxld.target_map[cxl_rr->nr_targets_set] = ep->dport->port_id; } - inc = 1; + cxl_rr->nr_targets_set++; out_target_set: - cxl_rr->nr_targets_set += inc; dev_dbg(&cxlr->dev, "%s:%s target[%d] = %s for %s:%s @ %d\n", dev_name(port->uport_dev), dev_name(&port->dev), cxl_rr->nr_targets_set - 1, dev_name(ep->dport->dport_dev), @@ -2439,6 +2481,7 @@ static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int i dev->bus = &cxl_bus_type; dev->type = &cxl_region_type; cxlr->id = id; + cxl_region_set_lock(cxlr, &cxlrd->cxlsd.cxld); return cxlr; } @@ -2924,38 +2967,119 @@ static bool cxl_is_hpa_in_chunk(u64 hpa, struct cxl_region *cxlr, int pos) return false; } -static bool has_hpa_to_spa(struct cxl_root_decoder *cxlrd) +#define CXL_POS_ZERO 0 +/** + * cxl_validate_translation_params + * @eiw: encoded interleave ways + * @eig: encoded interleave granularity + * @pos: position in interleave + * + * Callers pass CXL_POS_ZERO when no position parameter needs validating. + * + * Returns: 0 on success, -EINVAL on first invalid parameter + */ +int cxl_validate_translation_params(u8 eiw, u16 eig, int pos) { - return cxlrd->ops && cxlrd->ops->hpa_to_spa; + int ways, gran; + + if (eiw_to_ways(eiw, &ways)) { + pr_debug("%s: invalid eiw=%u\n", __func__, eiw); + return -EINVAL; + } + if (eig_to_granularity(eig, &gran)) { + pr_debug("%s: invalid eig=%u\n", __func__, eig); + return -EINVAL; + } + if (pos < 0 || pos >= ways) { + pr_debug("%s: invalid pos=%d for ways=%u\n", __func__, pos, + ways); + return -EINVAL; + } + + return 0; } +EXPORT_SYMBOL_FOR_MODULES(cxl_validate_translation_params, "cxl_translate"); -static bool has_spa_to_hpa(struct cxl_root_decoder *cxlrd) +u64 cxl_calculate_dpa_offset(u64 hpa_offset, u8 eiw, u16 eig) { - return cxlrd->ops && cxlrd->ops->spa_to_hpa; + u64 dpa_offset, bits_lower, bits_upper, temp; + int ret; + + ret = cxl_validate_translation_params(eiw, eig, CXL_POS_ZERO); + if (ret) + return ULLONG_MAX; + + /* + * DPA offset: CXL Spec 3.2 Section 8.2.4.20.13 + * Lower bits [IG+7:0] pass through unchanged + * (eiw < 8) + * Per spec: DPAOffset[51:IG+8] = (HPAOffset[51:IG+IW+8] >> IW) + * Clear the position bits to isolate upper section, then + * reverse the left shift by eiw that occurred during DPA->HPA + * (eiw >= 8) + * Per spec: DPAOffset[51:IG+8] = HPAOffset[51:IG+IW] / 3 + * Extract upper bits from the correct bit range and divide by 3 + * to recover the original DPA upper bits + */ + bits_lower = hpa_offset & GENMASK_ULL(eig + 7, 0); + if (eiw < 8) { + temp = hpa_offset &= ~GENMASK_ULL(eig + eiw + 8 - 1, 0); + dpa_offset = temp >> eiw; + } else { + bits_upper = div64_u64(hpa_offset >> (eig + eiw), 3); + dpa_offset = bits_upper << (eig + 8); + } + dpa_offset |= bits_lower; + + return dpa_offset; } +EXPORT_SYMBOL_FOR_MODULES(cxl_calculate_dpa_offset, "cxl_translate"); -u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, - u64 dpa) +int cxl_calculate_position(u64 hpa_offset, u8 eiw, u16 eig) { - struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); - u64 dpa_offset, hpa_offset, bits_upper, mask_upper, hpa; - struct cxl_region_params *p = &cxlr->params; - struct cxl_endpoint_decoder *cxled = NULL; - u16 eig = 0; - u8 eiw = 0; - int pos; + unsigned int ways = 0; + u64 shifted, rem; + int pos, ret; - for (int i = 0; i < p->nr_targets; i++) { - cxled = p->targets[i]; - if (cxlmd == cxled_to_memdev(cxled)) - break; + ret = cxl_validate_translation_params(eiw, eig, CXL_POS_ZERO); + if (ret) + return ret; + + if (!eiw) + /* position is 0 if no interleaving */ + return 0; + + /* + * Interleave position: CXL Spec 3.2 Section 8.2.4.20.13 + * eiw < 8 + * Position is in the IW bits at HPA_OFFSET[IG+8+IW-1:IG+8]. + * Per spec "remove IW bits starting with bit position IG+8" + * eiw >= 8 + * Position is not explicitly stored in HPA_OFFSET bits. It is + * derived from the modulo operation of the upper bits using + * the total number of interleave ways. + */ + if (eiw < 8) { + pos = (hpa_offset >> (eig + 8)) & GENMASK(eiw - 1, 0); + } else { + shifted = hpa_offset >> (eig + 8); + eiw_to_ways(eiw, &ways); + div64_u64_rem(shifted, ways, &rem); + pos = rem; } - if (!cxled || cxlmd != cxled_to_memdev(cxled)) - return ULLONG_MAX; - pos = cxled->pos; - ways_to_eiw(p->interleave_ways, &eiw); - granularity_to_eig(p->interleave_granularity, &eig); + return pos; +} +EXPORT_SYMBOL_FOR_MODULES(cxl_calculate_position, "cxl_translate"); + +u64 cxl_calculate_hpa_offset(u64 dpa_offset, int pos, u8 eiw, u16 eig) +{ + u64 mask_upper, hpa_offset, bits_upper; + int ret; + + ret = cxl_validate_translation_params(eiw, eig, pos); + if (ret) + return ULLONG_MAX; /* * The device position in the region interleave set was removed @@ -2967,9 +3091,6 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, * 8.2.4.19.13 Implementation Note: Device Decode Logic */ - /* Remove the dpa base */ - dpa_offset = dpa - cxl_dpa_resource_start(cxled); - mask_upper = GENMASK_ULL(51, eig + 8); if (eiw < 8) { @@ -2984,12 +3105,43 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, /* The lower bits remain unchanged */ hpa_offset |= dpa_offset & GENMASK_ULL(eig + 7, 0); + return hpa_offset; +} +EXPORT_SYMBOL_FOR_MODULES(cxl_calculate_hpa_offset, "cxl_translate"); + +u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, + u64 dpa) +{ + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_region_params *p = &cxlr->params; + struct cxl_endpoint_decoder *cxled = NULL; + u64 dpa_offset, hpa_offset, hpa; + u16 eig = 0; + u8 eiw = 0; + int pos; + + for (int i = 0; i < p->nr_targets; i++) { + if (cxlmd == cxled_to_memdev(p->targets[i])) { + cxled = p->targets[i]; + break; + } + } + if (!cxled) + return ULLONG_MAX; + + pos = cxled->pos; + ways_to_eiw(p->interleave_ways, &eiw); + granularity_to_eig(p->interleave_granularity, &eig); + + dpa_offset = dpa - cxl_dpa_resource_start(cxled); + hpa_offset = cxl_calculate_hpa_offset(dpa_offset, pos, eiw, eig); + /* Apply the hpa_offset to the region base address */ hpa = hpa_offset + p->res->start + p->cache_size; /* Root decoder translation overrides typical modulo decode */ - if (has_hpa_to_spa(cxlrd)) - hpa = cxlrd->ops->hpa_to_spa(cxlrd, hpa); + if (cxlrd->ops.hpa_to_spa) + hpa = cxlrd->ops.hpa_to_spa(cxlrd, hpa); if (!cxl_resource_contains_addr(p->res, hpa)) { dev_dbg(&cxlr->dev, @@ -2998,7 +3150,7 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, } /* Simple chunk check, by pos & gran, only applies to modulo decodes */ - if (!has_hpa_to_spa(cxlrd) && (!cxl_is_hpa_in_chunk(hpa, cxlr, pos))) + if (!cxlrd->ops.hpa_to_spa && !cxl_is_hpa_in_chunk(hpa, cxlr, pos)) return ULLONG_MAX; return hpa; @@ -3016,8 +3168,6 @@ static int region_offset_to_dpa_result(struct cxl_region *cxlr, u64 offset, struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); struct cxl_endpoint_decoder *cxled; u64 hpa, hpa_offset, dpa_offset; - u64 bits_upper, bits_lower; - u64 shifted, rem, temp; u16 eig = 0; u8 eiw = 0; int pos; @@ -3033,56 +3183,21 @@ static int region_offset_to_dpa_result(struct cxl_region *cxlr, u64 offset, * If the root decoder has SPA to CXL HPA callback, use it. Otherwise * CXL HPA is assumed to equal SPA. */ - if (has_spa_to_hpa(cxlrd)) { - hpa = cxlrd->ops->spa_to_hpa(cxlrd, p->res->start + offset); + if (cxlrd->ops.spa_to_hpa) { + hpa = cxlrd->ops.spa_to_hpa(cxlrd, p->res->start + offset); hpa_offset = hpa - p->res->start; } else { hpa_offset = offset; } - /* - * Interleave position: CXL Spec 3.2 Section 8.2.4.20.13 - * eiw < 8 - * Position is in the IW bits at HPA_OFFSET[IG+8+IW-1:IG+8]. - * Per spec "remove IW bits starting with bit position IG+8" - * eiw >= 8 - * Position is not explicitly stored in HPA_OFFSET bits. It is - * derived from the modulo operation of the upper bits using - * the total number of interleave ways. - */ - if (eiw < 8) { - pos = (hpa_offset >> (eig + 8)) & GENMASK(eiw - 1, 0); - } else { - shifted = hpa_offset >> (eig + 8); - div64_u64_rem(shifted, p->interleave_ways, &rem); - pos = rem; - } + + pos = cxl_calculate_position(hpa_offset, eiw, eig); if (pos < 0 || pos >= p->nr_targets) { dev_dbg(&cxlr->dev, "Invalid position %d for %d targets\n", pos, p->nr_targets); return -ENXIO; } - /* - * DPA offset: CXL Spec 3.2 Section 8.2.4.20.13 - * Lower bits [IG+7:0] pass through unchanged - * (eiw < 8) - * Per spec: DPAOffset[51:IG+8] = (HPAOffset[51:IG+IW+8] >> IW) - * Clear the position bits to isolate upper section, then - * reverse the left shift by eiw that occurred during DPA->HPA - * (eiw >= 8) - * Per spec: DPAOffset[51:IG+8] = HPAOffset[51:IG+IW] / 3 - * Extract upper bits from the correct bit range and divide by 3 - * to recover the original DPA upper bits - */ - bits_lower = hpa_offset & GENMASK_ULL(eig + 7, 0); - if (eiw < 8) { - temp = hpa_offset &= ~((u64)GENMASK(eig + eiw + 8 - 1, 0)); - dpa_offset = temp >> eiw; - } else { - bits_upper = div64_u64(hpa_offset >> (eig + eiw), 3); - dpa_offset = bits_upper << (eig + 8); - } - dpa_offset |= bits_lower; + dpa_offset = cxl_calculate_dpa_offset(hpa_offset, eiw, eig); /* Look-up and return the result: a memdev and a DPA */ for (int i = 0; i < p->nr_targets; i++) { @@ -3398,7 +3513,7 @@ static int match_region_by_range(struct device *dev, const void *data) p = &cxlr->params; guard(rwsem_read)(&cxl_rwsem.region); - return region_res_match_cxl_range(p, r); + return spa_maps_hpa(p, r); } static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr, @@ -3479,6 +3594,10 @@ static int __construct_region(struct cxl_region *cxlr, "Extended linear cache calculation failed rc:%d\n", rc); } + rc = sysfs_update_group(&cxlr->dev.kobj, &cxl_region_group); + if (rc) + return rc; + rc = insert_resource(cxlrd->res, res); if (rc) { /* diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 231ddccf8977..ba17fa86d249 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -451,7 +451,7 @@ struct cxl_root_decoder { void *platform_data; struct mutex range_lock; int qos_class; - struct cxl_rd_ops *ops; + struct cxl_rd_ops ops; struct cxl_switch_decoder cxlsd; }; @@ -517,6 +517,14 @@ enum cxl_partition_mode { */ #define CXL_REGION_F_NEEDS_RESET 1 +/* + * Indicate whether this region is locked due to 1 or more decoders that have + * been locked. The approach of all or nothing is taken with regard to the + * locked attribute. CXL_REGION_F_NEEDS_RESET should not be set if this flag is + * set. + */ +#define CXL_REGION_F_LOCK 2 + /** * struct cxl_region - CXL region * @dev: This region's device @@ -738,6 +746,25 @@ static inline bool is_cxl_root(struct cxl_port *port) return port->uport_dev == port->dev.parent; } +/* Address translation functions exported to cxl_translate test module only */ +int cxl_validate_translation_params(u8 eiw, u16 eig, int pos); +u64 cxl_calculate_hpa_offset(u64 dpa_offset, int pos, u8 eiw, u16 eig); +u64 cxl_calculate_dpa_offset(u64 hpa_offset, u8 eiw, u16 eig); +int cxl_calculate_position(u64 hpa_offset, u8 eiw, u16 eig); +struct cxl_cxims_data { + int nr_maps; + u64 xormaps[] __counted_by(nr_maps); +}; + +#if IS_ENABLED(CONFIG_CXL_ACPI) +u64 cxl_do_xormap_calc(struct cxl_cxims_data *cximsd, u64 addr, int hbiw); +#else +static inline u64 cxl_do_xormap_calc(struct cxl_cxims_data *cximsd, u64 addr, int hbiw) +{ + return ULLONG_MAX; +} +#endif + int cxl_num_decoders_committed(struct cxl_port *port); bool is_cxl_port(const struct device *dev); struct cxl_port *to_cxl_port(const struct device *dev); diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 7ae621e618e7..1d526bea8431 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -127,7 +127,6 @@ static inline bool cxl_pci_flit_256(struct pci_dev *pdev) return lnksta2 & PCI_EXP_LNKSTA2_FLIT; } -int devm_cxl_port_enumerate_dports(struct cxl_port *port); struct cxl_dev_state; void read_cdat_data(struct cxl_port *port); void cxl_cor_error_detected(struct pci_dev *pdev); diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index bd100ac31672..0be4e508affe 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -136,7 +136,7 @@ static irqreturn_t cxl_pci_mbox_irq(int irq, void *id) if (opcode == CXL_MBOX_OP_SANITIZE) { mutex_lock(&cxl_mbox->mbox_mutex); if (mds->security.sanitize_node) - mod_delayed_work(system_wq, &mds->security.poll_dwork, 0); + mod_delayed_work(system_percpu_wq, &mds->security.poll_dwork, 0); mutex_unlock(&cxl_mbox->mbox_mutex); } else { /* short-circuit the wait in __cxl_pci_mbox_send_cmd() */ diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 0d5ce4b74b9f..0e151d0572d1 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -4,13 +4,12 @@ ldflags-y += --wrap=is_acpi_device_node ldflags-y += --wrap=acpi_evaluate_integer ldflags-y += --wrap=acpi_pci_find_root ldflags-y += --wrap=nvdimm_bus_register -ldflags-y += --wrap=devm_cxl_port_enumerate_dports ldflags-y += --wrap=cxl_await_media_ready ldflags-y += --wrap=devm_cxl_add_rch_dport -ldflags-y += --wrap=cxl_rcd_component_reg_phys ldflags-y += --wrap=cxl_endpoint_parse_cdat ldflags-y += --wrap=cxl_dport_init_ras_reporting ldflags-y += --wrap=devm_cxl_endpoint_decoders_setup +ldflags-y += --wrap=hmat_get_extended_linear_cache_size DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl diff --git a/tools/testing/cxl/test/Kbuild b/tools/testing/cxl/test/Kbuild index 6b1927897856..af50972c8b6d 100644 --- a/tools/testing/cxl/test/Kbuild +++ b/tools/testing/cxl/test/Kbuild @@ -4,6 +4,7 @@ ccflags-y := -I$(srctree)/drivers/cxl/ -I$(srctree)/drivers/cxl/core obj-m += cxl_test.o obj-m += cxl_mock.o obj-m += cxl_mock_mem.o +obj-m += cxl_translate.o cxl_test-y := cxl.o cxl_mock-y := mock.o diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 2d135ca533d0..81e2aef3627a 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -15,6 +15,7 @@ #include "mock.h" static int interleave_arithmetic; +static bool extended_linear_cache; #define FAKE_QTG_ID 42 @@ -26,6 +27,9 @@ static int interleave_arithmetic; #define NR_CXL_PORT_DECODERS 8 #define NR_BRIDGES (NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST + NR_CXL_RCH) +#define MOCK_AUTO_REGION_SIZE_DEFAULT SZ_512M +static int mock_auto_region_size = MOCK_AUTO_REGION_SIZE_DEFAULT; + static struct platform_device *cxl_acpi; static struct platform_device *cxl_host_bridge[NR_CXL_HOST_BRIDGES]; #define NR_MULTI_ROOT (NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS) @@ -426,6 +430,22 @@ static struct cxl_mock_res *alloc_mock_res(resource_size_t size, int align) return res; } +/* Only update CFMWS0 as this is used by the auto region. */ +static void cfmws_elc_update(struct acpi_cedt_cfmws *window, int index) +{ + if (!extended_linear_cache) + return; + + if (index != 0) + return; + + /* + * The window size should be 2x of the CXL region size where half is + * DRAM and half is CXL + */ + window->window_size = mock_auto_region_size * 2; +} + static int populate_cedt(void) { struct cxl_mock_res *res; @@ -450,6 +470,7 @@ static int populate_cedt(void) for (i = cfmws_start; i <= cfmws_end; i++) { struct acpi_cedt_cfmws *window = mock_cfmws[i]; + cfmws_elc_update(window, i); res = alloc_mock_res(window->window_size, SZ_256M); if (!res) return -ENOMEM; @@ -591,6 +612,25 @@ mock_acpi_evaluate_integer(acpi_handle handle, acpi_string pathname, return AE_OK; } +static int +mock_hmat_get_extended_linear_cache_size(struct resource *backing_res, + int nid, resource_size_t *cache_size) +{ + struct acpi_cedt_cfmws *window = mock_cfmws[0]; + struct resource cfmws0_res = + DEFINE_RES_MEM(window->base_hpa, window->window_size); + + if (!extended_linear_cache || + !resource_contains(&cfmws0_res, backing_res)) { + return hmat_get_extended_linear_cache_size(backing_res, + nid, cache_size); + } + + *cache_size = mock_auto_region_size; + + return 0; +} + static struct pci_bus mock_pci_bus[NR_BRIDGES]; static struct acpi_pci_root mock_pci_root[ARRAY_SIZE(mock_pci_bus)] = { [0] = { @@ -738,7 +778,6 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld) struct cxl_endpoint_decoder *cxled; struct cxl_switch_decoder *cxlsd; struct cxl_port *port, *iter; - const int size = SZ_512M; struct cxl_memdev *cxlmd; struct cxl_dport *dport; struct device *dev; @@ -781,9 +820,11 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld) } base = window->base_hpa; + if (extended_linear_cache) + base += mock_auto_region_size; cxld->hpa_range = (struct range) { .start = base, - .end = base + size - 1, + .end = base + mock_auto_region_size - 1, }; cxld->interleave_ways = 2; @@ -792,7 +833,8 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld) cxld->flags = CXL_DECODER_F_ENABLE; cxled->state = CXL_DECODER_STATE_AUTO; port->commit_end = cxld->id; - devm_cxl_dpa_reserve(cxled, 0, size / cxld->interleave_ways, 0); + devm_cxl_dpa_reserve(cxled, 0, + mock_auto_region_size / cxld->interleave_ways, 0); cxld->commit = mock_decoder_commit; cxld->reset = mock_decoder_reset; @@ -841,7 +883,7 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld) cxld->interleave_granularity = 4096; cxld->hpa_range = (struct range) { .start = base, - .end = base + size - 1, + .end = base + mock_auto_region_size - 1, }; put_device(dev); } @@ -995,37 +1037,6 @@ static int get_port_array(struct cxl_port *port, return 0; } -static int mock_cxl_port_enumerate_dports(struct cxl_port *port) -{ - struct platform_device **array; - int i, array_size; - int rc; - - rc = get_port_array(port, &array, &array_size); - if (rc) - return rc; - - for (i = 0; i < array_size; i++) { - struct platform_device *pdev = array[i]; - struct cxl_dport *dport; - - if (pdev->dev.parent != port->uport_dev) { - dev_dbg(&port->dev, "%s: mismatch parent %s\n", - dev_name(port->uport_dev), - dev_name(pdev->dev.parent)); - continue; - } - - dport = devm_cxl_add_dport(port, &pdev->dev, pdev->id, - CXL_RESOURCE_NONE); - - if (IS_ERR(dport)) - return PTR_ERR(dport); - } - - return 0; -} - static struct cxl_dport *mock_cxl_add_dport_by_dev(struct cxl_port *port, struct device *dport_dev) { @@ -1114,9 +1125,10 @@ static struct cxl_mock_ops cxl_mock_ops = { .acpi_pci_find_root = mock_acpi_pci_find_root, .devm_cxl_switch_port_decoders_setup = mock_cxl_switch_port_decoders_setup, .devm_cxl_endpoint_decoders_setup = mock_cxl_endpoint_decoders_setup, - .devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports, .cxl_endpoint_parse_cdat = mock_cxl_endpoint_parse_cdat, .devm_cxl_add_dport_by_dev = mock_cxl_add_dport_by_dev, + .hmat_get_extended_linear_cache_size = + mock_hmat_get_extended_linear_cache_size, .list = LIST_HEAD_INIT(cxl_mock_ops.list), }; @@ -1606,6 +1618,8 @@ static __exit void cxl_test_exit(void) module_param(interleave_arithmetic, int, 0444); MODULE_PARM_DESC(interleave_arithmetic, "Modulo:0, XOR:1"); +module_param(extended_linear_cache, bool, 0444); +MODULE_PARM_DESC(extended_linear_cache, "Enable extended linear cache support"); module_init(cxl_test_init); module_exit(cxl_test_exit); MODULE_LICENSE("GPL v2"); diff --git a/tools/testing/cxl/test/cxl_translate.c b/tools/testing/cxl/test/cxl_translate.c new file mode 100644 index 000000000000..2200ae21795c --- /dev/null +++ b/tools/testing/cxl/test/cxl_translate.c @@ -0,0 +1,445 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright(c) 2025 Intel Corporation. All rights reserved. + +/* Preface all log entries with "cxl_translate" */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/moduleparam.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/acpi.h> +#include <cxlmem.h> +#include <cxl.h> + +/* Maximum number of test vectors and entry length */ +#define MAX_TABLE_ENTRIES 128 +#define MAX_ENTRY_LEN 128 + +/* Expected number of parameters in each test vector */ +#define EXPECTED_PARAMS 7 + +/* Module parameters for test vectors */ +static char *table[MAX_TABLE_ENTRIES]; +static int table_num; + +/* Interleave Arithmetic */ +#define MODULO_MATH 0 +#define XOR_MATH 1 + +/* + * XOR mapping configuration + * The test data sets all use the same set of xormaps. When additional + * data sets arrive for validation, this static setup will need to + * be changed to accept xormaps as additional parameters. + */ +struct cxl_cxims_data *cximsd; +static u64 xormaps[] = { + 0x2020900, + 0x4041200, + 0x1010400, + 0x800, +}; + +static int nr_maps = ARRAY_SIZE(xormaps); + +#define HBIW_TO_NR_MAPS_SIZE (CXL_DECODER_MAX_INTERLEAVE + 1) +static const int hbiw_to_nr_maps[HBIW_TO_NR_MAPS_SIZE] = { + [1] = 0, [2] = 1, [3] = 0, [4] = 2, [6] = 1, [8] = 3, [12] = 2, [16] = 4 +}; + +/** + * to_hpa - calculate an HPA offset from a DPA offset and position + * + * dpa_offset: device physical address offset + * pos: devices position in interleave + * r_eiw: region encoded interleave ways + * r_eig: region encoded interleave granularity + * hb_ways: host bridge interleave ways + * math: interleave arithmetic (MODULO_MATH or XOR_MATH) + * + * Returns: host physical address offset + */ +static u64 to_hpa(u64 dpa_offset, int pos, u8 r_eiw, u16 r_eig, u8 hb_ways, + u8 math) +{ + u64 hpa_offset; + + /* Calculate base HPA offset from DPA and position */ + hpa_offset = cxl_calculate_hpa_offset(dpa_offset, pos, r_eiw, r_eig); + + if (math == XOR_MATH) { + cximsd->nr_maps = hbiw_to_nr_maps[hb_ways]; + if (cximsd->nr_maps) + return cxl_do_xormap_calc(cximsd, hpa_offset, hb_ways); + } + return hpa_offset; +} + +/** + * to_dpa - translate an HPA offset to DPA offset + * + * hpa_offset: host physical address offset + * r_eiw: region encoded interleave ways + * r_eig: region encoded interleave granularity + * hb_ways: host bridge interleave ways + * math: interleave arithmetic (MODULO_MATH or XOR_MATH) + * + * Returns: device physical address offset + */ +static u64 to_dpa(u64 hpa_offset, u8 r_eiw, u16 r_eig, u8 hb_ways, u8 math) +{ + u64 offset = hpa_offset; + + if (math == XOR_MATH) { + cximsd->nr_maps = hbiw_to_nr_maps[hb_ways]; + if (cximsd->nr_maps) + offset = + cxl_do_xormap_calc(cximsd, hpa_offset, hb_ways); + } + return cxl_calculate_dpa_offset(offset, r_eiw, r_eig); +} + +/** + * to_pos - extract an interleave position from an HPA offset + * + * hpa_offset: host physical address offset + * r_eiw: region encoded interleave ways + * r_eig: region encoded interleave granularity + * hb_ways: host bridge interleave ways + * math: interleave arithmetic (MODULO_MATH or XOR_MATH) + * + * Returns: devices position in region interleave + */ +static u64 to_pos(u64 hpa_offset, u8 r_eiw, u16 r_eig, u8 hb_ways, u8 math) +{ + u64 offset = hpa_offset; + + /* Reverse XOR mapping if specified */ + if (math == XOR_MATH) + offset = cxl_do_xormap_calc(cximsd, hpa_offset, hb_ways); + + return cxl_calculate_position(offset, r_eiw, r_eig); +} + +/** + * run_translation_test - execute forward and reverse translations + * + * @dpa: device physical address + * @pos: expected position in region interleave + * @r_eiw: region encoded interleave ways + * @r_eig: region encoded interleave granularity + * @hb_ways: host bridge interleave ways + * @math: interleave arithmetic (MODULO_MATH or XOR_MATH) + * @expect_spa: expected system physical address + * + * Returns: 0 on success, -1 on failure + */ +static int run_translation_test(u64 dpa, int pos, u8 r_eiw, u16 r_eig, + u8 hb_ways, int math, u64 expect_hpa) +{ + u64 translated_spa, reverse_dpa; + int reverse_pos; + + /* Test Device to Host translation: DPA + POS -> SPA */ + translated_spa = to_hpa(dpa, pos, r_eiw, r_eig, hb_ways, math); + if (translated_spa != expect_hpa) { + pr_err("Device to host failed: expected HPA %llu, got %llu\n", + expect_hpa, translated_spa); + return -1; + } + + /* Test Host to Device DPA translation: SPA -> DPA */ + reverse_dpa = to_dpa(translated_spa, r_eiw, r_eig, hb_ways, math); + if (reverse_dpa != dpa) { + pr_err("Host to Device DPA failed: expected %llu, got %llu\n", + dpa, reverse_dpa); + return -1; + } + + /* Test Host to Device Position translation: SPA -> POS */ + reverse_pos = to_pos(translated_spa, r_eiw, r_eig, hb_ways, math); + if (reverse_pos != pos) { + pr_err("Position lookup failed: expected %d, got %d\n", pos, + reverse_pos); + return -1; + } + + return 0; +} + +/** + * parse_test_vector - parse a single test vector string + * + * entry: test vector string to parse + * dpa: device physical address + * pos: expected position in region interleave + * r_eiw: region encoded interleave ways + * r_eig: region encoded interleave granularity + * hb_ways: host bridge interleave ways + * math: interleave arithmetic (MODULO_MATH or XOR_MATH) + * expect_spa: expected system physical address + * + * Returns: 0 on success, negative error code on failure + */ +static int parse_test_vector(const char *entry, u64 *dpa, int *pos, u8 *r_eiw, + u16 *r_eig, u8 *hb_ways, int *math, + u64 *expect_hpa) +{ + unsigned int tmp_r_eiw, tmp_r_eig, tmp_hb_ways; + int parsed; + + parsed = sscanf(entry, "%llu %d %u %u %u %d %llu", dpa, pos, &tmp_r_eiw, + &tmp_r_eig, &tmp_hb_ways, math, expect_hpa); + + if (parsed != EXPECTED_PARAMS) { + pr_err("Parse error: expected %d parameters, got %d in '%s'\n", + EXPECTED_PARAMS, parsed, entry); + return -EINVAL; + } + if (tmp_r_eiw > U8_MAX || tmp_r_eig > U16_MAX || tmp_hb_ways > U8_MAX) { + pr_err("Parameter overflow in entry: '%s'\n", entry); + return -ERANGE; + } + if (*math != MODULO_MATH && *math != XOR_MATH) { + pr_err("Invalid math type %d in entry: '%s'\n", *math, entry); + return -EINVAL; + } + *r_eiw = tmp_r_eiw; + *r_eig = tmp_r_eig; + *hb_ways = tmp_hb_ways; + + return 0; +} + +/* + * setup_xor_mapping - Initialize XOR mapping data structure + * + * The test data sets all use the same HBIG so we can use one set + * of xormaps, and set the number to apply based on HBIW before + * calling cxl_do_xormap_calc(). + * + * When additional data sets arrive for validation with different + * HBIG's this static setup will need to be updated. + * + * Returns: 0 on success, negative error code on failure + */ +static int setup_xor_mapping(void) +{ + if (nr_maps <= 0) + return -EINVAL; + + cximsd = kzalloc(struct_size(cximsd, xormaps, nr_maps), GFP_KERNEL); + if (!cximsd) + return -ENOMEM; + + memcpy(cximsd->xormaps, xormaps, nr_maps * sizeof(*cximsd->xormaps)); + cximsd->nr_maps = nr_maps; + + return 0; +} + +static int test_random_params(void) +{ + u8 valid_eiws[] = { 0, 1, 2, 3, 4, 8, 9, 10 }; + u16 valid_eigs[] = { 0, 1, 2, 3, 4, 5, 6 }; + int i, ways, pos, reverse_pos; + u64 dpa, hpa, reverse_dpa; + int iterations = 10000; + int failures = 0; + + for (i = 0; i < iterations; i++) { + /* Generate valid random parameters for eiw, eig, pos, dpa */ + u8 eiw = valid_eiws[get_random_u32() % ARRAY_SIZE(valid_eiws)]; + u16 eig = valid_eigs[get_random_u32() % ARRAY_SIZE(valid_eigs)]; + + eiw_to_ways(eiw, &ways); + pos = get_random_u32() % ways; + dpa = get_random_u64() >> 12; + + hpa = cxl_calculate_hpa_offset(dpa, pos, eiw, eig); + reverse_dpa = cxl_calculate_dpa_offset(hpa, eiw, eig); + reverse_pos = cxl_calculate_position(hpa, eiw, eig); + + if (reverse_dpa != dpa || reverse_pos != pos) { + pr_err("test random iter %d FAIL hpa=%llu, dpa=%llu reverse_dpa=%llu, pos=%d reverse_pos=%d eiw=%u eig=%u\n", + i, hpa, dpa, reverse_dpa, pos, reverse_pos, eiw, + eig); + + if (failures++ > 10) { + pr_err("test random too many failures, stop\n"); + break; + } + } + } + pr_info("..... test random: PASS %d FAIL %d\n", i - failures, failures); + + if (failures) + return -EINVAL; + + return 0; +} + +struct param_test { + u8 eiw; + u16 eig; + int pos; + bool expect; /* true: expect pass, false: expect fail */ + const char *desc; +}; + +static struct param_test param_tests[] = { + { 0x0, 0, 0, true, "1-way, min eig=0, pos=0" }, + { 0x0, 3, 0, true, "1-way, mid eig=3, pos=0" }, + { 0x0, 6, 0, true, "1-way, max eig=6, pos=0" }, + { 0x1, 0, 0, true, "2-way, eig=0, pos=0" }, + { 0x1, 3, 1, true, "2-way, eig=3, max pos=1" }, + { 0x1, 6, 1, true, "2-way, eig=6, max pos=1" }, + { 0x2, 0, 0, true, "4-way, eig=0, pos=0" }, + { 0x2, 3, 3, true, "4-way, eig=3, max pos=3" }, + { 0x2, 6, 3, true, "4-way, eig=6, max pos=3" }, + { 0x3, 0, 0, true, "8-way, eig=0, pos=0" }, + { 0x3, 3, 7, true, "8-way, eig=3, max pos=7" }, + { 0x3, 6, 7, true, "8-way, eig=6, max pos=7" }, + { 0x4, 0, 0, true, "16-way, eig=0, pos=0" }, + { 0x4, 3, 15, true, "16-way, eig=3, max pos=15" }, + { 0x4, 6, 15, true, "16-way, eig=6, max pos=15" }, + { 0x8, 0, 0, true, "3-way, eig=0, pos=0" }, + { 0x8, 3, 2, true, "3-way, eig=3, max pos=2" }, + { 0x8, 6, 2, true, "3-way, eig=6, max pos=2" }, + { 0x9, 0, 0, true, "6-way, eig=0, pos=0" }, + { 0x9, 3, 5, true, "6-way, eig=3, max pos=5" }, + { 0x9, 6, 5, true, "6-way, eig=6, max pos=5" }, + { 0xA, 0, 0, true, "12-way, eig=0, pos=0" }, + { 0xA, 3, 11, true, "12-way, eig=3, max pos=11" }, + { 0xA, 6, 11, true, "12-way, eig=6, max pos=11" }, + { 0x5, 0, 0, false, "invalid eiw=5" }, + { 0x7, 0, 0, false, "invalid eiw=7" }, + { 0xB, 0, 0, false, "invalid eiw=0xB" }, + { 0xFF, 0, 0, false, "invalid eiw=0xFF" }, + { 0x1, 7, 0, false, "invalid eig=7 (out of range)" }, + { 0x2, 0x10, 0, false, "invalid eig=0x10" }, + { 0x3, 0xFFFF, 0, false, "invalid eig=0xFFFF" }, + { 0x1, 0, -1, false, "pos < 0" }, + { 0x1, 0, 2, false, "2-way, pos=2 (>= ways)" }, + { 0x2, 0, 4, false, "4-way, pos=4 (>= ways)" }, + { 0x3, 0, 8, false, "8-way, pos=8 (>= ways)" }, + { 0x4, 0, 16, false, "16-way, pos=16 (>= ways)" }, + { 0x8, 0, 3, false, "3-way, pos=3 (>= ways)" }, + { 0x9, 0, 6, false, "6-way, pos=6 (>= ways)" }, + { 0xA, 0, 12, false, "12-way, pos=12 (>= ways)" }, +}; + +static int test_cxl_validate_translation_params(void) +{ + int i, rc, failures = 0; + bool valid; + + for (i = 0; i < ARRAY_SIZE(param_tests); i++) { + struct param_test *t = ¶m_tests[i]; + + rc = cxl_validate_translation_params(t->eiw, t->eig, t->pos); + valid = (rc == 0); + + if (valid != t->expect) { + pr_err("test params failed: %s\n", t->desc); + failures++; + } + } + pr_info("..... test params: PASS %d FAIL %d\n", i - failures, failures); + + if (failures) + return -EINVAL; + + return 0; +} + +/* + * cxl_translate_init + * + * Run the internal validation tests when no params are passed. + * Otherwise, parse the parameters (test vectors), and kick off + * the translation test. + * + * Returns: 0 on success, negative error code on failure + */ +static int __init cxl_translate_init(void) +{ + int rc, i; + + /* If no tables are passed, validate module params only */ + if (table_num == 0) { + pr_info("Internal validation test start...\n"); + rc = test_cxl_validate_translation_params(); + if (rc) + return rc; + + rc = test_random_params(); + if (rc) + return rc; + + pr_info("Internal validation test completed successfully\n"); + + return 0; + } + + pr_info("CXL translate test module loaded with %d test vectors\n", + table_num); + + rc = setup_xor_mapping(); + if (rc) + return rc; + + /* Process each test vector */ + for (i = 0; i < table_num; i++) { + u64 dpa, expect_spa; + int pos, math; + u8 r_eiw, hb_ways; + u16 r_eig; + + pr_debug("Processing test vector %d: '%s'\n", i, table[i]); + + /* Parse the test vector */ + rc = parse_test_vector(table[i], &dpa, &pos, &r_eiw, &r_eig, + &hb_ways, &math, &expect_spa); + if (rc) { + pr_err("CXL Translate Test %d: FAIL\n" + " Failed to parse test vector '%s'\n", + i, table[i]); + continue; + } + /* Run the translation test */ + rc = run_translation_test(dpa, pos, r_eiw, r_eig, hb_ways, math, + expect_spa); + if (rc) { + pr_err("CXL Translate Test %d: FAIL\n" + " dpa=%llu pos=%d r_eiw=%u r_eig=%u hb_ways=%u math=%s expect_spa=%llu\n", + i, dpa, pos, r_eiw, r_eig, hb_ways, + (math == XOR_MATH) ? "XOR" : "MODULO", + expect_spa); + } else { + pr_info("CXL Translate Test %d: PASS\n", i); + } + } + + kfree(cximsd); + pr_info("CXL translate test completed\n"); + + return 0; +} + +static void __exit cxl_translate_exit(void) +{ + pr_info("CXL translate test module unloaded\n"); +} + +module_param_array(table, charp, &table_num, 0444); +MODULE_PARM_DESC(table, "Test vectors as space-separated decimal strings"); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("cxl_test: cxl address translation test module"); +MODULE_IMPORT_NS("CXL"); + +module_init(cxl_translate_init); +module_exit(cxl_translate_exit); diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index d533481672b7..176dcde570cd 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -250,22 +250,21 @@ static void mes_add_event(struct mock_event_store *mes, * Vary the number of events returned to simulate events occuring while the * logs are being read. */ -static int ret_limit = 0; +static atomic_t event_counter = ATOMIC_INIT(0); static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd) { struct cxl_get_event_payload *pl; struct mock_event_log *log; - u16 nr_overflow; + int ret_limit; u8 log_type; int i; if (cmd->size_in != sizeof(log_type)) return -EINVAL; - ret_limit = (ret_limit + 1) % CXL_TEST_EVENT_RET_MAX; - if (!ret_limit) - ret_limit = 1; + /* Vary return limit from 1 to CXL_TEST_EVENT_RET_MAX */ + ret_limit = (atomic_inc_return(&event_counter) % CXL_TEST_EVENT_RET_MAX) + 1; if (cmd->size_out < struct_size(pl, records, ret_limit)) return -EINVAL; @@ -299,7 +298,7 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd) u64 ns; pl->flags |= CXL_GET_EVENT_FLAG_OVERFLOW; - pl->overflow_err_count = cpu_to_le16(nr_overflow); + pl->overflow_err_count = cpu_to_le16(log->nr_overflow); ns = ktime_get_real_ns(); ns -= 5000000000; /* 5s ago */ pl->first_overflow_timestamp = cpu_to_le64(ns); diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 995269a75cbd..44bce80ef3ff 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -111,6 +111,26 @@ acpi_status __wrap_acpi_evaluate_integer(acpi_handle handle, } EXPORT_SYMBOL(__wrap_acpi_evaluate_integer); +int __wrap_hmat_get_extended_linear_cache_size(struct resource *backing_res, + int nid, + resource_size_t *cache_size) +{ + int index, rc; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops) + rc = ops->hmat_get_extended_linear_cache_size(backing_res, nid, + cache_size); + else + rc = hmat_get_extended_linear_cache_size(backing_res, nid, + cache_size); + + put_cxl_mock_ops(index); + + return rc; +} +EXPORT_SYMBOL_GPL(__wrap_hmat_get_extended_linear_cache_size); + struct acpi_pci_root *__wrap_acpi_pci_find_root(acpi_handle handle) { int index; @@ -172,21 +192,6 @@ int __wrap_devm_cxl_endpoint_decoders_setup(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_endpoint_decoders_setup, "CXL"); -int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port) -{ - int rc, index; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - - if (ops && ops->is_mock_port(port->uport_dev)) - rc = ops->devm_cxl_port_enumerate_dports(port); - else - rc = devm_cxl_port_enumerate_dports(port); - put_cxl_mock_ops(index); - - return rc; -} -EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_port_enumerate_dports, "CXL"); - int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds) { int rc, index; @@ -226,23 +231,6 @@ struct cxl_dport *__wrap_devm_cxl_add_rch_dport(struct cxl_port *port, } EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_rch_dport, "CXL"); -resource_size_t __wrap_cxl_rcd_component_reg_phys(struct device *dev, - struct cxl_dport *dport) -{ - int index; - resource_size_t component_reg_phys; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - - if (ops && ops->is_mock_port(dev)) - component_reg_phys = CXL_RESOURCE_NONE; - else - component_reg_phys = cxl_rcd_component_reg_phys(dev, dport); - put_cxl_mock_ops(index); - - return component_reg_phys; -} -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_rcd_component_reg_phys, "CXL"); - void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port) { int index; diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h index 4ed932e76aae..2684b89c8aa2 100644 --- a/tools/testing/cxl/test/mock.h +++ b/tools/testing/cxl/test/mock.h @@ -19,12 +19,14 @@ struct cxl_mock_ops { bool (*is_mock_bus)(struct pci_bus *bus); bool (*is_mock_port)(struct device *dev); bool (*is_mock_dev)(struct device *dev); - int (*devm_cxl_port_enumerate_dports)(struct cxl_port *port); int (*devm_cxl_switch_port_decoders_setup)(struct cxl_port *port); int (*devm_cxl_endpoint_decoders_setup)(struct cxl_port *port); void (*cxl_endpoint_parse_cdat)(struct cxl_port *port); struct cxl_dport *(*devm_cxl_add_dport_by_dev)(struct cxl_port *port, struct device *dport_dev); + int (*hmat_get_extended_linear_cache_size)(struct resource *backing_res, + int nid, + resource_size_t *cache_size); }; void register_cxl_mock_ops(struct cxl_mock_ops *ops); |