From 1fc766b5c08417248e0008bca14c3572ac0f1c26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 7 Jun 2022 17:55:55 +0200 Subject: [PATCH 1/9] nvme: add device name to warning in uuid_show() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This provides more context to users. Old message: [ 00.000000] No UUID available providing old NGUID New message: [ 00.000000] block nvme0n1: No UUID available providing old NGUID Fixes: d934f9848a77 ("nvme: provide UUID value to userspace") Signed-off-by: Thomas Weißschuh Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 24165daee3c8..9409b8843872 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3285,8 +3285,8 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, * we have no UUID set */ if (uuid_is_null(&ids->uuid)) { - printk_ratelimited(KERN_WARNING - "No UUID available providing old NGUID\n"); + dev_warn_ratelimited(dev, + "No UUID available providing old NGUID\n"); return sysfs_emit(buf, "%pU\n", ids->nguid); } return sysfs_emit(buf, "%pU\n", &ids->uuid); From 2f0dad1719cbbd690e916a42d937b7605ee63964 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 7 Jun 2022 08:30:29 -0700 Subject: [PATCH 2/9] nvme: add bug report info for global duplicate id The recent global id check is finding poorly implemented devices in the wild. Include relavant device information in the output to help quicken an appropriate quirk patch. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 1 + drivers/nvme/host/nvme.h | 28 ++++++++++++++++++++++++++++ drivers/nvme/host/pci.c | 16 ++++++++++++++++ 3 files changed, 45 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9409b8843872..3ab2cfd254a4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3863,6 +3863,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, if (ret) { dev_err(ctrl->device, "globally duplicate IDs for nsid %d\n", nsid); + nvme_print_device_info(ctrl); return ret; } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 9b72b6ecf33c..0da94b233fed 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -503,6 +503,7 @@ struct nvme_ctrl_ops { void (*submit_async_event)(struct nvme_ctrl *ctrl); void (*delete_ctrl)(struct nvme_ctrl *ctrl); int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); + void (*print_device_info)(struct nvme_ctrl *ctrl); }; /* @@ -548,6 +549,33 @@ static inline struct request *nvme_cid_to_rq(struct blk_mq_tags *tags, return blk_mq_tag_to_rq(tags, nvme_tag_from_cid(command_id)); } +/* + * Return the length of the string without the space padding + */ +static inline int nvme_strlen(char *s, int len) +{ + while (s[len - 1] == ' ') + len--; + return len; +} + +static inline void nvme_print_device_info(struct nvme_ctrl *ctrl) +{ + struct nvme_subsystem *subsys = ctrl->subsys; + + if (ctrl->ops->print_device_info) { + ctrl->ops->print_device_info(ctrl); + return; + } + + dev_err(ctrl->device, + "VID:%04x model:%.*s firmware:%.*s\n", subsys->vendor_id, + nvme_strlen(subsys->model, sizeof(subsys->model)), + subsys->model, nvme_strlen(subsys->firmware_rev, + sizeof(subsys->firmware_rev)), + subsys->firmware_rev); +} + #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS void nvme_fault_inject_init(struct nvme_fault_inject *fault_inj, const char *dev_name); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 48f4f6eb877b..96579d48002a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2976,6 +2976,21 @@ static int nvme_pci_get_address(struct nvme_ctrl *ctrl, char *buf, int size) return snprintf(buf, size, "%s\n", dev_name(&pdev->dev)); } + +static void nvme_pci_print_device_info(struct nvme_ctrl *ctrl) +{ + struct pci_dev *pdev = to_pci_dev(to_nvme_dev(ctrl)->dev); + struct nvme_subsystem *subsys = ctrl->subsys; + + dev_err(ctrl->device, + "VID:DID %04x:%04x model:%.*s firmware:%.*s\n", + pdev->vendor, pdev->device, + nvme_strlen(subsys->model, sizeof(subsys->model)), + subsys->model, nvme_strlen(subsys->firmware_rev, + sizeof(subsys->firmware_rev)), + subsys->firmware_rev); +} + static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { .name = "pcie", .module = THIS_MODULE, @@ -2987,6 +3002,7 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { .free_ctrl = nvme_pci_free_ctrl, .submit_async_event = nvme_pci_submit_async_event, .get_address = nvme_pci_get_address, + .print_device_info = nvme_pci_print_device_info, }; static int nvme_dev_map(struct nvme_dev *dev) From 4641a8e6e145f595059e695f0f8dbbe608134086 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 6 Jun 2022 09:53:17 -0700 Subject: [PATCH 3/9] nvme-pci: add trouble shooting steps for timeouts Many users have encountered IO timeouts with a CSTS value of 0xffffffff, which indicates a failure to read the register. While there are various potential causes for this observation, faulty NVMe APST has been the culprit quite frequently. Add the recommended troubleshooting steps in the error output when this condition occurs. Signed-off-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 96579d48002a..be053d943731 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1334,6 +1334,14 @@ static void nvme_warn_reset(struct nvme_dev *dev, u32 csts) dev_warn(dev->ctrl.device, "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n", csts, result); + + if (csts != ~0) + return; + + dev_warn(dev->ctrl.device, + "Does your device have a faulty power saving mode enabled?\n"); + dev_warn(dev->ctrl.device, + "Try \"nvme_core.default_ps_max_latency_us=0 pcie_aspm=off\" and report a bug\n"); } static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) From 3765fad508964f433ac111c127d6bedd19bdfa04 Mon Sep 17 00:00:00 2001 From: Stefan Reiter Date: Mon, 6 Jun 2022 13:01:29 +0000 Subject: [PATCH 4/9] nvme-pci: add NVME_QUIRK_BOGUS_NID for ADATA XPG GAMMIX S50 ADATA XPG GAMMIX S50 drives report bogus eui64 values that appear to be the same across drives in one system. Quirk them out so they are not marked as "non globally unique" duplicates. Signed-off-by: Stefan Reiter Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index be053d943731..631add46e439 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3487,6 +3487,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1e4B, 0x1202), /* MAXIO MAP1202 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x1cc1, 0x5350), /* ADATA XPG GAMMIX S50 */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061), .driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, }, { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065), From 2cf7a77ed5f8903606f4f7833d02d67b08650442 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 13 Jun 2022 07:45:47 -0700 Subject: [PATCH 5/9] nvme-pci: phison e12 has bogus namespace ids Add the quirk. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216049 Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 631add46e439..156723d3af83 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3461,6 +3461,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY | NVME_QUIRK_DISABLE_WRITE_ZEROES| NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + { PCI_DEVICE(0x1987, 0x5012), /* Phison E12 */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x1b4b, 0x1092), /* Lexar 256 GB SSD */ From c98a879312caf775c9768faed25ce1c013b4df04 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 13 Jun 2022 07:45:48 -0700 Subject: [PATCH 6/9] nvme-pci: smi has bogus namespace ids Add the quirk. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216096 Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 156723d3af83..0d2113468523 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3445,7 +3445,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_VDEVICE(REDHAT, 0x0010), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x126f, 0x2263), /* Silicon Motion unidentified */ - .driver_data = NVME_QUIRK_NO_NS_DESC_LIST, }, + .driver_data = NVME_QUIRK_NO_NS_DESC_LIST | + NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY | NVME_QUIRK_NO_NS_DESC_LIST, }, From c4f01a776b28378f4f61b53f8cb0e358f4fa3721 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 13 Jun 2022 07:45:49 -0700 Subject: [PATCH 7/9] nvme-pci: sk hynix p31 has bogus namespace ids Add the quirk. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216049 Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 0d2113468523..82b4daa9bf95 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3476,6 +3476,8 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x1c5c, 0x1504), /* SK Hynix PC400 */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + { PCI_DEVICE(0x1c5c, 0x174a), /* SK Hynix P31 SSD */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x15b7, 0x2001), /* Sandisk Skyhawk */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x1d97, 0x2263), /* SPCC */ From 6b961bce50e489186232cef51036ddb8d672bc3b Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Sun, 5 Jun 2022 20:36:48 +0000 Subject: [PATCH 8/9] nvme-pci: avoid the deepest sleep state on ZHITAI TiPro7000 SSDs When ZHITAI TiPro7000 SSDs entered deepest power state(ps4) it has the same APST sleep problem as Kingston A2000. by chance the system crashes and displays the same dmesg info: https://bugzilla.kernel.org/show_bug.cgi?id=195039#c65 As the Archlinux wiki suggest (enlat + exlat) < 25000 is fine and my testing shows no system crashes ever since. Therefore disabling the deepest power state will fix the APST sleep issue. https://wiki.archlinux.org/title/Solid_state_drive/NVMe This is the APST data from 'nvme id-ctrl /dev/nvme1' NVME Identify Controller: vid : 0x1e49 ssvid : 0x1e49 sn : [...] mn : ZHITAI TiPro7000 1TB fr : ZTA32F3Y [...] ps 0 : mp:3.50W operational enlat:5 exlat:5 rrt:0 rrl:0 rwt:0 rwl:0 idle_power:- active_power:- ps 1 : mp:3.30W operational enlat:50 exlat:100 rrt:1 rrl:1 rwt:1 rwl:1 idle_power:- active_power:- ps 2 : mp:2.80W operational enlat:50 exlat:200 rrt:2 rrl:2 rwt:2 rwl:2 idle_power:- active_power:- ps 3 : mp:0.1500W non-operational enlat:500 exlat:5000 rrt:3 rrl:3 rwt:3 rwl:3 idle_power:- active_power:- ps 4 : mp:0.0200W non-operational enlat:2000 exlat:60000 rrt:4 rrl:4 rwt:4 rwl:4 idle_power:- active_power:- Signed-off-by: Ning Wang Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 82b4daa9bf95..b6f536f9ee78 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3494,6 +3494,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1cc1, 0x5350), /* ADATA XPG GAMMIX S50 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x1e49, 0x0041), /* ZHITAI TiPro7000 NVMe SSD */ + .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061), .driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, }, { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065), From 43047e082b90ead395c44b0e8497bc853bd13845 Mon Sep 17 00:00:00 2001 From: "rasheed.hsueh" Date: Fri, 10 Jun 2022 14:27:34 +0800 Subject: [PATCH 9/9] nvme-pci: disable write zeros support on UMIC and Samsung SSDs Like commit 5611ec2b9814 ("nvme-pci: prevent SK hynix PC400 from using Write Zeroes command"), UMIS and Samsung has the same issue: [ 6305.633887] blk_update_request: operation not supported error, dev nvme0n1, sector 340812032 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 So also disable Write Zeroes command on UMIS and Samsung. Signed-off-by: rasheed.hsueh Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b6f536f9ee78..c7012e85d035 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3482,6 +3482,14 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x1d97, 0x2263), /* SPCC */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + { PCI_DEVICE(0x144d, 0xa80b), /* Samsung PM9B1 256G and 512G */ + .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + { PCI_DEVICE(0x144d, 0xa809), /* Samsung MZALQ256HBJD 256G */ + .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + { PCI_DEVICE(0x1cc4, 0x6303), /* UMIS RPJTJ512MGE1QDY 512G */ + .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + { PCI_DEVICE(0x1cc4, 0x6302), /* UMIS RPJTJ256MGE1QDY 256G */ + .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x2646, 0x2262), /* KINGSTON SKC2000 NVMe SSD */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, { PCI_DEVICE(0x2646, 0x2263), /* KINGSTON A2000 NVMe SSD */