From ad512f2023b387d5c3c3624342c2b11de848b491 Mon Sep 17 00:00:00 2001 From: Ajay Joshi Date: Sun, 27 Oct 2019 23:05:47 +0900 Subject: [PATCH 1/7] scsi: sd_zbc: add zone open, close, and finish support Implement REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE and REQ_OP_ZONE_FINISH support to allow explicit control of zone states. Contains contributions from Matias Bjorling, Hans Holmberg, Keith Busch and Damien Le Moal. Reviewed-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Signed-off-by: Ajay Joshi Signed-off-by: Matias Bjorling Signed-off-by: Hans Holmberg Signed-off-by: Keith Busch Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- drivers/scsi/sd.c | 15 +++++++++++++-- drivers/scsi/sd.h | 8 +++++--- drivers/scsi/sd_zbc.c | 22 +++++++++++++--------- 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index ebb40160539f..470ee6dc3f7e 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1291,9 +1291,17 @@ static blk_status_t sd_init_command(struct scsi_cmnd *cmd) case REQ_OP_WRITE: return sd_setup_read_write_cmnd(cmd); case REQ_OP_ZONE_RESET: - return sd_zbc_setup_reset_cmnd(cmd, false); + return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_RESET_WRITE_POINTER, + false); case REQ_OP_ZONE_RESET_ALL: - return sd_zbc_setup_reset_cmnd(cmd, true); + return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_RESET_WRITE_POINTER, + true); + case REQ_OP_ZONE_OPEN: + return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_OPEN_ZONE, false); + case REQ_OP_ZONE_CLOSE: + return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_CLOSE_ZONE, false); + case REQ_OP_ZONE_FINISH: + return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_FINISH_ZONE, false); default: WARN_ON_ONCE(1); return BLK_STS_NOTSUPP; @@ -1961,6 +1969,9 @@ static int sd_done(struct scsi_cmnd *SCpnt) case REQ_OP_WRITE_SAME: case REQ_OP_ZONE_RESET: case REQ_OP_ZONE_RESET_ALL: + case REQ_OP_ZONE_OPEN: + case REQ_OP_ZONE_CLOSE: + case REQ_OP_ZONE_FINISH: if (!result) { good_bytes = blk_rq_bytes(req); scsi_set_resid(SCpnt, 0); diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index 1eab779f812b..bf2102a749bc 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -209,7 +209,8 @@ static inline int sd_is_zoned(struct scsi_disk *sdkp) extern int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buffer); extern void sd_zbc_print_zones(struct scsi_disk *sdkp); -extern blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd, bool all); +blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd, + unsigned char op, bool all); extern void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, struct scsi_sense_hdr *sshdr); extern int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, @@ -225,8 +226,9 @@ static inline int sd_zbc_read_zones(struct scsi_disk *sdkp, static inline void sd_zbc_print_zones(struct scsi_disk *sdkp) {} -static inline blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd, - bool all) +static inline blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd, + unsigned char op, + bool all) { return BLK_STS_TARGET; } diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 1efc69e194f8..39f10ec0dfcf 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -207,13 +207,17 @@ static inline sector_t sd_zbc_zone_sectors(struct scsi_disk *sdkp) } /** - * sd_zbc_setup_reset_cmnd - Prepare a RESET WRITE POINTER scsi command. + * sd_zbc_setup_zone_mgmt_cmnd - Prepare a zone ZBC_OUT command. The operations + * can be RESET WRITE POINTER, OPEN, CLOSE or FINISH. * @cmd: the command to setup - * @all: Reset all zones control. + * @op: Operation to be performed + * @all: All zones control * - * Called from sd_init_command() for a REQ_OP_ZONE_RESET request. + * Called from sd_init_command() for REQ_OP_ZONE_RESET, REQ_OP_ZONE_RESET_ALL, + * REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE or REQ_OP_ZONE_FINISH requests. */ -blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd, bool all) +blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd, + unsigned char op, bool all) { struct request *rq = cmd->request; struct scsi_disk *sdkp = scsi_disk(rq->rq_disk); @@ -234,7 +238,7 @@ blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd, bool all) cmd->cmd_len = 16; memset(cmd->cmnd, 0, cmd->cmd_len); cmd->cmnd[0] = ZBC_OUT; - cmd->cmnd[1] = ZO_RESET_WRITE_POINTER; + cmd->cmnd[1] = op; if (all) cmd->cmnd[14] = 0x1; else @@ -263,14 +267,14 @@ void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, int result = cmd->result; struct request *rq = cmd->request; - if (req_op(rq) == REQ_OP_ZONE_RESET && + if (op_is_zone_mgmt(req_op(rq)) && result && sshdr->sense_key == ILLEGAL_REQUEST && sshdr->asc == 0x24) { /* - * INVALID FIELD IN CDB error: reset of a conventional - * zone was attempted. Nothing to worry about, so be - * quiet about the error. + * INVALID FIELD IN CDB error: a zone management command was + * attempted on a conventional zone. Nothing to worry about, + * so be quiet about the error. */ rq->rq_flags |= RQF_QUIET; } From 400b6a7b13a3fd71cff087139ce45dd1e5fff444 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 6 Nov 2019 06:35:18 -0800 Subject: [PATCH 2/7] nvme: Add hardware monitoring support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit nvme devices report temperature information in the controller information (for limits) and in the smart log. Currently, the only means to retrieve this information is the nvme command line interface, which requires super-user privileges. At the same time, it would be desirable to be able to use NVMe temperature information for thermal control. This patch adds support to read NVMe temperatures from the kernel using the hwmon API and adds temperature zones for NVMe drives. The thermal subsystem can use this information to set thermal policies, and userspace can access it using libsensors and/or the "sensors" command. Example output from the "sensors" command: nvme0-pci-0100 Adapter: PCI adapter Composite: +39.0°C (high = +85.0°C, crit = +85.0°C) Sensor 1: +39.0°C Sensor 2: +41.0°C Reviewed-by: Christoph Hellwig Signed-off-by: Guenter Roeck Signed-off-by: Keith Busch --- drivers/nvme/host/Kconfig | 10 ++ drivers/nvme/host/Makefile | 1 + drivers/nvme/host/core.c | 6 ++ drivers/nvme/host/hwmon.c | 181 +++++++++++++++++++++++++++++++++++++ drivers/nvme/host/nvme.h | 8 ++ 5 files changed, 206 insertions(+) create mode 100644 drivers/nvme/host/hwmon.c diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index 2b36f052bfb9..c6439638a419 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -23,6 +23,16 @@ config NVME_MULTIPATH /dev/nvmeXnY device will show up for each NVMe namespaces, even if it is accessible through multiple controllers. +config NVME_HWMON + bool "NVMe hardware monitoring" + depends on (NVME_CORE=y && HWMON=y) || (NVME_CORE=m && HWMON) + help + This provides support for NVMe hardware monitoring. If enabled, + a hardware monitoring device will be created for each NVMe drive + in the system. + + If unsure, say N. + config NVME_FABRICS tristate diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile index 8a4b671c5f0c..fc7b26be692d 100644 --- a/drivers/nvme/host/Makefile +++ b/drivers/nvme/host/Makefile @@ -14,6 +14,7 @@ nvme-core-$(CONFIG_TRACING) += trace.o nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o nvme-core-$(CONFIG_NVM) += lightnvm.o nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o +nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o nvme-y += pci.o diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b4214e54f2d2..4be64703aa47 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2760,6 +2760,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->oncs = le16_to_cpu(id->oncs); ctrl->mtfa = le16_to_cpu(id->mtfa); ctrl->oaes = le32_to_cpu(id->oaes); + ctrl->wctemp = le16_to_cpu(id->wctemp); + ctrl->cctemp = le16_to_cpu(id->cctemp); + atomic_set(&ctrl->abort_limit, id->acl + 1); ctrl->vwc = id->vwc; if (id->mdts) @@ -2859,6 +2862,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) if (ret < 0) return ret; + if (!ctrl->identified) + nvme_hwmon_init(ctrl); + ctrl->identified = true; return 0; diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c new file mode 100644 index 000000000000..5480cbb84f9f --- /dev/null +++ b/drivers/nvme/host/hwmon.c @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NVM Express hardware monitoring support + * Copyright (c) 2019, Guenter Roeck + */ + +#include +#include + +#include "nvme.h" + +struct nvme_hwmon_data { + struct nvme_ctrl *ctrl; + struct nvme_smart_log log; + struct mutex read_lock; +}; + +static int nvme_hwmon_get_smart_log(struct nvme_hwmon_data *data) +{ + int ret; + + ret = nvme_get_log(data->ctrl, NVME_NSID_ALL, NVME_LOG_SMART, 0, + &data->log, sizeof(data->log), 0); + + return ret <= 0 ? ret : -EIO; +} + +static int nvme_hwmon_read(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long *val) +{ + struct nvme_hwmon_data *data = dev_get_drvdata(dev); + struct nvme_smart_log *log = &data->log; + int temp; + int err; + + /* + * First handle attributes which don't require us to read + * the smart log. + */ + switch (attr) { + case hwmon_temp_max: + *val = (data->ctrl->wctemp - 273) * 1000; + return 0; + case hwmon_temp_crit: + *val = (data->ctrl->cctemp - 273) * 1000; + return 0; + default: + break; + } + + mutex_lock(&data->read_lock); + err = nvme_hwmon_get_smart_log(data); + if (err) + goto unlock; + + switch (attr) { + case hwmon_temp_input: + if (!channel) + temp = get_unaligned_le16(log->temperature); + else + temp = le16_to_cpu(log->temp_sensor[channel - 1]); + *val = (temp - 273) * 1000; + break; + case hwmon_temp_alarm: + *val = !!(log->critical_warning & NVME_SMART_CRIT_TEMPERATURE); + break; + default: + err = -EOPNOTSUPP; + break; + } +unlock: + mutex_unlock(&data->read_lock); + return err; +} + +static const char * const nvme_hwmon_sensor_names[] = { + "Composite", + "Sensor 1", + "Sensor 2", + "Sensor 3", + "Sensor 4", + "Sensor 5", + "Sensor 6", + "Sensor 7", + "Sensor 8", +}; + +static int nvme_hwmon_read_string(struct device *dev, + enum hwmon_sensor_types type, u32 attr, + int channel, const char **str) +{ + *str = nvme_hwmon_sensor_names[channel]; + return 0; +} + +static umode_t nvme_hwmon_is_visible(const void *_data, + enum hwmon_sensor_types type, + u32 attr, int channel) +{ + const struct nvme_hwmon_data *data = _data; + + switch (attr) { + case hwmon_temp_crit: + if (!channel && data->ctrl->cctemp) + return 0444; + break; + case hwmon_temp_max: + if (!channel && data->ctrl->wctemp) + return 0444; + break; + case hwmon_temp_alarm: + if (!channel) + return 0444; + break; + case hwmon_temp_input: + case hwmon_temp_label: + if (!channel || data->log.temp_sensor[channel - 1]) + return 0444; + break; + default: + break; + } + return 0; +} + +static const struct hwmon_channel_info *nvme_hwmon_info[] = { + HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ), + HWMON_CHANNEL_INFO(temp, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT | + HWMON_T_LABEL | HWMON_T_ALARM, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL), + NULL +}; + +static const struct hwmon_ops nvme_hwmon_ops = { + .is_visible = nvme_hwmon_is_visible, + .read = nvme_hwmon_read, + .read_string = nvme_hwmon_read_string, +}; + +static const struct hwmon_chip_info nvme_hwmon_chip_info = { + .ops = &nvme_hwmon_ops, + .info = nvme_hwmon_info, +}; + +void nvme_hwmon_init(struct nvme_ctrl *ctrl) +{ + struct device *dev = ctrl->dev; + struct nvme_hwmon_data *data; + struct device *hwmon; + int err; + + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); + if (!data) + return; + + data->ctrl = ctrl; + mutex_init(&data->read_lock); + + err = nvme_hwmon_get_smart_log(data); + if (err) { + dev_warn(dev, "Failed to read smart log (error %d)\n", err); + devm_kfree(dev, data); + return; + } + + hwmon = devm_hwmon_device_register_with_info(dev, "nvme", data, + &nvme_hwmon_chip_info, + NULL); + if (IS_ERR(hwmon)) { + dev_warn(dev, "Failed to instantiate hwmon device\n"); + devm_kfree(dev, data); + } +} diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 2637d9dd278f..258534a7bb6c 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -230,6 +230,8 @@ struct nvme_ctrl { u16 kas; u8 npss; u8 apsta; + u16 wctemp; + u16 cctemp; u32 oaes; u32 aen_result; u32 ctratt; @@ -665,4 +667,10 @@ static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) return dev_to_disk(dev)->private_data; } +#ifdef CONFIG_NVME_HWMON +void nvme_hwmon_init(struct nvme_ctrl *ctrl); +#else +static inline void nvme_hwmon_init(struct nvme_ctrl *ctrl) { } +#endif + #endif /* _NVME_H */ From 530436c45ef2e446c12538a400e465929a0b3ade Mon Sep 17 00:00:00 2001 From: Eduard Hasenleithner Date: Tue, 12 Nov 2019 21:55:01 +0100 Subject: [PATCH 3/7] nvme: Discard workaround for non-conformant devices Users observe IOMMU related errors when performing discard on nvme from non-compliant nvme devices reading beyond the end of the DMA mapped ranges to discard. Two different variants of this behavior have been observed: SM22XX controllers round up the read size to a multiple of 512 bytes, and Phison E12 unconditionally reads the maximum discard size allowed by the spec (256 segments or 4kB). Make nvme_setup_discard unconditionally allocate the maximum DSM buffer so the driver DMA maps a memory range that will always succeed. Link: https://bugzilla.kernel.org/show_bug.cgi?id=202665 many Signed-off-by: Eduard Hasenleithner [changelog, use existing define, kernel coding style] Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 4be64703aa47..9696404a6182 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -574,8 +574,14 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, struct nvme_dsm_range *range; struct bio *bio; - range = kmalloc_array(segments, sizeof(*range), - GFP_ATOMIC | __GFP_NOWARN); + /* + * Some devices do not consider the DSM 'Number of Ranges' field when + * determining how much data to DMA. Always allocate memory for maximum + * number of segments to prevent device reading beyond end of buffer. + */ + static const size_t alloc_size = sizeof(*range) * NVME_DSM_MAX_RANGES; + + range = kzalloc(alloc_size, GFP_ATOMIC | __GFP_NOWARN); if (!range) { /* * If we fail allocation our range, fallback to the controller @@ -615,7 +621,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, req->special_vec.bv_page = virt_to_page(range); req->special_vec.bv_offset = offset_in_page(range); - req->special_vec.bv_len = sizeof(*range) * segments; + req->special_vec.bv_len = alloc_size; req->rq_flags |= RQF_SPECIAL_PAYLOAD; return BLK_STS_OK; From 3aeb6a24f171f0000c81117b4b8b2ac2858b2f27 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 16 Nov 2019 18:50:38 +0100 Subject: [PATCH 4/7] nvmet: add another maintainer Sagi and I have been pretty busy lately, and Chaitanya has been helping a lot with target work and agreed to share the load. Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 55199ef7fa74..85d684a7a900 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11633,6 +11633,7 @@ F: drivers/nvme/target/fcloop.c NVM EXPRESS TARGET DRIVER M: Christoph Hellwig M: Sagi Grimberg +M: Chaitanya Kulkarni L: linux-nvme@lists.infradead.org T: git://git.infradead.org/nvme.git W: http://git.infradead.org/nvme.git From 52deba0f02a98c150677a9c381cc1991a928bcff Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 15 Nov 2019 00:40:00 +0900 Subject: [PATCH 5/7] nvme: hwmon: provide temperature min and max values for each sensor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the NVMe specification, the over temperature threshold and under temperature threshold features shall be implemented for Composite Temperature if a non-zero WCTEMP field value is reported in the Identify Controller data structure. The features are also implemented for all implemented temperature sensors (i.e., all Temperature Sensor fields that report a non-zero value). This provides the over temperature threshold and under temperature threshold for each sensor as temperature min and max values of hwmon sysfs attributes. The WCTEMP is already provided as a temperature max value for Composite Temperature, but this change isn't incompatible. Because the default value of the over temperature threshold for Composite Temperature is the WCTEMP. Now the alarm attribute for Composite Temperature indicates one of the temperature is outside of a temperature threshold. Because there is only a single bit in Critical Warning field that indicates a temperature is outside of a threshold. Example output from the "sensors" command: nvme-pci-0100 Adapter: PCI adapter Composite: +33.9°C (low = -273.1°C, high = +69.8°C) (crit = +79.8°C) Sensor 1: +34.9°C (low = -273.1°C, high = +65261.8°C) Sensor 2: +31.9°C (low = -273.1°C, high = +65261.8°C) Sensor 5: +47.9°C (low = -273.1°C, high = +65261.8°C) This also adds helper macros for kelvin from/to milli Celsius conversion, and replaces the repeated code in hwmon.c. Cc: Keith Busch Cc: Jens Axboe Cc: Christoph Hellwig Cc: Sagi Grimberg Cc: Jean Delvare Reviewed-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Akinobu Mita Signed-off-by: Keith Busch --- drivers/nvme/host/hwmon.c | 106 ++++++++++++++++++++++++++++++++------ include/linux/nvme.h | 6 +++ 2 files changed, 96 insertions(+), 16 deletions(-) diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c index 5480cbb84f9f..97a84b4b45db 100644 --- a/drivers/nvme/host/hwmon.c +++ b/drivers/nvme/host/hwmon.c @@ -9,12 +9,57 @@ #include "nvme.h" +/* These macros should be moved to linux/temperature.h */ +#define MILLICELSIUS_TO_KELVIN(t) DIV_ROUND_CLOSEST((t) + 273150, 1000) +#define KELVIN_TO_MILLICELSIUS(t) ((t) * 1000L - 273150) + struct nvme_hwmon_data { struct nvme_ctrl *ctrl; struct nvme_smart_log log; struct mutex read_lock; }; +static int nvme_get_temp_thresh(struct nvme_ctrl *ctrl, int sensor, bool under, + long *temp) +{ + unsigned int threshold = sensor << NVME_TEMP_THRESH_SELECT_SHIFT; + u32 status; + int ret; + + if (under) + threshold |= NVME_TEMP_THRESH_TYPE_UNDER; + + ret = nvme_get_features(ctrl, NVME_FEAT_TEMP_THRESH, threshold, NULL, 0, + &status); + if (ret > 0) + return -EIO; + if (ret < 0) + return ret; + *temp = KELVIN_TO_MILLICELSIUS(status & NVME_TEMP_THRESH_MASK); + + return 0; +} + +static int nvme_set_temp_thresh(struct nvme_ctrl *ctrl, int sensor, bool under, + long temp) +{ + unsigned int threshold = sensor << NVME_TEMP_THRESH_SELECT_SHIFT; + int ret; + + temp = MILLICELSIUS_TO_KELVIN(temp); + threshold |= clamp_val(temp, 0, NVME_TEMP_THRESH_MASK); + + if (under) + threshold |= NVME_TEMP_THRESH_TYPE_UNDER; + + ret = nvme_set_features(ctrl, NVME_FEAT_TEMP_THRESH, threshold, NULL, 0, + NULL); + if (ret > 0) + return -EIO; + + return ret; +} + static int nvme_hwmon_get_smart_log(struct nvme_hwmon_data *data) { int ret; @@ -39,10 +84,11 @@ static int nvme_hwmon_read(struct device *dev, enum hwmon_sensor_types type, */ switch (attr) { case hwmon_temp_max: - *val = (data->ctrl->wctemp - 273) * 1000; - return 0; + return nvme_get_temp_thresh(data->ctrl, channel, false, val); + case hwmon_temp_min: + return nvme_get_temp_thresh(data->ctrl, channel, true, val); case hwmon_temp_crit: - *val = (data->ctrl->cctemp - 273) * 1000; + *val = KELVIN_TO_MILLICELSIUS(data->ctrl->cctemp); return 0; default: break; @@ -59,7 +105,7 @@ static int nvme_hwmon_read(struct device *dev, enum hwmon_sensor_types type, temp = get_unaligned_le16(log->temperature); else temp = le16_to_cpu(log->temp_sensor[channel - 1]); - *val = (temp - 273) * 1000; + *val = KELVIN_TO_MILLICELSIUS(temp); break; case hwmon_temp_alarm: *val = !!(log->critical_warning & NVME_SMART_CRIT_TEMPERATURE); @@ -73,6 +119,23 @@ unlock: return err; } +static int nvme_hwmon_write(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long val) +{ + struct nvme_hwmon_data *data = dev_get_drvdata(dev); + + switch (attr) { + case hwmon_temp_max: + return nvme_set_temp_thresh(data->ctrl, channel, false, val); + case hwmon_temp_min: + return nvme_set_temp_thresh(data->ctrl, channel, true, val); + default: + break; + } + + return -EOPNOTSUPP; +} + static const char * const nvme_hwmon_sensor_names[] = { "Composite", "Sensor 1", @@ -105,8 +168,10 @@ static umode_t nvme_hwmon_is_visible(const void *_data, return 0444; break; case hwmon_temp_max: - if (!channel && data->ctrl->wctemp) - return 0444; + case hwmon_temp_min: + if ((!channel && data->ctrl->wctemp) || + (channel && data->log.temp_sensor[channel - 1])) + return 0644; break; case hwmon_temp_alarm: if (!channel) @@ -126,16 +191,24 @@ static umode_t nvme_hwmon_is_visible(const void *_data, static const struct hwmon_channel_info *nvme_hwmon_info[] = { HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ), HWMON_CHANNEL_INFO(temp, - HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT | - HWMON_T_LABEL | HWMON_T_ALARM, - HWMON_T_INPUT | HWMON_T_LABEL, - HWMON_T_INPUT | HWMON_T_LABEL, - HWMON_T_INPUT | HWMON_T_LABEL, - HWMON_T_INPUT | HWMON_T_LABEL, - HWMON_T_INPUT | HWMON_T_LABEL, - HWMON_T_INPUT | HWMON_T_LABEL, - HWMON_T_INPUT | HWMON_T_LABEL, - HWMON_T_INPUT | HWMON_T_LABEL), + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_CRIT | HWMON_T_LABEL | HWMON_T_ALARM, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_LABEL), NULL }; @@ -143,6 +216,7 @@ static const struct hwmon_ops nvme_hwmon_ops = { .is_visible = nvme_hwmon_is_visible, .read = nvme_hwmon_read, .read_string = nvme_hwmon_read_string, + .write = nvme_hwmon_write, }; static const struct hwmon_chip_info nvme_hwmon_chip_info = { diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 3eca4f7d8510..3d5189f46cb1 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -804,6 +804,12 @@ struct nvme_write_zeroes_cmd { /* Features */ +enum { + NVME_TEMP_THRESH_MASK = 0xffff, + NVME_TEMP_THRESH_SELECT_SHIFT = 16, + NVME_TEMP_THRESH_TYPE_UNDER = 0x100000, +}; + struct nvme_feat_auto_pst { __le64 entries[32]; }; From 6c6aa2f26c6813af38d88718881c0307bb9a54c0 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 15 Nov 2019 00:40:01 +0900 Subject: [PATCH 6/7] nvme: hwmon: add quirk to avoid changing temperature threshold This adds a new quirk NVME_QUIRK_NO_TEMP_THRESH_CHANGE to avoid changing the value of the temperature threshold feature for specific devices that show undesirable behavior. Guenter reported: "On my Intel NVME drive (SSDPEKKW512G7), writing any minimum limit on the Composite temperature sensor results in a temperature warning, and that warning is sticky until I reset the controller. It doesn't seem to matter which temperature I write; writing -273000 has the same result." The Intel NVMe has the latest firmware version installed, so this isn't a problem that was ever fixed. Reported-by: Guenter Roeck Cc: Keith Busch Cc: Jens Axboe Cc: Christoph Hellwig Cc: Sagi Grimberg Cc: Jean Delvare Reviewed-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Akinobu Mita Signed-off-by: Keith Busch --- drivers/nvme/host/hwmon.c | 6 +++++- drivers/nvme/host/nvme.h | 5 +++++ drivers/nvme/host/pci.c | 3 ++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c index 97a84b4b45db..a5af21f5d370 100644 --- a/drivers/nvme/host/hwmon.c +++ b/drivers/nvme/host/hwmon.c @@ -170,8 +170,12 @@ static umode_t nvme_hwmon_is_visible(const void *_data, case hwmon_temp_max: case hwmon_temp_min: if ((!channel && data->ctrl->wctemp) || - (channel && data->log.temp_sensor[channel - 1])) + (channel && data->log.temp_sensor[channel - 1])) { + if (data->ctrl->quirks & + NVME_QUIRK_NO_TEMP_THRESH_CHANGE) + return 0444; return 0644; + } break; case hwmon_temp_alarm: if (!channel) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 258534a7bb6c..34ac79c5e309 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -114,6 +114,11 @@ enum nvme_quirks { * Prevent tag overlap between queues */ NVME_QUIRK_SHARED_TAGS = (1 << 13), + + /* + * Don't change the value of the temperature threshold feature + */ + NVME_QUIRK_NO_TEMP_THRESH_CHANGE = (1 << 14), }; /* diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 1b1b0db45567..c40a672e5047 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3065,7 +3065,8 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_DEALLOCATE_ZEROES, }, { PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS | - NVME_QUIRK_MEDIUM_PRIO_SQ }, + NVME_QUIRK_MEDIUM_PRIO_SQ | + NVME_QUIRK_NO_TEMP_THRESH_CHANGE }, { PCI_VDEVICE(INTEL, 0xf1a6), /* Intel 760p/Pro 7600p */ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ From 03bf73c315edca28f47451913177e14cd040a216 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Mon, 23 Sep 2019 15:09:58 -0500 Subject: [PATCH 7/7] nbd: prevent memory leak In nbd_add_socket when krealloc succeeds, if nsock's allocation fail the reallocted memory is leak. The correct behaviour should be assigning the reallocted memory to config->socks right after success. Reviewed-by: Josef Bacik Signed-off-by: Navid Emamdoost Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index ac07e8c94c79..f471142d264d 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1004,14 +1004,15 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, sockfd_put(sock); return -ENOMEM; } + + config->socks = socks; + nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL); if (!nsock) { sockfd_put(sock); return -ENOMEM; } - config->socks = socks; - nsock->fallback_index = -1; nsock->dead = false; mutex_init(&nsock->tx_lock);