linux/drivers/pci/doe.c

594 lines
16 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
/*
* Data Object Exchange
* PCIe r6.0, sec 6.30 DOE
*
* Copyright (C) 2021 Huawei
* Jonathan Cameron <Jonathan.Cameron@huawei.com>
*
* Copyright (C) 2022 Intel Corporation
* Ira Weiny <ira.weiny@intel.com>
*/
#define dev_fmt(fmt) "DOE: " fmt
#include <linux/bitfield.h>
#include <linux/delay.h>
#include <linux/jiffies.h>
#include <linux/mutex.h>
#include <linux/pci.h>
#include <linux/pci-doe.h>
#include <linux/workqueue.h>
#define PCI_DOE_PROTOCOL_DISCOVERY 0
/* Timeout of 1 second from 6.30.2 Operation, PCI Spec r6.0 */
#define PCI_DOE_TIMEOUT HZ
#define PCI_DOE_POLL_INTERVAL (PCI_DOE_TIMEOUT / 128)
#define PCI_DOE_FLAG_CANCEL 0
#define PCI_DOE_FLAG_DEAD 1
/* Max data object length is 2^18 dwords */
#define PCI_DOE_MAX_LENGTH (1 << 18)
/**
* struct pci_doe_mb - State for a single DOE mailbox
*
* This state is used to manage a single DOE mailbox capability. All fields
* should be considered opaque to the consumers and the structure passed into
* the helpers below after being created by devm_pci_doe_create()
*
* @pdev: PCI device this mailbox belongs to
* @cap_offset: Capability offset
* @prots: Array of protocols supported (encoded as long values)
* @wq: Wait queue for work item
* @work_queue: Queue of pci_doe_work items
* @flags: Bit array of PCI_DOE_FLAG_* flags
*/
struct pci_doe_mb {
struct pci_dev *pdev;
u16 cap_offset;
struct xarray prots;
wait_queue_head_t wq;
struct workqueue_struct *work_queue;
unsigned long flags;
};
static int pci_doe_wait(struct pci_doe_mb *doe_mb, unsigned long timeout)
{
if (wait_event_timeout(doe_mb->wq,
test_bit(PCI_DOE_FLAG_CANCEL, &doe_mb->flags),
timeout))
return -EIO;
return 0;
}
static void pci_doe_write_ctrl(struct pci_doe_mb *doe_mb, u32 val)
{
struct pci_dev *pdev = doe_mb->pdev;
int offset = doe_mb->cap_offset;
pci_write_config_dword(pdev, offset + PCI_DOE_CTRL, val);
}
static int pci_doe_abort(struct pci_doe_mb *doe_mb)
{
struct pci_dev *pdev = doe_mb->pdev;
int offset = doe_mb->cap_offset;
unsigned long timeout_jiffies;
pci_dbg(pdev, "[%x] Issuing Abort\n", offset);
timeout_jiffies = jiffies + PCI_DOE_TIMEOUT;
pci_doe_write_ctrl(doe_mb, PCI_DOE_CTRL_ABORT);
do {
int rc;
u32 val;
rc = pci_doe_wait(doe_mb, PCI_DOE_POLL_INTERVAL);
if (rc)
return rc;
pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);
/* Abort success! */
if (!FIELD_GET(PCI_DOE_STATUS_ERROR, val) &&
!FIELD_GET(PCI_DOE_STATUS_BUSY, val))
return 0;
} while (!time_after(jiffies, timeout_jiffies));
/* Abort has timed out and the MB is dead */
pci_err(pdev, "[%x] ABORT timed out\n", offset);
return -EIO;
}
static int pci_doe_send_req(struct pci_doe_mb *doe_mb,
struct pci_doe_task *task)
{
struct pci_dev *pdev = doe_mb->pdev;
int offset = doe_mb->cap_offset;
size_t length;
u32 val;
int i;
/*
* Check the DOE busy bit is not set. If it is set, this could indicate
* someone other than Linux (e.g. firmware) is using the mailbox. Note
* it is expected that firmware and OS will negotiate access rights via
* an, as yet to be defined, method.
*/
pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);
if (FIELD_GET(PCI_DOE_STATUS_BUSY, val))
return -EBUSY;
if (FIELD_GET(PCI_DOE_STATUS_ERROR, val))
return -EIO;
/* Length is 2 DW of header + length of payload in DW */
cxl/pci: Fix CDAT retrieval on big endian The CDAT exposed in sysfs differs between little endian and big endian arches: On big endian, every 4 bytes are byte-swapped. PCI Configuration Space is little endian (PCI r3.0 sec 6.1). Accessors such as pci_read_config_dword() implicitly swap bytes on big endian. That way, the macros in include/uapi/linux/pci_regs.h work regardless of the arch's endianness. For an example of implicit byte-swapping, see ppc4xx_pciex_read_config(), which calls in_le32(), which uses lwbrx (Load Word Byte-Reverse Indexed). DOE Read/Write Data Mailbox Registers are unlike other registers in Configuration Space in that they contain or receive a 4 byte portion of an opaque byte stream (a "Data Object" per PCIe r6.0 sec 7.9.24.5f). They need to be copied to or from the request/response buffer verbatim. So amend pci_doe_send_req() and pci_doe_recv_resp() to undo the implicit byte-swapping. The CXL_DOE_TABLE_ACCESS_* and PCI_DOE_DATA_OBJECT_DISC_* macros assume implicit byte-swapping. Byte-swap requests after constructing them with those macros and byte-swap responses before parsing them. Change the request and response type to __le32 to avoid sparse warnings. Per a request from Jonathan, replace sizeof(u32) with sizeof(__le32) for consistency. Fixes: c97006046c79 ("cxl/port: Read CDAT table") Tested-by: Ira Weiny <ira.weiny@intel.com> Signed-off-by: Lukas Wunner <lukas@wunner.de> Reviewed-by: Dan Williams <dan.j.williams@intel.com> Cc: stable@vger.kernel.org # v6.0+ Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Link: https://lore.kernel.org/r/3051114102f41d19df3debbee123129118fc5e6d.1678543498.git.lukas@wunner.de Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-03-11 14:40:01 +00:00
length = 2 + task->request_pl_sz / sizeof(__le32);
if (length > PCI_DOE_MAX_LENGTH)
return -EIO;
if (length == PCI_DOE_MAX_LENGTH)
length = 0;
/* Write DOE Header */
val = FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_VID, task->prot.vid) |
FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, task->prot.type);
pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, val);
pci_write_config_dword(pdev, offset + PCI_DOE_WRITE,
FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH,
length));
cxl/pci: Fix CDAT retrieval on big endian The CDAT exposed in sysfs differs between little endian and big endian arches: On big endian, every 4 bytes are byte-swapped. PCI Configuration Space is little endian (PCI r3.0 sec 6.1). Accessors such as pci_read_config_dword() implicitly swap bytes on big endian. That way, the macros in include/uapi/linux/pci_regs.h work regardless of the arch's endianness. For an example of implicit byte-swapping, see ppc4xx_pciex_read_config(), which calls in_le32(), which uses lwbrx (Load Word Byte-Reverse Indexed). DOE Read/Write Data Mailbox Registers are unlike other registers in Configuration Space in that they contain or receive a 4 byte portion of an opaque byte stream (a "Data Object" per PCIe r6.0 sec 7.9.24.5f). They need to be copied to or from the request/response buffer verbatim. So amend pci_doe_send_req() and pci_doe_recv_resp() to undo the implicit byte-swapping. The CXL_DOE_TABLE_ACCESS_* and PCI_DOE_DATA_OBJECT_DISC_* macros assume implicit byte-swapping. Byte-swap requests after constructing them with those macros and byte-swap responses before parsing them. Change the request and response type to __le32 to avoid sparse warnings. Per a request from Jonathan, replace sizeof(u32) with sizeof(__le32) for consistency. Fixes: c97006046c79 ("cxl/port: Read CDAT table") Tested-by: Ira Weiny <ira.weiny@intel.com> Signed-off-by: Lukas Wunner <lukas@wunner.de> Reviewed-by: Dan Williams <dan.j.williams@intel.com> Cc: stable@vger.kernel.org # v6.0+ Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Link: https://lore.kernel.org/r/3051114102f41d19df3debbee123129118fc5e6d.1678543498.git.lukas@wunner.de Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-03-11 14:40:01 +00:00
for (i = 0; i < task->request_pl_sz / sizeof(__le32); i++)
pci_write_config_dword(pdev, offset + PCI_DOE_WRITE,
cxl/pci: Fix CDAT retrieval on big endian The CDAT exposed in sysfs differs between little endian and big endian arches: On big endian, every 4 bytes are byte-swapped. PCI Configuration Space is little endian (PCI r3.0 sec 6.1). Accessors such as pci_read_config_dword() implicitly swap bytes on big endian. That way, the macros in include/uapi/linux/pci_regs.h work regardless of the arch's endianness. For an example of implicit byte-swapping, see ppc4xx_pciex_read_config(), which calls in_le32(), which uses lwbrx (Load Word Byte-Reverse Indexed). DOE Read/Write Data Mailbox Registers are unlike other registers in Configuration Space in that they contain or receive a 4 byte portion of an opaque byte stream (a "Data Object" per PCIe r6.0 sec 7.9.24.5f). They need to be copied to or from the request/response buffer verbatim. So amend pci_doe_send_req() and pci_doe_recv_resp() to undo the implicit byte-swapping. The CXL_DOE_TABLE_ACCESS_* and PCI_DOE_DATA_OBJECT_DISC_* macros assume implicit byte-swapping. Byte-swap requests after constructing them with those macros and byte-swap responses before parsing them. Change the request and response type to __le32 to avoid sparse warnings. Per a request from Jonathan, replace sizeof(u32) with sizeof(__le32) for consistency. Fixes: c97006046c79 ("cxl/port: Read CDAT table") Tested-by: Ira Weiny <ira.weiny@intel.com> Signed-off-by: Lukas Wunner <lukas@wunner.de> Reviewed-by: Dan Williams <dan.j.williams@intel.com> Cc: stable@vger.kernel.org # v6.0+ Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Link: https://lore.kernel.org/r/3051114102f41d19df3debbee123129118fc5e6d.1678543498.git.lukas@wunner.de Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-03-11 14:40:01 +00:00
le32_to_cpu(task->request_pl[i]));
pci_doe_write_ctrl(doe_mb, PCI_DOE_CTRL_GO);
return 0;
}
static bool pci_doe_data_obj_ready(struct pci_doe_mb *doe_mb)
{
struct pci_dev *pdev = doe_mb->pdev;
int offset = doe_mb->cap_offset;
u32 val;
pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);
if (FIELD_GET(PCI_DOE_STATUS_DATA_OBJECT_READY, val))
return true;
return false;
}
static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *task)
{
struct pci_dev *pdev = doe_mb->pdev;
int offset = doe_mb->cap_offset;
size_t length, payload_length;
u32 val;
int i;
/* Read the first dword to get the protocol */
pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val);
if ((FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_VID, val) != task->prot.vid) ||
(FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, val) != task->prot.type)) {
dev_err_ratelimited(&pdev->dev, "[%x] expected [VID, Protocol] = [%04x, %02x], got [%04x, %02x]\n",
doe_mb->cap_offset, task->prot.vid, task->prot.type,
FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_VID, val),
FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, val));
return -EIO;
}
pci_write_config_dword(pdev, offset + PCI_DOE_READ, 0);
/* Read the second dword to get the length */
pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val);
pci_write_config_dword(pdev, offset + PCI_DOE_READ, 0);
length = FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH, val);
/* A value of 0x0 indicates max data object length */
if (!length)
length = PCI_DOE_MAX_LENGTH;
if (length < 2)
return -EIO;
/* First 2 dwords have already been read */
length -= 2;
cxl/pci: Fix CDAT retrieval on big endian The CDAT exposed in sysfs differs between little endian and big endian arches: On big endian, every 4 bytes are byte-swapped. PCI Configuration Space is little endian (PCI r3.0 sec 6.1). Accessors such as pci_read_config_dword() implicitly swap bytes on big endian. That way, the macros in include/uapi/linux/pci_regs.h work regardless of the arch's endianness. For an example of implicit byte-swapping, see ppc4xx_pciex_read_config(), which calls in_le32(), which uses lwbrx (Load Word Byte-Reverse Indexed). DOE Read/Write Data Mailbox Registers are unlike other registers in Configuration Space in that they contain or receive a 4 byte portion of an opaque byte stream (a "Data Object" per PCIe r6.0 sec 7.9.24.5f). They need to be copied to or from the request/response buffer verbatim. So amend pci_doe_send_req() and pci_doe_recv_resp() to undo the implicit byte-swapping. The CXL_DOE_TABLE_ACCESS_* and PCI_DOE_DATA_OBJECT_DISC_* macros assume implicit byte-swapping. Byte-swap requests after constructing them with those macros and byte-swap responses before parsing them. Change the request and response type to __le32 to avoid sparse warnings. Per a request from Jonathan, replace sizeof(u32) with sizeof(__le32) for consistency. Fixes: c97006046c79 ("cxl/port: Read CDAT table") Tested-by: Ira Weiny <ira.weiny@intel.com> Signed-off-by: Lukas Wunner <lukas@wunner.de> Reviewed-by: Dan Williams <dan.j.williams@intel.com> Cc: stable@vger.kernel.org # v6.0+ Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Link: https://lore.kernel.org/r/3051114102f41d19df3debbee123129118fc5e6d.1678543498.git.lukas@wunner.de Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-03-11 14:40:01 +00:00
payload_length = min(length, task->response_pl_sz / sizeof(__le32));
/* Read the rest of the response payload */
for (i = 0; i < payload_length; i++) {
cxl/pci: Fix CDAT retrieval on big endian The CDAT exposed in sysfs differs between little endian and big endian arches: On big endian, every 4 bytes are byte-swapped. PCI Configuration Space is little endian (PCI r3.0 sec 6.1). Accessors such as pci_read_config_dword() implicitly swap bytes on big endian. That way, the macros in include/uapi/linux/pci_regs.h work regardless of the arch's endianness. For an example of implicit byte-swapping, see ppc4xx_pciex_read_config(), which calls in_le32(), which uses lwbrx (Load Word Byte-Reverse Indexed). DOE Read/Write Data Mailbox Registers are unlike other registers in Configuration Space in that they contain or receive a 4 byte portion of an opaque byte stream (a "Data Object" per PCIe r6.0 sec 7.9.24.5f). They need to be copied to or from the request/response buffer verbatim. So amend pci_doe_send_req() and pci_doe_recv_resp() to undo the implicit byte-swapping. The CXL_DOE_TABLE_ACCESS_* and PCI_DOE_DATA_OBJECT_DISC_* macros assume implicit byte-swapping. Byte-swap requests after constructing them with those macros and byte-swap responses before parsing them. Change the request and response type to __le32 to avoid sparse warnings. Per a request from Jonathan, replace sizeof(u32) with sizeof(__le32) for consistency. Fixes: c97006046c79 ("cxl/port: Read CDAT table") Tested-by: Ira Weiny <ira.weiny@intel.com> Signed-off-by: Lukas Wunner <lukas@wunner.de> Reviewed-by: Dan Williams <dan.j.williams@intel.com> Cc: stable@vger.kernel.org # v6.0+ Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Link: https://lore.kernel.org/r/3051114102f41d19df3debbee123129118fc5e6d.1678543498.git.lukas@wunner.de Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-03-11 14:40:01 +00:00
pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val);
task->response_pl[i] = cpu_to_le32(val);
/* Prior to the last ack, ensure Data Object Ready */
if (i == (payload_length - 1) && !pci_doe_data_obj_ready(doe_mb))
return -EIO;
pci_write_config_dword(pdev, offset + PCI_DOE_READ, 0);
}
/* Flush excess length */
for (; i < length; i++) {
pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val);
pci_write_config_dword(pdev, offset + PCI_DOE_READ, 0);
}
/* Final error check to pick up on any since Data Object Ready */
pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);
if (FIELD_GET(PCI_DOE_STATUS_ERROR, val))
return -EIO;
cxl/pci: Fix CDAT retrieval on big endian The CDAT exposed in sysfs differs between little endian and big endian arches: On big endian, every 4 bytes are byte-swapped. PCI Configuration Space is little endian (PCI r3.0 sec 6.1). Accessors such as pci_read_config_dword() implicitly swap bytes on big endian. That way, the macros in include/uapi/linux/pci_regs.h work regardless of the arch's endianness. For an example of implicit byte-swapping, see ppc4xx_pciex_read_config(), which calls in_le32(), which uses lwbrx (Load Word Byte-Reverse Indexed). DOE Read/Write Data Mailbox Registers are unlike other registers in Configuration Space in that they contain or receive a 4 byte portion of an opaque byte stream (a "Data Object" per PCIe r6.0 sec 7.9.24.5f). They need to be copied to or from the request/response buffer verbatim. So amend pci_doe_send_req() and pci_doe_recv_resp() to undo the implicit byte-swapping. The CXL_DOE_TABLE_ACCESS_* and PCI_DOE_DATA_OBJECT_DISC_* macros assume implicit byte-swapping. Byte-swap requests after constructing them with those macros and byte-swap responses before parsing them. Change the request and response type to __le32 to avoid sparse warnings. Per a request from Jonathan, replace sizeof(u32) with sizeof(__le32) for consistency. Fixes: c97006046c79 ("cxl/port: Read CDAT table") Tested-by: Ira Weiny <ira.weiny@intel.com> Signed-off-by: Lukas Wunner <lukas@wunner.de> Reviewed-by: Dan Williams <dan.j.williams@intel.com> Cc: stable@vger.kernel.org # v6.0+ Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Link: https://lore.kernel.org/r/3051114102f41d19df3debbee123129118fc5e6d.1678543498.git.lukas@wunner.de Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-03-11 14:40:01 +00:00
return min(length, task->response_pl_sz / sizeof(__le32)) * sizeof(__le32);
}
static void signal_task_complete(struct pci_doe_task *task, int rv)
{
task->rv = rv;
task->complete(task);
destroy_work_on_stack(&task->work);
}
static void signal_task_abort(struct pci_doe_task *task, int rv)
{
struct pci_doe_mb *doe_mb = task->doe_mb;
struct pci_dev *pdev = doe_mb->pdev;
if (pci_doe_abort(doe_mb)) {
/*
* If the device can't process an abort; set the mailbox dead
* - no more submissions
*/
pci_err(pdev, "[%x] Abort failed marking mailbox dead\n",
doe_mb->cap_offset);
set_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags);
}
signal_task_complete(task, rv);
}
static void doe_statemachine_work(struct work_struct *work)
{
struct pci_doe_task *task = container_of(work, struct pci_doe_task,
work);
struct pci_doe_mb *doe_mb = task->doe_mb;
struct pci_dev *pdev = doe_mb->pdev;
int offset = doe_mb->cap_offset;
unsigned long timeout_jiffies;
u32 val;
int rc;
if (test_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags)) {
signal_task_complete(task, -EIO);
return;
}
/* Send request */
rc = pci_doe_send_req(doe_mb, task);
if (rc) {
/*
* The specification does not provide any guidance on how to
* resolve conflicting requests from other entities.
* Furthermore, it is likely that busy will not be detected
* most of the time. Flag any detection of status busy with an
* error.
*/
if (rc == -EBUSY)
dev_err_ratelimited(&pdev->dev, "[%x] busy detected; another entity is sending conflicting requests\n",
offset);
signal_task_abort(task, rc);
return;
}
timeout_jiffies = jiffies + PCI_DOE_TIMEOUT;
/* Poll for response */
retry_resp:
pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);
if (FIELD_GET(PCI_DOE_STATUS_ERROR, val)) {
signal_task_abort(task, -EIO);
return;
}
if (!FIELD_GET(PCI_DOE_STATUS_DATA_OBJECT_READY, val)) {
if (time_after(jiffies, timeout_jiffies)) {
signal_task_abort(task, -EIO);
return;
}
rc = pci_doe_wait(doe_mb, PCI_DOE_POLL_INTERVAL);
if (rc) {
signal_task_abort(task, rc);
return;
}
goto retry_resp;
}
rc = pci_doe_recv_resp(doe_mb, task);
if (rc < 0) {
signal_task_abort(task, rc);
return;
}
signal_task_complete(task, rc);
}
static void pci_doe_task_complete(struct pci_doe_task *task)
{
complete(task->private);
}
static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
u8 *protocol)
{
u32 request_pl = FIELD_PREP(PCI_DOE_DATA_OBJECT_DISC_REQ_3_INDEX,
*index);
cxl/pci: Fix CDAT retrieval on big endian The CDAT exposed in sysfs differs between little endian and big endian arches: On big endian, every 4 bytes are byte-swapped. PCI Configuration Space is little endian (PCI r3.0 sec 6.1). Accessors such as pci_read_config_dword() implicitly swap bytes on big endian. That way, the macros in include/uapi/linux/pci_regs.h work regardless of the arch's endianness. For an example of implicit byte-swapping, see ppc4xx_pciex_read_config(), which calls in_le32(), which uses lwbrx (Load Word Byte-Reverse Indexed). DOE Read/Write Data Mailbox Registers are unlike other registers in Configuration Space in that they contain or receive a 4 byte portion of an opaque byte stream (a "Data Object" per PCIe r6.0 sec 7.9.24.5f). They need to be copied to or from the request/response buffer verbatim. So amend pci_doe_send_req() and pci_doe_recv_resp() to undo the implicit byte-swapping. The CXL_DOE_TABLE_ACCESS_* and PCI_DOE_DATA_OBJECT_DISC_* macros assume implicit byte-swapping. Byte-swap requests after constructing them with those macros and byte-swap responses before parsing them. Change the request and response type to __le32 to avoid sparse warnings. Per a request from Jonathan, replace sizeof(u32) with sizeof(__le32) for consistency. Fixes: c97006046c79 ("cxl/port: Read CDAT table") Tested-by: Ira Weiny <ira.weiny@intel.com> Signed-off-by: Lukas Wunner <lukas@wunner.de> Reviewed-by: Dan Williams <dan.j.williams@intel.com> Cc: stable@vger.kernel.org # v6.0+ Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Link: https://lore.kernel.org/r/3051114102f41d19df3debbee123129118fc5e6d.1678543498.git.lukas@wunner.de Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-03-11 14:40:01 +00:00
__le32 request_pl_le = cpu_to_le32(request_pl);
__le32 response_pl_le;
u32 response_pl;
int rc;
rc = pci_doe(doe_mb, PCI_VENDOR_ID_PCI_SIG, PCI_DOE_PROTOCOL_DISCOVERY,
&request_pl_le, sizeof(request_pl_le),
&response_pl_le, sizeof(response_pl_le));
if (rc < 0)
return rc;
if (rc != sizeof(response_pl_le))
return -EIO;
cxl/pci: Fix CDAT retrieval on big endian The CDAT exposed in sysfs differs between little endian and big endian arches: On big endian, every 4 bytes are byte-swapped. PCI Configuration Space is little endian (PCI r3.0 sec 6.1). Accessors such as pci_read_config_dword() implicitly swap bytes on big endian. That way, the macros in include/uapi/linux/pci_regs.h work regardless of the arch's endianness. For an example of implicit byte-swapping, see ppc4xx_pciex_read_config(), which calls in_le32(), which uses lwbrx (Load Word Byte-Reverse Indexed). DOE Read/Write Data Mailbox Registers are unlike other registers in Configuration Space in that they contain or receive a 4 byte portion of an opaque byte stream (a "Data Object" per PCIe r6.0 sec 7.9.24.5f). They need to be copied to or from the request/response buffer verbatim. So amend pci_doe_send_req() and pci_doe_recv_resp() to undo the implicit byte-swapping. The CXL_DOE_TABLE_ACCESS_* and PCI_DOE_DATA_OBJECT_DISC_* macros assume implicit byte-swapping. Byte-swap requests after constructing them with those macros and byte-swap responses before parsing them. Change the request and response type to __le32 to avoid sparse warnings. Per a request from Jonathan, replace sizeof(u32) with sizeof(__le32) for consistency. Fixes: c97006046c79 ("cxl/port: Read CDAT table") Tested-by: Ira Weiny <ira.weiny@intel.com> Signed-off-by: Lukas Wunner <lukas@wunner.de> Reviewed-by: Dan Williams <dan.j.williams@intel.com> Cc: stable@vger.kernel.org # v6.0+ Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Link: https://lore.kernel.org/r/3051114102f41d19df3debbee123129118fc5e6d.1678543498.git.lukas@wunner.de Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-03-11 14:40:01 +00:00
response_pl = le32_to_cpu(response_pl_le);
*vid = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_VID, response_pl);
*protocol = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL,
response_pl);
*index = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_NEXT_INDEX,
response_pl);
return 0;
}
static void *pci_doe_xa_prot_entry(u16 vid, u8 prot)
{
return xa_mk_value((vid << 8) | prot);
}
static int pci_doe_cache_protocols(struct pci_doe_mb *doe_mb)
{
u8 index = 0;
u8 xa_idx = 0;
do {
int rc;
u16 vid;
u8 prot;
rc = pci_doe_discovery(doe_mb, &index, &vid, &prot);
if (rc)
return rc;
pci_dbg(doe_mb->pdev,
"[%x] Found protocol %d vid: %x prot: %x\n",
doe_mb->cap_offset, xa_idx, vid, prot);
rc = xa_insert(&doe_mb->prots, xa_idx++,
pci_doe_xa_prot_entry(vid, prot), GFP_KERNEL);
if (rc)
return rc;
} while (index);
return 0;
}
static void pci_doe_xa_destroy(void *mb)
{
struct pci_doe_mb *doe_mb = mb;
xa_destroy(&doe_mb->prots);
}
static void pci_doe_destroy_workqueue(void *mb)
{
struct pci_doe_mb *doe_mb = mb;
destroy_workqueue(doe_mb->work_queue);
}
static void pci_doe_flush_mb(void *mb)
{
struct pci_doe_mb *doe_mb = mb;
/* Stop all pending work items from starting */
set_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags);
/* Cancel an in progress work item, if necessary */
set_bit(PCI_DOE_FLAG_CANCEL, &doe_mb->flags);
wake_up(&doe_mb->wq);
/* Flush all work items */
flush_workqueue(doe_mb->work_queue);
}
/**
* pcim_doe_create_mb() - Create a DOE mailbox object
*
* @pdev: PCI device to create the DOE mailbox for
* @cap_offset: Offset of the DOE mailbox
*
* Create a single mailbox object to manage the mailbox protocol at the
* cap_offset specified.
*
* RETURNS: created mailbox object on success
* ERR_PTR(-errno) on failure
*/
struct pci_doe_mb *pcim_doe_create_mb(struct pci_dev *pdev, u16 cap_offset)
{
struct pci_doe_mb *doe_mb;
struct device *dev = &pdev->dev;
int rc;
doe_mb = devm_kzalloc(dev, sizeof(*doe_mb), GFP_KERNEL);
if (!doe_mb)
return ERR_PTR(-ENOMEM);
doe_mb->pdev = pdev;
doe_mb->cap_offset = cap_offset;
init_waitqueue_head(&doe_mb->wq);
xa_init(&doe_mb->prots);
rc = devm_add_action(dev, pci_doe_xa_destroy, doe_mb);
if (rc)
return ERR_PTR(rc);
doe_mb->work_queue = alloc_ordered_workqueue("%s %s DOE [%x]", 0,
dev_driver_string(&pdev->dev),
pci_name(pdev),
doe_mb->cap_offset);
if (!doe_mb->work_queue) {
pci_err(pdev, "[%x] failed to allocate work queue\n",
doe_mb->cap_offset);
return ERR_PTR(-ENOMEM);
}
rc = devm_add_action_or_reset(dev, pci_doe_destroy_workqueue, doe_mb);
if (rc)
return ERR_PTR(rc);
/* Reset the mailbox by issuing an abort */
rc = pci_doe_abort(doe_mb);
if (rc) {
pci_err(pdev, "[%x] failed to reset mailbox with abort command : %d\n",
doe_mb->cap_offset, rc);
return ERR_PTR(rc);
}
/*
* The state machine and the mailbox should be in sync now;
* Set up mailbox flush prior to using the mailbox to query protocols.
*/
rc = devm_add_action_or_reset(dev, pci_doe_flush_mb, doe_mb);
if (rc)
return ERR_PTR(rc);
rc = pci_doe_cache_protocols(doe_mb);
if (rc) {
pci_err(pdev, "[%x] failed to cache protocols : %d\n",
doe_mb->cap_offset, rc);
return ERR_PTR(rc);
}
return doe_mb;
}
EXPORT_SYMBOL_GPL(pcim_doe_create_mb);
/**
* pci_doe_supports_prot() - Return if the DOE instance supports the given
* protocol
* @doe_mb: DOE mailbox capability to query
* @vid: Protocol Vendor ID
* @type: Protocol type
*
* RETURNS: True if the DOE mailbox supports the protocol specified
*/
bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type)
{
unsigned long index;
void *entry;
/* The discovery protocol must always be supported */
if (vid == PCI_VENDOR_ID_PCI_SIG && type == PCI_DOE_PROTOCOL_DISCOVERY)
return true;
xa_for_each(&doe_mb->prots, index, entry)
if (entry == pci_doe_xa_prot_entry(vid, type))
return true;
return false;
}
EXPORT_SYMBOL_GPL(pci_doe_supports_prot);
/**
* pci_doe_submit_task() - Submit a task to be processed by the state machine
*
* @doe_mb: DOE mailbox capability to submit to
* @task: task to be queued
*
* Submit a DOE task (request/response) to the DOE mailbox to be processed.
* Returns upon queueing the task object. If the queue is full this function
* will sleep until there is room in the queue.
*
* task->complete will be called when the state machine is done processing this
* task.
*
PCI/DOE: Silence WARN splat with CONFIG_DEBUG_OBJECTS=y Gregory Price reports a WARN splat with CONFIG_DEBUG_OBJECTS=y upon CXL probing because pci_doe_submit_task() invokes INIT_WORK() instead of INIT_WORK_ONSTACK() for a work_struct that was allocated on the stack. All callers of pci_doe_submit_task() allocate the work_struct on the stack, so replace INIT_WORK() with INIT_WORK_ONSTACK() as a backportable short-term fix. The long-term fix implemented by a subsequent commit is to move to a synchronous API which allocates the work_struct internally in the DOE library. Stacktrace for posterity: WARNING: CPU: 0 PID: 23 at lib/debugobjects.c:545 __debug_object_init.cold+0x18/0x183 CPU: 0 PID: 23 Comm: kworker/u2:1 Not tainted 6.1.0-0.rc1.20221019gitaae703b02f92.17.fc38.x86_64 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: pci_doe_submit_task+0x5d/0xd0 pci_doe_discovery+0xb4/0x100 pcim_doe_create_mb+0x219/0x290 cxl_pci_probe+0x192/0x430 local_pci_probe+0x41/0x80 pci_device_probe+0xb3/0x220 really_probe+0xde/0x380 __driver_probe_device+0x78/0x170 driver_probe_device+0x1f/0x90 __driver_attach_async_helper+0x5c/0xe0 async_run_entry_fn+0x30/0x130 process_one_work+0x294/0x5b0 Fixes: 9d24322e887b ("PCI/DOE: Add DOE mailbox support functions") Link: https://lore.kernel.org/linux-cxl/Y1bOniJliOFszvIK@memverge.com/ Reported-by: Gregory Price <gregory.price@memverge.com> Tested-by: Ira Weiny <ira.weiny@intel.com> Tested-by: Gregory Price <gregory.price@memverge.com> Signed-off-by: Lukas Wunner <lukas@wunner.de> Reviewed-by: Ira Weiny <ira.weiny@intel.com> Reviewed-by: Dan Williams <dan.j.williams@intel.com> Reviewed-by: Gregory Price <gregory.price@memverge.com> Cc: stable@vger.kernel.org # v6.0+ Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Acked-by: Bjorn Helgaas <bhelgaas@google.com> Link: https://lore.kernel.org/r/67a9117f463ecdb38a2dbca6a20391ce2f1e7a06.1678543498.git.lukas@wunner.de Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-03-11 14:40:05 +00:00
* @task must be allocated on the stack.
*
* Excess data will be discarded.
*
* RETURNS: 0 when task has been successfully queued, -ERRNO on error
*/
int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task)
{
if (!pci_doe_supports_prot(doe_mb, task->prot.vid, task->prot.type))
return -EINVAL;
/*
* DOE requests must be a whole number of DW and the response needs to
* be big enough for at least 1 DW
*/
cxl/pci: Fix CDAT retrieval on big endian The CDAT exposed in sysfs differs between little endian and big endian arches: On big endian, every 4 bytes are byte-swapped. PCI Configuration Space is little endian (PCI r3.0 sec 6.1). Accessors such as pci_read_config_dword() implicitly swap bytes on big endian. That way, the macros in include/uapi/linux/pci_regs.h work regardless of the arch's endianness. For an example of implicit byte-swapping, see ppc4xx_pciex_read_config(), which calls in_le32(), which uses lwbrx (Load Word Byte-Reverse Indexed). DOE Read/Write Data Mailbox Registers are unlike other registers in Configuration Space in that they contain or receive a 4 byte portion of an opaque byte stream (a "Data Object" per PCIe r6.0 sec 7.9.24.5f). They need to be copied to or from the request/response buffer verbatim. So amend pci_doe_send_req() and pci_doe_recv_resp() to undo the implicit byte-swapping. The CXL_DOE_TABLE_ACCESS_* and PCI_DOE_DATA_OBJECT_DISC_* macros assume implicit byte-swapping. Byte-swap requests after constructing them with those macros and byte-swap responses before parsing them. Change the request and response type to __le32 to avoid sparse warnings. Per a request from Jonathan, replace sizeof(u32) with sizeof(__le32) for consistency. Fixes: c97006046c79 ("cxl/port: Read CDAT table") Tested-by: Ira Weiny <ira.weiny@intel.com> Signed-off-by: Lukas Wunner <lukas@wunner.de> Reviewed-by: Dan Williams <dan.j.williams@intel.com> Cc: stable@vger.kernel.org # v6.0+ Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Link: https://lore.kernel.org/r/3051114102f41d19df3debbee123129118fc5e6d.1678543498.git.lukas@wunner.de Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-03-11 14:40:01 +00:00
if (task->request_pl_sz % sizeof(__le32) ||
task->response_pl_sz < sizeof(__le32))
return -EINVAL;
if (test_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags))
return -EIO;
task->doe_mb = doe_mb;
PCI/DOE: Silence WARN splat with CONFIG_DEBUG_OBJECTS=y Gregory Price reports a WARN splat with CONFIG_DEBUG_OBJECTS=y upon CXL probing because pci_doe_submit_task() invokes INIT_WORK() instead of INIT_WORK_ONSTACK() for a work_struct that was allocated on the stack. All callers of pci_doe_submit_task() allocate the work_struct on the stack, so replace INIT_WORK() with INIT_WORK_ONSTACK() as a backportable short-term fix. The long-term fix implemented by a subsequent commit is to move to a synchronous API which allocates the work_struct internally in the DOE library. Stacktrace for posterity: WARNING: CPU: 0 PID: 23 at lib/debugobjects.c:545 __debug_object_init.cold+0x18/0x183 CPU: 0 PID: 23 Comm: kworker/u2:1 Not tainted 6.1.0-0.rc1.20221019gitaae703b02f92.17.fc38.x86_64 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: pci_doe_submit_task+0x5d/0xd0 pci_doe_discovery+0xb4/0x100 pcim_doe_create_mb+0x219/0x290 cxl_pci_probe+0x192/0x430 local_pci_probe+0x41/0x80 pci_device_probe+0xb3/0x220 really_probe+0xde/0x380 __driver_probe_device+0x78/0x170 driver_probe_device+0x1f/0x90 __driver_attach_async_helper+0x5c/0xe0 async_run_entry_fn+0x30/0x130 process_one_work+0x294/0x5b0 Fixes: 9d24322e887b ("PCI/DOE: Add DOE mailbox support functions") Link: https://lore.kernel.org/linux-cxl/Y1bOniJliOFszvIK@memverge.com/ Reported-by: Gregory Price <gregory.price@memverge.com> Tested-by: Ira Weiny <ira.weiny@intel.com> Tested-by: Gregory Price <gregory.price@memverge.com> Signed-off-by: Lukas Wunner <lukas@wunner.de> Reviewed-by: Ira Weiny <ira.weiny@intel.com> Reviewed-by: Dan Williams <dan.j.williams@intel.com> Reviewed-by: Gregory Price <gregory.price@memverge.com> Cc: stable@vger.kernel.org # v6.0+ Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Acked-by: Bjorn Helgaas <bhelgaas@google.com> Link: https://lore.kernel.org/r/67a9117f463ecdb38a2dbca6a20391ce2f1e7a06.1678543498.git.lukas@wunner.de Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-03-11 14:40:05 +00:00
INIT_WORK_ONSTACK(&task->work, doe_statemachine_work);
queue_work(doe_mb->work_queue, &task->work);
return 0;
}
EXPORT_SYMBOL_GPL(pci_doe_submit_task);
/**
* pci_doe() - Perform Data Object Exchange
*
* @doe_mb: DOE Mailbox
* @vendor: Vendor ID
* @type: Data Object Type
* @request: Request payload
* @request_sz: Size of request payload (bytes)
* @response: Response payload
* @response_sz: Size of response payload (bytes)
*
* Submit @request to @doe_mb and store the @response.
* The DOE exchange is performed synchronously and may therefore sleep.
*
* Payloads are treated as opaque byte streams which are transmitted verbatim,
* without byte-swapping. If payloads contain little-endian register values,
* the caller is responsible for conversion with cpu_to_le32() / le32_to_cpu().
*
* RETURNS: Length of received response or negative errno.
* Received data in excess of @response_sz is discarded.
* The length may be smaller than @response_sz and the caller
* is responsible for checking that.
*/
int pci_doe(struct pci_doe_mb *doe_mb, u16 vendor, u8 type,
const void *request, size_t request_sz,
void *response, size_t response_sz)
{
DECLARE_COMPLETION_ONSTACK(c);
struct pci_doe_task task = {
.prot.vid = vendor,
.prot.type = type,
.request_pl = request,
.request_pl_sz = request_sz,
.response_pl = response,
.response_pl_sz = response_sz,
.complete = pci_doe_task_complete,
.private = &c,
};
int rc;
rc = pci_doe_submit_task(doe_mb, &task);
if (rc)
return rc;
wait_for_completion(&c);
return task.rv;
}
EXPORT_SYMBOL_GPL(pci_doe);