linux/drivers/char/ipmi/ipmi_kcs_sm.c
Zhang Yuchen c608966f3f ipmi: fix msg stack when IPMI is disconnected
If you continue to access and send messages at a high frequency (once
every 55s) when the IPMI is disconnected, messages will accumulate in
intf->[hp_]xmit_msg. If it lasts long enough, it takes up a lot of
memory.

The reason is that if IPMI is disconnected, each message will be set to
IDLE after it returns to HOSED through IDLE->ERROR0->HOSED. The next
message goes through the same process when it comes in. This process
needs to wait for IBF_TIMEOUT * (MAX_ERROR_RETRIES + 1) = 55s.

Each message takes 55S to destroy. This results in a continuous increase
in memory.

I find that if I wait 5 seconds after the first message fails, the
status changes to ERROR0 in smi_timeout(). The next message will return
the error code IPMI_NOT_IN_MY_STATE_ERR directly without wait.

This is more in line with our needs.

So instead of setting each message state to IDLE after it reaches the
state HOSED, set state to ERROR0.

After testing, the problem has been solved, no matter how many
consecutive sends, will not cause continuous memory growth. It also
returns to normal immediately after the IPMI is restored.

In addition, the HOSED state should also count as invalid. So the HOSED
is removed from the invalid judgment in start_kcs_transaction().

The verification operations are as follows:

1. Use BPF to record the ipmi_alloc/free_smi_msg().

  $ bpftrace -e 'kretprobe:ipmi_alloc_recv_msg {printf("alloc
      %p\n",retval);} kprobe:free_recv_msg {printf("free  %p\n",arg0)}'

2. Exec `date; time for x in $(seq 1 2); do ipmitool mc info; done`.
3. Record the output of `time` and when free all msgs.

Before:

`time` takes 120s, This is because `ipmitool mc info` send 4 msgs and
waits only 15 seconds for each message. Last msg is free after 440s.

  $ bpftrace -e 'kretprobe:ipmi_alloc_recv_msg {printf("alloc
      %p\n",retval);} kprobe:free_recv_msg {printf("free  %p\n",arg0)}'
  Oct 05 11:40:55 Attaching 2 probes...
  Oct 05 11:41:12 alloc 0xffff9558a05f0c00
  Oct 05 11:41:27 alloc 0xffff9558a05f1a00
  Oct 05 11:41:42 alloc 0xffff9558a05f0000
  Oct 05 11:41:57 alloc 0xffff9558a05f1400
  Oct 05 11:42:07 free  0xffff9558a05f0c00
  Oct 05 11:42:07 alloc 0xffff9558a05f7000
  Oct 05 11:42:22 alloc 0xffff9558a05f2a00
  Oct 05 11:42:37 alloc 0xffff9558a05f5a00
  Oct 05 11:42:52 alloc 0xffff9558a05f3a00
  Oct 05 11:43:02 free  0xffff9558a05f1a00
  Oct 05 11:43:57 free  0xffff9558a05f0000
  Oct 05 11:44:52 free  0xffff9558a05f1400
  Oct 05 11:45:47 free  0xffff9558a05f7000
  Oct 05 11:46:42 free  0xffff9558a05f2a00
  Oct 05 11:47:37 free  0xffff9558a05f5a00
  Oct 05 11:48:32 free  0xffff9558a05f3a00

  $ root@dc00-pb003-t106-n078:~# date;time for x in $(seq 1 2); do
  ipmitool mc info; done

  Wed Oct  5 11:41:12 CST 2022
  No data available
  Get Device ID command failed
  No data available
  No data available
  No valid response received
  Get Device ID command failed: Unspecified error
  No data available
  Get Device ID command failed
  No data available
  No data available
  No valid response received
  No data available
  Get Device ID command failed

  real        1m55.052s
  user        0m0.001s
  sys        0m0.001s

After:

`time` takes 55s, all msgs is returned and free after 55s.

  $ bpftrace -e 'kretprobe:ipmi_alloc_recv_msg {printf("alloc
      %p\n",retval);} kprobe:free_recv_msg {printf("free  %p\n",arg0)}'

  Oct 07 16:30:35 Attaching 2 probes...
  Oct 07 16:30:45 alloc 0xffff955943aa9800
  Oct 07 16:31:00 alloc 0xffff955943aacc00
  Oct 07 16:31:15 alloc 0xffff955943aa8c00
  Oct 07 16:31:30 alloc 0xffff955943aaf600
  Oct 07 16:31:40 free  0xffff955943aa9800
  Oct 07 16:31:40 free  0xffff955943aacc00
  Oct 07 16:31:40 free  0xffff955943aa8c00
  Oct 07 16:31:40 free  0xffff955943aaf600
  Oct 07 16:31:40 alloc 0xffff9558ec8f7e00
  Oct 07 16:31:40 free  0xffff9558ec8f7e00
  Oct 07 16:31:40 alloc 0xffff9558ec8f7800
  Oct 07 16:31:40 free  0xffff9558ec8f7800
  Oct 07 16:31:40 alloc 0xffff9558ec8f7e00
  Oct 07 16:31:40 free  0xffff9558ec8f7e00
  Oct 07 16:31:40 alloc 0xffff9558ec8f7800
  Oct 07 16:31:40 free  0xffff9558ec8f7800

  root@dc00-pb003-t106-n078:~# date;time for x in $(seq 1 2); do
  ipmitool mc info; done
  Fri Oct  7 16:30:45 CST 2022
  No data available
  Get Device ID command failed
  No data available
  No data available
  No valid response received
  Get Device ID command failed: Unspecified error
  Get Device ID command failed: 0xd5 Command not supported in present state
  Get Device ID command failed: Command not supported in present state

  real        0m55.038s
  user        0m0.001s
  sys        0m0.001s

Signed-off-by: Zhang Yuchen <zhangyuchen.lcr@bytedance.com>
Message-Id: <20221009091811.40240-2-zhangyuchen.lcr@bytedance.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
2022-10-17 09:51:27 -05:00

543 lines
13 KiB
C

// SPDX-License-Identifier: GPL-2.0+
/*
* ipmi_kcs_sm.c
*
* State machine for handling IPMI KCS interfaces.
*
* Author: MontaVista Software, Inc.
* Corey Minyard <minyard@mvista.com>
* source@mvista.com
*
* Copyright 2002 MontaVista Software Inc.
*/
/*
* This state machine is taken from the state machine in the IPMI spec,
* pretty much verbatim. If you have questions about the states, see
* that document.
*/
#define DEBUG /* So dev_dbg() is always available. */
#include <linux/kernel.h> /* For printk. */
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/string.h>
#include <linux/jiffies.h>
#include <linux/ipmi_msgdefs.h> /* for completion codes */
#include "ipmi_si_sm.h"
/* kcs_debug is a bit-field
* KCS_DEBUG_ENABLE - turned on for now
* KCS_DEBUG_MSG - commands and their responses
* KCS_DEBUG_STATES - state machine
*/
#define KCS_DEBUG_STATES 4
#define KCS_DEBUG_MSG 2
#define KCS_DEBUG_ENABLE 1
static int kcs_debug;
module_param(kcs_debug, int, 0644);
MODULE_PARM_DESC(kcs_debug, "debug bitmask, 1=enable, 2=messages, 4=states");
/* The states the KCS driver may be in. */
enum kcs_states {
/* The KCS interface is currently doing nothing. */
KCS_IDLE,
/*
* We are starting an operation. The data is in the output
* buffer, but nothing has been done to the interface yet. This
* was added to the state machine in the spec to wait for the
* initial IBF.
*/
KCS_START_OP,
/* We have written a write cmd to the interface. */
KCS_WAIT_WRITE_START,
/* We are writing bytes to the interface. */
KCS_WAIT_WRITE,
/*
* We have written the write end cmd to the interface, and
* still need to write the last byte.
*/
KCS_WAIT_WRITE_END,
/* We are waiting to read data from the interface. */
KCS_WAIT_READ,
/*
* State to transition to the error handler, this was added to
* the state machine in the spec to be sure IBF was there.
*/
KCS_ERROR0,
/*
* First stage error handler, wait for the interface to
* respond.
*/
KCS_ERROR1,
/*
* The abort cmd has been written, wait for the interface to
* respond.
*/
KCS_ERROR2,
/*
* We wrote some data to the interface, wait for it to switch
* to read mode.
*/
KCS_ERROR3,
/* The hardware failed to follow the state machine. */
KCS_HOSED
};
#define MAX_KCS_READ_SIZE IPMI_MAX_MSG_LENGTH
#define MAX_KCS_WRITE_SIZE IPMI_MAX_MSG_LENGTH
/* Timeouts in microseconds. */
#define IBF_RETRY_TIMEOUT (5*USEC_PER_SEC)
#define OBF_RETRY_TIMEOUT (5*USEC_PER_SEC)
#define MAX_ERROR_RETRIES 10
#define ERROR0_OBF_WAIT_JIFFIES (2*HZ)
struct si_sm_data {
enum kcs_states state;
struct si_sm_io *io;
unsigned char write_data[MAX_KCS_WRITE_SIZE];
int write_pos;
int write_count;
int orig_write_count;
unsigned char read_data[MAX_KCS_READ_SIZE];
int read_pos;
int truncated;
unsigned int error_retries;
long ibf_timeout;
long obf_timeout;
unsigned long error0_timeout;
};
static unsigned int init_kcs_data_with_state(struct si_sm_data *kcs,
struct si_sm_io *io, enum kcs_states state)
{
kcs->state = state;
kcs->io = io;
kcs->write_pos = 0;
kcs->write_count = 0;
kcs->orig_write_count = 0;
kcs->read_pos = 0;
kcs->error_retries = 0;
kcs->truncated = 0;
kcs->ibf_timeout = IBF_RETRY_TIMEOUT;
kcs->obf_timeout = OBF_RETRY_TIMEOUT;
/* Reserve 2 I/O bytes. */
return 2;
}
static unsigned int init_kcs_data(struct si_sm_data *kcs,
struct si_sm_io *io)
{
return init_kcs_data_with_state(kcs, io, KCS_IDLE);
}
static inline unsigned char read_status(struct si_sm_data *kcs)
{
return kcs->io->inputb(kcs->io, 1);
}
static inline unsigned char read_data(struct si_sm_data *kcs)
{
return kcs->io->inputb(kcs->io, 0);
}
static inline void write_cmd(struct si_sm_data *kcs, unsigned char data)
{
kcs->io->outputb(kcs->io, 1, data);
}
static inline void write_data(struct si_sm_data *kcs, unsigned char data)
{
kcs->io->outputb(kcs->io, 0, data);
}
/* Control codes. */
#define KCS_GET_STATUS_ABORT 0x60
#define KCS_WRITE_START 0x61
#define KCS_WRITE_END 0x62
#define KCS_READ_BYTE 0x68
/* Status bits. */
#define GET_STATUS_STATE(status) (((status) >> 6) & 0x03)
#define KCS_IDLE_STATE 0
#define KCS_READ_STATE 1
#define KCS_WRITE_STATE 2
#define KCS_ERROR_STATE 3
#define GET_STATUS_ATN(status) ((status) & 0x04)
#define GET_STATUS_IBF(status) ((status) & 0x02)
#define GET_STATUS_OBF(status) ((status) & 0x01)
static inline void write_next_byte(struct si_sm_data *kcs)
{
write_data(kcs, kcs->write_data[kcs->write_pos]);
(kcs->write_pos)++;
(kcs->write_count)--;
}
static inline void start_error_recovery(struct si_sm_data *kcs, char *reason)
{
(kcs->error_retries)++;
if (kcs->error_retries > MAX_ERROR_RETRIES) {
if (kcs_debug & KCS_DEBUG_ENABLE)
dev_dbg(kcs->io->dev, "ipmi_kcs_sm: kcs hosed: %s\n",
reason);
kcs->state = KCS_HOSED;
} else {
kcs->error0_timeout = jiffies + ERROR0_OBF_WAIT_JIFFIES;
kcs->state = KCS_ERROR0;
}
}
static inline void read_next_byte(struct si_sm_data *kcs)
{
if (kcs->read_pos >= MAX_KCS_READ_SIZE) {
/* Throw the data away and mark it truncated. */
read_data(kcs);
kcs->truncated = 1;
} else {
kcs->read_data[kcs->read_pos] = read_data(kcs);
(kcs->read_pos)++;
}
write_data(kcs, KCS_READ_BYTE);
}
static inline int check_ibf(struct si_sm_data *kcs, unsigned char status,
long time)
{
if (GET_STATUS_IBF(status)) {
kcs->ibf_timeout -= time;
if (kcs->ibf_timeout < 0) {
start_error_recovery(kcs, "IBF not ready in time");
kcs->ibf_timeout = IBF_RETRY_TIMEOUT;
return 1;
}
return 0;
}
kcs->ibf_timeout = IBF_RETRY_TIMEOUT;
return 1;
}
static inline int check_obf(struct si_sm_data *kcs, unsigned char status,
long time)
{
if (!GET_STATUS_OBF(status)) {
kcs->obf_timeout -= time;
if (kcs->obf_timeout < 0) {
kcs->obf_timeout = OBF_RETRY_TIMEOUT;
start_error_recovery(kcs, "OBF not ready in time");
return 1;
}
return 0;
}
kcs->obf_timeout = OBF_RETRY_TIMEOUT;
return 1;
}
static void clear_obf(struct si_sm_data *kcs, unsigned char status)
{
if (GET_STATUS_OBF(status))
read_data(kcs);
}
static void restart_kcs_transaction(struct si_sm_data *kcs)
{
kcs->write_count = kcs->orig_write_count;
kcs->write_pos = 0;
kcs->read_pos = 0;
kcs->state = KCS_WAIT_WRITE_START;
kcs->ibf_timeout = IBF_RETRY_TIMEOUT;
kcs->obf_timeout = OBF_RETRY_TIMEOUT;
write_cmd(kcs, KCS_WRITE_START);
}
static int start_kcs_transaction(struct si_sm_data *kcs, unsigned char *data,
unsigned int size)
{
unsigned int i;
if (size < 2)
return IPMI_REQ_LEN_INVALID_ERR;
if (size > MAX_KCS_WRITE_SIZE)
return IPMI_REQ_LEN_EXCEEDED_ERR;
if (kcs->state != KCS_IDLE) {
dev_warn(kcs->io->dev, "KCS in invalid state %d\n", kcs->state);
return IPMI_NOT_IN_MY_STATE_ERR;
}
if (kcs_debug & KCS_DEBUG_MSG) {
dev_dbg(kcs->io->dev, "%s -", __func__);
for (i = 0; i < size; i++)
pr_cont(" %02x", data[i]);
pr_cont("\n");
}
kcs->error_retries = 0;
memcpy(kcs->write_data, data, size);
kcs->write_count = size;
kcs->orig_write_count = size;
kcs->write_pos = 0;
kcs->read_pos = 0;
kcs->state = KCS_START_OP;
kcs->ibf_timeout = IBF_RETRY_TIMEOUT;
kcs->obf_timeout = OBF_RETRY_TIMEOUT;
return 0;
}
static int get_kcs_result(struct si_sm_data *kcs, unsigned char *data,
unsigned int length)
{
if (length < kcs->read_pos) {
kcs->read_pos = length;
kcs->truncated = 1;
}
memcpy(data, kcs->read_data, kcs->read_pos);
if ((length >= 3) && (kcs->read_pos < 3)) {
/* Guarantee that we return at least 3 bytes, with an
error in the third byte if it is too short. */
data[2] = IPMI_ERR_UNSPECIFIED;
kcs->read_pos = 3;
}
if (kcs->truncated) {
/*
* Report a truncated error. We might overwrite
* another error, but that's too bad, the user needs
* to know it was truncated.
*/
data[2] = IPMI_ERR_MSG_TRUNCATED;
kcs->truncated = 0;
}
return kcs->read_pos;
}
/*
* This implements the state machine defined in the IPMI manual, see
* that for details on how this works. Divide that flowchart into
* sections delimited by "Wait for IBF" and this will become clear.
*/
static enum si_sm_result kcs_event(struct si_sm_data *kcs, long time)
{
unsigned char status;
unsigned char state;
status = read_status(kcs);
if (kcs_debug & KCS_DEBUG_STATES)
dev_dbg(kcs->io->dev,
"KCS: State = %d, %x\n", kcs->state, status);
/* All states wait for ibf, so just do it here. */
if (!check_ibf(kcs, status, time))
return SI_SM_CALL_WITH_DELAY;
/* Just about everything looks at the KCS state, so grab that, too. */
state = GET_STATUS_STATE(status);
switch (kcs->state) {
case KCS_IDLE:
/* If there's and interrupt source, turn it off. */
clear_obf(kcs, status);
if (GET_STATUS_ATN(status))
return SI_SM_ATTN;
else
return SI_SM_IDLE;
case KCS_START_OP:
if (state != KCS_IDLE_STATE) {
start_error_recovery(kcs,
"State machine not idle at start");
break;
}
clear_obf(kcs, status);
write_cmd(kcs, KCS_WRITE_START);
kcs->state = KCS_WAIT_WRITE_START;
break;
case KCS_WAIT_WRITE_START:
if (state != KCS_WRITE_STATE) {
start_error_recovery(
kcs,
"Not in write state at write start");
break;
}
read_data(kcs);
if (kcs->write_count == 1) {
write_cmd(kcs, KCS_WRITE_END);
kcs->state = KCS_WAIT_WRITE_END;
} else {
write_next_byte(kcs);
kcs->state = KCS_WAIT_WRITE;
}
break;
case KCS_WAIT_WRITE:
if (state != KCS_WRITE_STATE) {
start_error_recovery(kcs,
"Not in write state for write");
break;
}
clear_obf(kcs, status);
if (kcs->write_count == 1) {
write_cmd(kcs, KCS_WRITE_END);
kcs->state = KCS_WAIT_WRITE_END;
} else {
write_next_byte(kcs);
}
break;
case KCS_WAIT_WRITE_END:
if (state != KCS_WRITE_STATE) {
start_error_recovery(kcs,
"Not in write state"
" for write end");
break;
}
clear_obf(kcs, status);
write_next_byte(kcs);
kcs->state = KCS_WAIT_READ;
break;
case KCS_WAIT_READ:
if ((state != KCS_READ_STATE) && (state != KCS_IDLE_STATE)) {
start_error_recovery(
kcs,
"Not in read or idle in read state");
break;
}
if (state == KCS_READ_STATE) {
if (!check_obf(kcs, status, time))
return SI_SM_CALL_WITH_DELAY;
read_next_byte(kcs);
} else {
/*
* We don't implement this exactly like the state
* machine in the spec. Some broken hardware
* does not write the final dummy byte to the
* read register. Thus obf will never go high
* here. We just go straight to idle, and we
* handle clearing out obf in idle state if it
* happens to come in.
*/
clear_obf(kcs, status);
kcs->orig_write_count = 0;
kcs->state = KCS_IDLE;
return SI_SM_TRANSACTION_COMPLETE;
}
break;
case KCS_ERROR0:
clear_obf(kcs, status);
status = read_status(kcs);
if (GET_STATUS_OBF(status))
/* controller isn't responding */
if (time_before(jiffies, kcs->error0_timeout))
return SI_SM_CALL_WITH_TICK_DELAY;
write_cmd(kcs, KCS_GET_STATUS_ABORT);
kcs->state = KCS_ERROR1;
break;
case KCS_ERROR1:
clear_obf(kcs, status);
write_data(kcs, 0);
kcs->state = KCS_ERROR2;
break;
case KCS_ERROR2:
if (state != KCS_READ_STATE) {
start_error_recovery(kcs,
"Not in read state for error2");
break;
}
if (!check_obf(kcs, status, time))
return SI_SM_CALL_WITH_DELAY;
clear_obf(kcs, status);
write_data(kcs, KCS_READ_BYTE);
kcs->state = KCS_ERROR3;
break;
case KCS_ERROR3:
if (state != KCS_IDLE_STATE) {
start_error_recovery(kcs,
"Not in idle state for error3");
break;
}
if (!check_obf(kcs, status, time))
return SI_SM_CALL_WITH_DELAY;
clear_obf(kcs, status);
if (kcs->orig_write_count) {
restart_kcs_transaction(kcs);
} else {
kcs->state = KCS_IDLE;
return SI_SM_TRANSACTION_COMPLETE;
}
break;
case KCS_HOSED:
break;
}
if (kcs->state == KCS_HOSED) {
init_kcs_data_with_state(kcs, kcs->io, KCS_ERROR0);
return SI_SM_HOSED;
}
return SI_SM_CALL_WITHOUT_DELAY;
}
static int kcs_size(void)
{
return sizeof(struct si_sm_data);
}
static int kcs_detect(struct si_sm_data *kcs)
{
/*
* It's impossible for the KCS status register to be all 1's,
* (assuming a properly functioning, self-initialized BMC)
* but that's what you get from reading a bogus address, so we
* test that first.
*/
if (read_status(kcs) == 0xff)
return 1;
return 0;
}
static void kcs_cleanup(struct si_sm_data *kcs)
{
}
const struct si_sm_handlers kcs_smi_handlers = {
.init_data = init_kcs_data,
.start_transaction = start_kcs_transaction,
.get_result = get_kcs_result,
.event = kcs_event,
.detect = kcs_detect,
.cleanup = kcs_cleanup,
.size = kcs_size,
};