ipmi_watchdog: restore settings when BMC reset
If the BMC gets reset, it will return 0x80 response errors. In less than a week # grep "Error 80 on cmd 22" /var/log/kernel |wc -l 378681 In this case, it is probably a good idea to restore the IPMI settings. Signed-off-by: Corey Minyard <cminyard@mvista.com> Tested-by: Arkadiusz Miśkiewicz <a.miskiewicz@gmail.com> Reported-by: Arkadiusz Miśkiewicz <a.miskiewicz@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									ff05b6f7ae
								
							
						
					
					
						commit
						b75d91f7ca
					
				| @ -139,6 +139,8 @@ | |||||||
| #define IPMI_WDOG_SET_TIMER		0x24 | #define IPMI_WDOG_SET_TIMER		0x24 | ||||||
| #define IPMI_WDOG_GET_TIMER		0x25 | #define IPMI_WDOG_GET_TIMER		0x25 | ||||||
| 
 | 
 | ||||||
|  | #define IPMI_WDOG_TIMER_NOT_INIT_RESP	0x80 | ||||||
|  | 
 | ||||||
| /* These are here until the real ones get into the watchdog.h interface. */ | /* These are here until the real ones get into the watchdog.h interface. */ | ||||||
| #ifndef WDIOC_GETTIMEOUT | #ifndef WDIOC_GETTIMEOUT | ||||||
| #define	WDIOC_GETTIMEOUT        _IOW(WATCHDOG_IOCTL_BASE, 20, int) | #define	WDIOC_GETTIMEOUT        _IOW(WATCHDOG_IOCTL_BASE, 20, int) | ||||||
| @ -596,6 +598,7 @@ static int ipmi_heartbeat(void) | |||||||
| 	struct kernel_ipmi_msg            msg; | 	struct kernel_ipmi_msg            msg; | ||||||
| 	int                               rv; | 	int                               rv; | ||||||
| 	struct ipmi_system_interface_addr addr; | 	struct ipmi_system_interface_addr addr; | ||||||
|  | 	int				  timeout_retries = 0; | ||||||
| 
 | 
 | ||||||
| 	if (ipmi_ignore_heartbeat) | 	if (ipmi_ignore_heartbeat) | ||||||
| 		return 0; | 		return 0; | ||||||
| @ -616,6 +619,7 @@ static int ipmi_heartbeat(void) | |||||||
| 
 | 
 | ||||||
| 	mutex_lock(&heartbeat_lock); | 	mutex_lock(&heartbeat_lock); | ||||||
| 
 | 
 | ||||||
|  | restart: | ||||||
| 	atomic_set(&heartbeat_tofree, 2); | 	atomic_set(&heartbeat_tofree, 2); | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| @ -653,7 +657,33 @@ static int ipmi_heartbeat(void) | |||||||
| 	/* Wait for the heartbeat to be sent. */ | 	/* Wait for the heartbeat to be sent. */ | ||||||
| 	wait_for_completion(&heartbeat_wait); | 	wait_for_completion(&heartbeat_wait); | ||||||
| 
 | 
 | ||||||
| 	if (heartbeat_recv_msg.msg.data[0] != 0) { | 	if (heartbeat_recv_msg.msg.data[0] == IPMI_WDOG_TIMER_NOT_INIT_RESP)  { | ||||||
|  | 		timeout_retries++; | ||||||
|  | 		if (timeout_retries > 3) { | ||||||
|  | 			printk(KERN_ERR PFX ": Unable to restore the IPMI" | ||||||
|  | 			       " watchdog's settings, giving up.\n"); | ||||||
|  | 			rv = -EIO; | ||||||
|  | 			goto out_unlock; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		/*
 | ||||||
|  | 		 * The timer was not initialized, that means the BMC was | ||||||
|  | 		 * probably reset and lost the watchdog information.  Attempt | ||||||
|  | 		 * to restore the timer's info.  Note that we still hold | ||||||
|  | 		 * the heartbeat lock, to keep a heartbeat from happening | ||||||
|  | 		 * in this process, so must say no heartbeat to avoid a | ||||||
|  | 		 * deadlock on this mutex. | ||||||
|  | 		 */ | ||||||
|  | 		rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB); | ||||||
|  | 		if (rv) { | ||||||
|  | 			printk(KERN_ERR PFX ": Unable to send the command to" | ||||||
|  | 			       " set the watchdog's settings, giving up.\n"); | ||||||
|  | 			goto out_unlock; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		/* We might need a new heartbeat, so do it now */ | ||||||
|  | 		goto restart; | ||||||
|  | 	} else if (heartbeat_recv_msg.msg.data[0] != 0) { | ||||||
| 		/*
 | 		/*
 | ||||||
| 		 * Got an error in the heartbeat response.  It was already | 		 * Got an error in the heartbeat response.  It was already | ||||||
| 		 * reported in ipmi_wdog_msg_handler, but we should return | 		 * reported in ipmi_wdog_msg_handler, but we should return | ||||||
| @ -662,6 +692,7 @@ static int ipmi_heartbeat(void) | |||||||
| 		rv = -EINVAL; | 		rv = -EINVAL; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | out_unlock: | ||||||
| 	mutex_unlock(&heartbeat_lock); | 	mutex_unlock(&heartbeat_lock); | ||||||
| 
 | 
 | ||||||
| 	return rv; | 	return rv; | ||||||
| @ -922,11 +953,15 @@ static struct miscdevice ipmi_wdog_miscdev = { | |||||||
| static void ipmi_wdog_msg_handler(struct ipmi_recv_msg *msg, | static void ipmi_wdog_msg_handler(struct ipmi_recv_msg *msg, | ||||||
| 				  void                 *handler_data) | 				  void                 *handler_data) | ||||||
| { | { | ||||||
| 	if (msg->msg.data[0] != 0) { | 	if (msg->msg.cmd == IPMI_WDOG_RESET_TIMER && | ||||||
|  | 			msg->msg.data[0] == IPMI_WDOG_TIMER_NOT_INIT_RESP) | ||||||
|  | 		printk(KERN_INFO PFX "response: The IPMI controller appears" | ||||||
|  | 		       " to have been reset, will attempt to reinitialize" | ||||||
|  | 		       " the watchdog timer\n"); | ||||||
|  | 	else if (msg->msg.data[0] != 0) | ||||||
| 		printk(KERN_ERR PFX "response: Error %x on cmd %x\n", | 		printk(KERN_ERR PFX "response: Error %x on cmd %x\n", | ||||||
| 		       msg->msg.data[0], | 		       msg->msg.data[0], | ||||||
| 		       msg->msg.cmd); | 		       msg->msg.cmd); | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	ipmi_free_recv_msg(msg); | 	ipmi_free_recv_msg(msg); | ||||||
| } | } | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user