staging/lustre/lnet: peer aliveness status and NI status
A couple of changes to improve aliveness detection: - When LNet received a message, it can determine peer of this message is alive - When LNet received a message from remote network, it can determine router is alive and NI status on router is UP. Signed-off-by: Liang Zhen <liang.zhen@intel.com> Reviewed-on: http://review.whamcloud.com/12453 Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-5485 Reviewed-by: James Simmons <uja.ornl@gmail.com> Reviewed-by: Isaac Huang <he.huang@intel.com> Signed-off-by: Oleg Drokin <oleg.drokin@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
parent
62e4941354
commit
af3fa7c71b
@ -636,6 +636,7 @@ lnet_net2rnethash(__u32 net)
|
||||
}
|
||||
|
||||
extern lnd_t the_lolnd;
|
||||
extern int avoid_asym_router_failure;
|
||||
|
||||
int lnet_cpt_of_nid_locked(lnet_nid_t nid);
|
||||
int lnet_cpt_of_nid(lnet_nid_t nid);
|
||||
@ -851,6 +852,7 @@ int lnet_peer_buffer_credits(lnet_ni_t *ni);
|
||||
|
||||
int lnet_router_checker_start(void);
|
||||
void lnet_router_checker_stop(void);
|
||||
void lnet_router_ni_update_locked(lnet_peer_t *gw, __u32 net);
|
||||
void lnet_swap_pinginfo(lnet_ping_info_t *info);
|
||||
|
||||
int lnet_ping_target_init(void);
|
||||
@ -870,4 +872,12 @@ void lnet_peer_tables_destroy(void);
|
||||
int lnet_peer_tables_create(void);
|
||||
void lnet_debug_peer(lnet_nid_t nid);
|
||||
|
||||
static inline void lnet_peer_set_alive(lnet_peer_t *lp)
|
||||
{
|
||||
lp->lp_last_alive = lp->lp_last_query = get_seconds();
|
||||
if (!lp->lp_alive)
|
||||
lnet_notify_locked(lp, 0, 1, lp->lp_last_alive);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -1877,6 +1877,19 @@ lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid,
|
||||
goto drop;
|
||||
}
|
||||
|
||||
if (lnet_isrouter(msg->msg_rxpeer)) {
|
||||
lnet_peer_set_alive(msg->msg_rxpeer);
|
||||
if (avoid_asym_router_failure &&
|
||||
LNET_NIDNET(src_nid) != LNET_NIDNET(from_nid)) {
|
||||
/* received a remote message from router, update
|
||||
* remote NI status on this router.
|
||||
* NB: multi-hop routed message will be ignored.
|
||||
*/
|
||||
lnet_router_ni_update_locked(msg->msg_rxpeer,
|
||||
LNET_NIDNET(src_nid));
|
||||
}
|
||||
}
|
||||
|
||||
lnet_msg_commit(msg, cpt);
|
||||
|
||||
if (!for_me) {
|
||||
|
@ -84,7 +84,7 @@ static int check_routers_before_use;
|
||||
module_param(check_routers_before_use, int, 0444);
|
||||
MODULE_PARM_DESC(check_routers_before_use, "Assume routers are down and ping them before use");
|
||||
|
||||
static int avoid_asym_router_failure = 1;
|
||||
int avoid_asym_router_failure = 1;
|
||||
module_param(avoid_asym_router_failure, int, 0644);
|
||||
MODULE_PARM_DESC(avoid_asym_router_failure, "Avoid asymmetrical router failures (0 to disable)");
|
||||
|
||||
@ -783,6 +783,21 @@ lnet_wait_known_routerstate(void)
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
lnet_router_ni_update_locked(lnet_peer_t *gw, __u32 net)
|
||||
{
|
||||
lnet_route_t *rte;
|
||||
|
||||
if ((gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS) != 0) {
|
||||
list_for_each_entry(rte, &gw->lp_routes, lr_gwlist) {
|
||||
if (rte->lr_net == net) {
|
||||
rte->lr_downis = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
lnet_update_ni_status_locked(void)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user