2006-01-02 18:04:38 +00:00
|
|
|
/*
|
|
|
|
* net/tipc/link.h: Include file for TIPC link code
|
2007-02-09 14:25:21 +00:00
|
|
|
*
|
2014-08-22 22:09:07 +00:00
|
|
|
* Copyright (c) 1995-2006, 2013-2014, Ericsson AB
|
2011-01-07 16:43:40 +00:00
|
|
|
* Copyright (c) 2004-2005, 2010-2011, Wind River Systems
|
2006-01-02 18:04:38 +00:00
|
|
|
* All rights reserved.
|
|
|
|
*
|
2006-01-11 12:30:43 +00:00
|
|
|
* Redistribution and use in source and binary forms, with or without
|
2006-01-02 18:04:38 +00:00
|
|
|
* modification, are permitted provided that the following conditions are met:
|
|
|
|
*
|
2006-01-11 12:30:43 +00:00
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 3. Neither the names of the copyright holders nor the names of its
|
|
|
|
* contributors may be used to endorse or promote products derived from
|
|
|
|
* this software without specific prior written permission.
|
2006-01-02 18:04:38 +00:00
|
|
|
*
|
2006-01-11 12:30:43 +00:00
|
|
|
* Alternatively, this software may be distributed under the terms of the
|
|
|
|
* GNU General Public License ("GPL") version 2 as published by the Free
|
|
|
|
* Software Foundation.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
2006-01-02 18:04:38 +00:00
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _TIPC_LINK_H
|
|
|
|
#define _TIPC_LINK_H
|
|
|
|
|
2014-11-20 09:29:07 +00:00
|
|
|
#include <net/genetlink.h>
|
2006-01-02 18:04:38 +00:00
|
|
|
#include "msg.h"
|
|
|
|
#include "node.h"
|
|
|
|
|
2015-01-09 07:27:01 +00:00
|
|
|
/* TIPC-specific error codes
|
|
|
|
*/
|
|
|
|
#define ELINKCONG EAGAIN /* link congestion <=> resource unavailable */
|
|
|
|
|
2014-02-13 22:29:08 +00:00
|
|
|
/* Out-of-range value for link sequence numbers
|
2011-10-24 19:26:24 +00:00
|
|
|
*/
|
|
|
|
#define INVALID_LINK_SEQ 0x10000
|
|
|
|
|
2015-07-30 22:24:21 +00:00
|
|
|
/* Link FSM events:
|
2014-02-13 22:29:08 +00:00
|
|
|
*/
|
tipc: clean up definitions and usage of link flags
The status flag LINK_STOPPED is not needed any more, since the
mechanism for delayed deletion of links has been removed.
Likewise, LINK_STARTED and LINK_START_EVT are unnecessary,
because we can just as well start the link timer directly from
inside tipc_link_create().
We eliminate these flags in this commit.
Instead of the above flags, we now introduce three new link modes,
TIPC_LINK_OPEN, TIPC_LINK_BLOCKED and TIPC_LINK_TUNNEL. The values
indicate whether, and in the case of TIPC_LINK_TUNNEL, which, messages
the link is allowed to receive in this state. TIPC_LINK_BLOCKED also
blocks timer-driven protocol messages to be sent out, and any change
to the link FSM. Since the modes are mutually exclusive, we convert
them to state values, and rename the 'flags' field in struct tipc_link
to 'exec_mode'.
Finally, we move the #defines for link FSM states and events from link.h
into enums inside the file link.c, which is the real usage scope of
these definitions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-16 20:54:25 +00:00
|
|
|
enum {
|
2015-07-30 22:24:21 +00:00
|
|
|
LINK_ESTABLISH_EVT = 0xec1ab1e,
|
|
|
|
LINK_PEER_RESET_EVT = 0x9eed0e,
|
|
|
|
LINK_FAILURE_EVT = 0xfa110e,
|
|
|
|
LINK_RESET_EVT = 0x10ca1d0e,
|
|
|
|
LINK_FAILOVER_BEGIN_EVT = 0xfa110bee,
|
|
|
|
LINK_FAILOVER_END_EVT = 0xfa110ede,
|
|
|
|
LINK_SYNCH_BEGIN_EVT = 0xc1ccbee,
|
|
|
|
LINK_SYNCH_END_EVT = 0xc1ccede
|
tipc: clean up definitions and usage of link flags
The status flag LINK_STOPPED is not needed any more, since the
mechanism for delayed deletion of links has been removed.
Likewise, LINK_STARTED and LINK_START_EVT are unnecessary,
because we can just as well start the link timer directly from
inside tipc_link_create().
We eliminate these flags in this commit.
Instead of the above flags, we now introduce three new link modes,
TIPC_LINK_OPEN, TIPC_LINK_BLOCKED and TIPC_LINK_TUNNEL. The values
indicate whether, and in the case of TIPC_LINK_TUNNEL, which, messages
the link is allowed to receive in this state. TIPC_LINK_BLOCKED also
blocks timer-driven protocol messages to be sent out, and any change
to the link FSM. Since the modes are mutually exclusive, we convert
them to state values, and rename the 'flags' field in struct tipc_link
to 'exec_mode'.
Finally, we move the #defines for link FSM states and events from link.h
into enums inside the file link.c, which is the real usage scope of
these definitions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-16 20:54:25 +00:00
|
|
|
};
|
2014-02-13 22:29:08 +00:00
|
|
|
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-16 20:54:31 +00:00
|
|
|
/* Events returned from link at packet reception or at timeout
|
tipc: improve link FSM implementation
The link FSM implementation is currently unnecessarily complex.
It sometimes checks for conditional state outside the FSM data
before deciding next state, and often performs actions directly
inside the FSM logics.
In this commit, we create a second, simpler FSM implementation,
that as far as possible acts only on states and events that it is
strictly defined for, and postpone any actions until it is finished
with its decisions. It also returns an event flag field and an a
buffer queue which may potentially contain a protocol message to
be sent by the caller.
Unfortunately, we cannot yet make the FSM "clean", in the sense
that its decisions are only based on FSM state and event, and that
state changes happen only here. That will have to wait until the
activate/reset logics has been cleaned up in a future commit.
We also rename the link states as follows:
WORKING_WORKING -> TIPC_LINK_WORKING
WORKING_UNKNOWN -> TIPC_LINK_PROBING
RESET_UNKNOWN -> TIPC_LINK_RESETTING
RESET_RESET -> TIPC_LINK_ESTABLISHING
The existing FSM function, link_state_event(), is still needed for
a while, so we redesign it to make use of the new function.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-16 20:54:27 +00:00
|
|
|
*/
|
|
|
|
enum {
|
|
|
|
TIPC_LINK_UP_EVT = 1,
|
|
|
|
TIPC_LINK_DOWN_EVT = (1 << 1)
|
|
|
|
};
|
|
|
|
|
2014-02-13 22:29:08 +00:00
|
|
|
/* Starting value for maximum packet size negotiation on unicast links
|
2006-01-02 18:04:38 +00:00
|
|
|
* (unless bearer MTU is less)
|
|
|
|
*/
|
|
|
|
#define MAX_PKT_DEFAULT 1500
|
|
|
|
|
2012-07-11 13:40:43 +00:00
|
|
|
struct tipc_stats {
|
|
|
|
u32 sent_info; /* used in counting # sent packets */
|
|
|
|
u32 recv_info; /* used in counting # recv'd packets */
|
|
|
|
u32 sent_states;
|
|
|
|
u32 recv_states;
|
|
|
|
u32 sent_probes;
|
|
|
|
u32 recv_probes;
|
|
|
|
u32 sent_nacks;
|
|
|
|
u32 recv_nacks;
|
|
|
|
u32 sent_acks;
|
|
|
|
u32 sent_bundled;
|
|
|
|
u32 sent_bundles;
|
|
|
|
u32 recv_bundled;
|
|
|
|
u32 recv_bundles;
|
|
|
|
u32 retransmitted;
|
|
|
|
u32 sent_fragmented;
|
|
|
|
u32 sent_fragments;
|
|
|
|
u32 recv_fragmented;
|
|
|
|
u32 recv_fragments;
|
|
|
|
u32 link_congs; /* # port sends blocked by congestion */
|
|
|
|
u32 deferred_recv;
|
|
|
|
u32 duplicates;
|
|
|
|
u32 max_queue_sz; /* send queue size high water mark */
|
|
|
|
u32 accu_queue_sz; /* used for send queue size profiling */
|
|
|
|
u32 queue_sz_counts; /* used for send queue size profiling */
|
|
|
|
u32 msg_length_counts; /* used for message length profiling */
|
|
|
|
u32 msg_lengths_total; /* used for message length profiling */
|
|
|
|
u32 msg_length_profile[7]; /* used for msg. length profiling */
|
|
|
|
};
|
|
|
|
|
2006-01-02 18:04:38 +00:00
|
|
|
/**
|
2011-12-30 01:58:42 +00:00
|
|
|
* struct tipc_link - TIPC link data structure
|
2006-01-02 18:04:38 +00:00
|
|
|
* @addr: network address of link's peer node
|
|
|
|
* @name: link name character string
|
|
|
|
* @media_addr: media address to use when sending messages over link
|
|
|
|
* @timer: link timer
|
|
|
|
* @owner: pointer to peer node
|
tipc: add reference count to struct tipc_link
When a bearer is disabled, all pertaining links will be reset and
deleted. However, if there is a second active link towards a killed
link's destination, the delete has to be postponed until the failover
is finished. During this interval, we currently put the link in zombie
mode, i.e., we take it out of traffic, delete its timer, but leave it
attached to the owner node structure until all missing packets have
been received. When this is done, we detach the link from its node
and delete it, assuming that the synchronous timer deletion that was
initiated earlier in a different thread has finished.
This is unsafe, as the failover may finish before del_timer_sync()
has returned in the other thread.
We fix this by adding an atomic reference counter of type kref in
struct tipc_link. The counter keeps track of the references kept
to the link by the owner node and the timer. We then do a conditional
delete, based on the reference counter, both after the failover has
been finished and when the timer expires, if applicable. Whoever
comes last, will actually delete the link. This approach also implies
that we can make the deletion of the timer asynchronous.
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-03 13:59:17 +00:00
|
|
|
* @refcnt: reference counter for permanent references (owner node & timer)
|
2006-01-02 18:04:38 +00:00
|
|
|
* @peer_session: link session # being used by peer end of link
|
|
|
|
* @peer_bearer_id: bearer id used by link's peer endpoint
|
tipc: decouple the relationship between bearer and link
Currently on both paths of message transmission and reception, the
read lock of tipc_net_lock must be held before bearer is accessed,
while the write lock of tipc_net_lock has to be taken before bearer
is configured. Although it can ensure that bearer is always valid on
the two data paths, link and bearer is closely bound together.
So as the part of effort of removing tipc_net_lock, the locking
policy of bearer protection will be adjusted as below: on the two
data paths, RCU is used, and on the configuration path of bearer,
RTNL lock is applied.
Now RCU just covers the path of message reception. To make it possible
to protect the path of message transmission with RCU, link should not
use its stored bearer pointer to access bearer, but it should use the
bearer identity of its attached bearer as index to get bearer instance
from bearer_list array, which can help us decouple the relationship
between bearer and link. As a result, bearer on the path of message
transmission can be safely protected by RCU when we access bearer_list
array within RCU lock protection.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 02:55:46 +00:00
|
|
|
* @bearer_id: local bearer id used by link
|
2007-02-09 14:25:21 +00:00
|
|
|
* @tolerance: minimum link continuity loss needed to reset link [in ms]
|
2015-05-14 14:46:15 +00:00
|
|
|
* @keepalive_intv: link keepalive timer interval
|
2006-01-02 18:04:38 +00:00
|
|
|
* @abort_limit: # of unacknowledged continuity probes needed to reset link
|
|
|
|
* @state: current state of link FSM
|
2015-05-14 14:46:15 +00:00
|
|
|
* @silent_intv_cnt: # of timer intervals without any reception from peer
|
2006-01-02 18:04:38 +00:00
|
|
|
* @proto_msg: template for control messages generated by link
|
|
|
|
* @pmsg: convenience pointer to "proto_msg" field
|
|
|
|
* @priority: current link priority
|
tipc: decouple the relationship between bearer and link
Currently on both paths of message transmission and reception, the
read lock of tipc_net_lock must be held before bearer is accessed,
while the write lock of tipc_net_lock has to be taken before bearer
is configured. Although it can ensure that bearer is always valid on
the two data paths, link and bearer is closely bound together.
So as the part of effort of removing tipc_net_lock, the locking
policy of bearer protection will be adjusted as below: on the two
data paths, RCU is used, and on the configuration path of bearer,
RTNL lock is applied.
Now RCU just covers the path of message reception. To make it possible
to protect the path of message transmission with RCU, link should not
use its stored bearer pointer to access bearer, but it should use the
bearer identity of its attached bearer as index to get bearer instance
from bearer_list array, which can help us decouple the relationship
between bearer and link. As a result, bearer on the path of message
transmission can be safely protected by RCU when we access bearer_list
array within RCU lock protection.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 02:55:46 +00:00
|
|
|
* @net_plane: current link network plane ('A' through 'H')
|
tipc: introduce starvation free send algorithm
Currently, we only use a single counter; the length of the backlog
queue, to determine whether a message should be accepted to the queue
or not. Each time a message is being sent, the queue length is compared
to a threshold value for the message's importance priority. If the queue
length is beyond this threshold, the message is rejected. This algorithm
implies a risk of starvation of low importance senders during very high
load, because it may take a long time before the backlog queue has
decreased enough to accept a lower level message.
We now eliminate this risk by introducing a counter for each importance
priority. When a message is sent, we check only the queue level for that
particular message's priority. If that is ok, the message can be added
to the backlog, irrespective of the queue level for other priorities.
This way, each level is guaranteed a certain portion of the total
bandwidth, and any risk of starvation is eliminated.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-25 16:07:24 +00:00
|
|
|
* @backlog_limit: backlog queue congestion thresholds (indexed by importance)
|
2006-01-02 18:04:38 +00:00
|
|
|
* @exp_msg_count: # of tunnelled messages expected during link changeover
|
2015-05-14 14:46:15 +00:00
|
|
|
* @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset
|
tipc: simplify link mtu negotiation
When a link is being established, the two endpoints advertise their
respective interface MTU in the transmitted RESET and ACTIVATE messages.
If there is any difference, the lower of the two MTUs will be selected
for use by both endpoints.
However, as a remnant of earlier attempts to introduce TIPC level
routing. there also exists an MTU discovery mechanism. If an intermediate
node has a lower MTU than the two endpoints, they will discover this
through a bisectional approach, and finally adopt this MTU for common use.
Since there is no TIPC level routing, and probably never will be,
this mechanism doesn't make any sense, and only serves to make the
link level protocol unecessarily complex.
In this commit, we eliminate the MTU discovery algorithm,and fall back
to the simple MTU advertising approach. This change is fully backwards
compatible.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-04-02 13:33:02 +00:00
|
|
|
* @mtu: current maximum packet size for this link
|
|
|
|
* @advertised_mtu: advertised own mtu when link is being established
|
2015-03-13 20:08:10 +00:00
|
|
|
* @transmitq: queue for sent, non-acked messages
|
|
|
|
* @backlogq: queue for messages waiting to be sent
|
2015-05-14 14:46:15 +00:00
|
|
|
* @snt_nxt: next sequence number to use for outbound messages
|
2006-01-02 18:04:38 +00:00
|
|
|
* @last_retransmitted: sequence number of most recently retransmitted message
|
|
|
|
* @stale_count: # of identical retransmit requests made by peer
|
2015-05-14 14:46:15 +00:00
|
|
|
* @rcv_nxt: next sequence number to expect for inbound messages
|
2014-11-26 03:41:53 +00:00
|
|
|
* @deferred_queue: deferred queue saved OOS b'cast message received from node
|
2006-01-02 18:04:38 +00:00
|
|
|
* @unacked_window: # of inbound messages rx'd without ack'ing back to peer
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 13:36:41 +00:00
|
|
|
* @inputq: buffer queue for messages to be delivered upwards
|
|
|
|
* @namedq: buffer queue for name table messages to be delivered upwards
|
2006-01-02 18:04:38 +00:00
|
|
|
* @next_out: ptr to first unsent outbound message in queue
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 13:36:41 +00:00
|
|
|
* @wakeupq: linked list of wakeup msgs waiting for link congestion to abate
|
2006-01-02 18:04:38 +00:00
|
|
|
* @long_msg_seq_no: next identifier to use for outbound fragmented messages
|
2014-05-14 09:39:12 +00:00
|
|
|
* @reasm_buf: head of partially reassembled inbound message fragments
|
2006-01-02 18:04:38 +00:00
|
|
|
* @stats: collects statistics regarding link activity
|
|
|
|
*/
|
2011-12-30 01:58:42 +00:00
|
|
|
struct tipc_link {
|
2006-01-02 18:04:38 +00:00
|
|
|
u32 addr;
|
|
|
|
char name[TIPC_MAX_LINK_NAME];
|
2015-07-30 22:24:26 +00:00
|
|
|
struct tipc_media_addr *media_addr;
|
2008-09-03 06:38:32 +00:00
|
|
|
struct tipc_node *owner;
|
2006-01-02 18:04:38 +00:00
|
|
|
|
|
|
|
/* Management and link supervision data */
|
|
|
|
u32 peer_session;
|
|
|
|
u32 peer_bearer_id;
|
tipc: decouple the relationship between bearer and link
Currently on both paths of message transmission and reception, the
read lock of tipc_net_lock must be held before bearer is accessed,
while the write lock of tipc_net_lock has to be taken before bearer
is configured. Although it can ensure that bearer is always valid on
the two data paths, link and bearer is closely bound together.
So as the part of effort of removing tipc_net_lock, the locking
policy of bearer protection will be adjusted as below: on the two
data paths, RCU is used, and on the configuration path of bearer,
RTNL lock is applied.
Now RCU just covers the path of message reception. To make it possible
to protect the path of message transmission with RCU, link should not
use its stored bearer pointer to access bearer, but it should use the
bearer identity of its attached bearer as index to get bearer instance
from bearer_list array, which can help us decouple the relationship
between bearer and link. As a result, bearer on the path of message
transmission can be safely protected by RCU when we access bearer_list
array within RCU lock protection.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 02:55:46 +00:00
|
|
|
u32 bearer_id;
|
2006-01-02 18:04:38 +00:00
|
|
|
u32 tolerance;
|
2015-05-14 14:46:15 +00:00
|
|
|
unsigned long keepalive_intv;
|
2006-01-02 18:04:38 +00:00
|
|
|
u32 abort_limit;
|
2015-07-30 22:24:21 +00:00
|
|
|
u32 state;
|
2015-05-14 14:46:15 +00:00
|
|
|
u32 silent_intv_cnt;
|
2006-01-02 18:04:38 +00:00
|
|
|
struct {
|
|
|
|
unchar hdr[INT_H_SIZE];
|
|
|
|
unchar body[TIPC_MAX_IF_NAME];
|
|
|
|
} proto_msg;
|
|
|
|
struct tipc_msg *pmsg;
|
|
|
|
u32 priority;
|
tipc: decouple the relationship between bearer and link
Currently on both paths of message transmission and reception, the
read lock of tipc_net_lock must be held before bearer is accessed,
while the write lock of tipc_net_lock has to be taken before bearer
is configured. Although it can ensure that bearer is always valid on
the two data paths, link and bearer is closely bound together.
So as the part of effort of removing tipc_net_lock, the locking
policy of bearer protection will be adjusted as below: on the two
data paths, RCU is used, and on the configuration path of bearer,
RTNL lock is applied.
Now RCU just covers the path of message reception. To make it possible
to protect the path of message transmission with RCU, link should not
use its stored bearer pointer to access bearer, but it should use the
bearer identity of its attached bearer as index to get bearer instance
from bearer_list array, which can help us decouple the relationship
between bearer and link. As a result, bearer on the path of message
transmission can be safely protected by RCU when we access bearer_list
array within RCU lock protection.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 02:55:46 +00:00
|
|
|
char net_plane;
|
2006-01-02 18:04:38 +00:00
|
|
|
|
2015-07-30 22:24:19 +00:00
|
|
|
/* Failover/synch */
|
|
|
|
u16 drop_point;
|
|
|
|
struct sk_buff *failover_reasm_skb;
|
2006-01-02 18:04:38 +00:00
|
|
|
|
2007-02-09 14:25:21 +00:00
|
|
|
/* Max packet negotiation */
|
tipc: simplify link mtu negotiation
When a link is being established, the two endpoints advertise their
respective interface MTU in the transmitted RESET and ACTIVATE messages.
If there is any difference, the lower of the two MTUs will be selected
for use by both endpoints.
However, as a remnant of earlier attempts to introduce TIPC level
routing. there also exists an MTU discovery mechanism. If an intermediate
node has a lower MTU than the two endpoints, they will discover this
through a bisectional approach, and finally adopt this MTU for common use.
Since there is no TIPC level routing, and probably never will be,
this mechanism doesn't make any sense, and only serves to make the
link level protocol unecessarily complex.
In this commit, we eliminate the MTU discovery algorithm,and fall back
to the simple MTU advertising approach. This change is fully backwards
compatible.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-04-02 13:33:02 +00:00
|
|
|
u16 mtu;
|
|
|
|
u16 advertised_mtu;
|
2006-01-02 18:04:38 +00:00
|
|
|
|
|
|
|
/* Sending */
|
2015-03-13 20:08:10 +00:00
|
|
|
struct sk_buff_head transmq;
|
|
|
|
struct sk_buff_head backlogq;
|
tipc: introduce starvation free send algorithm
Currently, we only use a single counter; the length of the backlog
queue, to determine whether a message should be accepted to the queue
or not. Each time a message is being sent, the queue length is compared
to a threshold value for the message's importance priority. If the queue
length is beyond this threshold, the message is rejected. This algorithm
implies a risk of starvation of low importance senders during very high
load, because it may take a long time before the backlog queue has
decreased enough to accept a lower level message.
We now eliminate this risk by introducing a counter for each importance
priority. When a message is sent, we check only the queue level for that
particular message's priority. If that is ok, the message can be added
to the backlog, irrespective of the queue level for other priorities.
This way, each level is guaranteed a certain portion of the total
bandwidth, and any risk of starvation is eliminated.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-25 16:07:24 +00:00
|
|
|
struct {
|
|
|
|
u16 len;
|
|
|
|
u16 limit;
|
|
|
|
} backlog[5];
|
2015-05-14 14:46:15 +00:00
|
|
|
u16 snd_nxt;
|
|
|
|
u16 last_retransm;
|
tipc: improve sequence number checking
The sequence number of an incoming packet is currently only checked
for less than, equality to, or bigger than the next expected number,
meaning that the receive window in practice becomes one half sequence
number cycle, or U16_MAX/2. This does not make sense, and may not even
be safe if there are extreme delays in the network. Any packet sent by
the peer during the ongoing cycle must belong inside his current send
window, or should otherwise be dropped if possible.
Since a link endpoint cannot know its peer's current send window, it
has to base this sanity check on a worst-case assumption, i.e., that
the peer is using a maximum sized window of 8191 packets. Using this
assumption, we now add a check that the sequence number is not bigger
than next_expected + TIPC_MAX_LINK_WIN. We also re-order the checks
done, so that the receive window test is performed before the gap test.
This way, we are guaranteed that no packet with illegal sequence numbers
are ever added to the deferred queue.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-15 18:52:42 +00:00
|
|
|
u16 window;
|
2007-02-09 14:25:21 +00:00
|
|
|
u32 stale_count;
|
2006-01-02 18:04:38 +00:00
|
|
|
|
|
|
|
/* Reception */
|
2015-05-14 14:46:15 +00:00
|
|
|
u16 rcv_nxt;
|
2015-03-13 20:08:10 +00:00
|
|
|
u32 rcv_unacked;
|
|
|
|
struct sk_buff_head deferdq;
|
2015-07-16 20:54:21 +00:00
|
|
|
struct sk_buff_head *inputq;
|
|
|
|
struct sk_buff_head *namedq;
|
2006-01-02 18:04:38 +00:00
|
|
|
|
|
|
|
/* Congestion handling */
|
tipc: resolve race problem at unicast message reception
TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.
We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.
This solves the sequentiality problem, and seems to cause no measurable
performance degradation.
A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-05 13:36:41 +00:00
|
|
|
struct sk_buff_head wakeupq;
|
2006-01-02 18:04:38 +00:00
|
|
|
|
tipc: message reassembly using fragment chain
When the first fragment of a long data data message is received on a link, a
reassembly buffer large enough to hold the data from this and all subsequent
fragments of the message is allocated. The payload of each new fragment is
copied into this buffer upon arrival. When the last fragment is received, the
reassembled message is delivered upwards to the port/socket layer.
Not only is this an inefficient approach, but it may also cause bursts of
reassembly failures in low memory situations. since we may fail to allocate
the necessary large buffer in the first place. Furthermore, after 100 subsequent
such failures the link will be reset, something that in reality aggravates the
situation.
To remedy this problem, this patch introduces a different approach. Instead of
allocating a big reassembly buffer, we now append the arriving fragments
to a reassembly chain on the link, and deliver the whole chain up to the
socket layer once the last fragment has been received. This is safe because
the retransmission layer of a TIPC link always delivers packets in strict
uninterrupted order, to the reassembly layer as to all other upper layers.
Hence there can never be more than one fragment chain pending reassembly at
any given time in a link, and we can trust (but still verify) that the
fragments will be chained up in the correct order.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-06 08:28:06 +00:00
|
|
|
/* Fragmentation/reassembly */
|
2014-05-14 09:39:12 +00:00
|
|
|
struct sk_buff *reasm_buf;
|
2006-01-02 18:04:38 +00:00
|
|
|
|
2007-02-09 14:25:21 +00:00
|
|
|
/* Statistics */
|
2012-07-11 13:40:43 +00:00
|
|
|
struct tipc_stats stats;
|
2006-01-02 18:04:38 +00:00
|
|
|
};
|
|
|
|
|
2015-07-30 22:24:26 +00:00
|
|
|
bool tipc_link_create(struct tipc_node *n, struct tipc_bearer *b, u32 session,
|
|
|
|
u32 ownnode, u32 peer, struct tipc_media_addr *maddr,
|
|
|
|
struct sk_buff_head *inputq, struct sk_buff_head *namedq,
|
|
|
|
struct tipc_link **link);
|
2015-07-30 22:24:19 +00:00
|
|
|
void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
|
|
|
|
int mtyp, struct sk_buff_head *xmitq);
|
2015-07-30 22:24:20 +00:00
|
|
|
void tipc_link_build_bcast_sync_msg(struct tipc_link *l,
|
|
|
|
struct sk_buff_head *xmitq);
|
2015-07-30 22:24:21 +00:00
|
|
|
int tipc_link_fsm_evt(struct tipc_link *l, int evt);
|
2011-12-30 01:58:42 +00:00
|
|
|
void tipc_link_reset_fragments(struct tipc_link *l_ptr);
|
2015-07-30 22:24:21 +00:00
|
|
|
bool tipc_link_is_up(struct tipc_link *l);
|
|
|
|
bool tipc_link_is_reset(struct tipc_link *l);
|
tipc: delay ESTABLISH state event when link is established
Link establishing, just like link teardown, is a non-atomic action, in
the sense that discovering that conditions are right to establish a link,
and the actual adding of the link to one of the node's send slots is done
in two different lock contexts. The link FSM is designed to help bridging
the gap between the two contexts in a safe manner.
We have now discovered a weakness in the implementaton of this FSM.
Because we directly let the link go from state LINK_ESTABLISHING to
state LINK_ESTABLISHED already in the first lock context, we are unable
to distinguish between a fully established link, i.e., a link that has
been added to its slot, and a link that has not yet reached the second
lock context. It may hence happen that a manual intervention, e.g., when
disabling an interface, causes the function tipc_node_link_down() to try
removing the link from the node slots, decrementing its active link
counter etc, although the link was never added there in the first place.
We solve this by delaying the actual state change until we reach the
second lock context, inside the function tipc_node_link_up(). This
makes it possible for potentail callers of __tipc_node_link_down() to
know if they should proceed or not, and the problem is solved.
Unforunately, the situation described above also has a second problem.
Since there by necessity is a tipc_node_link_up() call pending once
the node lock has been released, we must defuse that call by setting
the link back from LINK_ESTABLISHING to LINK_RESET state. This forces
us to make a slight modification to the link FSM, which will now look
as follows.
+------------------------------------+
|RESET_EVT |
| |
| +--------------+
| +-----------------| SYNCHING |-----------------+
| |FAILURE_EVT +--------------+ PEER_RESET_EVT|
| | A | |
| | | | |
| | | | |
| | |SYNCH_ |SYNCH_ |
| | |BEGIN_EVT |END_EVT |
| | | | |
| V | V V
| +-------------+ +--------------+ +------------+
| | RESETTING |<---------| ESTABLISHED |--------->| PEER_RESET |
| +-------------+ FAILURE_ +--------------+ PEER_ +------------+
| | EVT | A RESET_EVT |
| | | | |
| | +----------------+ | |
| RESET_EVT| |RESET_EVT | |
| | | | |
| | | |ESTABLISH_EVT |
| | | +-------------+ | |
| | | | RESET_EVT | | |
| | | | | | |
| V V V | | |
| +-------------+ +--------------+ RESET_EVT|
+--->| RESET |--------->| ESTABLISHING |<----------------+
+-------------+ PEER_ +--------------+
| A RESET_EVT |
| | |
| | |
|FAILOVER_ |FAILOVER_ |FAILOVER_
|BEGIN_EVT |END_EVT |BEGIN_EVT
| | |
V | |
+-------------+ |
| FAILINGOVER |<----------------+
+-------------+
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-10-15 18:52:44 +00:00
|
|
|
bool tipc_link_is_establishing(struct tipc_link *l);
|
2015-07-30 22:24:21 +00:00
|
|
|
bool tipc_link_is_synching(struct tipc_link *l);
|
|
|
|
bool tipc_link_is_failingover(struct tipc_link *l);
|
|
|
|
bool tipc_link_is_blocked(struct tipc_link *l);
|
2011-12-30 01:58:42 +00:00
|
|
|
int tipc_link_is_active(struct tipc_link *l_ptr);
|
2014-01-07 22:02:44 +00:00
|
|
|
void tipc_link_purge_queues(struct tipc_link *l_ptr);
|
2015-06-28 13:44:44 +00:00
|
|
|
void tipc_link_purge_backlog(struct tipc_link *l);
|
2011-12-30 01:58:42 +00:00
|
|
|
void tipc_link_reset(struct tipc_link *l_ptr);
|
2015-01-09 07:27:06 +00:00
|
|
|
int __tipc_link_xmit(struct net *net, struct tipc_link *link,
|
|
|
|
struct sk_buff_head *list);
|
2015-07-16 20:54:24 +00:00
|
|
|
int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list,
|
|
|
|
struct sk_buff_head *xmitq);
|
2014-02-18 08:06:46 +00:00
|
|
|
void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob,
|
tipc: simplify link mtu negotiation
When a link is being established, the two endpoints advertise their
respective interface MTU in the transmitted RESET and ACTIVATE messages.
If there is any difference, the lower of the two MTUs will be selected
for use by both endpoints.
However, as a remnant of earlier attempts to introduce TIPC level
routing. there also exists an MTU discovery mechanism. If an intermediate
node has a lower MTU than the two endpoints, they will discover this
through a bisectional approach, and finally adopt this MTU for common use.
Since there is no TIPC level routing, and probably never will be,
this mechanism doesn't make any sense, and only serves to make the
link level protocol unecessarily complex.
In this commit, we eliminate the MTU discovery algorithm,and fall back
to the simple MTU advertising approach. This change is fully backwards
compatible.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-04-02 13:33:02 +00:00
|
|
|
u32 gap, u32 tolerance, u32 priority);
|
2014-11-26 03:41:48 +00:00
|
|
|
void tipc_link_push_packets(struct tipc_link *l_ptr);
|
2014-11-26 03:41:53 +00:00
|
|
|
u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *buf);
|
2011-12-30 01:58:42 +00:00
|
|
|
void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window);
|
|
|
|
void tipc_link_retransmit(struct tipc_link *l_ptr,
|
|
|
|
struct sk_buff *start, u32 retransmits);
|
2014-11-26 03:41:52 +00:00
|
|
|
struct sk_buff *tipc_skb_queue_next(const struct sk_buff_head *list,
|
|
|
|
const struct sk_buff *skb);
|
2006-01-02 18:04:38 +00:00
|
|
|
|
2014-11-20 09:29:12 +00:00
|
|
|
int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb);
|
|
|
|
int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info);
|
2014-11-20 09:29:13 +00:00
|
|
|
int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info);
|
2014-11-20 09:29:14 +00:00
|
|
|
int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info);
|
2014-11-20 09:29:07 +00:00
|
|
|
int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]);
|
2015-07-16 20:54:28 +00:00
|
|
|
int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq);
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-16 20:54:31 +00:00
|
|
|
int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
|
|
|
|
struct sk_buff_head *xmitq);
|
2010-05-11 14:30:11 +00:00
|
|
|
|
2006-01-02 18:04:38 +00:00
|
|
|
#endif
|