tcp: add TCPMemoryPressuresChrono counter
DRAM supply shortage and poor memory pressure tracking in TCP stack makes any change in SO_SNDBUF/SO_RCVBUF (or equivalent autotuning limits) and tcp_mem[] quite hazardous. TCPMemoryPressures SNMP counter is an indication of tcp_mem sysctl limits being hit, but only tracking number of transitions. If TCP stack behavior under stress was perfect : 1) It would maintain memory usage close to the limit. 2) Memory pressure state would be entered for short times. We certainly prefer 100 events lasting 10ms compared to one event lasting 200 seconds. This patch adds a new SNMP counter tracking cumulative duration of memory pressure events, given in ms units. $ cat /proc/sys/net/ipv4/tcp_mem 3088 4117 6176 $ grep TCP /proc/net/sockstat TCP: inuse 180 orphan 0 tw 2 alloc 234 mem 4140 $ nstat -n ; sleep 10 ; nstat |grep Pressure TcpExtTCPMemoryPressures 1700 TcpExtTCPMemoryPressuresChrono 5209 v2: Used EXPORT_SYMBOL_GPL() instead of EXPORT_SYMBOL() as David instructed. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
06fcb3b69f
commit
0604475119
@ -1080,6 +1080,7 @@ struct proto {
|
||||
bool (*stream_memory_free)(const struct sock *sk);
|
||||
/* Memory pressure */
|
||||
void (*enter_memory_pressure)(struct sock *sk);
|
||||
void (*leave_memory_pressure)(struct sock *sk);
|
||||
atomic_long_t *memory_allocated; /* Current allocated memory. */
|
||||
struct percpu_counter *sockets_allocated; /* Current number of sockets. */
|
||||
/*
|
||||
@ -1088,7 +1089,7 @@ struct proto {
|
||||
* All the __sk_mem_schedule() is of this nature: accounting
|
||||
* is strict, actions are advisory and have some latency.
|
||||
*/
|
||||
int *memory_pressure;
|
||||
unsigned long *memory_pressure;
|
||||
long *sysctl_mem;
|
||||
int *sysctl_wmem;
|
||||
int *sysctl_rmem;
|
||||
@ -1193,25 +1194,6 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
|
||||
return !!*sk->sk_prot->memory_pressure;
|
||||
}
|
||||
|
||||
static inline void sk_leave_memory_pressure(struct sock *sk)
|
||||
{
|
||||
int *memory_pressure = sk->sk_prot->memory_pressure;
|
||||
|
||||
if (!memory_pressure)
|
||||
return;
|
||||
|
||||
if (*memory_pressure)
|
||||
*memory_pressure = 0;
|
||||
}
|
||||
|
||||
static inline void sk_enter_memory_pressure(struct sock *sk)
|
||||
{
|
||||
if (!sk->sk_prot->enter_memory_pressure)
|
||||
return;
|
||||
|
||||
sk->sk_prot->enter_memory_pressure(sk);
|
||||
}
|
||||
|
||||
static inline long
|
||||
sk_memory_allocated(const struct sock *sk)
|
||||
{
|
||||
|
@ -276,7 +276,7 @@ extern int sysctl_tcp_pacing_ca_ratio;
|
||||
|
||||
extern atomic_long_t tcp_memory_allocated;
|
||||
extern struct percpu_counter tcp_sockets_allocated;
|
||||
extern int tcp_memory_pressure;
|
||||
extern unsigned long tcp_memory_pressure;
|
||||
|
||||
/* optimized version of sk_under_memory_pressure() for TCP sockets */
|
||||
static inline bool tcp_under_memory_pressure(const struct sock *sk)
|
||||
@ -1320,6 +1320,7 @@ extern void tcp_openreq_init_rwin(struct request_sock *req,
|
||||
const struct dst_entry *dst);
|
||||
|
||||
void tcp_enter_memory_pressure(struct sock *sk);
|
||||
void tcp_leave_memory_pressure(struct sock *sk);
|
||||
|
||||
static inline int keepalive_intvl_when(const struct tcp_sock *tp)
|
||||
{
|
||||
|
@ -228,6 +228,7 @@ enum
|
||||
LINUX_MIB_TCPABORTONLINGER, /* TCPAbortOnLinger */
|
||||
LINUX_MIB_TCPABORTFAILED, /* TCPAbortFailed */
|
||||
LINUX_MIB_TCPMEMORYPRESSURES, /* TCPMemoryPressures */
|
||||
LINUX_MIB_TCPMEMORYPRESSURESCHRONO, /* TCPMemoryPressuresChrono */
|
||||
LINUX_MIB_TCPSACKDISCARD, /* TCPSACKDiscard */
|
||||
LINUX_MIB_TCPDSACKIGNOREDOLD, /* TCPSACKIgnoredOld */
|
||||
LINUX_MIB_TCPDSACKIGNOREDNOUNDO, /* TCPSACKIgnoredNoUndo */
|
||||
|
@ -2076,6 +2076,26 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
|
||||
}
|
||||
EXPORT_SYMBOL(sock_cmsg_send);
|
||||
|
||||
static void sk_enter_memory_pressure(struct sock *sk)
|
||||
{
|
||||
if (!sk->sk_prot->enter_memory_pressure)
|
||||
return;
|
||||
|
||||
sk->sk_prot->enter_memory_pressure(sk);
|
||||
}
|
||||
|
||||
static void sk_leave_memory_pressure(struct sock *sk)
|
||||
{
|
||||
if (sk->sk_prot->leave_memory_pressure) {
|
||||
sk->sk_prot->leave_memory_pressure(sk);
|
||||
} else {
|
||||
unsigned long *memory_pressure = sk->sk_prot->memory_pressure;
|
||||
|
||||
if (memory_pressure && *memory_pressure)
|
||||
*memory_pressure = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* On 32bit arches, an skb frag is limited to 2^15 */
|
||||
#define SKB_FRAG_PAGE_ORDER get_order(32768)
|
||||
|
||||
|
@ -447,7 +447,7 @@ static void dn_destruct(struct sock *sk)
|
||||
dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
|
||||
}
|
||||
|
||||
static int dn_memory_pressure;
|
||||
static unsigned long dn_memory_pressure;
|
||||
|
||||
static void dn_enter_memory_pressure(struct sock *sk)
|
||||
{
|
||||
|
@ -250,6 +250,7 @@ static const struct snmp_mib snmp4_net_list[] = {
|
||||
SNMP_MIB_ITEM("TCPAbortOnLinger", LINUX_MIB_TCPABORTONLINGER),
|
||||
SNMP_MIB_ITEM("TCPAbortFailed", LINUX_MIB_TCPABORTFAILED),
|
||||
SNMP_MIB_ITEM("TCPMemoryPressures", LINUX_MIB_TCPMEMORYPRESSURES),
|
||||
SNMP_MIB_ITEM("TCPMemoryPressuresChrono", LINUX_MIB_TCPMEMORYPRESSURESCHRONO),
|
||||
SNMP_MIB_ITEM("TCPSACKDiscard", LINUX_MIB_TCPSACKDISCARD),
|
||||
SNMP_MIB_ITEM("TCPDSACKIgnoredOld", LINUX_MIB_TCPDSACKIGNOREDOLD),
|
||||
SNMP_MIB_ITEM("TCPDSACKIgnoredNoUndo", LINUX_MIB_TCPDSACKIGNOREDNOUNDO),
|
||||
|
@ -320,17 +320,36 @@ struct tcp_splice_state {
|
||||
* All the __sk_mem_schedule() is of this nature: accounting
|
||||
* is strict, actions are advisory and have some latency.
|
||||
*/
|
||||
int tcp_memory_pressure __read_mostly;
|
||||
EXPORT_SYMBOL(tcp_memory_pressure);
|
||||
unsigned long tcp_memory_pressure __read_mostly;
|
||||
EXPORT_SYMBOL_GPL(tcp_memory_pressure);
|
||||
|
||||
void tcp_enter_memory_pressure(struct sock *sk)
|
||||
{
|
||||
if (!tcp_memory_pressure) {
|
||||
unsigned long val;
|
||||
|
||||
if (tcp_memory_pressure)
|
||||
return;
|
||||
val = jiffies;
|
||||
|
||||
if (!val)
|
||||
val--;
|
||||
if (!cmpxchg(&tcp_memory_pressure, 0, val))
|
||||
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES);
|
||||
tcp_memory_pressure = 1;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(tcp_enter_memory_pressure);
|
||||
EXPORT_SYMBOL_GPL(tcp_enter_memory_pressure);
|
||||
|
||||
void tcp_leave_memory_pressure(struct sock *sk)
|
||||
{
|
||||
unsigned long val;
|
||||
|
||||
if (!tcp_memory_pressure)
|
||||
return;
|
||||
val = xchg(&tcp_memory_pressure, 0);
|
||||
if (val)
|
||||
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURESCHRONO,
|
||||
jiffies_to_msecs(jiffies - val));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tcp_leave_memory_pressure);
|
||||
|
||||
/* Convert seconds to retransmits based on initial and max timeout */
|
||||
static u8 secs_to_retrans(int seconds, int timeout, int rto_max)
|
||||
|
@ -2387,6 +2387,7 @@ struct proto tcp_prot = {
|
||||
.unhash = inet_unhash,
|
||||
.get_port = inet_csk_get_port,
|
||||
.enter_memory_pressure = tcp_enter_memory_pressure,
|
||||
.leave_memory_pressure = tcp_leave_memory_pressure,
|
||||
.stream_memory_free = tcp_stream_memory_free,
|
||||
.sockets_allocated = &tcp_sockets_allocated,
|
||||
.orphan_count = &tcp_orphan_count,
|
||||
|
@ -1910,6 +1910,7 @@ struct proto tcpv6_prot = {
|
||||
.unhash = inet_unhash,
|
||||
.get_port = inet_csk_get_port,
|
||||
.enter_memory_pressure = tcp_enter_memory_pressure,
|
||||
.leave_memory_pressure = tcp_leave_memory_pressure,
|
||||
.stream_memory_free = tcp_stream_memory_free,
|
||||
.sockets_allocated = &tcp_sockets_allocated,
|
||||
.memory_allocated = &tcp_memory_allocated,
|
||||
|
@ -103,7 +103,7 @@ static int sctp_autobind(struct sock *sk);
|
||||
static void sctp_sock_migrate(struct sock *, struct sock *,
|
||||
struct sctp_association *, sctp_socket_type_t);
|
||||
|
||||
static int sctp_memory_pressure;
|
||||
static unsigned long sctp_memory_pressure;
|
||||
static atomic_long_t sctp_memory_allocated;
|
||||
struct percpu_counter sctp_sockets_allocated;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user