diff options
-rw-r--r-- | include/linux/tcp.h | 6 | ||||
-rw-r--r-- | include/net/tcp.h | 5 | ||||
-rw-r--r-- | net/ipv4/Makefile | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 14 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_recovery.c | 32 |
6 files changed, 60 insertions, 0 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 8c54863dfc38..5dce9705fe84 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -194,6 +194,12 @@ struct tcp_sock { u32 window_clamp; /* Maximal window to advertise */ u32 rcv_ssthresh; /* Current window clamp */ + /* Information of the most recently (s)acked skb */ + struct tcp_rack { + struct skb_mstamp mstamp; /* (Re)sent time of the skb */ + u8 advanced; /* mstamp advanced since last lost marking */ + u8 reord; /* reordering detected */ + } rack; u16 advmss; /* Advertised MSS */ u8 unused; u8 nonagle : 4,/* Disable Nagle algorithm? */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 4a43152229ea..3c3a9fe057d3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1750,6 +1750,11 @@ int tcpv4_offload_init(void); void tcp_v4_init(void); void tcp_init(void); +/* tcp_recovery.c */ + +extern void tcp_rack_advance(struct tcp_sock *tp, + const struct skb_mstamp *xmit_time, u8 sacked); + /* * Save and compile IPv4 options, return a pointer to it */ diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 89aacb630a53..c29809f765dc 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -8,6 +8,7 @@ obj-y := route.o inetpeer.o protocol.o \ inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \ + tcp_recovery.o \ tcp_offload.o datagram.o raw.o udp.o udplite.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1e97e73e5ecf..ce8370525832 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1173,6 +1173,8 @@ static u8 tcp_sacktag_one(struct sock *sk, return sacked; if (!(sacked & TCPCB_SACKED_ACKED)) { + tcp_rack_advance(tp, xmit_time, sacked); + if (sacked & TCPCB_SACKED_RETRANS) { /* If the segment is not tagged as lost, * we do not clear RETRANS, believing @@ -2256,6 +2258,16 @@ static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when) before(tp->rx_opt.rcv_tsecr, when); } +/* skb is spurious retransmitted if the returned timestamp echo + * reply is prior to the skb transmission time + */ +static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp, + const struct sk_buff *skb) +{ + return (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) && + tcp_tsopt_ecr_before(tp, tcp_skb_timestamp(skb)); +} + /* Nothing was retransmitted or returned timestamp is less * than timestamp of the first retransmission. */ @@ -3135,6 +3147,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (sacked & TCPCB_SACKED_ACKED) tp->sacked_out -= acked_pcount; + else if (tcp_is_sack(tp) && !tcp_skb_spurious_retrans(tp, skb)) + tcp_rack_advance(tp, &skb->skb_mstamp, sacked); if (sacked & TCPCB_LOST) tp->lost_out -= acked_pcount; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b875c288daaa..1fd5d413a664 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -548,6 +548,8 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, tcp_ecn_openreq_child(newtp, req); newtp->fastopen_rsk = NULL; newtp->syn_data_acked = 0; + newtp->rack.mstamp.v64 = 0; + newtp->rack.advanced = 0; newtp->saved_syn = req->saved_syn; req->saved_syn = NULL; diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c new file mode 100644 index 000000000000..8f66a6584845 --- /dev/null +++ b/net/ipv4/tcp_recovery.c @@ -0,0 +1,32 @@ +#include <linux/tcp.h> +#include <net/tcp.h> + +/* Record the most recently (re)sent time among the (s)acked packets */ +void tcp_rack_advance(struct tcp_sock *tp, + const struct skb_mstamp *xmit_time, u8 sacked) +{ + if (tp->rack.mstamp.v64 && + !skb_mstamp_after(xmit_time, &tp->rack.mstamp)) + return; + + if (sacked & TCPCB_RETRANS) { + struct skb_mstamp now; + + /* If the sacked packet was retransmitted, it's ambiguous + * whether the retransmission or the original (or the prior + * retransmission) was sacked. + * + * If the original is lost, there is no ambiguity. Otherwise + * we assume the original can be delayed up to aRTT + min_rtt. + * the aRTT term is bounded by the fast recovery or timeout, + * so it's at least one RTT (i.e., retransmission is at least + * an RTT later). + */ + skb_mstamp_get(&now); + if (skb_mstamp_us_delta(&now, xmit_time) < tcp_min_rtt(tp)) + return; + } + + tp->rack.mstamp = *xmit_time; + tp->rack.advanced = 1; +} |