diff options
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r-- | net/ipv4/tcp.c | 132 |
1 files changed, 58 insertions, 74 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f5c336f8b0c8..bc7f419184aa 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -287,8 +287,8 @@ enum { TCP_CMSG_TS = 2 }; -struct percpu_counter tcp_orphan_count; -EXPORT_SYMBOL_GPL(tcp_orphan_count); +DEFINE_PER_CPU(unsigned int, tcp_orphan_count); +EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count); long sysctl_tcp_mem[3] __read_mostly; EXPORT_SYMBOL(sysctl_tcp_mem); @@ -325,11 +325,6 @@ struct tcp_splice_state { unsigned long tcp_memory_pressure __read_mostly; EXPORT_SYMBOL_GPL(tcp_memory_pressure); -DEFINE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key); -EXPORT_SYMBOL(tcp_rx_skb_cache_key); - -DEFINE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key); - void tcp_enter_memory_pressure(struct sock *sk) { unsigned long val; @@ -644,7 +639,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) } EXPORT_SYMBOL(tcp_ioctl); -static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) +void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) { TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; tp->pushed_seq = tp->write_seq; @@ -655,15 +650,13 @@ static inline bool forced_push(const struct tcp_sock *tp) return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1)); } -static void skb_entail(struct sock *sk, struct sk_buff *skb) +void tcp_skb_entail(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); - skb->csum = 0; tcb->seq = tcb->end_seq = tp->write_seq; tcb->tcp_flags = TCPHDR_ACK; - tcb->sacked = 0; __skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); sk_wmem_queued_add(sk, skb->truesize); @@ -858,30 +851,15 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, } EXPORT_SYMBOL(tcp_splice_read); -struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, - bool force_schedule) +struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, + bool force_schedule) { struct sk_buff *skb; - if (likely(!size)) { - skb = sk->sk_tx_skb_cache; - if (skb) { - skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); - sk->sk_tx_skb_cache = NULL; - pskb_trim(skb, 0); - INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); - skb_shinfo(skb)->tx_flags = 0; - memset(TCP_SKB_CB(skb), 0, sizeof(struct tcp_skb_cb)); - return skb; - } - } - /* The TCP header must be at least 32-bit aligned. */ - size = ALIGN(size, 4); - if (unlikely(tcp_under_memory_pressure(sk))) sk_mem_reclaim_partial(sk); - skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp); + skb = alloc_skb_fclone(size + MAX_TCP_HEADER, gfp); if (likely(skb)) { bool mem_scheduled; @@ -892,12 +870,8 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, mem_scheduled = sk_wmem_schedule(sk, skb->truesize); } if (likely(mem_scheduled)) { - skb_reserve(skb, sk->sk_prot->max_header); - /* - * Make sure that we have exactly size bytes - * available to the caller, no more, no less. - */ - skb->reserved_tailroom = skb->end - skb->tail - size; + skb_reserve(skb, MAX_TCP_HEADER); + skb->ip_summed = CHECKSUM_PARTIAL; INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); return skb; } @@ -950,18 +924,20 @@ int tcp_send_mss(struct sock *sk, int *size_goal, int flags) * importantly be able to generate EPOLLOUT for Edge Trigger epoll() * users. */ -void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb) +void tcp_remove_empty_skb(struct sock *sk) { - if (skb && !skb->len) { + struct sk_buff *skb = tcp_write_queue_tail(sk); + + if (skb && TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { tcp_unlink_write_queue(skb, sk); if (tcp_write_queue_empty(sk)) tcp_chrono_stop(sk, TCP_CHRONO_BUSY); - sk_wmem_free_skb(sk, skb); + tcp_wmem_free_skb(sk, skb); } } -struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags, - struct page *page, int offset, size_t *size) +static struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags, + struct page *page, int offset, size_t *size) { struct sk_buff *skb = tcp_write_queue_tail(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -974,15 +950,15 @@ new_segment: if (!sk_stream_memory_free(sk)) return NULL; - skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, - tcp_rtx_and_write_queues_empty(sk)); + skb = tcp_stream_alloc_skb(sk, 0, sk->sk_allocation, + tcp_rtx_and_write_queues_empty(sk)); if (!skb) return NULL; #ifdef CONFIG_TLS_DEVICE skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED); #endif - skb_entail(sk, skb); + tcp_skb_entail(sk, skb); copy = size_goal; } @@ -1013,7 +989,6 @@ new_segment: skb->truesize += copy; sk_wmem_queued_add(sk, copy); sk_mem_charge(sk, copy); - skb->ip_summed = CHECKSUM_PARTIAL; WRITE_ONCE(tp->write_seq, tp->write_seq + copy); TCP_SKB_CB(skb)->end_seq += copy; tcp_skb_pcount_set(skb, 0); @@ -1104,7 +1079,7 @@ out: return copied; do_error: - tcp_remove_empty_skb(sk, tcp_write_queue_tail(sk)); + tcp_remove_empty_skb(sk); if (copied) goto out; out_err: @@ -1303,15 +1278,14 @@ new_segment: goto restart; } first_skb = tcp_rtx_and_write_queues_empty(sk); - skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, - first_skb); + skb = tcp_stream_alloc_skb(sk, 0, sk->sk_allocation, + first_skb); if (!skb) goto wait_for_space; process_backlog++; - skb->ip_summed = CHECKSUM_PARTIAL; - skb_entail(sk, skb); + tcp_skb_entail(sk, skb); copy = size_goal; /* All packets are restored as if they have @@ -1326,14 +1300,7 @@ new_segment: if (copy > msg_data_left(msg)) copy = msg_data_left(msg); - /* Where to copy to? */ - if (skb_availroom(skb) > 0 && !zc) { - /* We have some space in skb head. Superb! */ - copy = min_t(int, copy, skb_availroom(skb)); - err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy); - if (err) - goto do_fault; - } else if (!zc) { + if (!zc) { bool merge = true; int i = skb_shinfo(skb)->nr_frags; struct page_frag *pfrag = sk_page_frag(sk); @@ -1432,9 +1399,7 @@ out_nopush: return copied + copied_syn; do_error: - skb = tcp_write_queue_tail(sk); -do_fault: - tcp_remove_empty_skb(sk, skb); + tcp_remove_empty_skb(sk); if (copied + copied_syn) goto out; @@ -2687,11 +2652,36 @@ void tcp_shutdown(struct sock *sk, int how) } EXPORT_SYMBOL(tcp_shutdown); +int tcp_orphan_count_sum(void) +{ + int i, total = 0; + + for_each_possible_cpu(i) + total += per_cpu(tcp_orphan_count, i); + + return max(total, 0); +} + +static int tcp_orphan_cache; +static struct timer_list tcp_orphan_timer; +#define TCP_ORPHAN_TIMER_PERIOD msecs_to_jiffies(100) + +static void tcp_orphan_update(struct timer_list *unused) +{ + WRITE_ONCE(tcp_orphan_cache, tcp_orphan_count_sum()); + mod_timer(&tcp_orphan_timer, jiffies + TCP_ORPHAN_TIMER_PERIOD); +} + +static bool tcp_too_many_orphans(int shift) +{ + return READ_ONCE(tcp_orphan_cache) << shift > sysctl_tcp_max_orphans; +} + bool tcp_check_oom(struct sock *sk, int shift) { bool too_many_orphans, out_of_socket_memory; - too_many_orphans = tcp_too_many_orphans(sk, shift); + too_many_orphans = tcp_too_many_orphans(shift); out_of_socket_memory = tcp_out_of_memory(sk); if (too_many_orphans) @@ -2800,7 +2790,7 @@ adjudge_to_death: /* remove backlog if any, without releasing ownership. */ __release_sock(sk); - percpu_counter_inc(sk->sk_prot->orphan_count); + this_cpu_inc(tcp_orphan_count); /* Have we already been destroyed by a softirq or backlog? */ if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE) @@ -2903,7 +2893,7 @@ static void tcp_rtx_queue_purge(struct sock *sk) * list_del(&skb->tcp_tsorted_anchor) */ tcp_rtx_queue_unlink(skb, sk); - sk_wmem_free_skb(sk, skb); + tcp_wmem_free_skb(sk, skb); } } @@ -2914,14 +2904,9 @@ void tcp_write_queue_purge(struct sock *sk) tcp_chrono_stop(sk, TCP_CHRONO_BUSY); while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { tcp_skb_tsorted_anchor_cleanup(skb); - sk_wmem_free_skb(sk, skb); + tcp_wmem_free_skb(sk, skb); } tcp_rtx_queue_purge(sk); - skb = sk->sk_tx_skb_cache; - if (skb) { - __kfree_skb(skb); - sk->sk_tx_skb_cache = NULL; - } INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue); sk_mem_reclaim(sk); tcp_clear_all_retrans_hints(tcp_sk(sk)); @@ -2958,10 +2943,6 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_clear_xmit_timers(sk); __skb_queue_purge(&sk->sk_receive_queue); - if (sk->sk_rx_skb_cache) { - __kfree_skb(sk->sk_rx_skb_cache); - sk->sk_rx_skb_cache = NULL; - } WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); tp->urg_data = 0; tcp_write_queue_purge(sk); @@ -4502,7 +4483,10 @@ void __init tcp_init(void) sizeof_field(struct sk_buff, cb)); percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); - percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); + + timer_setup(&tcp_orphan_timer, tcp_orphan_update, TIMER_DEFERRABLE); + mod_timer(&tcp_orphan_timer, jiffies + TCP_ORPHAN_TIMER_PERIOD); + inet_hashinfo_init(&tcp_hashinfo); inet_hashinfo2_init(&tcp_hashinfo, "tcp_listen_portaddr_hash", thash_entries, 21, /* one slot per 2 MB*/ |