summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-10-18 09:31:37 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-18 09:31:37 -0700
commit2e923b0251932ad4a82cc87ec1443a1f1d17073e (patch)
treed12032bc9bcfbb8a57659275d1b9b582f23f2ecc /net/ipv4/tcp_output.c
parentffd8221bc348f8c282d1271883dbe629ea8ae289 (diff)
parentf2d9da1a8375cbe53df5b415d059429013a3a79f (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: 1) Include fixes for netrom and dsa (Fabian Frederick and Florian Fainelli) 2) Fix FIXED_PHY support in stmmac, from Giuseppe CAVALLARO. 3) Several SKB use after free fixes (vxlan, openvswitch, vxlan, ip_tunnel, fou), from Li ROngQing. 4) fec driver PTP support fixes from Luwei Zhou and Nimrod Andy. 5) Use after free in virtio_net, from Michael S Tsirkin. 6) Fix flow mask handling for megaflows in openvswitch, from Pravin B Shelar. 7) ISDN gigaset and capi bug fixes from Tilman Schmidt. 8) Fix route leak in ip_send_unicast_reply(), from Vasily Averin. 9) Fix two eBPF JIT bugs on x86, from Alexei Starovoitov. 10) TCP_SKB_CB() reorganization caused a few regressions, fixed by Cong Wang and Eric Dumazet. 11) Don't overwrite end of SKB when parsing malformed sctp ASCONF chunks, from Daniel Borkmann. 12) Don't call sock_kfree_s() with NULL pointers, this function also has the side effect of adjusting the socket memory usage. From Cong Wang. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (90 commits) bna: fix skb->truesize underestimation net: dsa: add includes for ethtool and phy_fixed definitions openvswitch: Set flow-key members. netrom: use linux/uaccess.h dsa: Fix conversion from host device to mii bus tipc: fix bug in bundled buffer reception ipv6: introduce tcp_v6_iif() sfc: add support for skb->xmit_more r8152: return -EBUSY for runtime suspend ipv4: fix a potential use after free in fou.c ipv4: fix a potential use after free in ip_tunnel_core.c hyperv: Add handling of IP header with option field in netvsc_set_hash() openvswitch: Create right mask with disabled megaflows vxlan: fix a free after use openvswitch: fix a use after free ipv4: dst_entry leak in ip_send_unicast_reply() ipv4: clean up cookie_v4_check() ipv4: share tcp_v4_save_options() with cookie_v4_check() ipv4: call __ip_options_echo() in cookie_v4_check() atm: simplify lanai.c by using module_pci_driver ...
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c34
1 files changed, 25 insertions, 9 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index becd98ce9a1c..3af21296d967 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -839,26 +839,38 @@ void tcp_wfree(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
struct tcp_sock *tp = tcp_sk(sk);
+ int wmem;
+
+ /* Keep one reference on sk_wmem_alloc.
+ * Will be released by sk_free() from here or tcp_tasklet_func()
+ */
+ wmem = atomic_sub_return(skb->truesize - 1, &sk->sk_wmem_alloc);
+
+ /* If this softirq is serviced by ksoftirqd, we are likely under stress.
+ * Wait until our queues (qdisc + devices) are drained.
+ * This gives :
+ * - less callbacks to tcp_write_xmit(), reducing stress (batches)
+ * - chance for incoming ACK (processed by another cpu maybe)
+ * to migrate this flow (skb->ooo_okay will be eventually set)
+ */
+ if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current)
+ goto out;
if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) &&
!test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) {
unsigned long flags;
struct tsq_tasklet *tsq;
- /* Keep a ref on socket.
- * This last ref will be released in tcp_tasklet_func()
- */
- atomic_sub(skb->truesize - 1, &sk->sk_wmem_alloc);
-
/* queue this socket to tasklet queue */
local_irq_save(flags);
tsq = this_cpu_ptr(&tsq_tasklet);
list_add(&tp->tsq_node, &tsq->head);
tasklet_schedule(&tsq->tasklet);
local_irq_restore(flags);
- } else {
- sock_wfree(skb);
+ return;
}
+out:
+ sk_free(sk);
}
/* This routine actually transmits TCP packets queued in by
@@ -914,9 +926,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
tcp_ca_event(sk, CA_EVENT_TX_START);
/* if no packet is in qdisc/device queue, then allow XPS to select
- * another queue.
+ * another queue. We can be called from tcp_tsq_handler()
+ * which holds one reference to sk_wmem_alloc.
+ *
+ * TODO: Ideally, in-flight pure ACK packets should not matter here.
+ * One way to get this would be to set skb->truesize = 2 on them.
*/
- skb->ooo_okay = sk_wmem_alloc_get(sk) == 0;
+ skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1);
skb_push(skb, tcp_header_size);
skb_reset_transport_header(skb);