diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 52 | ||||
-rw-r--r-- | net/ipv4/devinet.c | 6 | ||||
-rw-r--r-- | net/ipv4/fib_frontend.c | 6 | ||||
-rw-r--r-- | net/ipv4/gre.c | 1 | ||||
-rw-r--r-- | net/ipv4/inet_diag.c | 2 | ||||
-rw-r--r-- | net/ipv4/inetpeer.c | 280 | ||||
-rw-r--r-- | net/ipv4/ip_fragment.c | 5 | ||||
-rw-r--r-- | net/ipv4/ip_input.c | 4 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 2 | ||||
-rw-r--r-- | net/ipv4/ipconfig.c | 75 | ||||
-rw-r--r-- | net/ipv4/ipmr.c | 3 | ||||
-rw-r--r-- | net/ipv4/netfilter/ipt_CLUSTERIP.c | 26 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_defrag_ipv4.c | 2 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_nat_snmp_basic.c | 210 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_nat_standalone.c | 2 | ||||
-rw-r--r-- | net/ipv4/raw.c | 36 | ||||
-rw-r--r-- | net/ipv4/route.c | 30 | ||||
-rw-r--r-- | net/ipv4/syncookies.c | 1 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 14 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 46 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 11 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 6 | ||||
-rw-r--r-- | net/ipv4/udp.c | 2 | ||||
-rw-r--r-- | net/ipv4/xfrm4_policy.c | 2 |
24 files changed, 344 insertions, 480 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index ef1528af7abf..1b745d412cf6 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1440,11 +1440,11 @@ EXPORT_SYMBOL_GPL(inet_ctl_sock_create); unsigned long snmp_fold_field(void __percpu *mib[], int offt) { unsigned long res = 0; - int i; + int i, j; for_each_possible_cpu(i) { - res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt); - res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt); + for (j = 0; j < SNMP_ARRAY_SZ; j++) + res += *(((unsigned long *) per_cpu_ptr(mib[j], i)) + offt); } return res; } @@ -1458,28 +1458,19 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset) int cpu; for_each_possible_cpu(cpu) { - void *bhptr, *userptr; + void *bhptr; struct u64_stats_sync *syncp; - u64 v_bh, v_user; + u64 v; unsigned int start; - /* first mib used by softirq context, we must use _bh() accessors */ - bhptr = per_cpu_ptr(SNMP_STAT_BHPTR(mib), cpu); + bhptr = per_cpu_ptr(mib[0], cpu); syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); do { start = u64_stats_fetch_begin_bh(syncp); - v_bh = *(((u64 *) bhptr) + offt); + v = *(((u64 *) bhptr) + offt); } while (u64_stats_fetch_retry_bh(syncp, start)); - /* second mib used in USER context */ - userptr = per_cpu_ptr(SNMP_STAT_USRPTR(mib), cpu); - syncp = (struct u64_stats_sync *)(userptr + syncp_offset); - do { - start = u64_stats_fetch_begin(syncp); - v_user = *(((u64 *) userptr) + offt); - } while (u64_stats_fetch_retry(syncp, start)); - - res += v_bh + v_user; + res += v; } return res; } @@ -1491,25 +1482,28 @@ int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align) BUG_ON(ptr == NULL); ptr[0] = __alloc_percpu(mibsize, align); if (!ptr[0]) - goto err0; + return -ENOMEM; +#if SNMP_ARRAY_SZ == 2 ptr[1] = __alloc_percpu(mibsize, align); - if (!ptr[1]) - goto err1; + if (!ptr[1]) { + free_percpu(ptr[0]); + ptr[0] = NULL; + return -ENOMEM; + } +#endif return 0; -err1: - free_percpu(ptr[0]); - ptr[0] = NULL; -err0: - return -ENOMEM; } EXPORT_SYMBOL_GPL(snmp_mib_init); -void snmp_mib_free(void __percpu *ptr[2]) +void snmp_mib_free(void __percpu *ptr[SNMP_ARRAY_SZ]) { + int i; + BUG_ON(ptr == NULL); - free_percpu(ptr[0]); - free_percpu(ptr[1]); - ptr[0] = ptr[1] = NULL; + for (i = 0; i < SNMP_ARRAY_SZ; i++) { + free_percpu(ptr[i]); + ptr[i] = NULL; + } } EXPORT_SYMBOL_GPL(snmp_mib_free); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 0d4a184af16f..37b3c188d8b3 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1833,8 +1833,8 @@ void __init devinet_init(void) rtnl_af_register(&inet_af_ops); - rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL); - rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL); - rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); + rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL); + rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL); + rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL); } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 22524716fe70..92fc5f69f5da 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1124,9 +1124,9 @@ static struct pernet_operations fib_net_ops = { void __init ip_fib_init(void) { - rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL); - rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL); - rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib); + rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL); + rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL); + rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL); register_pernet_subsys(&fib_net_ops); register_netdevice_notifier(&fib_netdev_notifier); diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index c6933f2ea310..9dbe10875fbd 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -16,7 +16,6 @@ #include <linux/skbuff.h> #include <linux/in.h> #include <linux/netdevice.h> -#include <linux/version.h> #include <linux/spinlock.h> #include <net/protocol.h> #include <net/gre.h> diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 3267d3898437..389a2e6a17fd 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -869,7 +869,7 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } return netlink_dump_start(idiagnl, skb, nlh, - inet_diag_dump, NULL); + inet_diag_dump, NULL, 0); } return inet_diag_get_exact(skb, nlh); diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index ce616d92cc54..dafbf2c98b28 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -54,15 +54,11 @@ * 1. Nodes may appear in the tree only with the pool lock held. * 2. Nodes may disappear from the tree only with the pool lock held * AND reference count being 0. - * 3. Nodes appears and disappears from unused node list only under - * "inet_peer_unused_lock". - * 4. Global variable peer_total is modified under the pool lock. - * 5. struct inet_peer fields modification: + * 3. Global variable peer_total is modified under the pool lock. + * 4. struct inet_peer fields modification: * avl_left, avl_right, avl_parent, avl_height: pool lock - * unused: unused node list lock * refcnt: atomically against modifications on other CPU; * usually under some other lock to prevent node disappearing - * dtime: unused node list lock * daddr: unchangeable * ip_id_count: atomic value (no lock needed) */ @@ -104,19 +100,6 @@ int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries m * aggressively at this stage */ int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */ -int inet_peer_gc_mintime __read_mostly = 10 * HZ; -int inet_peer_gc_maxtime __read_mostly = 120 * HZ; - -static struct { - struct list_head list; - spinlock_t lock; -} unused_peers = { - .list = LIST_HEAD_INIT(unused_peers.list), - .lock = __SPIN_LOCK_UNLOCKED(unused_peers.lock), -}; - -static void peer_check_expire(unsigned long dummy); -static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0); /* Called from ip_output.c:ip_init */ @@ -142,21 +125,6 @@ void __init inet_initpeers(void) 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); - /* All the timers, started at system startup tend - to synchronize. Perturb it a bit. - */ - peer_periodic_timer.expires = jiffies - + net_random() % inet_peer_gc_maxtime - + inet_peer_gc_maxtime; - add_timer(&peer_periodic_timer); -} - -/* Called with or without local BH being disabled. */ -static void unlink_from_unused(struct inet_peer *p) -{ - spin_lock_bh(&unused_peers.lock); - list_del_init(&p->unused); - spin_unlock_bh(&unused_peers.lock); } static int addr_compare(const struct inetpeer_addr *a, @@ -203,20 +171,6 @@ static int addr_compare(const struct inetpeer_addr *a, u; \ }) -static bool atomic_add_unless_return(atomic_t *ptr, int a, int u, int *newv) -{ - int cur, old = atomic_read(ptr); - - while (old != u) { - *newv = old + a; - cur = atomic_cmpxchg(ptr, old, *newv); - if (cur == old) - return true; - old = cur; - } - return false; -} - /* * Called with rcu_read_lock() * Because we hold no lock against a writer, its quite possible we fall @@ -225,8 +179,7 @@ static bool atomic_add_unless_return(atomic_t *ptr, int a, int u, int *newv) * We exit from this function if number of links exceeds PEER_MAXDEPTH */ static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, - struct inet_peer_base *base, - int *newrefcnt) + struct inet_peer_base *base) { struct inet_peer *u = rcu_dereference(base->root); int count = 0; @@ -235,11 +188,9 @@ static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, int cmp = addr_compare(daddr, &u->daddr); if (cmp == 0) { /* Before taking a reference, check if this entry was - * deleted, unlink_from_pool() sets refcnt=-1 to make - * distinction between an unused entry (refcnt=0) and - * a freed one. + * deleted (refcnt=-1) */ - if (!atomic_add_unless_return(&u->refcnt, 1, -1, newrefcnt)) + if (!atomic_add_unless(&u->refcnt, 1, -1)) u = NULL; return u; } @@ -366,137 +317,96 @@ static void inetpeer_free_rcu(struct rcu_head *head) kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu)); } -/* May be called with local BH enabled. */ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base, struct inet_peer __rcu **stack[PEER_MAXDEPTH]) { - int do_free; - - do_free = 0; - - write_seqlock_bh(&base->lock); - /* Check the reference counter. It was artificially incremented by 1 - * in cleanup() function to prevent sudden disappearing. If we can - * atomically (because of lockless readers) take this last reference, - * it's safe to remove the node and free it later. - * We use refcnt=-1 to alert lockless readers this entry is deleted. - */ - if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { - struct inet_peer __rcu ***stackptr, ***delp; - if (lookup(&p->daddr, stack, base) != p) - BUG(); - delp = stackptr - 1; /* *delp[0] == p */ - if (p->avl_left == peer_avl_empty_rcu) { - *delp[0] = p->avl_right; - --stackptr; - } else { - /* look for a node to insert instead of p */ - struct inet_peer *t; - t = lookup_rightempty(p, base); - BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t); - **--stackptr = t->avl_left; - /* t is removed, t->daddr > x->daddr for any - * x in p->avl_left subtree. - * Put t in the old place of p. */ - RCU_INIT_POINTER(*delp[0], t); - t->avl_left = p->avl_left; - t->avl_right = p->avl_right; - t->avl_height = p->avl_height; - BUG_ON(delp[1] != &p->avl_left); - delp[1] = &t->avl_left; /* was &p->avl_left */ - } - peer_avl_rebalance(stack, stackptr, base); - base->total--; - do_free = 1; + struct inet_peer __rcu ***stackptr, ***delp; + + if (lookup(&p->daddr, stack, base) != p) + BUG(); + delp = stackptr - 1; /* *delp[0] == p */ + if (p->avl_left == peer_avl_empty_rcu) { + *delp[0] = p->avl_right; + --stackptr; + } else { + /* look for a node to insert instead of p */ + struct inet_peer *t; + t = lookup_rightempty(p, base); + BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t); + **--stackptr = t->avl_left; + /* t is removed, t->daddr > x->daddr for any + * x in p->avl_left subtree. + * Put t in the old place of p. */ + RCU_INIT_POINTER(*delp[0], t); + t->avl_left = p->avl_left; + t->avl_right = p->avl_right; + t->avl_height = p->avl_height; + BUG_ON(delp[1] != &p->avl_left); + delp[1] = &t->avl_left; /* was &p->avl_left */ } - write_sequnlock_bh(&base->lock); - - if (do_free) - call_rcu(&p->rcu, inetpeer_free_rcu); - else - /* The node is used again. Decrease the reference counter - * back. The loop "cleanup -> unlink_from_unused - * -> unlink_from_pool -> putpeer -> link_to_unused - * -> cleanup (for the same node)" - * doesn't really exist because the entry will have a - * recent deletion time and will not be cleaned again soon. - */ - inet_putpeer(p); + peer_avl_rebalance(stack, stackptr, base); + base->total--; + call_rcu(&p->rcu, inetpeer_free_rcu); } static struct inet_peer_base *family_to_base(int family) { - return (family == AF_INET ? &v4_peers : &v6_peers); + return family == AF_INET ? &v4_peers : &v6_peers; } -static struct inet_peer_base *peer_to_base(struct inet_peer *p) +/* perform garbage collect on all items stacked during a lookup */ +static int inet_peer_gc(struct inet_peer_base *base, + struct inet_peer __rcu **stack[PEER_MAXDEPTH], + struct inet_peer __rcu ***stackptr) { - return family_to_base(p->daddr.family); -} - -/* May be called with local BH enabled. */ -static int cleanup_once(unsigned long ttl, struct inet_peer __rcu **stack[PEER_MAXDEPTH]) -{ - struct inet_peer *p = NULL; - - /* Remove the first entry from the list of unused nodes. */ - spin_lock_bh(&unused_peers.lock); - if (!list_empty(&unused_peers.list)) { - __u32 delta; + struct inet_peer *p, *gchead = NULL; + __u32 delta, ttl; + int cnt = 0; - p = list_first_entry(&unused_peers.list, struct inet_peer, unused); + if (base->total >= inet_peer_threshold) + ttl = 0; /* be aggressive */ + else + ttl = inet_peer_maxttl + - (inet_peer_maxttl - inet_peer_minttl) / HZ * + base->total / inet_peer_threshold * HZ; + stackptr--; /* last stack slot is peer_avl_empty */ + while (stackptr > stack) { + stackptr--; + p = rcu_deref_locked(**stackptr, base); delta = (__u32)jiffies - p->dtime; - - if (delta < ttl) { - /* Do not prune fresh entries. */ - spin_unlock_bh(&unused_peers.lock); - return -1; + if (atomic_read(&p->refcnt) == 0 && delta >= ttl && + atomic_cmpxchg(&p->refcnt, 0, -1) == 0) { + p->gc_next = gchead; + gchead = p; } - - list_del_init(&p->unused); - - /* Grab an extra reference to prevent node disappearing - * before unlink_from_pool() call. */ - atomic_inc(&p->refcnt); } - spin_unlock_bh(&unused_peers.lock); - - if (p == NULL) - /* It means that the total number of USED entries has - * grown over inet_peer_threshold. It shouldn't really - * happen because of entry limits in route cache. */ - return -1; - - unlink_from_pool(p, peer_to_base(p), stack); - return 0; + while ((p = gchead) != NULL) { + gchead = p->gc_next; + cnt++; + unlink_from_pool(p, base, stack); + } + return cnt; } -/* Called with or without local BH being disabled. */ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) { struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; struct inet_peer_base *base = family_to_base(daddr->family); struct inet_peer *p; unsigned int sequence; - int invalidated, newrefcnt = 0; + int invalidated, gccnt = 0; - /* Look up for the address quickly, lockless. + /* Attempt a lockless lookup first. * Because of a concurrent writer, we might not find an existing entry. */ rcu_read_lock(); sequence = read_seqbegin(&base->lock); - p = lookup_rcu(daddr, base, &newrefcnt); + p = lookup_rcu(daddr, base); invalidated = read_seqretry(&base->lock, sequence); rcu_read_unlock(); - if (p) { -found: /* The existing node has been found. - * Remove the entry from unused list if it was there. - */ - if (newrefcnt == 1) - unlink_from_unused(p); + if (p) return p; - } /* If no writer did a change during our lookup, we can return early. */ if (!create && !invalidated) @@ -506,11 +416,17 @@ found: /* The existing node has been found. * At least, nodes should be hot in our cache. */ write_seqlock_bh(&base->lock); +relookup: p = lookup(daddr, stack, base); if (p != peer_avl_empty) { - newrefcnt = atomic_inc_return(&p->refcnt); + atomic_inc(&p->refcnt); write_sequnlock_bh(&base->lock); - goto found; + return p; + } + if (!gccnt) { + gccnt = inet_peer_gc(base, stack, stackptr); + if (gccnt && create) + goto relookup; } p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; if (p) { @@ -525,7 +441,6 @@ found: /* The existing node has been found. p->pmtu_expires = 0; p->pmtu_orig = 0; memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); - INIT_LIST_HEAD(&p->unused); /* Link the node. */ @@ -534,63 +449,14 @@ found: /* The existing node has been found. } write_sequnlock_bh(&base->lock); - if (base->total >= inet_peer_threshold) - /* Remove one less-recently-used entry. */ - cleanup_once(0, stack); - return p; } - -static int compute_total(void) -{ - return v4_peers.total + v6_peers.total; -} EXPORT_SYMBOL_GPL(inet_getpeer); -/* Called with local BH disabled. */ -static void peer_check_expire(unsigned long dummy) -{ - unsigned long now = jiffies; - int ttl, total; - struct inet_peer __rcu **stack[PEER_MAXDEPTH]; - - total = compute_total(); - if (total >= inet_peer_threshold) - ttl = inet_peer_minttl; - else - ttl = inet_peer_maxttl - - (inet_peer_maxttl - inet_peer_minttl) / HZ * - total / inet_peer_threshold * HZ; - while (!cleanup_once(ttl, stack)) { - if (jiffies != now) - break; - } - - /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime - * interval depending on the total number of entries (more entries, - * less interval). */ - total = compute_total(); - if (total >= inet_peer_threshold) - peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime; - else - peer_periodic_timer.expires = jiffies - + inet_peer_gc_maxtime - - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * - total / inet_peer_threshold * HZ; - add_timer(&peer_periodic_timer); -} - void inet_putpeer(struct inet_peer *p) { - local_bh_disable(); - - if (atomic_dec_and_lock(&p->refcnt, &unused_peers.lock)) { - list_add_tail(&p->unused, &unused_peers.list); - p->dtime = (__u32)jiffies; - spin_unlock(&unused_peers.lock); - } - - local_bh_enable(); + p->dtime = (__u32)jiffies; + atomic_dec(&p->refcnt); } EXPORT_SYMBOL_GPL(inet_putpeer); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 0ad6035f6366..0e0ab98abc6f 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -261,8 +261,9 @@ static void ip_expire(unsigned long arg) * Only an end host needs to send an ICMP * "Fragment Reassembly Timeout" message, per RFC792. */ - if (qp->user == IP_DEFRAG_CONNTRACK_IN && - skb_rtable(head)->rt_type != RTN_LOCAL) + if (qp->user == IP_DEFRAG_AF_PACKET || + (qp->user == IP_DEFRAG_CONNTRACK_IN && + skb_rtable(head)->rt_type != RTN_LOCAL)) goto out_rcu_unlock; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index c8f48efc5fd3..073a9b01c40c 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -165,7 +165,7 @@ int ip_call_ra_chain(struct sk_buff *skb) (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dev->ifindex) && net_eq(sock_net(sk), dev_net(dev))) { - if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { + if (ip_is_fragment(ip_hdr(skb))) { if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) return 1; } @@ -256,7 +256,7 @@ int ip_local_deliver(struct sk_buff *skb) * Reassemble IP fragments. */ - if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { + if (ip_is_fragment(ip_hdr(skb))) { if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER)) return 0; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 84f26e8e6c60..54119d5aae8f 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -489,7 +489,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (first_len - hlen > mtu || ((first_len - hlen) & 7) || - (iph->frag_off & htons(IP_MF|IP_OFFSET)) || + ip_is_fragment(iph) || skb_cloned(skb)) goto slow_path; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index ab7e5542c1cf..472a8c4f1dc0 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -861,41 +861,44 @@ static void __init ic_do_bootp_ext(u8 *ext) #endif switch (*ext++) { - case 1: /* Subnet mask */ - if (ic_netmask == NONE) - memcpy(&ic_netmask, ext+1, 4); - break; - case 3: /* Default gateway */ - if (ic_gateway == NONE) - memcpy(&ic_gateway, ext+1, 4); - break; - case 6: /* DNS server */ - servers= *ext/4; - if (servers > CONF_NAMESERVERS_MAX) - servers = CONF_NAMESERVERS_MAX; - for (i = 0; i < servers; i++) { - if (ic_nameservers[i] == NONE) - memcpy(&ic_nameservers[i], ext+1+4*i, 4); - } - break; - case 12: /* Host name */ - ic_bootp_string(utsname()->nodename, ext+1, *ext, __NEW_UTS_LEN); - ic_host_name_set = 1; - break; - case 15: /* Domain name (DNS) */ - ic_bootp_string(ic_domain, ext+1, *ext, sizeof(ic_domain)); - break; - case 17: /* Root path */ - if (!root_server_path[0]) - ic_bootp_string(root_server_path, ext+1, *ext, sizeof(root_server_path)); - break; - case 26: /* Interface MTU */ - memcpy(&mtu, ext+1, sizeof(mtu)); - ic_dev_mtu = ntohs(mtu); - break; - case 40: /* NIS Domain name (_not_ DNS) */ - ic_bootp_string(utsname()->domainname, ext+1, *ext, __NEW_UTS_LEN); - break; + case 1: /* Subnet mask */ + if (ic_netmask == NONE) + memcpy(&ic_netmask, ext+1, 4); + break; + case 3: /* Default gateway */ + if (ic_gateway == NONE) + memcpy(&ic_gateway, ext+1, 4); + break; + case 6: /* DNS server */ + servers= *ext/4; + if (servers > CONF_NAMESERVERS_MAX) + servers = CONF_NAMESERVERS_MAX; + for (i = 0; i < servers; i++) { + if (ic_nameservers[i] == NONE) + memcpy(&ic_nameservers[i], ext+1+4*i, 4); + } + break; + case 12: /* Host name */ + ic_bootp_string(utsname()->nodename, ext+1, *ext, + __NEW_UTS_LEN); + ic_host_name_set = 1; + break; + case 15: /* Domain name (DNS) */ + ic_bootp_string(ic_domain, ext+1, *ext, sizeof(ic_domain)); + break; + case 17: /* Root path */ + if (!root_server_path[0]) + ic_bootp_string(root_server_path, ext+1, *ext, + sizeof(root_server_path)); + break; + case 26: /* Interface MTU */ + memcpy(&mtu, ext+1, sizeof(mtu)); + ic_dev_mtu = ntohs(mtu); + break; + case 40: /* NIS Domain name (_not_ DNS) */ + ic_bootp_string(utsname()->domainname, ext+1, *ext, + __NEW_UTS_LEN); + break; } } @@ -932,7 +935,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str goto drop; /* Fragments are not supported */ - if (h->frag_off & htons(IP_OFFSET | IP_MF)) { + if (ip_is_fragment(h)) { if (net_ratelimit()) printk(KERN_ERR "DHCP/BOOTP: Ignoring fragmented " "reply.\n"); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 30a7763c400e..aae2bd8cd924 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2544,7 +2544,8 @@ int __init ip_mr_init(void) goto add_proto_fail; } #endif - rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute); + rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, + NULL, ipmr_rtm_dumproute, NULL); return 0; #ifdef CONFIG_IP_PIMSM_V2 diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 5c9e97c79017..db8d22db425f 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -317,19 +317,19 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) hash = clusterip_hashfn(skb, cipinfo->config); switch (ctinfo) { - case IP_CT_NEW: - ct->mark = hash; - break; - case IP_CT_RELATED: - case IP_CT_RELATED_REPLY: - /* FIXME: we don't handle expectations at the - * moment. they can arrive on a different node than - * the master connection (e.g. FTP passive mode) */ - case IP_CT_ESTABLISHED: - case IP_CT_ESTABLISHED_REPLY: - break; - default: - break; + case IP_CT_NEW: + ct->mark = hash; + break; + case IP_CT_RELATED: + case IP_CT_RELATED_REPLY: + /* FIXME: we don't handle expectations at the moment. + * They can arrive on a different node than + * the master connection (e.g. FTP passive mode) */ + case IP_CT_ESTABLISHED: + case IP_CT_ESTABLISHED_REPLY: + break; + default: /* Prevent gcc warnings */ + break; } #ifdef DEBUG diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index f3a9b42b16c6..9bb1b8a37a22 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -82,7 +82,7 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, #endif #endif /* Gather fragments. */ - if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { + if (ip_is_fragment(ip_hdr(skb))) { enum ip_defrag_users user = nf_ct_defrag_user(hooknum, skb); if (nf_ct_ipv4_gather_frags(skb, user)) return NF_STOLEN; diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 8812a02078ab..076b7c8c4aa4 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -719,117 +719,115 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx, l = 0; switch (type) { - case SNMP_INTEGER: - len = sizeof(long); - if (!asn1_long_decode(ctx, end, &l)) { - kfree(id); - return 0; - } - *obj = kmalloc(sizeof(struct snmp_object) + len, - GFP_ATOMIC); - if (*obj == NULL) { - kfree(id); - if (net_ratelimit()) - pr_notice("OOM in bsalg (%d)\n", __LINE__); - return 0; - } - (*obj)->syntax.l[0] = l; - break; - case SNMP_OCTETSTR: - case SNMP_OPAQUE: - if (!asn1_octets_decode(ctx, end, &p, &len)) { - kfree(id); - return 0; - } - *obj = kmalloc(sizeof(struct snmp_object) + len, - GFP_ATOMIC); - if (*obj == NULL) { - kfree(p); - kfree(id); - if (net_ratelimit()) - pr_notice("OOM in bsalg (%d)\n", __LINE__); - return 0; - } - memcpy((*obj)->syntax.c, p, len); + case SNMP_INTEGER: + len = sizeof(long); + if (!asn1_long_decode(ctx, end, &l)) { + kfree(id); + return 0; + } + *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); + if (*obj == NULL) { + kfree(id); + if (net_ratelimit()) + pr_notice("OOM in bsalg (%d)\n", __LINE__); + return 0; + } + (*obj)->syntax.l[0] = l; + break; + case SNMP_OCTETSTR: + case SNMP_OPAQUE: + if (!asn1_octets_decode(ctx, end, &p, &len)) { + kfree(id); + return 0; + } + *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); + if (*obj == NULL) { kfree(p); - break; - case SNMP_NULL: - case SNMP_NOSUCHOBJECT: - case SNMP_NOSUCHINSTANCE: - case SNMP_ENDOFMIBVIEW: - len = 0; - *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC); - if (*obj == NULL) { - kfree(id); - if (net_ratelimit()) - pr_notice("OOM in bsalg (%d)\n", __LINE__); - return 0; - } - if (!asn1_null_decode(ctx, end)) { - kfree(id); - kfree(*obj); - *obj = NULL; - return 0; - } - break; - case SNMP_OBJECTID: - if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) { - kfree(id); - return 0; - } - len *= sizeof(unsigned long); - *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); - if (*obj == NULL) { - kfree(lp); - kfree(id); - if (net_ratelimit()) - pr_notice("OOM in bsalg (%d)\n", __LINE__); - return 0; - } - memcpy((*obj)->syntax.ul, lp, len); + kfree(id); + if (net_ratelimit()) + pr_notice("OOM in bsalg (%d)\n", __LINE__); + return 0; + } + memcpy((*obj)->syntax.c, p, len); + kfree(p); + break; + case SNMP_NULL: + case SNMP_NOSUCHOBJECT: + case SNMP_NOSUCHINSTANCE: + case SNMP_ENDOFMIBVIEW: + len = 0; + *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC); + if (*obj == NULL) { + kfree(id); + if (net_ratelimit()) + pr_notice("OOM in bsalg (%d)\n", __LINE__); + return 0; + } + if (!asn1_null_decode(ctx, end)) { + kfree(id); + kfree(*obj); + *obj = NULL; + return 0; + } + break; + case SNMP_OBJECTID: + if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) { + kfree(id); + return 0; + } + len *= sizeof(unsigned long); + *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); + if (*obj == NULL) { kfree(lp); - break; - case SNMP_IPADDR: - if (!asn1_octets_decode(ctx, end, &p, &len)) { - kfree(id); - return 0; - } - if (len != 4) { - kfree(p); - kfree(id); - return 0; - } - *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); - if (*obj == NULL) { - kfree(p); - kfree(id); - if (net_ratelimit()) - pr_notice("OOM in bsalg (%d)\n", __LINE__); - return 0; - } - memcpy((*obj)->syntax.uc, p, len); + kfree(id); + if (net_ratelimit()) + pr_notice("OOM in bsalg (%d)\n", __LINE__); + return 0; + } + memcpy((*obj)->syntax.ul, lp, len); + kfree(lp); + break; + case SNMP_IPADDR: + if (!asn1_octets_decode(ctx, end, &p, &len)) { + kfree(id); + return 0; + } + if (len != 4) { kfree(p); - break; - case SNMP_COUNTER: - case SNMP_GAUGE: - case SNMP_TIMETICKS: - len = sizeof(unsigned long); - if (!asn1_ulong_decode(ctx, end, &ul)) { - kfree(id); - return 0; - } - *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); - if (*obj == NULL) { - kfree(id); - if (net_ratelimit()) - pr_notice("OOM in bsalg (%d)\n", __LINE__); - return 0; - } - (*obj)->syntax.ul[0] = ul; - break; - default: kfree(id); return 0; + } + *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); + if (*obj == NULL) { + kfree(p); + kfree(id); + if (net_ratelimit()) + pr_notice("OOM in bsalg (%d)\n", __LINE__); + return 0; + } + memcpy((*obj)->syntax.uc, p, len); + kfree(p); + break; + case SNMP_COUNTER: + case SNMP_GAUGE: + case SNMP_TIMETICKS: + len = sizeof(unsigned long); + if (!asn1_ulong_decode(ctx, end, &ul)) { + kfree(id); + return 0; + } + *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); + if (*obj == NULL) { + kfree(id); + if (net_ratelimit()) + pr_notice("OOM in bsalg (%d)\n", __LINE__); + return 0; + } + (*obj)->syntax.ul[0] = ul; + break; + default: + kfree(id); + return 0; } (*obj)->syntax_len = len; diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 483b76d042da..a6e606e84820 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -88,7 +88,7 @@ nf_nat_fn(unsigned int hooknum, /* We never see fragments: conntrack defrags on pre-routing and local-out, and nf_nat_out protects post-routing. */ - NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); + NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); ct = nf_ct_get(skb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index c9893d43242e..08526786dc39 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -825,28 +825,28 @@ static int compat_raw_getsockopt(struct sock *sk, int level, int optname, static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) { switch (cmd) { - case SIOCOUTQ: { - int amount = sk_wmem_alloc_get(sk); + case SIOCOUTQ: { + int amount = sk_wmem_alloc_get(sk); - return put_user(amount, (int __user *)arg); - } - case SIOCINQ: { - struct sk_buff *skb; - int amount = 0; - - spin_lock_bh(&sk->sk_receive_queue.lock); - skb = skb_peek(&sk->sk_receive_queue); - if (skb != NULL) - amount = skb->len; - spin_unlock_bh(&sk->sk_receive_queue.lock); - return put_user(amount, (int __user *)arg); - } + return put_user(amount, (int __user *)arg); + } + case SIOCINQ: { + struct sk_buff *skb; + int amount = 0; + + spin_lock_bh(&sk->sk_receive_queue.lock); + skb = skb_peek(&sk->sk_receive_queue); + if (skb != NULL) + amount = skb->len; + spin_unlock_bh(&sk->sk_receive_queue.lock); + return put_user(amount, (int __user *)arg); + } - default: + default: #ifdef CONFIG_IP_MROUTE - return ipmr_ioctl(sk, cmd, (void __user *)arg); + return ipmr_ioctl(sk, cmd, (void __user *)arg); #else - return -ENOIOCTLCMD; + return -ENOIOCTLCMD; #endif } } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index aa13ef105110..a8ccd9bca09c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1439,20 +1439,20 @@ static int ip_error(struct sk_buff *skb) int code; switch (rt->dst.error) { - case EINVAL: - default: - goto out; - case EHOSTUNREACH: - code = ICMP_HOST_UNREACH; - break; - case ENETUNREACH: - code = ICMP_NET_UNREACH; - IP_INC_STATS_BH(dev_net(rt->dst.dev), - IPSTATS_MIB_INNOROUTES); - break; - case EACCES: - code = ICMP_PKT_FILTERED; - break; + case EINVAL: + default: + goto out; + case EHOSTUNREACH: + code = ICMP_HOST_UNREACH; + break; + case ENETUNREACH: + code = ICMP_NET_UNREACH; + IP_INC_STATS_BH(dev_net(rt->dst.dev), + IPSTATS_MIB_INNOROUTES); + break; + case EACCES: + code = ICMP_PKT_FILTERED; + break; } if (!rt->peer) @@ -3303,7 +3303,7 @@ int __init ip_rt_init(void) xfrm_init(); xfrm4_init(ip_rt_max_size); #endif - rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL); + rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL); #ifdef CONFIG_SYSCTL register_pernet_subsys(&sysctl_route_ops); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 26461492a847..92bb9434b338 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -316,6 +316,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->wscale_ok = tcp_opt.wscale_ok; ireq->tstamp_ok = tcp_opt.saw_tstamp; req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; + treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 57d0752e239a..69fd7201129a 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -398,20 +398,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec_jiffies, }, { - .procname = "inet_peer_gc_mintime", - .data = &inet_peer_gc_mintime, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "inet_peer_gc_maxtime", - .data = &inet_peer_gc_maxtime, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { .procname = "tcp_orphan_retries", .data = &sysctl_tcp_orphan_retries, .maxlen = sizeof(int), diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bef9f04c22ba..ea0d2183df4b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -880,6 +880,11 @@ static void tcp_init_metrics(struct sock *sk) tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH); if (tp->snd_ssthresh > tp->snd_cwnd_clamp) tp->snd_ssthresh = tp->snd_cwnd_clamp; + } else { + /* ssthresh may have been reduced unnecessarily during. + * 3WHS. Restore it back to its initial default. + */ + tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; } if (dst_metric(dst, RTAX_REORDERING) && tp->reordering != dst_metric(dst, RTAX_REORDERING)) { @@ -887,10 +892,7 @@ static void tcp_init_metrics(struct sock *sk) tp->reordering = dst_metric(dst, RTAX_REORDERING); } - if (dst_metric(dst, RTAX_RTT) == 0) - goto reset; - - if (!tp->srtt && dst_metric_rtt(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3)) + if (dst_metric(dst, RTAX_RTT) == 0 || tp->srtt == 0) goto reset; /* Initial rtt is determined from SYN,SYN-ACK. @@ -916,19 +918,26 @@ static void tcp_init_metrics(struct sock *sk) tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); } tcp_set_rto(sk); - if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) { reset: - /* Play conservative. If timestamps are not - * supported, TCP will fail to recalculate correct - * rtt, if initial rto is too small. FORGET ALL AND RESET! + if (tp->srtt == 0) { + /* RFC2988bis: We've failed to get a valid RTT sample from + * 3WHS. This is most likely due to retransmission, + * including spurious one. Reset the RTO back to 3secs + * from the more aggressive 1sec to avoid more spurious + * retransmission. */ - if (!tp->rx_opt.saw_tstamp && tp->srtt) { - tp->srtt = 0; - tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; - inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; - } + tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK; + inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; } - tp->snd_cwnd = tcp_init_cwnd(tp, dst); + /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been + * retransmitted. In light of RFC2988bis' more aggressive 1sec + * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK + * retransmission has occurred. + */ + if (tp->total_retrans > 1) + tp->snd_cwnd = 1; + else + tp->snd_cwnd = tcp_init_cwnd(tp, dst); tp->snd_cwnd_stamp = tcp_time_stamp; } @@ -3112,12 +3121,13 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) tcp_xmit_retransmit_queue(sk); } -static void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt) +void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt) { tcp_rtt_estimator(sk, seq_rtt); tcp_set_rto(sk); inet_csk(sk)->icsk_backoff = 0; } +EXPORT_SYMBOL(tcp_valid_rtt_meas); /* Read draft-ietf-tcplw-high-performance before mucking * with this code. (Supersedes RFC1323) @@ -5806,12 +5816,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, tp->rx_opt.snd_wscale; tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); - /* tcp_ack considers this ACK as duplicate - * and does not calculate rtt. - * Force it here. - */ - tcp_ack_update_rtt(sk, 0, 0); - if (tp->rx_opt.tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 708dc203b034..955b8e65b69e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -429,8 +429,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) break; icsk->icsk_backoff--; - inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) << - icsk->icsk_backoff; + inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) : + TCP_TIMEOUT_INIT) << icsk->icsk_backoff; tcp_bound_rto(sk); skb = tcp_write_queue_head(sk); @@ -1384,6 +1384,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) isn = tcp_v4_init_sequence(skb); } tcp_rsk(req)->snt_isn = isn; + tcp_rsk(req)->snt_synack = tcp_time_stamp; if (tcp_v4_send_synack(sk, dst, req, (struct request_values *)&tmp_ext) || @@ -1458,6 +1459,10 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; tcp_initialize_rcv_mss(newsk); + if (tcp_rsk(req)->snt_synack) + tcp_valid_rtt_meas(newsk, + tcp_time_stamp - tcp_rsk(req)->snt_synack); + newtp->total_retrans = req->retrans; #ifdef CONFIG_TCP_MD5SIG /* Copy over the MD5 key from the original socket */ @@ -1855,7 +1860,7 @@ static int tcp_v4_init_sock(struct sock *sk) * algorithms that we must have the following bandaid to talk * efficiently to them. -DaveM */ - tp->snd_cwnd = 2; + tp->snd_cwnd = TCP_INIT_CWND; /* See draft-stevens-tcpca-spec-01 for discussion of the * initialization of these values. diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 80b1f80759ab..d2fe4e06b472 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -486,7 +486,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, * algorithms that we must have the following bandaid to talk * efficiently to them. -DaveM */ - newtp->snd_cwnd = 2; + newtp->snd_cwnd = TCP_INIT_CWND; newtp->snd_cwnd_cnt = 0; newtp->bytes_acked = 0; @@ -720,6 +720,10 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); return NULL; } + if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr) + tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr; + else if (req->retrans) /* don't take RTT sample if retrans && ~TS */ + tcp_rsk(req)->snt_synack = 0; /* OK, ACK is valid, create big socket and * feed this segment to it. It will repeat all diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 48cd88e62553..95e0d3b8977f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -105,6 +105,7 @@ #include <net/route.h> #include <net/checksum.h> #include <net/xfrm.h> +#include <trace/events/udp.h> #include "udp_impl.h" struct udp_table udp_table __read_mostly; @@ -1366,6 +1367,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) is_udplite); UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); + trace_udp_fail_queue_rcv_skb(rc, sk); return -1; } diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 981e43eaf704..fc5368ad2b0d 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -117,7 +117,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) memset(fl4, 0, sizeof(struct flowi4)); fl4->flowi4_mark = skb->mark; - if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { + if (!ip_is_fragment(iph)) { switch (iph->protocol) { case IPPROTO_UDP: case IPPROTO_UDPLITE: |