diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/Kconfig | 8 | ||||
-rw-r--r-- | net/ipv4/cipso_ipv4.c | 24 | ||||
-rw-r--r-- | net/ipv4/gre_offload.c | 2 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 10 | ||||
-rw-r--r-- | net/ipv4/inet_hashtables.c | 5 | ||||
-rw-r--r-- | net/ipv4/ipip.c | 59 | ||||
-rw-r--r-- | net/ipv4/netfilter/ipt_SYNPROXY.c | 3 | ||||
-rw-r--r-- | net/ipv4/route.c | 2 | ||||
-rw-r--r-- | net/ipv4/syncookies.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 22 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 10 | ||||
-rw-r--r-- | net/ipv4/udp.c | 23 | ||||
-rw-r--r-- | net/ipv4/udp_offload.c | 2 |
14 files changed, 93 insertions, 81 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 91a2557942fa..f48fe6fc7e8c 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -70,11 +70,9 @@ config IP_MULTIPLE_TABLES address into account. Furthermore, the TOS (Type-Of-Service) field of the packet can be used for routing decisions as well. - If you are interested in this, please see the preliminary - documentation at <http://www.compendium.com.ar/policy-routing.txt> - and <ftp://post.tepkom.ru/pub/vol2/Linux/docs/advanced-routing.tex>. - You will need supporting software from - <ftp://ftp.tux.org/pub/net/ip-routing/>. + If you need more information, see the Linux Advanced + Routing and Traffic Control documentation at + <http://lartc.org/howto/lartc.rpdb.html> If unsure, say N. diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 2ae8f54cb321..82178cc69c96 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1951,7 +1951,7 @@ int cipso_v4_req_setattr(struct request_sock *req, buf = NULL; req_inet = inet_rsk(req); - opt = xchg(&req_inet->opt, opt); + opt = xchg((__force struct ip_options_rcu **)&req_inet->ireq_opt, opt); if (opt) kfree_rcu(opt, rcu); @@ -1973,11 +1973,13 @@ req_setattr_failure: * values on failure. * */ -static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr) +static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr) { + struct ip_options_rcu *opt = rcu_dereference_protected(*opt_ptr, 1); int hdr_delta = 0; - struct ip_options_rcu *opt = *opt_ptr; + if (!opt || opt->opt.cipso == 0) + return 0; if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) { u8 cipso_len; u8 cipso_off; @@ -2039,14 +2041,10 @@ static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr) */ void cipso_v4_sock_delattr(struct sock *sk) { - int hdr_delta; - struct ip_options_rcu *opt; struct inet_sock *sk_inet; + int hdr_delta; sk_inet = inet_sk(sk); - opt = rcu_dereference_protected(sk_inet->inet_opt, 1); - if (!opt || opt->opt.cipso == 0) - return; hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt); if (sk_inet->is_icsk && hdr_delta > 0) { @@ -2066,15 +2064,7 @@ void cipso_v4_sock_delattr(struct sock *sk) */ void cipso_v4_req_delattr(struct request_sock *req) { - struct ip_options_rcu *opt; - struct inet_request_sock *req_inet; - - req_inet = inet_rsk(req); - opt = req_inet->opt; - if (!opt || opt->opt.cipso == 0) - return; - - cipso_v4_delopt(&req_inet->opt); + cipso_v4_delopt(&inet_rsk(req)->ireq_opt); } /** diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 416bb304a281..1859c473b21a 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -86,7 +86,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, greh = (struct gre_base_hdr *)skb_transport_header(skb); pcsum = (__sum16 *)(greh + 1); - if (gso_partial) { + if (gso_partial && skb_is_gso(skb)) { unsigned int partial_adj; /* Adjust checksum to account for the fact that diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index c039c937ba90..b47a59cb3573 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -475,6 +475,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) } spin_unlock_bh(&queue->fastopenq.lock); } + mem_cgroup_sk_alloc(newsk); out: release_sock(sk); if (req) @@ -539,9 +540,11 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, { const struct inet_request_sock *ireq = inet_rsk(req); struct net *net = read_pnet(&ireq->ireq_net); - struct ip_options_rcu *opt = ireq->opt; + struct ip_options_rcu *opt; struct rtable *rt; + opt = ireq_opt_deref(ireq); + flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), @@ -575,10 +578,9 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, struct flowi4 *fl4; struct rtable *rt; + opt = rcu_dereference(ireq->ireq_opt); fl4 = &newinet->cork.fl.u.ip4; - rcu_read_lock(); - opt = rcu_dereference(newinet->inet_opt); flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), @@ -591,13 +593,11 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, goto no_route; if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) goto route_err; - rcu_read_unlock(); return &rt->dst; route_err: ip_rt_put(rt); no_route: - rcu_read_unlock(); __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 597bb4cfe805..e7d15fb0d94d 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -456,10 +456,7 @@ static int inet_reuseport_add_sock(struct sock *sk, return reuseport_add_sock(sk, sk2); } - /* Initial allocation may have already happened via setsockopt */ - if (!rcu_access_pointer(sk->sk_reuseport_cb)) - return reuseport_alloc(sk); - return 0; + return reuseport_alloc(sk); } int __inet_hash(struct sock *sk, struct sock *osk) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index fb1ad22b5e29..cdd627355ed1 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -128,43 +128,68 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly; static int ipip_err(struct sk_buff *skb, u32 info) { - -/* All the routers (except for Linux) return only - 8 bytes of packet payload. It means, that precise relaying of - ICMP in the real Internet is absolutely infeasible. - */ + /* All the routers (except for Linux) return only + * 8 bytes of packet payload. It means, that precise relaying of + * ICMP in the real Internet is absolutely infeasible. + */ struct net *net = dev_net(skb->dev); struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); const struct iphdr *iph = (const struct iphdr *)skb->data; - struct ip_tunnel *t; - int err; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; + struct ip_tunnel *t; + int err = 0; + + switch (type) { + case ICMP_DEST_UNREACH: + switch (code) { + case ICMP_SR_FAILED: + /* Impossible event. */ + goto out; + default: + /* All others are translated to HOST_UNREACH. + * rfc2003 contains "deep thoughts" about NET_UNREACH, + * I believe they are just ether pollution. --ANK + */ + break; + } + break; + + case ICMP_TIME_EXCEEDED: + if (code != ICMP_EXC_TTL) + goto out; + break; + + case ICMP_REDIRECT: + break; + + default: + goto out; + } - err = -ENOENT; t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, iph->daddr, iph->saddr, 0); - if (!t) + if (!t) { + err = -ENOENT; goto out; + } if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->parms.link, 0, iph->protocol, 0); - err = 0; + ipv4_update_pmtu(skb, net, info, t->parms.link, 0, + iph->protocol, 0); goto out; } if (type == ICMP_REDIRECT) { - ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, - iph->protocol, 0); - err = 0; + ipv4_redirect(skb, net, t->parms.link, 0, iph->protocol, 0); goto out; } - if (t->parms.iph.daddr == 0) + if (t->parms.iph.daddr == 0) { + err = -ENOENT; goto out; + } - err = 0; if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) goto out; diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 811689e523c3..f75fc6b53115 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -330,7 +330,8 @@ static unsigned int ipv4_synproxy_hook(void *priv, if (synproxy == NULL) return NF_ACCEPT; - if (nf_is_loopback_packet(skb)) + if (nf_is_loopback_packet(skb) || + ip_hdr(skb)->protocol != IPPROTO_TCP) return NF_ACCEPT; thoff = ip_hdrlen(skb); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ac6fde5d45f1..3d9f1c2f81c5 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2513,7 +2513,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or struct rtable *ort = (struct rtable *) dst_orig; struct rtable *rt; - rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0); + rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0); if (rt) { struct dst_entry *new = &rt->dst; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index b1bb1b3a1082..77cf32a80952 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -355,7 +355,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) */ - ireq->opt = tcp_v4_save_options(sock_net(sk), skb); + RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(sock_net(sk), skb)); if (security_inet_conn_request(sk, skb, req)) { reqsk_free(req); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c5d7656beeee..7eec3383702b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6196,7 +6196,7 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, struct inet_request_sock *ireq = inet_rsk(req); kmemcheck_annotate_bitfield(ireq, flags); - ireq->opt = NULL; + ireq->ireq_opt = NULL; #if IS_ENABLED(CONFIG_IPV6) ireq->pktopts = NULL; #endif diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 85164d4d3e53..5b027c69cbc5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -877,7 +877,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, ireq->ir_rmt_addr, - ireq->opt); + ireq_opt_deref(ireq)); err = net_xmit_eval(err); } @@ -889,7 +889,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, */ static void tcp_v4_reqsk_destructor(struct request_sock *req) { - kfree(inet_rsk(req)->opt); + kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1)); } #ifdef CONFIG_TCP_MD5SIG @@ -1265,10 +1265,11 @@ static void tcp_v4_init_req(struct request_sock *req, struct sk_buff *skb) { struct inet_request_sock *ireq = inet_rsk(req); + struct net *net = sock_net(sk_listener); sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); - ireq->opt = tcp_v4_save_options(sock_net(sk_listener), skb); + RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb)); } static struct dst_entry *tcp_v4_route_req(const struct sock *sk, @@ -1355,10 +1356,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, sk_daddr_set(newsk, ireq->ir_rmt_addr); sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); newsk->sk_bound_dev_if = ireq->ir_iif; - newinet->inet_saddr = ireq->ir_loc_addr; - inet_opt = ireq->opt; - rcu_assign_pointer(newinet->inet_opt, inet_opt); - ireq->opt = NULL; + newinet->inet_saddr = ireq->ir_loc_addr; + inet_opt = rcu_dereference(ireq->ireq_opt); + RCU_INIT_POINTER(newinet->inet_opt, inet_opt); newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; newinet->rcv_tos = ip_hdr(skb)->tos; @@ -1403,9 +1403,12 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, if (__inet_inherit_port(sk, newsk) < 0) goto put_and_exit; *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); - if (*own_req) + if (likely(*own_req)) { tcp_move_syn(newtp, req); - + ireq->ireq_opt = NULL; + } else { + newinet->inet_opt = NULL; + } return newsk; exit_overflow: @@ -1416,6 +1419,7 @@ exit: tcp_listendrop(sk); return NULL; put_and_exit: + newinet->inet_opt = NULL; inet_csk_prepare_forced_close(newsk); tcp_done(newsk); goto exit; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0bc9e46a5369..ae60dd3faed0 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -739,8 +739,10 @@ static void tcp_tsq_handler(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); if (tp->lost_out > tp->retrans_out && - tp->snd_cwnd > tcp_packets_in_flight(tp)) + tp->snd_cwnd > tcp_packets_in_flight(tp)) { + tcp_mstamp_refresh(tp); tcp_xmit_retransmit_queue(sk); + } tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle, 0, GFP_ATOMIC); @@ -2237,6 +2239,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, sent_pkts = 0; + tcp_mstamp_refresh(tp); if (!push_one) { /* Do MTU probing. */ result = tcp_mtu_probe(sk); @@ -2248,7 +2251,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, } max_segs = tcp_tso_segs(sk, mss_now); - tcp_mstamp_refresh(tp); while ((skb = tcp_send_head(sk))) { unsigned int limit; @@ -2841,8 +2843,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : -ENOBUFS; - if (!err) + if (!err) { skb->skb_mstamp = tp->tcp_mstamp; + tcp_rate_skb_sent(sk, skb); + } } else { err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5676237d2b0f..ebfbccae62fd 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -231,10 +231,7 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot) } } - /* Initial allocation may have already happened via setsockopt */ - if (!rcu_access_pointer(sk->sk_reuseport_cb)) - return reuseport_alloc(sk); - return 0; + return reuseport_alloc(sk); } /** @@ -1061,7 +1058,7 @@ back_from_confirm: /* ... which is an evident application bug. --ANK */ release_sock(sk); - net_dbg_ratelimited("cork app bug 2\n"); + net_dbg_ratelimited("socket already corked\n"); err = -EINVAL; goto out; } @@ -1144,7 +1141,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, if (unlikely(!up->pending)) { release_sock(sk); - net_dbg_ratelimited("udp cork app bug 3\n"); + net_dbg_ratelimited("cork failed\n"); return -EINVAL; } @@ -2240,20 +2237,16 @@ int udp_v4_early_demux(struct sk_buff *skb) iph = ip_hdr(skb); uh = udp_hdr(skb); - if (skb->pkt_type == PACKET_BROADCAST || - skb->pkt_type == PACKET_MULTICAST) { + if (skb->pkt_type == PACKET_MULTICAST) { in_dev = __in_dev_get_rcu(skb->dev); if (!in_dev) return 0; - /* we are supposed to accept bcast packets */ - if (skb->pkt_type == PACKET_MULTICAST) { - ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, - iph->protocol); - if (!ours) - return 0; - } + ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, + iph->protocol); + if (!ours) + return 0; sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, uh->source, iph->saddr, diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 97658bfc1b58..e360d55be555 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -120,7 +120,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, * will be using a length value equal to only one MSS sized * segment instead of the entire frame. */ - if (gso_partial) { + if (gso_partial && skb_is_gso(skb)) { uh->len = htons(skb_shinfo(skb)->gso_size + SKB_GSO_CB(skb)->data_offset + skb->head - (unsigned char *)uh); |