diff options
Diffstat (limited to 'net/ipv4')
72 files changed, 804 insertions, 552 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index aff93e7cdb31..52bdb881a506 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket @@ -58,11 +59,6 @@ * Some other random speedups. * Cyrus Durgin : Cleaned up file for kmod hacks. * Andi Kleen : Fix inet_stream_connect TCP race. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #define pr_fmt(fmt) "IPv4: " fmt diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 850a6f13a082..05eb42f347e8 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* linux/net/ipv4/arp.c * * Copyright (C) 1994 by Florian La Roche @@ -7,11 +8,6 @@ * high-level addresses) into a low-level hardware address (like an Ethernet * address). * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Fixes: * Alan Cox : Removed the Ethernet assumptions in * Florian's code diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 300921417f89..7bd29e694603 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -1,14 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * common UDP/RAW code * Linux INET implementation * * Authors: * Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/types.h> diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index ed2e2dc745cd..7874303220c5 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * NET3 IP device support routines. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Derived from the IP parts of dev.c 1.0.19 * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> @@ -749,8 +745,7 @@ static void check_lifetime(struct work_struct *work) ifap = &ifa->ifa_dev->ifa_list; tmp = rtnl_dereference(*ifap); while (tmp) { - tmp = rtnl_dereference(tmp->ifa_next); - if (rtnl_dereference(*ifap) == ifa) { + if (tmp == ifa) { inet_del_ifa(ifa->ifa_dev, ifap, 1); break; @@ -1292,6 +1287,7 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) { const struct in_ifaddr *ifa; __be32 addr = 0; + unsigned char localnet_scope = RT_SCOPE_HOST; struct in_device *in_dev; struct net *net = dev_net(dev); int master_idx; @@ -1301,10 +1297,13 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) if (!in_dev) goto no_in_dev; + if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev))) + localnet_scope = RT_SCOPE_LINK; + in_dev_for_each_ifa_rcu(ifa, in_dev) { if (ifa->ifa_flags & IFA_F_SECONDARY) continue; - if (ifa->ifa_scope > scope) + if (min(ifa->ifa_scope, localnet_scope) > scope) continue; if (!dst || inet_ifa_match(dst, ifa)) { addr = ifa->ifa_local; @@ -1357,14 +1356,20 @@ EXPORT_SYMBOL(inet_select_addr); static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst, __be32 local, int scope) { + unsigned char localnet_scope = RT_SCOPE_HOST; const struct in_ifaddr *ifa; __be32 addr = 0; int same = 0; + if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev))) + localnet_scope = RT_SCOPE_LINK; + in_dev_for_each_ifa_rcu(ifa, in_dev) { + unsigned char min_scope = min(ifa->ifa_scope, localnet_scope); + if (!addr && (local == ifa->ifa_local || !local) && - ifa->ifa_scope <= scope) { + min_scope <= scope) { addr = ifa->ifa_local; if (same) break; @@ -1379,7 +1384,7 @@ static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst, if (inet_ifa_match(addr, ifa)) break; /* No, then can we use new local src? */ - if (ifa->ifa_scope <= scope) { + if (min_scope <= scope) { addr = ifa->ifa_local; break; } diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 8edcfa66d1e5..2e5e377f50a1 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * IPV4 GSO/GRO offload support * Linux INET implementation @@ -5,10 +6,6 @@ * Copyright (C) 2016 secunet Security Networks AG * Author: Steffen Klassert <steffen.klassert@secunet.com> * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * * ESP GRO support */ diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 5ea2750982f2..317339cd7f03 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket @@ -6,11 +7,6 @@ * IPv4 Forwarding Information Base: FIB frontend. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/module.h> @@ -675,6 +671,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { [RTA_IP_PROTO] = { .type = NLA_U8 }, [RTA_SPORT] = { .type = NLA_U16 }, [RTA_DPORT] = { .type = NLA_U16 }, + [RTA_NH_ID] = { .type = NLA_U32 }, }; int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla, @@ -812,6 +809,18 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, if (err < 0) goto errout; break; + case RTA_NH_ID: + cfg->fc_nh_id = nla_get_u32(attr); + break; + } + } + + if (cfg->fc_nh_id) { + if (cfg->fc_oif || cfg->fc_gw_family || + cfg->fc_encap || cfg->fc_mp) { + NL_SET_ERR_MSG(extack, + "Nexthop specification and nexthop id are mutually exclusive"); + return -EINVAL; } } @@ -838,6 +847,12 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) goto errout; + if (cfg.fc_nh_id && !nexthop_find_by_id(net, cfg.fc_nh_id)) { + NL_SET_ERR_MSG(extack, "Nexthop id does not exist"); + err = -EINVAL; + goto errout; + } + tb = fib_get_table(net, cfg.fc_table); if (!tb) { NL_SET_ERR_MSG(extack, "FIB table does not exist"); @@ -897,10 +912,15 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, NL_SET_ERR_MSG(extack, "Invalid values in header for FIB dump request"); return -EINVAL; } + if (rtm->rtm_flags & ~(RTM_F_CLONED | RTM_F_PREFIX)) { NL_SET_ERR_MSG(extack, "Invalid flags for FIB dump request"); return -EINVAL; } + if (rtm->rtm_flags & RTM_F_CLONED) + filter->dump_routes = false; + else + filter->dump_exceptions = false; filter->dump_all_families = (rtm->rtm_family == AF_UNSPEC); filter->flags = rtm->rtm_flags; @@ -947,9 +967,10 @@ EXPORT_SYMBOL_GPL(ip_valid_fib_dump_req); static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { + struct fib_dump_filter filter = { .dump_routes = true, + .dump_exceptions = true }; const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); - struct fib_dump_filter filter = {}; unsigned int h, s_h; unsigned int e = 0, s_e; struct fib_table *tb; @@ -966,8 +987,8 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) filter.flags = rtm->rtm_flags & (RTM_F_PREFIX | RTM_F_CLONED); } - /* fib entries are never clones and ipv4 does not use prefix flag */ - if (filter.flags & (RTM_F_PREFIX | RTM_F_CLONED)) + /* ipv4 does not use prefix flag */ + if (filter.flags & RTM_F_PREFIX) return skb->len; if (filter.table_id) { diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 88807c138df4..b43a7ba5c6a4 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket @@ -8,11 +9,6 @@ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * Thomas Graf <tgraf@suug.ch> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Fixes: * Rani Assaf : local_rule cannot be deleted * Marc Boucher : routing by fwmark diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 4282bdcacf96..2db089e10ba0 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket @@ -6,11 +7,6 @@ * IPv4 Forwarding Information Base: semantics. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/uaccess.h> @@ -329,14 +325,32 @@ static inline unsigned int fib_devindex_hashfn(unsigned int val) (val >> (DEVINDEX_HASHBITS * 2))) & mask; } -static inline unsigned int fib_info_hashfn(const struct fib_info *fi) +static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope, + u32 prefsrc, u32 priority) +{ + unsigned int val = init_val; + + val ^= (protocol << 8) | scope; + val ^= prefsrc; + val ^= priority; + + return val; +} + +static unsigned int fib_info_hashfn_result(unsigned int val) { unsigned int mask = (fib_info_hash_size - 1); - unsigned int val = fi->fib_nhs; - val ^= (fi->fib_protocol << 8) | fi->fib_scope; - val ^= (__force u32)fi->fib_prefsrc; - val ^= fi->fib_priority; + return (val ^ (val >> 7) ^ (val >> 12)) & mask; +} + +static inline unsigned int fib_info_hashfn(struct fib_info *fi) +{ + unsigned int val; + + val = fib_info_hashfn_1(fi->fib_nhs, fi->fib_protocol, + fi->fib_scope, (__force u32)fi->fib_prefsrc, + fi->fib_priority); if (fi->nh) { val ^= fib_devindex_hashfn(fi->nh->id); @@ -346,7 +360,40 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi) } endfor_nexthops(fi) } - return (val ^ (val >> 7) ^ (val >> 12)) & mask; + return fib_info_hashfn_result(val); +} + +/* no metrics, only nexthop id */ +static struct fib_info *fib_find_info_nh(struct net *net, + const struct fib_config *cfg) +{ + struct hlist_head *head; + struct fib_info *fi; + unsigned int hash; + + hash = fib_info_hashfn_1(fib_devindex_hashfn(cfg->fc_nh_id), + cfg->fc_protocol, cfg->fc_scope, + (__force u32)cfg->fc_prefsrc, + cfg->fc_priority); + hash = fib_info_hashfn_result(hash); + head = &fib_info_hash[hash]; + + hlist_for_each_entry(fi, head, fib_hash) { + if (!net_eq(fi->fib_net, net)) + continue; + if (!fi->nh || fi->nh->id != cfg->fc_nh_id) + continue; + if (cfg->fc_protocol == fi->fib_protocol && + cfg->fc_scope == fi->fib_scope && + cfg->fc_prefsrc == fi->fib_prefsrc && + cfg->fc_priority == fi->fib_priority && + cfg->fc_type == fi->fib_type && + cfg->fc_table == fi->fib_tb_id && + !((cfg->fc_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK)) + return fi; + } + + return NULL; } static struct fib_info *fib_find_info(struct fib_info *nfi) @@ -793,6 +840,12 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority) return 1; + if (cfg->fc_nh_id) { + if (fi->nh && cfg->fc_nh_id == fi->nh->id) + return 0; + return 1; + } + if (cfg->fc_oif || cfg->fc_gw_family) { struct fib_nh *nh = fib_info_nh(fi, 0); @@ -1003,7 +1056,7 @@ static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table, { struct net_device *dev; struct fib_result res; - int err; + int err = 0; if (nh->fib_nh_flags & RTNH_F_ONLINK) { unsigned int addr_type; @@ -1306,6 +1359,23 @@ struct fib_info *fib_create_info(struct fib_config *cfg, goto err_inval; } + if (cfg->fc_nh_id) { + if (!cfg->fc_mx) { + fi = fib_find_info_nh(net, cfg); + if (fi) { + fi->fib_treeref++; + return fi; + } + } + + nh = nexthop_find_by_id(net, cfg->fc_nh_id); + if (!nh) { + NL_SET_ERR_MSG(extack, "Nexthop id does not exist"); + goto err_inval; + } + nhs = 0; + } + #ifdef CONFIG_IP_ROUTE_MULTIPATH if (cfg->fc_mp) { nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 716f2d66cb3f..4400f5051977 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1,8 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. * * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet * & Swedish University of Agricultural Sciences. @@ -18,28 +15,19 @@ * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. * http://www.csc.kth.se/~snilsson/software/dyntrie2/ * - * * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 * - * * Code from fib_hash has been reused which includes the following header: * - * * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * IPv4 FIB: lookup engine and maintenance routines. * - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Substantial contributions to this work comes from: * * David S. Miller, <davem@davemloft.net> @@ -350,12 +338,18 @@ static struct tnode *tnode_alloc(int bits) static inline void empty_child_inc(struct key_vector *n) { - ++tn_info(n)->empty_children ? : ++tn_info(n)->full_children; + tn_info(n)->empty_children++; + + if (!tn_info(n)->empty_children) + tn_info(n)->full_children++; } static inline void empty_child_dec(struct key_vector *n) { - tn_info(n)->empty_children-- ? : tn_info(n)->full_children--; + if (!tn_info(n)->empty_children) + tn_info(n)->full_children--; + + tn_info(n)->empty_children--; } static struct key_vector *leaf_new(t_key key, struct fib_alias *fa) @@ -2096,22 +2090,26 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, { unsigned int flags = NLM_F_MULTI; __be32 xkey = htonl(l->key); + int i, s_i, i_fa, s_fa, err; struct fib_alias *fa; - int i, s_i; - if (filter->filter_set) + if (filter->filter_set || + !filter->dump_exceptions || !filter->dump_routes) flags |= NLM_F_DUMP_FILTERED; s_i = cb->args[4]; + s_fa = cb->args[5]; i = 0; /* rcu_read_lock is hold by caller */ hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { - int err; + struct fib_info *fi = fa->fa_info; if (i < s_i) goto next; + i_fa = 0; + if (tb->tb_id != fa->tb_id) goto next; @@ -2120,29 +2118,43 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, goto next; if ((filter->protocol && - fa->fa_info->fib_protocol != filter->protocol)) + fi->fib_protocol != filter->protocol)) goto next; if (filter->dev && - !fib_info_nh_uses_dev(fa->fa_info, filter->dev)) + !fib_info_nh_uses_dev(fi, filter->dev)) goto next; } - err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, RTM_NEWROUTE, - tb->tb_id, fa->fa_type, - xkey, KEYLENGTH - fa->fa_slen, - fa->fa_tos, fa->fa_info, flags); - if (err < 0) { - cb->args[4] = i; - return err; + if (filter->dump_routes && !s_fa) { + err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, RTM_NEWROUTE, + tb->tb_id, fa->fa_type, + xkey, KEYLENGTH - fa->fa_slen, + fa->fa_tos, fi, flags); + if (err < 0) + goto stop; + i_fa++; + } + + if (filter->dump_exceptions) { + err = fib_dump_info_fnhe(skb, cb, tb->tb_id, fi, + &i_fa, s_fa); + if (err < 0) + goto stop; } + next: i++; } cb->args[4] = i; return skb->len; + +stop: + cb->args[4] = i; + cb->args[5] = i_fa; + return err; } /* rcu_read_lock needs to be hold by caller from readside */ diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 7c4a41dc04bb..293acfb36376 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -1,13 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * GRE over IPv4 demultiplexer driver * * Authors: Dmitry Kozlov (xeb@mail.ru) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 6c63524f598a..4de7e962d3da 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * IPV4 GSO/GRO offload support * Linux INET implementation * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * GRE GSO support */ diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 49d6b037b113..1510e951f451 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1,13 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * NET3: Implementation of the ICMP protocol layer. * * Alan Cox, <alan@lxorguk.ukuu.org.uk> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Some of the function names and the icmp unreach table for this * module were derived from [icmp.c 1.0.11 06/02/93] by * Ross Biro, Fred N. van Kempen, Mark Evans, Alan Cox, Gerhard Koerting. @@ -59,7 +55,6 @@ * * - Should use skb_pull() instead of all the manual checking. * This would also greatly simply some upper layer error handlers. --AK - * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index fa5732bcfc76..9a206931a342 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Linux NET3: Internet Group Management Protocol [IGMP] * @@ -11,11 +12,6 @@ * Authors: * Alan Cox <alan@lxorguk.ukuu.org.uk> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Fixes: * * Alan Cox : Added lots of __inline__ to optimise diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 474e15762b62..f5c163d4771b 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket @@ -6,11 +7,6 @@ * Support for INET connection oriented protocols. * * Authors: See the TCP sources - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or(at your option) any later version. */ #include <linux/module.h> @@ -755,10 +751,6 @@ drop: static void reqsk_queue_hash_req(struct request_sock *req, unsigned long timeout) { - req->num_retrans = 0; - req->num_timeout = 0; - req->sk = NULL; - timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED); mod_timer(&req->rsk_timer, jiffies + timeout); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 5731670c560b..bbb005eb5218 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * inet_diag.c Module for monitoring INET transport protocols sockets. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/kernel.h> diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 35e9784fab4e..d666756be5f1 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * inet fragments management * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Pavel Emelyanov <xemul@openvz.org> * Started as consolidation of ipv4/ip_fragment.c, * ipv6/reassembly. and ipv6 nf conntrack reassembly @@ -149,10 +145,9 @@ static void inet_frags_free_cb(void *ptr, void *arg) inet_frag_destroy(fq); } -static void fqdir_rwork_fn(struct work_struct *work) +static void fqdir_work_fn(struct work_struct *work) { - struct fqdir *fqdir = container_of(to_rcu_work(work), - struct fqdir, destroy_rwork); + struct fqdir *fqdir = container_of(work, struct fqdir, destroy_work); struct inet_frags *f = fqdir->f; rhashtable_free_and_destroy(&fqdir->rhashtable, inet_frags_free_cb, NULL); @@ -191,18 +186,8 @@ EXPORT_SYMBOL(fqdir_init); void fqdir_exit(struct fqdir *fqdir) { - fqdir->high_thresh = 0; /* prevent creation of new frags */ - - fqdir->dead = true; - - /* call_rcu is supposed to provide memory barrier semantics, - * separating the setting of fqdir->dead with the destruction - * work. This implicit barrier is paired with inet_frag_kill(). - */ - - INIT_RCU_WORK(&fqdir->destroy_rwork, fqdir_rwork_fn); - queue_rcu_work(system_wq, &fqdir->destroy_rwork); - + INIT_WORK(&fqdir->destroy_work, fqdir_work_fn); + queue_work(system_wq, &fqdir->destroy_work); } EXPORT_SYMBOL(fqdir_exit); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index af28f332ba89..97824864e40d 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket @@ -6,11 +7,6 @@ * Generic INET transport hashtables * * Authors: Lotsa people, from code originally in tcp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/module.h> diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 1ffaec056821..4385eb9e781f 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -143,6 +143,10 @@ static void ip_expire(struct timer_list *t) net = qp->q.fqdir->net; rcu_read_lock(); + + if (qp->q.fqdir->dead) + goto out_rcu_unlock; + spin_lock(&qp->q.lock); if (qp->q.flags & INET_FRAG_COMPLETE) @@ -676,6 +680,11 @@ static int __net_init ipv4_frags_init_net(struct net *net) return res; } +static void __net_exit ipv4_frags_pre_exit_net(struct net *net) +{ + fqdir_pre_exit(net->ipv4.fqdir); +} + static void __net_exit ipv4_frags_exit_net(struct net *net) { ip4_frags_ns_ctl_unregister(net); @@ -683,8 +692,9 @@ static void __net_exit ipv4_frags_exit_net(struct net *net) } static struct pernet_operations ip4_frags_ops = { - .init = ipv4_frags_init_net, - .exit = ipv4_frags_exit_net, + .init = ipv4_frags_init_net, + .pre_exit = ipv4_frags_pre_exit_net, + .exit = ipv4_frags_exit_net, }; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 4b0526441476..a53a543fe055 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1,13 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Linux NET3: GRE over IP protocol decoder. * * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index ed97724c5e33..1e2392b7c64e 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket @@ -14,7 +15,6 @@ * Jorge Cwik, <jorge@laser.satlink.net> * Arnt Gulbrandsen, <agulbra@nvg.unit.no> * - * * Fixes: * Alan Cox : Commented a couple of minor bits of surplus code * Alan Cox : Undefining IP_FORWARD doesn't include the code @@ -96,8 +96,6 @@ * Jos Vos : Do accounting *before* call_in_firewall * Willy Konynenberg : Transparent proxying support * - * - * * To Fix: * IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient * and could be made very efficient with the addition of some virtual memory hacks to permit @@ -106,11 +104,6 @@ * interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet * output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause * fragmentation anyway. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #define pr_fmt(fmt) "IPv4: " fmt diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index f5636ab0b9c3..cdd6c3418b9e 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -994,7 +994,7 @@ static int __ip_append_data(struct sock *sk, uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb)); if (!uarg) return -ENOBUFS; - extra_uref = !skb; /* only extra ref if !MSG_MORE */ + extra_uref = !skb_zcopy(skb); /* only ref on new uarg */ if (rt->dst.dev->features & NETIF_F_SG && csummode == CHECKSUM_PARTIAL) { paged = true; @@ -1632,7 +1632,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, const struct ip_options *sopt, __be32 daddr, __be32 saddr, const struct ip_reply_arg *arg, - unsigned int len) + unsigned int len, u64 transmit_time) { struct ip_options_data replyopts; struct ipcm_cookie ipc; @@ -1648,6 +1648,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, ipcm_init(&ipc); ipc.addr = daddr; + ipc.sockc.transmit_time = transmit_time; if (replyopts.opt.opt.optlen) { ipc.opt = &replyopts.opt; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index a5d8cad18ead..38c02bb62e2c 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 Nicira, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 30c1c264bdfc..1452a97914a0 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 Nicira, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -89,9 +76,12 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, __ip_select_ident(net, iph, skb_shinfo(skb)->gso_segs ?: 1); err = ip_local_out(net, sk, skb); - if (unlikely(net_xmit_eval(err))) - pkt_len = 0; - iptunnel_xmit_stats(dev, pkt_len); + + if (dev) { + if (unlikely(net_xmit_eval(err))) + pkt_len = 0; + iptunnel_xmit_stats(dev, pkt_len); + } } EXPORT_SYMBOL_GPL(iptunnel_xmit); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 254a42e83ff9..cfb025606793 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -1,15 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Linux NET3: IP/IP protocol decoder modified to support * virtual tunnel interface * * Authors: * Saurabh Mohan (saurabh.mohan@vyatta.com) 05/07/2012 - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * */ /* diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 9119d012ba46..2f4cdcc13d53 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -1,13 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * IP Payload Compression Protocol (IPComp) - RFC3173. * * Copyright (c) 2003 James Morris <jmorris@intercode.com.au> * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * * Todo: * - Tunable compression parameters. * - Compression stats. diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index fe10b9a2efc8..43adfc1641ba 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Linux NET3: IP/IP protocol decoder. * @@ -16,12 +17,6 @@ * Carlos Picoto : GRE over IP support * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c. * I do not want to merge them together. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * */ /* tunnel.c: an IP tunnel driver diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 2c61e10a60e3..c07bc82cbbe9 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1,14 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * IP multicast routing support for mrouted 3.6/3.8 * * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk> * Linux Consultancy and Custom Driver Development * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Fixes: * Michael Chastain : Incorrect size of copying. * Alan Cox : Added the cache manager code @@ -23,7 +19,6 @@ * Carlos Picoto : PIMv1 Support * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header * Relax this requirement to work with older peers. - * */ #include <linux/uaccess.h> diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index e77872c93c20..10b91ebdf213 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1,13 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Packet matching code. * * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org> * Copyright (C) 2006-2010 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/cache.h> diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index a2a88ab07f7b..4d6bf7ac0792 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -1,13 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* Cluster IP hashmark target * (C) 2003-2004 by Harald Welte <laforge@netfilter.org> * based on ideas of Fabio Olive Leite <olive@unixforge.org> * * Development of this code funded by SuSE Linux AG, http://www.suse.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 9f6751893660..5930d3b02555 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* iptables module for the IPv4 and TCP ECN bits, Version 1.5 * * (C) 2002 by Harald Welte <laforge@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/in.h> diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index e8bed3390e58..e16b98ee6266 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -1,13 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This is a module which is used for rejecting packets. */ /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 0c80616c00b5..8e7f84ec783d 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -1,9 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/netfilter_ipv4/ip_tables.h> diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c index 7c6c20eaf4db..161ba412cb08 100644 --- a/net/ipv4/netfilter/ipt_ah.c +++ b/net/ipv4/netfilter/ipt_ah.c @@ -1,9 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* Kernel module to match AH parameters. */ /* (C) 1999-2000 Yon Uriarte <yon@astaro.de> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/in.h> diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 0b10d8812828..59031670b16a 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2011 Florian Westphal <fw@strlen.de> * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * * based on fib_frontend.c; Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 9ac92ea7b93c..9d54b4017e50 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -1,13 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x. * * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * */ #include <linux/module.h> diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index dea138ca8925..bb9266ea3785 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x. * * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/module.h> #include <linux/netfilter_ipv4/ip_tables.h> diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 007da0882412..ad33687b7444 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> * (C) 2011 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/module.h> diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index e5379fe57b64..ac633c1db97e 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * "security" table * @@ -10,10 +11,6 @@ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling * Copyright (C) 2000-2004 Netfilter Core Team <coreteam <at> netfilter.org> * Copyright (C) 2008 Red Hat, Inc., James Morris <jmorris <at> redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/module.h> #include <linux/netfilter_ipv4/ip_tables.h> diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index a0d3ad60a411..8115611aa47d 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -1,9 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/types.h> diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c index df5c2a2061a4..7a83f881efa9 100644 --- a/net/ipv4/netfilter/nf_log_arp.c +++ b/net/ipv4/netfilter/nf_log_arp.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * (C) 2014 by Pablo Neira Ayuso <pablo@netfilter.org> * @@ -5,10 +6,6 @@ * * Bart De Schuymer <bdschuym@pandora.be> * Harald Welte <laforge@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index 1e6f28c97d3a..4b2d49cc9f1a 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -1,9 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index 076b6b29d66d..87b711fd5a44 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -1,11 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * H.323 extension for NAT alteration. * * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net> * Copyright (c) 2006-2012 Patrick McHardy <kaber@trash.net> * - * This source code is licensed under General Public License version 2. - * * Based on the 'brute force' H.323 NAT module by * Jozsef Kadlecsik <kadlec@netfilter.org> */ diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 7dc3c324b911..2361fdac2c43 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -1,9 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/module.h> diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c index 4824b1e183a1..36a28d46149c 100644 --- a/net/ipv4/netfilter/nf_socket_ipv4.c +++ b/net/ipv4/netfilter/nf_socket_ipv4.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2007-2008 BalaBit IT Ltd. * Author: Krisztian Kovacs - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c index 40c93b3bd731..b2bae0b0e42a 100644 --- a/net/ipv4/netfilter/nf_tproxy_ipv4.c +++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2007-2008 BalaBit IT Ltd. * Author: Krisztian Kovacs - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * */ #include <net/netfilter/nf_tproxy.h> diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c index 0af3d8df70dd..abf89b972094 100644 --- a/net/ipv4/netfilter/nft_dup_ipv4.c +++ b/net/ipv4/netfilter/nft_dup_ipv4.c @@ -1,9 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/kernel.h> diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index c8888e52591f..ce294113dbcd 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -1,8 +1,4 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ +// SPDX-License-Identifier: GPL-2.0-only #include <linux/kernel.h> #include <linux/init.h> diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c index 517ce93699de..7e6fd5cde50f 100644 --- a/net/ipv4/netfilter/nft_reject_ipv4.c +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -1,11 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> * Copyright (c) 2013 Eric Leblond <eric@regit.org> * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * * Development of this code funded by Astaro AG (http://www.astaro.com/) */ diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 5e48762b6b5f..5fe5a3981d43 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -517,6 +517,47 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) } EXPORT_SYMBOL_GPL(nexthop_select_path); +int nexthop_for_each_fib6_nh(struct nexthop *nh, + int (*cb)(struct fib6_nh *nh, void *arg), + void *arg) +{ + struct nh_info *nhi; + int err; + + if (nh->is_group) { + struct nh_group *nhg; + int i; + + nhg = rcu_dereference_rtnl(nh->nh_grp); + for (i = 0; i < nhg->num_nh; i++) { + struct nh_grp_entry *nhge = &nhg->nh_entries[i]; + + nhi = rcu_dereference_rtnl(nhge->nh->nh_info); + err = cb(&nhi->fib6_nh, arg); + if (err) + return err; + } + } else { + nhi = rcu_dereference_rtnl(nh->nh_info); + err = cb(&nhi->fib6_nh, arg); + if (err) + return err; + } + + return 0; +} +EXPORT_SYMBOL_GPL(nexthop_for_each_fib6_nh); + +static int check_src_addr(const struct in6_addr *saddr, + struct netlink_ext_ack *extack) +{ + if (!ipv6_addr_any(saddr)) { + NL_SET_ERR_MSG(extack, "IPv6 routes using source address can not use nexthop objects"); + return -EINVAL; + } + return 0; +} + int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, struct netlink_ext_ack *extack) { @@ -528,10 +569,8 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, * routing it can not use nexthop objects. mlxsw also does not allow * fib6_src on routes. */ - if (!ipv6_addr_any(&cfg->fc_src)) { - NL_SET_ERR_MSG(extack, "IPv6 routes using source address can not use nexthop objects"); + if (cfg && check_src_addr(&cfg->fc_src, extack) < 0) return -EINVAL; - } if (nh->is_group) { struct nh_group *nhg; @@ -552,6 +591,25 @@ no_v4_nh: } EXPORT_SYMBOL_GPL(fib6_check_nexthop); +/* if existing nexthop has ipv6 routes linked to it, need + * to verify this new spec works with ipv6 + */ +static int fib6_check_nh_list(struct nexthop *old, struct nexthop *new, + struct netlink_ext_ack *extack) +{ + struct fib6_info *f6i; + + if (list_empty(&old->f6i_list)) + return 0; + + list_for_each_entry(f6i, &old->f6i_list, nh_list) { + if (check_src_addr(&f6i->fib6_src.addr, extack) < 0) + return -EINVAL; + } + + return fib6_check_nexthop(new, NULL, extack); +} + static int nexthop_check_scope(struct nexthop *nh, u8 scope, struct netlink_ext_ack *extack) { @@ -600,6 +658,21 @@ out: return err; } +static int fib_check_nh_list(struct nexthop *old, struct nexthop *new, + struct netlink_ext_ack *extack) +{ + struct fib_info *fi; + + list_for_each_entry(fi, &old->fi_list, nh_list) { + int err; + + err = fib_check_nexthop(new, fi->fib_scope, extack); + if (err) + return err; + } + return 0; +} + static void nh_group_rebalance(struct nh_group *nhg) { int total = 0; @@ -692,6 +765,7 @@ static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo) } } +/* not called for nexthop replace */ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) { struct fib6_info *f6i, *tmp; @@ -746,10 +820,171 @@ static void remove_nexthop(struct net *net, struct nexthop *nh, nexthop_put(nh); } +/* if any FIB entries reference this nexthop, any dst entries + * need to be regenerated + */ +static void nh_rt_cache_flush(struct net *net, struct nexthop *nh) +{ + struct fib6_info *f6i; + + if (!list_empty(&nh->fi_list)) + rt_cache_flush(net); + + list_for_each_entry(f6i, &nh->f6i_list, nh_list) + ipv6_stub->fib6_update_sernum(net, f6i); +} + +static int replace_nexthop_grp(struct net *net, struct nexthop *old, + struct nexthop *new, + struct netlink_ext_ack *extack) +{ + struct nh_group *oldg, *newg; + int i; + + if (!new->is_group) { + NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with a nexthop."); + return -EINVAL; + } + + oldg = rtnl_dereference(old->nh_grp); + newg = rtnl_dereference(new->nh_grp); + + /* update parents - used by nexthop code for cleanup */ + for (i = 0; i < newg->num_nh; i++) + newg->nh_entries[i].nh_parent = old; + + rcu_assign_pointer(old->nh_grp, newg); + + for (i = 0; i < oldg->num_nh; i++) + oldg->nh_entries[i].nh_parent = new; + + rcu_assign_pointer(new->nh_grp, oldg); + + return 0; +} + +static int replace_nexthop_single(struct net *net, struct nexthop *old, + struct nexthop *new, + struct netlink_ext_ack *extack) +{ + struct nh_info *oldi, *newi; + + if (new->is_group) { + NL_SET_ERR_MSG(extack, "Can not replace a nexthop with a nexthop group."); + return -EINVAL; + } + + oldi = rtnl_dereference(old->nh_info); + newi = rtnl_dereference(new->nh_info); + + newi->nh_parent = old; + oldi->nh_parent = new; + + old->protocol = new->protocol; + old->nh_flags = new->nh_flags; + + rcu_assign_pointer(old->nh_info, newi); + rcu_assign_pointer(new->nh_info, oldi); + + return 0; +} + +static void __nexthop_replace_notify(struct net *net, struct nexthop *nh, + struct nl_info *info) +{ + struct fib6_info *f6i; + + if (!list_empty(&nh->fi_list)) { + struct fib_info *fi; + + /* expectation is a few fib_info per nexthop and then + * a lot of routes per fib_info. So mark the fib_info + * and then walk the fib tables once + */ + list_for_each_entry(fi, &nh->fi_list, nh_list) + fi->nh_updated = true; + + fib_info_notify_update(net, info); + + list_for_each_entry(fi, &nh->fi_list, nh_list) + fi->nh_updated = false; + } + + list_for_each_entry(f6i, &nh->f6i_list, nh_list) + ipv6_stub->fib6_rt_update(net, f6i, info); +} + +/* send RTM_NEWROUTE with REPLACE flag set for all FIB entries + * linked to this nexthop and for all groups that the nexthop + * is a member of + */ +static void nexthop_replace_notify(struct net *net, struct nexthop *nh, + struct nl_info *info) +{ + struct nh_grp_entry *nhge; + + __nexthop_replace_notify(net, nh, info); + + list_for_each_entry(nhge, &nh->grp_list, nh_list) + __nexthop_replace_notify(net, nhge->nh_parent, info); +} + static int replace_nexthop(struct net *net, struct nexthop *old, struct nexthop *new, struct netlink_ext_ack *extack) { - return -EEXIST; + bool new_is_reject = false; + struct nh_grp_entry *nhge; + int err; + + /* check that existing FIB entries are ok with the + * new nexthop definition + */ + err = fib_check_nh_list(old, new, extack); + if (err) + return err; + + err = fib6_check_nh_list(old, new, extack); + if (err) + return err; + + if (!new->is_group) { + struct nh_info *nhi = rtnl_dereference(new->nh_info); + + new_is_reject = nhi->reject_nh; + } + + list_for_each_entry(nhge, &old->grp_list, nh_list) { + /* if new nexthop is a blackhole, any groups using this + * nexthop cannot have more than 1 path + */ + if (new_is_reject && + nexthop_num_path(nhge->nh_parent) > 1) { + NL_SET_ERR_MSG(extack, "Blackhole nexthop can not be a member of a group with more than one path"); + return -EINVAL; + } + + err = fib_check_nh_list(nhge->nh_parent, new, extack); + if (err) + return err; + + err = fib6_check_nh_list(nhge->nh_parent, new, extack); + if (err) + return err; + } + + if (old->is_group) + err = replace_nexthop_grp(net, old, new, extack); + else + err = replace_nexthop_single(net, old, new, extack); + + if (!err) { + nh_rt_cache_flush(net, old); + + __remove_nexthop(net, new, NULL); + nexthop_put(new); + } + + return err; } /* called with rtnl_lock held */ @@ -761,6 +996,7 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh, bool replace = !!(cfg->nlflags & NLM_F_REPLACE); bool create = !!(cfg->nlflags & NLM_F_CREATE); u32 new_id = new_nh->id; + int replace_notify = 0; int rc = -EEXIST; pp = &root->rb_node; @@ -780,8 +1016,10 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh, pp = &next->rb_right; } else if (replace) { rc = replace_nexthop(net, nh, new_nh, extack); - if (!rc) + if (!rc) { new_nh = nh; /* send notification with old nh */ + replace_notify = 1; + } goto out; } else { /* id already exists and not a replace */ @@ -802,6 +1040,8 @@ out: if (!rc) { nh_base_seq_inc(net); nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo); + if (replace_notify) + nexthop_replace_notify(net, new_nh, &cfg->nlinfo); } return rc; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 834be7daeb32..9d24ef5c5d8f 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket @@ -5,11 +6,6 @@ * * "Ping" sockets * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Based on ipv4/udp.c code. * * Authors: Vasiliy Kulikov / Openwall (for Linux 2.6), @@ -17,7 +13,6 @@ * * Pavel gave all rights to bugs to Vasiliy, * none of the bugs are Pavel's now. - * */ #include <linux/uaccess.h> diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 4746f963c439..cc90243ccf76 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket @@ -25,11 +26,6 @@ * split functions for more readibility. * Andi Kleen : Add support for /proc/net/netstat * Arnaldo C. Melo : Convert to seq_file - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/types.h> #include <net/net_namespace.h> @@ -291,6 +287,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED), SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP), SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP), + SNMP_MIB_ITEM("TCPWqueueTooBig", LINUX_MIB_TCPWQUEUETOOBIG), SNMP_MIB_ITEM("TCPFastOpenPassiveAltKey", LINUX_MIB_TCPFASTOPENPASSIVEALTKEY), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 92d249e053be..9a8c0892622b 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket @@ -16,11 +17,6 @@ * Richard Colella : Hang on hash collision * Vince Laviano : Modified inet_del_protocol() to correctly * maintain copy bit. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/cache.h> #include <linux/module.h> diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 0e482f07b37f..0b8e06ca75d6 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket @@ -30,11 +31,6 @@ * Alan Cox : Added IP_HDRINCL option. * Alan Cox : Skip broadcast check if BSDism set. * David S. Miller : New socket lookup architecture. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/types.h> diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4a1168451f3a..6aee412a68bd 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket @@ -55,11 +56,6 @@ * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes. * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect * Ilia Sotnikov : Removed TOS from hash calculations - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #define pr_fmt(fmt) "IPv4: " fmt @@ -1934,6 +1930,23 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, hash_keys.basic.ip_proto = fl4->flowi4_proto; } break; + case 2: + memset(&hash_keys, 0, sizeof(hash_keys)); + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + /* skb is currently provided only when forwarding */ + if (skb) { + struct flow_keys keys; + + skb_flow_dissect_flow_keys(skb, &keys, 0); + + hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; + hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; + } else { + /* Same as case 0 */ + hash_keys.addrs.v4addrs.src = fl4->saddr; + hash_keys.addrs.v4addrs.dst = fl4->daddr; + } + break; } mhash = flow_hash_from_keys(&hash_keys); @@ -1986,7 +1999,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, u32 itag = 0; struct rtable *rth; struct flowi4 fl4; - bool do_cache; + bool do_cache = true; /* IP on this device is disabled. */ @@ -2063,6 +2076,9 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (res->type == RTN_BROADCAST) { if (IN_DEV_BFORWARD(in_dev)) goto make_route; + /* not do cache if bc_forwarding is enabled */ + if (IPV4_DEVCONF_ALL(net, BC_FORWARDING)) + do_cache = false; goto brd_input; } @@ -2100,18 +2116,15 @@ brd_input: RT_CACHE_STAT_INC(in_brd); local_input: - do_cache = false; - if (res->fi) { - if (!itag) { - struct fib_nh_common *nhc = FIB_RES_NHC(*res); + do_cache &= res->fi && !itag; + if (do_cache) { + struct fib_nh_common *nhc = FIB_RES_NHC(*res); - rth = rcu_dereference(nhc->nhc_rth_input); - if (rt_cache_valid(rth)) { - skb_dst_set_noref(skb, &rth->dst); - err = 0; - goto out; - } - do_cache = true; + rth = rcu_dereference(nhc->nhc_rth_input); + if (rt_cache_valid(rth)) { + skb_dst_set_noref(skb, &rth->dst); + err = 0; + goto out; } } @@ -2686,7 +2699,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, r->rtm_family = AF_INET; r->rtm_dst_len = 32; r->rtm_src_len = 0; - r->rtm_tos = fl4->flowi4_tos; + r->rtm_tos = fl4 ? fl4->flowi4_tos : 0; r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT; if (nla_put_u32(skb, RTA_TABLE, table_id)) goto nla_put_failure; @@ -2714,7 +2727,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid)) goto nla_put_failure; #endif - if (!rt_is_input_route(rt) && + if (fl4 && !rt_is_input_route(rt) && fl4->saddr != src) { if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr)) goto nla_put_failure; @@ -2754,36 +2767,40 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, if (rtnetlink_put_metrics(skb, metrics) < 0) goto nla_put_failure; - if (fl4->flowi4_mark && - nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark)) - goto nla_put_failure; - - if (!uid_eq(fl4->flowi4_uid, INVALID_UID) && - nla_put_u32(skb, RTA_UID, - from_kuid_munged(current_user_ns(), fl4->flowi4_uid))) - goto nla_put_failure; + if (fl4) { + if (fl4->flowi4_mark && + nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark)) + goto nla_put_failure; - error = rt->dst.error; + if (!uid_eq(fl4->flowi4_uid, INVALID_UID) && + nla_put_u32(skb, RTA_UID, + from_kuid_munged(current_user_ns(), + fl4->flowi4_uid))) + goto nla_put_failure; - if (rt_is_input_route(rt)) { + if (rt_is_input_route(rt)) { #ifdef CONFIG_IP_MROUTE - if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) && - IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { - int err = ipmr_get_route(net, skb, - fl4->saddr, fl4->daddr, - r, portid); - - if (err <= 0) { - if (err == 0) - return 0; - goto nla_put_failure; - } - } else + if (ipv4_is_multicast(dst) && + !ipv4_is_local_multicast(dst) && + IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { + int err = ipmr_get_route(net, skb, + fl4->saddr, fl4->daddr, + r, portid); + + if (err <= 0) { + if (err == 0) + return 0; + goto nla_put_failure; + } + } else #endif - if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif)) - goto nla_put_failure; + if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif)) + goto nla_put_failure; + } } + error = rt->dst.error; + if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0) goto nla_put_failure; @@ -2795,6 +2812,79 @@ nla_put_failure: return -EMSGSIZE; } +static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb, + struct netlink_callback *cb, u32 table_id, + struct fnhe_hash_bucket *bucket, int genid, + int *fa_index, int fa_start) +{ + int i; + + for (i = 0; i < FNHE_HASH_SIZE; i++) { + struct fib_nh_exception *fnhe; + + for (fnhe = rcu_dereference(bucket[i].chain); fnhe; + fnhe = rcu_dereference(fnhe->fnhe_next)) { + struct rtable *rt; + int err; + + if (*fa_index < fa_start) + goto next; + + if (fnhe->fnhe_genid != genid) + goto next; + + if (fnhe->fnhe_expires && + time_after(jiffies, fnhe->fnhe_expires)) + goto next; + + rt = rcu_dereference(fnhe->fnhe_rth_input); + if (!rt) + rt = rcu_dereference(fnhe->fnhe_rth_output); + if (!rt) + goto next; + + err = rt_fill_info(net, fnhe->fnhe_daddr, 0, rt, + table_id, NULL, skb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq); + if (err) + return err; +next: + (*fa_index)++; + } + } + + return 0; +} + +int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb, + u32 table_id, struct fib_info *fi, + int *fa_index, int fa_start) +{ + struct net *net = sock_net(cb->skb->sk); + int nhsel, genid = fnhe_genid(net); + + for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) { + struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel); + struct fnhe_hash_bucket *bucket; + int err; + + if (nhc->nhc_flags & RTNH_F_DEAD) + continue; + + bucket = rcu_dereference(nhc->nhc_exceptions); + if (!bucket) + continue; + + err = fnhe_dump_bucket(net, skb, cb, table_id, bucket, genid, + fa_index, fa_start); + if (err) + return err; + } + + return 0; +} + static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst, u8 ip_proto, __be16 sport, __be16 dport) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 008545f63667..535b69326f66 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -1,13 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Syncookies implementation for the Linux kernel * * Copyright (C) 1997 Andi Kleen * Based on ideas by D.J.Bernstein and Eric Schenk. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/tcp.h> diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 90f09e47198b..7d66306b5f39 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -39,6 +39,8 @@ static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; static int tcp_adv_win_scale_min = -31; static int tcp_adv_win_scale_max = 31; +static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS; +static int tcp_min_snd_mss_max = 65535; static int ip_privileged_port_min; static int ip_privileged_port_max = 65535; static int ip_ttl_min = 1; @@ -363,8 +365,7 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write, } } tcp_fastopen_reset_cipher(net, NULL, key, - backup_data ? key + 4 : NULL, - TCP_FASTOPEN_KEY_LENGTH); + backup_data ? key + 4 : NULL); } bad_key: @@ -601,6 +602,18 @@ static struct ctl_table ipv4_table[] = { .extra1 = &sysctl_fib_sync_mem_min, .extra2 = &sysctl_fib_sync_mem_max, }, + { + .procname = "tcp_rx_skb_cache", + .data = &tcp_rx_skb_cache_key.key, + .mode = 0644, + .proc_handler = proc_do_static_key, + }, + { + .procname = "tcp_tx_skb_cache", + .data = &tcp_tx_skb_cache_key.key, + .mode = 0644, + .proc_handler = proc_do_static_key, + }, { } }; @@ -800,6 +813,15 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec, }, { + .procname = "tcp_min_snd_mss", + .data = &init_net.ipv4.sysctl_tcp_min_snd_mss, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &tcp_min_snd_mss_min, + .extra2 = &tcp_min_snd_mss_max, + }, + { .procname = "tcp_probe_threshold", .data = &init_net.ipv4.sysctl_tcp_probe_threshold, .maxlen = sizeof(int), @@ -1008,7 +1030,7 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_fib_multipath_hash_policy, .extra1 = &zero, - .extra2 = &one, + .extra2 = &two, }, #endif { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 27ce13ece510..47c217905864 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket @@ -205,11 +206,6 @@ * Hirokazu Takahashi : Use copy_from_user() instead of * csum_and_copy_from_user() if possible. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or(at your option) any later version. - * * Description of States: * * TCP_SYN_SENT sent a connection request, waiting for ack @@ -321,6 +317,11 @@ struct tcp_splice_state { unsigned long tcp_memory_pressure __read_mostly; EXPORT_SYMBOL_GPL(tcp_memory_pressure); +DEFINE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key); +EXPORT_SYMBOL(tcp_rx_skb_cache_key); + +DEFINE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key); + void tcp_enter_memory_pressure(struct sock *sk) { unsigned long val; @@ -2740,6 +2741,21 @@ static int tcp_repair_options_est(struct sock *sk, return 0; } +DEFINE_STATIC_KEY_FALSE(tcp_tx_delay_enabled); +EXPORT_SYMBOL(tcp_tx_delay_enabled); + +static void tcp_enable_tx_delay(void) +{ + if (!static_branch_unlikely(&tcp_tx_delay_enabled)) { + static int __tcp_tx_delay_enabled = 0; + + if (cmpxchg(&__tcp_tx_delay_enabled, 0, 1) == 0) { + static_branch_enable(&tcp_tx_delay_enabled); + pr_info("TCP_TX_DELAY enabled\n"); + } + } +} + /* * Socket option code for TCP. */ @@ -2806,8 +2822,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, if (optlen == TCP_FASTOPEN_KEY_BUF_LENGTH) backup_key = key + TCP_FASTOPEN_KEY_LENGTH; - return tcp_fastopen_reset_cipher(net, sk, key, backup_key, - TCP_FASTOPEN_KEY_LENGTH); + return tcp_fastopen_reset_cipher(net, sk, key, backup_key); } default: /* fallthru */ @@ -3091,6 +3106,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level, else tp->recvmsg_inq = val; break; + case TCP_TX_DELAY: + if (val) + tcp_enable_tx_delay(); + tp->tcp_tx_delay = val; + break; default: err = -ENOPROTOOPT; break; @@ -3550,6 +3570,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = tp->fastopen_no_cookie; break; + case TCP_TX_DELAY: + val = tp->tcp_tx_delay; + break; + case TCP_TIMESTAMP: val = tcp_time_stamp_raw() + tp->tsoffset; break; @@ -3883,6 +3907,7 @@ void __init tcp_init(void) unsigned long limit; unsigned int i; + BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE); BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 477cb4aa456c..79f705450c16 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* DataCenter TCP (DCTCP) congestion control. * * http://simula.stanford.edu/~alizade/Site/DCTCP.html @@ -33,11 +34,6 @@ * Daniel Borkmann <dborkman@redhat.com> * Florian Westphal <fw@strlen.de> * Glenn Judd <glenn.judd@morganstanley.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or (at - * your option) any later version. */ #include <linux/module.h> diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 81148f7a2323..a3a386236d93 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * tcp_diag.c Module for monitoring TCP transport protocols sockets. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/module.h> diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 8e1580485c9e..3fd451271a70 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -30,21 +30,15 @@ void tcp_fastopen_init_key_once(struct net *net) * for a valid cookie, so this is an acceptable risk. */ get_random_bytes(key, sizeof(key)); - tcp_fastopen_reset_cipher(net, NULL, key, NULL, sizeof(key)); + tcp_fastopen_reset_cipher(net, NULL, key, NULL); } static void tcp_fastopen_ctx_free(struct rcu_head *head) { struct tcp_fastopen_context *ctx = container_of(head, struct tcp_fastopen_context, rcu); - int i; - /* We own ctx, thus no need to hold the Fastopen-lock */ - for (i = 0; i < TCP_FASTOPEN_KEY_MAX; i++) { - if (ctx->tfm[i]) - crypto_free_cipher(ctx->tfm[i]); - } - kfree(ctx); + kzfree(ctx); } void tcp_fastopen_destroy_cipher(struct sock *sk) @@ -72,54 +66,29 @@ void tcp_fastopen_ctx_destroy(struct net *net) call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free); } -struct tcp_fastopen_context *tcp_fastopen_alloc_ctx(void *primary_key, - void *backup_key, - unsigned int len) -{ - struct tcp_fastopen_context *new_ctx; - void *key = primary_key; - int err, i; - - new_ctx = kmalloc(sizeof(*new_ctx), GFP_KERNEL); - if (!new_ctx) - return ERR_PTR(-ENOMEM); - for (i = 0; i < TCP_FASTOPEN_KEY_MAX; i++) - new_ctx->tfm[i] = NULL; - for (i = 0; i < (backup_key ? 2 : 1); i++) { - new_ctx->tfm[i] = crypto_alloc_cipher("aes", 0, 0); - if (IS_ERR(new_ctx->tfm[i])) { - err = PTR_ERR(new_ctx->tfm[i]); - new_ctx->tfm[i] = NULL; - pr_err("TCP: TFO aes cipher alloc error: %d\n", err); - goto out; - } - err = crypto_cipher_setkey(new_ctx->tfm[i], key, len); - if (err) { - pr_err("TCP: TFO cipher key error: %d\n", err); - goto out; - } - memcpy(&new_ctx->key[i * TCP_FASTOPEN_KEY_LENGTH], key, len); - key = backup_key; - } - return new_ctx; -out: - tcp_fastopen_ctx_free(&new_ctx->rcu); - return ERR_PTR(err); -} - int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, - void *primary_key, void *backup_key, - unsigned int len) + void *primary_key, void *backup_key) { struct tcp_fastopen_context *ctx, *octx; struct fastopen_queue *q; int err = 0; - ctx = tcp_fastopen_alloc_ctx(primary_key, backup_key, len); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); + ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) { + err = -ENOMEM; goto out; } + + ctx->key[0].key[0] = get_unaligned_le64(primary_key); + ctx->key[0].key[1] = get_unaligned_le64(primary_key + 8); + if (backup_key) { + ctx->key[1].key[0] = get_unaligned_le64(backup_key); + ctx->key[1].key[1] = get_unaligned_le64(backup_key + 8); + ctx->num = 2; + } else { + ctx->num = 1; + } + spin_lock(&net->ipv4.tcp_fastopen_ctx_lock); if (sk) { q = &inet_csk(sk)->icsk_accept_queue.fastopenq; @@ -141,31 +110,29 @@ out: static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req, struct sk_buff *syn, - struct crypto_cipher *tfm, + const siphash_key_t *key, struct tcp_fastopen_cookie *foc) { + BUILD_BUG_ON(TCP_FASTOPEN_COOKIE_SIZE != sizeof(u64)); + if (req->rsk_ops->family == AF_INET) { const struct iphdr *iph = ip_hdr(syn); - __be32 path[4] = { iph->saddr, iph->daddr, 0, 0 }; - crypto_cipher_encrypt_one(tfm, foc->val, (void *)path); + foc->val[0] = cpu_to_le64(siphash(&iph->saddr, + sizeof(iph->saddr) + + sizeof(iph->daddr), + key)); foc->len = TCP_FASTOPEN_COOKIE_SIZE; return true; } - #if IS_ENABLED(CONFIG_IPV6) if (req->rsk_ops->family == AF_INET6) { const struct ipv6hdr *ip6h = ipv6_hdr(syn); - struct tcp_fastopen_cookie tmp; - struct in6_addr *buf; - int i; - - crypto_cipher_encrypt_one(tfm, tmp.val, - (void *)&ip6h->saddr); - buf = &tmp.addr; - for (i = 0; i < 4; i++) - buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i]; - crypto_cipher_encrypt_one(tfm, foc->val, (void *)buf); + + foc->val[0] = cpu_to_le64(siphash(&ip6h->saddr, + sizeof(ip6h->saddr) + + sizeof(ip6h->daddr), + key)); foc->len = TCP_FASTOPEN_COOKIE_SIZE; return true; } @@ -173,11 +140,8 @@ static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req, return false; } -/* Generate the fastopen cookie by doing aes128 encryption on both - * the source and destination addresses. Pad 0s for IPv4 or IPv4-mapped-IPv6 - * addresses. For the longer IPv6 addresses use CBC-MAC. - * - * XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE. +/* Generate the fastopen cookie by applying SipHash to both the source and + * destination addresses. */ static void tcp_fastopen_cookie_gen(struct sock *sk, struct request_sock *req, @@ -189,7 +153,7 @@ static void tcp_fastopen_cookie_gen(struct sock *sk, rcu_read_lock(); ctx = tcp_fastopen_get_ctx(sk); if (ctx) - __tcp_fastopen_cookie_gen_cipher(req, syn, ctx->tfm[0], foc); + __tcp_fastopen_cookie_gen_cipher(req, syn, &ctx->key[0], foc); rcu_read_unlock(); } @@ -253,7 +217,7 @@ static int tcp_fastopen_cookie_gen_check(struct sock *sk, if (!ctx) goto out; for (i = 0; i < tcp_fastopen_context_len(ctx); i++) { - __tcp_fastopen_cookie_gen_cipher(req, syn, ctx->tfm[i], foc); + __tcp_fastopen_cookie_gen_cipher(req, syn, &ctx->key[i], foc); if (tcp_fastopen_cookie_match(foc, orig)) { ret = i + 1; goto out; @@ -274,10 +238,6 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, struct sock *child; bool own_req; - req->num_retrans = 0; - req->num_timeout = 0; - req->sk = NULL; - child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, NULL, &own_req); if (!child) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 08a477e74cf3..b71efeb0ae5b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -119,7 +119,7 @@ void clean_acked_data_enable(struct inet_connection_sock *icsk, void (*cad)(struct sock *sk, u32 ack_seq)) { icsk->icsk_clean_acked = cad; - static_branch_inc(&clean_acked_data_enabled.key); + static_branch_deferred_inc(&clean_acked_data_enabled); } EXPORT_SYMBOL_GPL(clean_acked_data_enable); @@ -1302,7 +1302,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, TCP_SKB_CB(skb)->seq += shifted; tcp_skb_pcount_add(prev, pcount); - BUG_ON(tcp_skb_pcount(skb) < pcount); + WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount); tcp_skb_pcount_add(skb, -pcount); /* When we're adding to gso_segs == 1, gso_size will be zero, @@ -1368,6 +1368,21 @@ static int skb_can_shift(const struct sk_buff *skb) return !skb_headlen(skb) && skb_is_nonlinear(skb); } +int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, + int pcount, int shiftlen) +{ + /* TCP min gso_size is 8 bytes (TCP_MIN_GSO_SIZE) + * Since TCP_SKB_CB(skb)->tcp_gso_segs is 16 bits, we need + * to make sure not storing more than 65535 * 8 bytes per skb, + * even if current MSS is bigger. + */ + if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE)) + return 0; + if (unlikely(tcp_skb_pcount(to) + pcount > 65535)) + return 0; + return skb_shift(to, from, shiftlen); +} + /* Try collapsing SACK blocks spanning across multiple skbs to a single * skb. */ @@ -1473,7 +1488,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una)) goto fallback; - if (!skb_shift(prev, skb, len)) + if (!tcp_skb_shift(prev, skb, pcount, len)) goto fallback; if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack)) goto out; @@ -1491,11 +1506,10 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, goto out; len = skb->len; - if (skb_shift(prev, skb, len)) { - pcount += tcp_skb_pcount(skb); - tcp_shifted_skb(sk, prev, skb, state, tcp_skb_pcount(skb), + pcount = tcp_skb_pcount(skb); + if (tcp_skb_shift(prev, skb, pcount, len)) + tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, 0); - } out: return prev; @@ -2648,7 +2662,7 @@ static void tcp_process_loss(struct sock *sk, int flag, int num_dupack, struct tcp_sock *tp = tcp_sk(sk); bool recovered = !before(tp->snd_una, tp->high_seq); - if ((flag & FLAG_SND_UNA_ADVANCED) && + if ((flag & FLAG_SND_UNA_ADVANCED || tp->fastopen_rsk) && tcp_try_undo_loss(sk, false)) return; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 59b7edd8719c..d57641cb3477 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket @@ -7,18 +8,12 @@ * * IPv4 specific functions * - * * code split from: * linux/ipv4/tcp.c * linux/ipv4/tcp_input.c * linux/ipv4/tcp_output.c * * See tcp.c for author information - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ /* @@ -667,8 +662,9 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) int genhash; struct sock *sk1 = NULL; #endif - struct net *net; + u64 transmit_time = 0; struct sock *ctl_sk; + struct net *net; /* Never send a reset in response to a reset. */ if (th->rst) @@ -772,13 +768,16 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); local_bh_disable(); ctl_sk = this_cpu_read(*net->ipv4.tcp_sk); - if (sk) + if (sk) { ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? inet_twsk(sk)->tw_mark : sk->sk_mark; + transmit_time = tcp_transmit_time(sk); + } ip_send_unicast_reply(ctl_sk, skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, - &arg, arg.iov[0].iov_len); + &arg, arg.iov[0].iov_len, + transmit_time); ctl_sk->sk_mark = 0; __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); @@ -813,6 +812,7 @@ static void tcp_v4_send_ack(const struct sock *sk, struct net *net = sock_net(sk); struct ip_reply_arg arg; struct sock *ctl_sk; + u64 transmit_time; memset(&rep.th, 0, sizeof(struct tcphdr)); memset(&arg, 0, sizeof(arg)); @@ -864,13 +864,14 @@ static void tcp_v4_send_ack(const struct sock *sk, arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); local_bh_disable(); ctl_sk = this_cpu_read(*net->ipv4.tcp_sk); - if (sk) - ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? - inet_twsk(sk)->tw_mark : sk->sk_mark; + ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? + inet_twsk(sk)->tw_mark : sk->sk_mark; + transmit_time = tcp_transmit_time(sk); ip_send_unicast_reply(ctl_sk, skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, - &arg, arg.iov[0].iov_len); + &arg, arg.iov[0].iov_len, + transmit_time); ctl_sk->sk_mark = 0; __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); @@ -2633,6 +2634,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_ecn_fallback = 1; net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS; + net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS; net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 7c35731816e2..8bcaf2586b68 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -274,7 +274,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; tcptw->tw_ts_offset = tp->tsoffset; tcptw->tw_last_oow_ack_time = 0; - + tcptw->tw_tx_delay = tp->tcp_tx_delay; #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -283,6 +283,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; tw->tw_tclass = np->tclass; tw->tw_flowlabel = be32_to_cpu(np->flow_label & IPV6_FLOWLABEL_MASK); + tw->tw_txhash = sk->sk_txhash; tw->tw_ipv6only = sk->sk_ipv6only; } #endif diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 0fbf7d4df9da..e09147ac9a99 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * IPV4 GSO/GRO offload support * Linux INET implementation * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * TCPv4 GSO/GRO support */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f429e856e263..4af1f5dae9d3 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1153,6 +1153,8 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, memset(skb->cb, 0, max(sizeof(struct inet_skb_parm), sizeof(struct inet6_skb_parm))); + tcp_add_tx_delay(skb, tp); + err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); if (unlikely(err > 0)) { @@ -1296,6 +1298,12 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, if (nsize < 0) nsize = 0; + if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf && + tcp_queue != TCP_FRAG_IN_WRITE_QUEUE)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG); + return -ENOMEM; + } + if (skb_unclone(skb, gfp)) return -ENOMEM; @@ -1454,8 +1462,7 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu) mss_now -= icsk->icsk_ext_hdr_len; /* Then reserve room for full set of TCP options and 8 bytes of data */ - if (mss_now < 48) - mss_now = 48; + mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss); return mss_now; } @@ -2234,6 +2241,18 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes); limit <<= factor; + if (static_branch_unlikely(&tcp_tx_delay_enabled) && + tcp_sk(sk)->tcp_tx_delay) { + u64 extra_bytes = (u64)sk->sk_pacing_rate * tcp_sk(sk)->tcp_tx_delay; + + /* TSQ is based on skb truesize sum (sk_wmem_alloc), so we + * approximate our needs assuming an ~100% skb->truesize overhead. + * USEC_PER_SEC is approximated by 2^20. + * do_div(extra_bytes, USEC_PER_SEC/2) is replaced by a right shift. + */ + extra_bytes >>= (20 - 1); + limit += extra_bytes; + } if (refcount_read(&sk->sk_wmem_alloc) > limit) { /* Always send skb if rtx queue is empty. * No need to wait for TX completion to call us back, @@ -2747,7 +2766,7 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) if (next_skb_size <= skb_availroom(skb)) skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size), next_skb_size); - else if (!skb_shift(skb, next_skb, next_skb_size)) + else if (!tcp_skb_shift(skb, next_skb, 1, next_skb_size)) return false; } tcp_highest_sack_replace(sk, next_skb, skb); @@ -3212,6 +3231,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, int tcp_header_size; struct tcphdr *th; int mss; + u64 now; skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); if (unlikely(!skb)) { @@ -3243,13 +3263,14 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, mss = tcp_mss_clamp(tp, dst_metric_advmss(dst)); memset(&opts, 0, sizeof(opts)); + now = tcp_clock_ns(); #ifdef CONFIG_SYN_COOKIES if (unlikely(req->cookie_ts)) skb->skb_mstamp_ns = cookie_init_timestamp(req); else #endif { - skb->skb_mstamp_ns = tcp_clock_ns(); + skb->skb_mstamp_ns = now; if (!tcp_rsk(req)->snt_synack) /* Timestamp first SYNACK */ tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb); } @@ -3292,8 +3313,9 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, rcu_read_unlock(); #endif - /* Do not fool tcpdump (if any), clean our debris */ - skb->tstamp = 0; + skb->skb_mstamp_ns = now; + tcp_add_tx_delay(skb, tp); + return skb; } EXPORT_SYMBOL(tcp_make_synack); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 5bad937ce779..c801cd37cc2a 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -155,6 +155,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; mss = min(net->ipv4.sysctl_tcp_base_mss, mss); mss = max(mss, 68 - tcp_sk(sk)->tcp_header_len); + mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss); icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); } tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f40059b2a8a0..1b971bd95786 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket @@ -69,12 +70,6 @@ * a single port at the same time. * Derek Atkins <derek@ihtfp.com>: Add Encapulation Support * James Chapman : Add L2TP encapsulation type. - * - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #define pr_fmt(fmt) "UDP: " fmt @@ -130,17 +125,6 @@ EXPORT_SYMBOL(udp_memory_allocated); #define MAX_UDP_PORTS 65536 #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN) -/* IPCB reference means this can not be used from early demux */ -static bool udp_lib_exact_dif_match(struct net *net, struct sk_buff *skb) -{ -#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - if (!net->ipv4.sysctl_udp_l3mdev_accept && - skb && ipv4_l3mdev_skb(IPCB(skb)->flags)) - return true; -#endif - return false; -} - static int udp_lib_lport_inuse(struct net *net, __u16 num, const struct udp_hslot *hslot, unsigned long *bitmap, @@ -369,7 +353,7 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum) static int compute_score(struct sock *sk, struct net *net, __be32 saddr, __be16 sport, __be32 daddr, unsigned short hnum, - int dif, int sdif, bool exact_dif) + int dif, int sdif) { int score; struct inet_sock *inet; @@ -425,7 +409,7 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr, static struct sock *udp4_lib_lookup2(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, unsigned int hnum, - int dif, int sdif, bool exact_dif, + int dif, int sdif, struct udp_hslot *hslot2, struct sk_buff *skb) { @@ -437,7 +421,7 @@ static struct sock *udp4_lib_lookup2(struct net *net, badness = 0; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { score = compute_score(sk, net, saddr, sport, - daddr, hnum, dif, sdif, exact_dif); + daddr, hnum, dif, sdif); if (score > badness) { if (sk->sk_reuseport) { hash = udp_ehashfn(net, daddr, hnum, @@ -465,7 +449,6 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, unsigned short hnum = ntohs(dport); unsigned int hash2, slot2; struct udp_hslot *hslot2; - bool exact_dif = udp_lib_exact_dif_match(net, skb); hash2 = ipv4_portaddr_hash(net, daddr, hnum); slot2 = hash2 & udptable->mask; @@ -473,7 +456,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, result = udp4_lib_lookup2(net, saddr, sport, daddr, hnum, dif, sdif, - exact_dif, hslot2, skb); + hslot2, skb); if (!result) { hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); slot2 = hash2 & udptable->mask; @@ -481,7 +464,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, result = udp4_lib_lookup2(net, saddr, sport, htonl(INADDR_ANY), hnum, dif, sdif, - exact_dif, hslot2, skb); + hslot2, skb); } if (IS_ERR(result)) return NULL; @@ -503,7 +486,11 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, struct sock *udp4_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport) { - return __udp4_lib_lookup_skb(skb, sport, dport, &udp_table); + const struct iphdr *iph = ip_hdr(skb); + + return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport, + iph->daddr, dport, inet_iif(skb), + inet_sdif(skb), &udp_table, NULL); } EXPORT_SYMBOL_GPL(udp4_lib_lookup_skb); @@ -538,8 +525,7 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk, (inet->inet_dport != rmt_port && inet->inet_dport) || (inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) || ipv6_only_sock(sk) || - (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif && - sk->sk_bound_dev_if != sdif)) + !udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) return false; if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif, sdif)) return false; @@ -1779,6 +1765,10 @@ try_again: sin->sin_addr.s_addr = ip_hdr(skb)->saddr; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); *addr_len = sizeof(*sin); + + if (cgroup_bpf_enabled) + BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, + (struct sockaddr *)sin); } if (udp_sk(sk)->gro_enabled) diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 5cbb9be05295..910555a4d9fe 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * udp_diag.c Module for monitoring UDP transport protocols sockets. * * Authors: Pavel Emelyanov, <xemul@parallels.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index f1c46df279e8..a3908e55ed89 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * IPV4 GSO/GRO offload support * Linux INET implementation * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * UDPv4 GSO support */ @@ -228,6 +224,11 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, seg = segs; uh = udp_hdr(seg); + /* preserve TX timestamp flags and TS key for first segment */ + skb_shinfo(seg)->tskey = skb_shinfo(gso_skb)->tskey; + skb_shinfo(seg)->tx_flags |= + (skb_shinfo(gso_skb)->tx_flags & SKBTX_ANY_TSTAMP); + /* compute checksum adjustment based on old length versus new */ newlen = htons(sizeof(*uh) + mss); check = csum16_add(csum16_sub(uh->check, uh->len), newlen); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 3c94b8f0ff27..5936d66d1ce2 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * UDPLITE An implementation of the UDP-Lite protocol (RFC 3828). * @@ -5,10 +6,6 @@ * * Changes: * Fixes: - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #define pr_fmt(fmt) "UDPLite: " fmt diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 9bb8905088c7..ecff3fce9807 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * xfrm4_output.c - Common IPsec encapsulation code for IPv4. * Copyright (c) 2004 Herbert Xu <herbert@gondor.apana.org.au> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/if_ether.h> diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c index bcab48944c15..8a4285712808 100644 --- a/net/ipv4/xfrm4_protocol.c +++ b/net/ipv4/xfrm4_protocol.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* xfrm4_protocol.c - Generic xfrm protocol multiplexer. * * Copyright (C) 2013 secunet Security Networks AG @@ -7,11 +8,6 @@ * * Based on: * net/ipv4/tunnel4.c - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/init.h> |