diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-07 22:03:58 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-07 22:03:58 -0700 |
commit | 80f232121b69cc69a31ccb2b38c1665d770b0710 (patch) | |
tree | 106263eac4ff03b899df695e00dd11e593e74fe2 /net | |
parent | 82efe439599439a5e1e225ce5740e6cfb777a7dd (diff) | |
parent | a9e41a529681b38087c91ebc0bb91e12f510ca2d (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
"Highlights:
1) Support AES128-CCM ciphers in kTLS, from Vakul Garg.
2) Add fib_sync_mem to control the amount of dirty memory we allow to
queue up between synchronize RCU calls, from David Ahern.
3) Make flow classifier more lockless, from Vlad Buslov.
4) Add PHY downshift support to aquantia driver, from Heiner
Kallweit.
5) Add SKB cache for TCP rx and tx, from Eric Dumazet. This reduces
contention on SLAB spinlocks in heavy RPC workloads.
6) Partial GSO offload support in XFRM, from Boris Pismenny.
7) Add fast link down support to ethtool, from Heiner Kallweit.
8) Use siphash for IP ID generator, from Eric Dumazet.
9) Pull nexthops even further out from ipv4/ipv6 routes and FIB
entries, from David Ahern.
10) Move skb->xmit_more into a per-cpu variable, from Florian
Westphal.
11) Improve eBPF verifier speed and increase maximum program size,
from Alexei Starovoitov.
12) Eliminate per-bucket spinlocks in rhashtable, and instead use bit
spinlocks. From Neil Brown.
13) Allow tunneling with GUE encap in ipvs, from Jacky Hu.
14) Improve link partner cap detection in generic PHY code, from
Heiner Kallweit.
15) Add layer 2 encap support to bpf_skb_adjust_room(), from Alan
Maguire.
16) Remove SKB list implementation assumptions in SCTP, your's truly.
17) Various cleanups, optimizations, and simplifications in r8169
driver. From Heiner Kallweit.
18) Add memory accounting on TX and RX path of SCTP, from Xin Long.
19) Switch PHY drivers over to use dynamic featue detection, from
Heiner Kallweit.
20) Support flow steering without masking in dpaa2-eth, from Ioana
Ciocoi.
21) Implement ndo_get_devlink_port in netdevsim driver, from Jiri
Pirko.
22) Increase the strict parsing of current and future netlink
attributes, also export such policies to userspace. From Johannes
Berg.
23) Allow DSA tag drivers to be modular, from Andrew Lunn.
24) Remove legacy DSA probing support, also from Andrew Lunn.
25) Allow ll_temac driver to be used on non-x86 platforms, from Esben
Haabendal.
26) Add a generic tracepoint for TX queue timeouts to ease debugging,
from Cong Wang.
27) More indirect call optimizations, from Paolo Abeni"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1763 commits)
cxgb4: Fix error path in cxgb4_init_module
net: phy: improve pause mode reporting in phy_print_status
dt-bindings: net: Fix a typo in the phy-mode list for ethernet bindings
net: macb: Change interrupt and napi enable order in open
net: ll_temac: Improve error message on error IRQ
net/sched: remove block pointer from common offload structure
net: ethernet: support of_get_mac_address new ERR_PTR error
net: usb: smsc: fix warning reported by kbuild test robot
staging: octeon-ethernet: Fix of_get_mac_address ERR_PTR check
net: dsa: support of_get_mac_address new ERR_PTR error
net: dsa: sja1105: Fix status initialization in sja1105_get_ethtool_stats
vrf: sit mtu should not be updated when vrf netdev is the link
net: dsa: Fix error cleanup path in dsa_init_module
l2tp: Fix possible NULL pointer dereference
taprio: add null check on sched_nest to avoid potential null pointer dereference
net: mvpp2: cls: fix less than zero check on a u32 variable
net_sched: sch_fq: handle non connected flows
net_sched: sch_fq: do not assume EDT packets are ordered
net: hns3: use devm_kcalloc when allocating desc_cb
net: hns3: some cleanup for struct hns3_enet_ring
...
Diffstat (limited to 'net')
489 files changed, 14217 insertions, 9258 deletions
diff --git a/net/6lowpan/nhc.c b/net/6lowpan/nhc.c index 4fa2fdda174d..9e56fb98f33c 100644 --- a/net/6lowpan/nhc.c +++ b/net/6lowpan/nhc.c @@ -18,7 +18,7 @@ #include "nhc.h" static struct rb_root rb_root = RB_ROOT; -static struct lowpan_nhc *lowpan_nexthdr_nhcs[NEXTHDR_MAX]; +static struct lowpan_nhc *lowpan_nexthdr_nhcs[NEXTHDR_MAX + 1]; static DEFINE_SPINLOCK(lowpan_nhc_lock); static int lowpan_nhc_insert(struct lowpan_nhc *nhc) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index dc4411165e43..1f99678751df 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -75,6 +75,14 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg, return 0; } +static void vlan_stacked_transfer_operstate(const struct net_device *rootdev, + struct net_device *dev, + struct vlan_dev_priv *vlan) +{ + if (!(vlan->flags & VLAN_FLAG_BRIDGE_BINDING)) + netif_stacked_transfer_operstate(rootdev, dev); +} + void unregister_vlan_dev(struct net_device *dev, struct list_head *head) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); @@ -180,7 +188,7 @@ int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack) /* Account for reference in struct vlan_dev_priv */ dev_hold(real_dev); - netif_stacked_transfer_operstate(real_dev, dev); + vlan_stacked_transfer_operstate(real_dev, dev, vlan); linkwatch_fire_event(dev); /* _MUST_ call rfc2863_policy() */ /* So, got the sucker initialized, now lets place @@ -399,7 +407,8 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, case NETDEV_CHANGE: /* Propagate real device state to vlan devices */ vlan_group_for_each_dev(grp, i, vlandev) - netif_stacked_transfer_operstate(dev, vlandev); + vlan_stacked_transfer_operstate(dev, vlandev, + vlan_dev_priv(vlandev)); break; case NETDEV_CHANGEADDR: @@ -446,7 +455,8 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, dev_close_many(&close_list, false); list_for_each_entry_safe(vlandev, tmp, &close_list, close_list) { - netif_stacked_transfer_operstate(dev, vlandev); + vlan_stacked_transfer_operstate(dev, vlandev, + vlan_dev_priv(vlandev)); list_del_init(&vlandev->close_list); } list_del(&close_list); @@ -463,7 +473,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING)) dev_change_flags(vlandev, flgs | IFF_UP, extack); - netif_stacked_transfer_operstate(dev, vlandev); + vlan_stacked_transfer_operstate(dev, vlandev, vlan); } break; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 8d77b6ee4477..f044ae56a313 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -223,7 +223,8 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask) u32 old_flags = vlan->flags; if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP | - VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP)) + VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP | + VLAN_FLAG_BRIDGE_BINDING)) return -EINVAL; vlan->flags = (old_flags & ~mask) | (flags & mask); @@ -296,7 +297,8 @@ static int vlan_dev_open(struct net_device *dev) if (vlan->flags & VLAN_FLAG_MVRP) vlan_mvrp_request_join(dev); - if (netif_carrier_ok(real_dev)) + if (netif_carrier_ok(real_dev) && + !(vlan->flags & VLAN_FLAG_BRIDGE_BINDING)) netif_carrier_on(dev); return 0; @@ -326,7 +328,8 @@ static int vlan_dev_stop(struct net_device *dev) if (!ether_addr_equal(dev->dev_addr, real_dev->dev_addr)) dev_uc_del(real_dev, dev->dev_addr); - netif_carrier_off(dev); + if (!(vlan->flags & VLAN_FLAG_BRIDGE_BINDING)) + netif_carrier_off(dev); return 0; } @@ -550,7 +553,8 @@ static const struct net_device_ops vlan_netdev_ops; static int vlan_dev_init(struct net_device *dev) { - struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + struct net_device *real_dev = vlan->real_dev; netif_carrier_off(dev); @@ -561,6 +565,9 @@ static int vlan_dev_init(struct net_device *dev) (1<<__LINK_STATE_DORMANT))) | (1<<__LINK_STATE_PRESENT); + if (vlan->flags & VLAN_FLAG_BRIDGE_BINDING) + dev->state |= (1 << __LINK_STATE_NOCARRIER); + dev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | @@ -591,8 +598,7 @@ static int vlan_dev_init(struct net_device *dev) #endif dev->needed_headroom = real_dev->needed_headroom; - if (vlan_hw_offload_capable(real_dev->features, - vlan_dev_priv(dev)->vlan_proto)) { + if (vlan_hw_offload_capable(real_dev->features, vlan->vlan_proto)) { dev->header_ops = &vlan_passthru_header_ops; dev->hard_header_len = real_dev->hard_header_len; } else { @@ -606,8 +612,8 @@ static int vlan_dev_init(struct net_device *dev) vlan_dev_set_lockdep_class(dev, vlan_dev_get_lock_subclass(dev)); - vlan_dev_priv(dev)->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats); - if (!vlan_dev_priv(dev)->vlan_pcpu_stats) + vlan->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats); + if (!vlan->vlan_pcpu_stats) return -ENOMEM; return 0; diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 9b60c1e399e2..24eebbc92364 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -35,8 +35,8 @@ static inline int vlan_validate_qos_map(struct nlattr *attr) { if (!attr) return 0; - return nla_validate_nested(attr, IFLA_VLAN_QOS_MAX, vlan_map_policy, - NULL); + return nla_validate_nested_deprecated(attr, IFLA_VLAN_QOS_MAX, + vlan_map_policy, NULL); } static int vlan_validate(struct nlattr *tb[], struct nlattr *data[], @@ -84,7 +84,8 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[], flags = nla_data(data[IFLA_VLAN_FLAGS]); if ((flags->flags & flags->mask) & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP | - VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP)) { + VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP | + VLAN_FLAG_BRIDGE_BINDING)) { NL_SET_ERR_MSG_MOD(extack, "Invalid VLAN flags"); return -EINVAL; } @@ -226,7 +227,7 @@ static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev) goto nla_put_failure; } if (vlan->nr_ingress_mappings) { - nest = nla_nest_start(skb, IFLA_VLAN_INGRESS_QOS); + nest = nla_nest_start_noflag(skb, IFLA_VLAN_INGRESS_QOS); if (nest == NULL) goto nla_put_failure; @@ -244,7 +245,7 @@ static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev) } if (vlan->nr_egress_mappings) { - nest = nla_nest_start(skb, IFLA_VLAN_EGRESS_QOS); + nest = nla_nest_start_noflag(skb, IFLA_VLAN_EGRESS_QOS); if (nest == NULL) goto nla_put_failure; diff --git a/net/Kconfig b/net/Kconfig index 1efe1f9ee492..3e8fdd688329 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -429,11 +429,8 @@ config NET_SOCK_MSG with the help of BPF programs. config NET_DEVLINK - bool "Network physical/parent device Netlink interface" - help - Network physical/parent device Netlink interface provides - infrastructure to support access to physical chip-wide config and - monitoring. + bool + default n config PAGE_POOL bool diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index dbe8b1993be9..a2555023c654 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1806,12 +1806,6 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) rc = put_user(amount, (int __user *)argp); break; } - case SIOCGSTAMP: - rc = sock_get_timestamp(sk, argp); - break; - case SIOCGSTAMPNS: - rc = sock_get_timestampns(sk, argp); - break; /* Routing */ case SIOCADDRT: case SIOCDELRT: @@ -1871,6 +1865,7 @@ static const struct proto_ops atalk_dgram_ops = { .getname = atalk_getname, .poll = datagram_poll, .ioctl = atalk_ioctl, + .gettstamp = sock_gettstamp, #ifdef CONFIG_COMPAT .compat_ioctl = atalk_compat_ioctl, #endif diff --git a/net/atm/clip.c b/net/atm/clip.c index d795b9c5aea4..b9e67e589a7b 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -345,8 +345,8 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } rt = (struct rtable *) dst; - if (rt->rt_gateway) - daddr = &rt->rt_gateway; + if (rt->rt_gw_family == AF_INET) + daddr = &rt->rt_gw4; else daddr = &ip_hdr(skb)->daddr; n = dst_neigh_lookup(dst, daddr); diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c index 2ff0e5e470e3..d955b683aa7c 100644 --- a/net/atm/ioctl.c +++ b/net/atm/ioctl.c @@ -81,22 +81,6 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd, (int __user *)argp) ? -EFAULT : 0; goto done; } - case SIOCGSTAMP: /* borrowed from IP */ -#ifdef CONFIG_COMPAT - if (compat) - error = compat_sock_get_timestamp(sk, argp); - else -#endif - error = sock_get_timestamp(sk, argp); - goto done; - case SIOCGSTAMPNS: /* borrowed from IP */ -#ifdef CONFIG_COMPAT - if (compat) - error = compat_sock_get_timestampns(sk, argp); - else -#endif - error = sock_get_timestampns(sk, argp); - goto done; case ATM_SETSC: net_warn_ratelimited("ATM_SETSC is obsolete; used by %s:%d\n", current->comm, task_pid_nr(current)); diff --git a/net/atm/lec.c b/net/atm/lec.c index ad4f829193f0..a0311493b01b 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -726,9 +726,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) struct lec_priv *priv; if (arg < 0) - i = 0; - else - i = arg; + arg = 0; if (arg >= MAX_LEC_ITF) return -EINVAL; i = array_index_nospec(arg, MAX_LEC_ITF); diff --git a/net/atm/pvc.c b/net/atm/pvc.c index 2cb10af16afc..02bd2a436bdf 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -118,6 +118,7 @@ static const struct proto_ops pvc_proto_ops = { #ifdef CONFIG_COMPAT .compat_ioctl = vcc_compat_ioctl, #endif + .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = pvc_shutdown, .setsockopt = pvc_setsockopt, diff --git a/net/atm/svc.c b/net/atm/svc.c index 2f91b766ac42..908cbb8654f5 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -641,6 +641,7 @@ static const struct proto_ops svc_proto_ops = { #ifdef CONFIG_COMPAT .compat_ioctl = svc_compat_ioctl, #endif + .gettstamp = sock_gettstamp, .listen = svc_listen, .shutdown = svc_shutdown, .setsockopt = svc_setsockopt, diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 5d01edf8d819..012c0b6fc4f6 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1714,14 +1714,6 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) break; } - case SIOCGSTAMP: - res = sock_get_timestamp(sk, argp); - break; - - case SIOCGSTAMPNS: - res = sock_get_timestampns(sk, argp); - break; - case SIOCAX25ADDUID: /* Add a uid to the uid/call map table */ case SIOCAX25DELUID: /* Delete a uid from the uid/call map table */ case SIOCAX25GETUID: { @@ -1888,8 +1880,8 @@ static int ax25_info_show(struct seq_file *seq, void *v) * magic dev src_addr dest_addr,digi1,digi2,.. st vs vr va t1 t1 t2 t2 t3 t3 idle idle n2 n2 rtt window paclen Snd-Q Rcv-Q inode */ - seq_printf(seq, "%8.8lx %s %s%s ", - (long) ax25, + seq_printf(seq, "%p %s %s%s ", + ax25, ax25->ax25_dev == NULL? "???" : ax25->ax25_dev->dev->name, ax2asc(buf, &ax25->source_addr), ax25->iamdigi? "*":""); @@ -1950,6 +1942,7 @@ static const struct proto_ops ax25_proto_ops = { .getname = ax25_getname, .poll = datagram_poll, .ioctl = ax25_ioctl, + .gettstamp = sock_gettstamp, .listen = ax25_listen, .shutdown = ax25_shutdown, .setsockopt = ax25_setsockopt, diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index a31db5e9ac8e..a3d188dfbe75 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -2,18 +2,6 @@ # Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: # # Marek Lindner, Simon Wunderlich -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of version 2 of the GNU General Public -# License as published by the Free Software Foundation. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, see <http://www.gnu.org/licenses/>. # # B.A.T.M.A.N meshing protocol @@ -109,6 +97,18 @@ config BATMAN_ADV_DEBUG buffer. The output is controlled via the batadv netdev specific log_level setting. +config BATMAN_ADV_SYSFS + bool "batman-adv sysfs entries" + depends on BATMAN_ADV + default y + help + Say Y here if you want to enable batman-adv device configuration and + status interface through sysfs attributes. It is replaced by the + batadv generic netlink family but still used by various userspace + tools and scripts. + + If unsure, say Y. + config BATMAN_ADV_TRACING bool "B.A.T.M.A.N. tracing support" depends on BATMAN_ADV diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index a887ecc3efa1..fd63e116d9ff 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -2,19 +2,6 @@ # Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: # # Marek Lindner, Simon Wunderlich -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of version 2 of the GNU General Public -# License as published by the Free Software Foundation. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, see <http://www.gnu.org/licenses/>. -# obj-$(CONFIG_BATMAN_ADV) += batman-adv.o batman-adv-y += bat_algo.o @@ -41,7 +28,7 @@ batman-adv-y += originator.o batman-adv-y += routing.o batman-adv-y += send.o batman-adv-y += soft-interface.o -batman-adv-y += sysfs.o +batman-adv-$(CONFIG_BATMAN_ADV_SYSFS) += sysfs.o batman-adv-$(CONFIG_BATMAN_ADV_TRACING) += trace.o batman-adv-y += tp_meter.o batman-adv-y += translation-table.o diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c index 7b7e15641fef..fa39eaaab9d7 100644 --- a/net/batman-adv/bat_algo.c +++ b/net/batman-adv/bat_algo.c @@ -2,18 +2,6 @@ /* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h index 25e7bb51928c..cb7d57d16c9d 100644 --- a/net/batman-adv/bat_algo.h +++ b/net/batman-adv/bat_algo.h @@ -2,18 +2,6 @@ /* Copyright (C) 2011-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Linus Lüssing - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_BAT_ALGO_H_ diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index de61091af666..bd4138ddf7e0 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -2,18 +2,6 @@ /* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "bat_iv_ogm.h" diff --git a/net/batman-adv/bat_iv_ogm.h b/net/batman-adv/bat_iv_ogm.h index 785f6666273c..c7a9ba305bfc 100644 --- a/net/batman-adv/bat_iv_ogm.h +++ b/net/batman-adv/bat_iv_ogm.h @@ -2,18 +2,6 @@ /* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_BAT_IV_OGM_H_ diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 445594ed58af..231b4aab4d8d 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -2,18 +2,6 @@ /* Copyright (C) 2013-2019 B.A.T.M.A.N. contributors: * * Linus Lüssing, Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "bat_v.h" diff --git a/net/batman-adv/bat_v.h b/net/batman-adv/bat_v.h index 465a4fc23354..37833db098e6 100644 --- a/net/batman-adv/bat_v.h +++ b/net/batman-adv/bat_v.h @@ -2,18 +2,6 @@ /* Copyright (C) 2011-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Linus Lüssing - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_BAT_V_H_ diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index d5df0114f08a..2614a9caee00 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -2,18 +2,6 @@ /* Copyright (C) 2011-2019 B.A.T.M.A.N. contributors: * * Linus Lüssing, Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "bat_v_elp.h" diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h index 75f189ee4a1c..bb3d40f73bfe 100644 --- a/net/batman-adv/bat_v_elp.h +++ b/net/batman-adv/bat_v_elp.h @@ -2,18 +2,6 @@ /* Copyright (C) 2013-2019 B.A.T.M.A.N. contributors: * * Linus Lüssing, Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_BAT_V_ELP_H_ diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index c9698ad41854..fad95ef64e01 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -2,18 +2,6 @@ /* Copyright (C) 2013-2019 B.A.T.M.A.N. contributors: * * Antonio Quartulli - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "bat_v_ogm.h" diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h index f67cf7ee06b2..616bf2ea8755 100644 --- a/net/batman-adv/bat_v_ogm.h +++ b/net/batman-adv/bat_v_ogm.h @@ -2,18 +2,6 @@ /* Copyright (C) 2013-2019 B.A.T.M.A.N. contributors: * * Antonio Quartulli - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_BAT_V_OGM_H_ diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index 63e134e763e3..7f04a6acf14e 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -2,18 +2,6 @@ /* Copyright (C) 2006-2019 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "bitarray.h" diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h index f3a05ad9afad..84ad2d2b6ac9 100644 --- a/net/batman-adv/bitarray.h +++ b/net/batman-adv/bitarray.h @@ -2,18 +2,6 @@ /* Copyright (C) 2006-2019 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_BITARRAY_H_ diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 4fb01108e5f5..663a53b6d36e 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -2,18 +2,6 @@ /* Copyright (C) 2011-2019 B.A.T.M.A.N. contributors: * * Simon Wunderlich - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "bridge_loop_avoidance.h" @@ -59,7 +47,6 @@ #include "netlink.h" #include "originator.h" #include "soft-interface.h" -#include "sysfs.h" #include "translation-table.h" static const u8 batadv_announce_mac[4] = {0x43, 0x05, 0x43, 0x05}; diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index 31771c751efb..012d72c8d064 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -2,18 +2,6 @@ /* Copyright (C) 2011-2019 B.A.T.M.A.N. contributors: * * Simon Wunderlich - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_BLA_H_ diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index 3b9d1ad2f467..d38d70ccdd5a 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -2,18 +2,6 @@ /* Copyright (C) 2010-2019 B.A.T.M.A.N. contributors: * * Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "debugfs.h" diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h index c0b8694041ec..7fac680cf740 100644 --- a/net/batman-adv/debugfs.h +++ b/net/batman-adv/debugfs.h @@ -2,18 +2,6 @@ /* Copyright (C) 2010-2019 B.A.T.M.A.N. contributors: * * Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_DEBUGFS_H_ diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 310a4f353008..b0af3a11d406 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -2,18 +2,6 @@ /* Copyright (C) 2011-2019 B.A.T.M.A.N. contributors: * * Antonio Quartulli - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "distributed-arp-table.h" @@ -667,7 +655,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst, } /** - * batadv_dat_send_data() - send a payload to the selected candidates + * batadv_dat_forward_data() - copy and send payload to the selected candidates * @bat_priv: the bat priv with all the soft interface information * @skb: payload to send * @ip: the DHT key @@ -680,9 +668,9 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst, * Return: true if the packet is sent to at least one candidate, false * otherwise. */ -static bool batadv_dat_send_data(struct batadv_priv *bat_priv, - struct sk_buff *skb, __be32 ip, - unsigned short vid, int packet_subtype) +static bool batadv_dat_forward_data(struct batadv_priv *bat_priv, + struct sk_buff *skb, __be32 ip, + unsigned short vid, int packet_subtype) { int i; bool ret = false; @@ -1277,8 +1265,8 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, ret = true; } else { /* Send the request to the DHT */ - ret = batadv_dat_send_data(bat_priv, skb, ip_dst, vid, - BATADV_P_DAT_DHT_GET); + ret = batadv_dat_forward_data(bat_priv, skb, ip_dst, vid, + BATADV_P_DAT_DHT_GET); } out: if (dat_entry) @@ -1392,8 +1380,10 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, /* Send the ARP reply to the candidates for both the IP addresses that * the node obtained from the ARP reply */ - batadv_dat_send_data(bat_priv, skb, ip_src, vid, BATADV_P_DAT_DHT_PUT); - batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_PUT); + batadv_dat_forward_data(bat_priv, skb, ip_src, vid, + BATADV_P_DAT_DHT_PUT); + batadv_dat_forward_data(bat_priv, skb, ip_dst, vid, + BATADV_P_DAT_DHT_PUT); } /** @@ -1444,7 +1434,6 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, hw_src, &ip_src, hw_dst, &ip_dst, dat_entry->mac_addr, &dat_entry->ip); dropped = true; - goto out; } /* Update our internal cache with both the IP addresses the node got @@ -1453,6 +1442,9 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, batadv_dat_entry_add(bat_priv, ip_src, hw_src, vid); batadv_dat_entry_add(bat_priv, ip_dst, hw_dst, vid); + if (dropped) + goto out; + /* If BLA is enabled, only forward ARP replies if we have claimed the * source of the ARP reply or if no one else of the same backbone has * already claimed that client. This prevents that different gateways @@ -1708,8 +1700,10 @@ static void batadv_dat_put_dhcp(struct batadv_priv *bat_priv, u8 *chaddr, batadv_dat_entry_add(bat_priv, yiaddr, chaddr, vid); batadv_dat_entry_add(bat_priv, ip_dst, hw_dst, vid); - batadv_dat_send_data(bat_priv, skb, yiaddr, vid, BATADV_P_DAT_DHT_PUT); - batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_PUT); + batadv_dat_forward_data(bat_priv, skb, yiaddr, vid, + BATADV_P_DAT_DHT_PUT); + batadv_dat_forward_data(bat_priv, skb, ip_dst, vid, + BATADV_P_DAT_DHT_PUT); consume_skb(skb); diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h index 68c0ff321acd..110c27447d70 100644 --- a/net/batman-adv/distributed-arp-table.h +++ b/net/batman-adv/distributed-arp-table.h @@ -2,18 +2,6 @@ /* Copyright (C) 2011-2019 B.A.T.M.A.N. contributors: * * Antonio Quartulli - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_DISTRIBUTED_ARP_TABLE_H_ diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index b506d15b8230..385fccdcf69d 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -2,18 +2,6 @@ /* Copyright (C) 2013-2019 B.A.T.M.A.N. contributors: * * Martin Hundebøll <martin@hundeboll.net> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "fragmentation.h" diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h index abdac26579bf..d6074ba2ada7 100644 --- a/net/batman-adv/fragmentation.h +++ b/net/batman-adv/fragmentation.h @@ -2,18 +2,6 @@ /* Copyright (C) 2013-2019 B.A.T.M.A.N. contributors: * * Martin Hundebøll <martin@hundeboll.net> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_FRAGMENTATION_H_ diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index f5811f61aa92..47df4c678988 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -2,18 +2,6 @@ /* Copyright (C) 2009-2019 B.A.T.M.A.N. contributors: * * Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "gateway_client.h" @@ -53,7 +41,6 @@ #include "originator.h" #include "routing.h" #include "soft-interface.h" -#include "sysfs.h" #include "translation-table.h" /* These are the offsets of the "hw type" and "hw address length" in the dhcp diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h index b5732c8be81a..0e14026feebd 100644 --- a/net/batman-adv/gateway_client.h +++ b/net/batman-adv/gateway_client.h @@ -2,18 +2,6 @@ /* Copyright (C) 2009-2019 B.A.T.M.A.N. contributors: * * Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index e064de45e22c..dac097f9be03 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -2,18 +2,6 @@ /* Copyright (C) 2009-2019 B.A.T.M.A.N. contributors: * * Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "gateway_common.h" diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h index 128467a0fb89..5cf50736c635 100644 --- a/net/batman-adv/gateway_common.h +++ b/net/batman-adv/gateway_common.h @@ -2,18 +2,6 @@ /* Copyright (C) 2009-2019 B.A.T.M.A.N. contributors: * * Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_GATEWAY_COMMON_H_ diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 96ef7c70b4d9..79d1731b8306 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -2,18 +2,6 @@ /* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "hard-interface.h" diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index 48de28c83401..c8ef6aa0e865 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -2,18 +2,6 @@ /* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_HARD_INTERFACE_H_ diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c index 56a08ce193d5..a9d4e176f4de 100644 --- a/net/batman-adv/hash.c +++ b/net/batman-adv/hash.c @@ -2,18 +2,6 @@ /* Copyright (C) 2006-2019 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "hash.h" diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h index 37507b6d4006..ceef171f7f98 100644 --- a/net/batman-adv/hash.h +++ b/net/batman-adv/hash.h @@ -2,18 +2,6 @@ /* Copyright (C) 2006-2019 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_HASH_H_ diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 3ff32125f4b5..0a91c8661357 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -2,18 +2,6 @@ /* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "icmp_socket.h" diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h index 5f8926522ff0..35eecbfd2e65 100644 --- a/net/batman-adv/icmp_socket.h +++ b/net/batman-adv/icmp_socket.h @@ -2,18 +2,6 @@ /* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_ICMP_SOCKET_H_ diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c index e8ff13598c08..f79ebd5b46e9 100644 --- a/net/batman-adv/log.c +++ b/net/batman-adv/log.c @@ -2,18 +2,6 @@ /* Copyright (C) 2010-2019 B.A.T.M.A.N. contributors: * * Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "log.h" diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h index 660e9bcc85a2..5504637e63d8 100644 --- a/net/batman-adv/log.h +++ b/net/batman-adv/log.h @@ -2,18 +2,6 @@ /* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_LOG_H_ diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 75750870cf04..dabcaff87e34 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -2,18 +2,6 @@ /* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" @@ -22,6 +10,7 @@ #include <linux/build_bug.h> #include <linux/byteorder/generic.h> #include <linux/crc32c.h> +#include <linux/device.h> #include <linux/errno.h> #include <linux/genetlink.h> #include <linux/gfp.h> @@ -31,6 +20,7 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/kernel.h> +#include <linux/kobject.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/module.h> @@ -40,6 +30,7 @@ #include <linux/rcupdate.h> #include <linux/seq_file.h> #include <linux/skbuff.h> +#include <linux/slab.h> #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/string.h> @@ -84,6 +75,22 @@ struct workqueue_struct *batadv_event_workqueue; static void batadv_recv_handler_init(void); +#define BATADV_UEV_TYPE_VAR "BATTYPE=" +#define BATADV_UEV_ACTION_VAR "BATACTION=" +#define BATADV_UEV_DATA_VAR "BATDATA=" + +static char *batadv_uev_action_str[] = { + "add", + "del", + "change", + "loopdetect", +}; + +static char *batadv_uev_type_str[] = { + "gw", + "bla", +}; + static int __init batadv_init(void) { int ret; @@ -678,6 +685,60 @@ bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid) return ap_isolation_enabled; } +/** + * batadv_throw_uevent() - Send an uevent with batman-adv specific env data + * @bat_priv: the bat priv with all the soft interface information + * @type: subsystem type of event. Stored in uevent's BATTYPE + * @action: action type of event. Stored in uevent's BATACTION + * @data: string with additional information to the event (ignored for + * BATADV_UEV_DEL). Stored in uevent's BATDATA + * + * Return: 0 on success or negative error number in case of failure + */ +int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type, + enum batadv_uev_action action, const char *data) +{ + int ret = -ENOMEM; + struct kobject *bat_kobj; + char *uevent_env[4] = { NULL, NULL, NULL, NULL }; + + bat_kobj = &bat_priv->soft_iface->dev.kobj; + + uevent_env[0] = kasprintf(GFP_ATOMIC, + "%s%s", BATADV_UEV_TYPE_VAR, + batadv_uev_type_str[type]); + if (!uevent_env[0]) + goto out; + + uevent_env[1] = kasprintf(GFP_ATOMIC, + "%s%s", BATADV_UEV_ACTION_VAR, + batadv_uev_action_str[action]); + if (!uevent_env[1]) + goto out; + + /* If the event is DEL, ignore the data field */ + if (action != BATADV_UEV_DEL) { + uevent_env[2] = kasprintf(GFP_ATOMIC, + "%s%s", BATADV_UEV_DATA_VAR, data); + if (!uevent_env[2]) + goto out; + } + + ret = kobject_uevent_env(bat_kobj, KOBJ_CHANGE, uevent_env); +out: + kfree(uevent_env[0]); + kfree(uevent_env[1]); + kfree(uevent_env[2]); + + if (ret) + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Impossible to send uevent for (%s,%s,%s) event (err: %d)\n", + batadv_uev_type_str[type], + batadv_uev_action_str[action], + (action == BATADV_UEV_DEL ? "NULL" : data), ret); + return ret; +} + module_init(batadv_init); module_exit(batadv_exit); diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 3ed669d7dc6b..f827e441025f 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -2,18 +2,6 @@ /* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_MAIN_H_ @@ -394,5 +382,7 @@ static inline void batadv_add_counter(struct batadv_priv *bat_priv, size_t idx, unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len); bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid); +int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type, + enum batadv_uev_action action, const char *data); #endif /* _NET_BATMAN_ADV_MAIN_H_ */ diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index f91b1b6265cf..3feb9435b715 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -2,18 +2,6 @@ /* Copyright (C) 2014-2019 B.A.T.M.A.N. contributors: * * Linus Lüssing - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "multicast.h" @@ -66,6 +54,7 @@ #include "hash.h" #include "log.h" #include "netlink.h" +#include "send.h" #include "soft-interface.h" #include "translation-table.h" #include "tvlv.h" @@ -991,6 +980,7 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, { int ret, tt_count, ip_count, unsnoop_count, total_count; bool is_unsnoopable = false; + unsigned int mcast_fanout; struct ethhdr *ethhdr; ret = batadv_mcast_forw_mode_check(bat_priv, skb, &is_unsnoopable); @@ -1025,8 +1015,203 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, case 0: return BATADV_FORW_NONE; default: - return BATADV_FORW_ALL; + mcast_fanout = atomic_read(&bat_priv->multicast_fanout); + + if (!unsnoop_count && total_count <= mcast_fanout) + return BATADV_FORW_SOME; } + + return BATADV_FORW_ALL; +} + +/** + * batadv_mcast_forw_tt() - forwards a packet to multicast listeners + * @bat_priv: the bat priv with all the soft interface information + * @skb: the multicast packet to transmit + * @vid: the vlan identifier + * + * Sends copies of a frame with multicast destination to any multicast + * listener registered in the translation table. A transmission is performed + * via a batman-adv unicast packet for each such destination node. + * + * Return: NET_XMIT_DROP on memory allocation failure, NET_XMIT_SUCCESS + * otherwise. + */ +static int +batadv_mcast_forw_tt(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid) +{ + int ret = NET_XMIT_SUCCESS; + struct sk_buff *newskb; + + struct batadv_tt_orig_list_entry *orig_entry; + + struct batadv_tt_global_entry *tt_global; + const u8 *addr = eth_hdr(skb)->h_dest; + + tt_global = batadv_tt_global_hash_find(bat_priv, addr, vid); + if (!tt_global) + goto out; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_entry, &tt_global->orig_list, list) { + newskb = skb_copy(skb, GFP_ATOMIC); + if (!newskb) { + ret = NET_XMIT_DROP; + break; + } + + batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0, + orig_entry->orig_node, vid); + } + rcu_read_unlock(); + + batadv_tt_global_entry_put(tt_global); + +out: + return ret; +} + +/** + * batadv_mcast_forw_want_all_ipv4() - forward to nodes with want-all-ipv4 + * @bat_priv: the bat priv with all the soft interface information + * @skb: the multicast packet to transmit + * @vid: the vlan identifier + * + * Sends copies of a frame with multicast destination to any node with a + * BATADV_MCAST_WANT_ALL_IPV4 flag set. A transmission is performed via a + * batman-adv unicast packet for each such destination node. + * + * Return: NET_XMIT_DROP on memory allocation failure, NET_XMIT_SUCCESS + * otherwise. + */ +static int +batadv_mcast_forw_want_all_ipv4(struct batadv_priv *bat_priv, + struct sk_buff *skb, unsigned short vid) +{ + struct batadv_orig_node *orig_node; + int ret = NET_XMIT_SUCCESS; + struct sk_buff *newskb; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, + &bat_priv->mcast.want_all_ipv4_list, + mcast_want_all_ipv4_node) { + newskb = skb_copy(skb, GFP_ATOMIC); + if (!newskb) { + ret = NET_XMIT_DROP; + break; + } + + batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0, + orig_node, vid); + } + rcu_read_unlock(); + return ret; +} + +/** + * batadv_mcast_forw_want_all_ipv6() - forward to nodes with want-all-ipv6 + * @bat_priv: the bat priv with all the soft interface information + * @skb: The multicast packet to transmit + * @vid: the vlan identifier + * + * Sends copies of a frame with multicast destination to any node with a + * BATADV_MCAST_WANT_ALL_IPV6 flag set. A transmission is performed via a + * batman-adv unicast packet for each such destination node. + * + * Return: NET_XMIT_DROP on memory allocation failure, NET_XMIT_SUCCESS + * otherwise. + */ +static int +batadv_mcast_forw_want_all_ipv6(struct batadv_priv *bat_priv, + struct sk_buff *skb, unsigned short vid) +{ + struct batadv_orig_node *orig_node; + int ret = NET_XMIT_SUCCESS; + struct sk_buff *newskb; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, + &bat_priv->mcast.want_all_ipv6_list, + mcast_want_all_ipv6_node) { + newskb = skb_copy(skb, GFP_ATOMIC); + if (!newskb) { + ret = NET_XMIT_DROP; + break; + } + + batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0, + orig_node, vid); + } + rcu_read_unlock(); + return ret; +} + +/** + * batadv_mcast_forw_want_all() - forward packet to nodes in a want-all list + * @bat_priv: the bat priv with all the soft interface information + * @skb: the multicast packet to transmit + * @vid: the vlan identifier + * + * Sends copies of a frame with multicast destination to any node with a + * BATADV_MCAST_WANT_ALL_IPV4 or BATADV_MCAST_WANT_ALL_IPV6 flag set. A + * transmission is performed via a batman-adv unicast packet for each such + * destination node. + * + * Return: NET_XMIT_DROP on memory allocation failure or if the protocol family + * is neither IPv4 nor IPv6. NET_XMIT_SUCCESS otherwise. + */ +static int +batadv_mcast_forw_want_all(struct batadv_priv *bat_priv, + struct sk_buff *skb, unsigned short vid) +{ + switch (ntohs(eth_hdr(skb)->h_proto)) { + case ETH_P_IP: + return batadv_mcast_forw_want_all_ipv4(bat_priv, skb, vid); + case ETH_P_IPV6: + return batadv_mcast_forw_want_all_ipv6(bat_priv, skb, vid); + default: + /* we shouldn't be here... */ + return NET_XMIT_DROP; + } +} + +/** + * batadv_mcast_forw_send() - send packet to any detected multicast recpient + * @bat_priv: the bat priv with all the soft interface information + * @skb: the multicast packet to transmit + * @vid: the vlan identifier + * + * Sends copies of a frame with multicast destination to any node that signaled + * interest in it, that is either via the translation table or the according + * want-all flags. A transmission is performed via a batman-adv unicast packet + * for each such destination node. + * + * The given skb is consumed/freed. + * + * Return: NET_XMIT_DROP on memory allocation failure or if the protocol family + * is neither IPv4 nor IPv6. NET_XMIT_SUCCESS otherwise. + */ +int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid) +{ + int ret; + + ret = batadv_mcast_forw_tt(bat_priv, skb, vid); + if (ret != NET_XMIT_SUCCESS) { + kfree_skb(skb); + return ret; + } + + ret = batadv_mcast_forw_want_all(bat_priv, skb, vid); + if (ret != NET_XMIT_SUCCESS) { + kfree_skb(skb); + return ret; + } + + consume_skb(skb); + return ret; } /** diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h index 466013fe88af..653b9b76fabe 100644 --- a/net/batman-adv/multicast.h +++ b/net/batman-adv/multicast.h @@ -2,18 +2,6 @@ /* Copyright (C) 2014-2019 B.A.T.M.A.N. contributors: * * Linus Lüssing - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_MULTICAST_H_ @@ -36,6 +24,13 @@ enum batadv_forw_mode { BATADV_FORW_ALL, /** + * @BATADV_FORW_SOME: forward the packet to some nodes (currently via + * a multicast-to-unicast conversion and the BATMAN unicast routing + * protocol) + */ + BATADV_FORW_SOME, + + /** * @BATADV_FORW_SINGLE: forward the packet to a single node (currently * via the BATMAN unicast routing protocol) */ @@ -51,6 +46,9 @@ enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, struct batadv_orig_node **mcast_single_orig); +int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid); + void batadv_mcast_init(struct batadv_priv *bat_priv); int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset); @@ -73,6 +71,14 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, return BATADV_FORW_ALL; } +static inline int +batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid) +{ + kfree_skb(skb); + return NET_XMIT_DROP; +} + static inline int batadv_mcast_init(struct batadv_priv *bat_priv) { return 0; diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 67a58da2e6a0..a67720fad46c 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -2,18 +2,6 @@ /* Copyright (C) 2016-2019 B.A.T.M.A.N. contributors: * * Matthias Schiffer - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "netlink.h" @@ -157,6 +145,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_HOP_PENALTY] = { .type = NLA_U8 }, [BATADV_ATTR_LOG_LEVEL] = { .type = NLA_U32 }, [BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_MULTICAST_FANOUT] = { .type = NLA_U32 }, [BATADV_ATTR_NETWORK_CODING_ENABLED] = { .type = NLA_U8 }, [BATADV_ATTR_ORIG_INTERVAL] = { .type = NLA_U32 }, [BATADV_ATTR_ELP_INTERVAL] = { .type = NLA_U32 }, @@ -353,6 +342,10 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg, if (nla_put_u8(msg, BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED, !atomic_read(&bat_priv->multicast_mode))) goto nla_put_failure; + + if (nla_put_u32(msg, BATADV_ATTR_MULTICAST_FANOUT, + atomic_read(&bat_priv->multicast_fanout))) + goto nla_put_failure; #endif /* CONFIG_BATMAN_ADV_MCAST */ #ifdef CONFIG_BATMAN_ADV_NC @@ -592,6 +585,12 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info) atomic_set(&bat_priv->multicast_mode, !nla_get_u8(attr)); } + + if (info->attrs[BATADV_ATTR_MULTICAST_FANOUT]) { + attr = info->attrs[BATADV_ATTR_MULTICAST_FANOUT]; + + atomic_set(&bat_priv->multicast_fanout, nla_get_u32(attr)); + } #endif /* CONFIG_BATMAN_ADV_MCAST */ #ifdef CONFIG_BATMAN_ADV_NC @@ -1344,35 +1343,35 @@ static void batadv_post_doit(const struct genl_ops *ops, struct sk_buff *skb, static const struct genl_ops batadv_netlink_ops[] = { { .cmd = BATADV_CMD_GET_MESH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, /* can be retrieved by unprivileged users */ - .policy = batadv_netlink_policy, .doit = batadv_netlink_get_mesh, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_TP_METER, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .doit = batadv_netlink_tp_meter_start, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_TP_METER_CANCEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .doit = batadv_netlink_tp_meter_cancel, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_GET_ROUTING_ALGOS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_algo_dump, }, { .cmd = BATADV_CMD_GET_HARDIF, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, /* can be retrieved by unprivileged users */ - .policy = batadv_netlink_policy, .dumpit = batadv_netlink_dump_hardif, .doit = batadv_netlink_get_hardif, .internal_flags = BATADV_FLAG_NEED_MESH | @@ -1380,85 +1379,85 @@ static const struct genl_ops batadv_netlink_ops[] = { }, { .cmd = BATADV_CMD_GET_TRANSTABLE_LOCAL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_tt_local_dump, }, { .cmd = BATADV_CMD_GET_TRANSTABLE_GLOBAL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_tt_global_dump, }, { .cmd = BATADV_CMD_GET_ORIGINATORS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_orig_dump, }, { .cmd = BATADV_CMD_GET_NEIGHBORS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_hardif_neigh_dump, }, { .cmd = BATADV_CMD_GET_GATEWAYS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_gw_dump, }, { .cmd = BATADV_CMD_GET_BLA_CLAIM, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_bla_claim_dump, }, { .cmd = BATADV_CMD_GET_BLA_BACKBONE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_bla_backbone_dump, }, { .cmd = BATADV_CMD_GET_DAT_CACHE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_dat_cache_dump, }, { .cmd = BATADV_CMD_GET_MCAST_FLAGS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_mcast_flags_dump, }, { .cmd = BATADV_CMD_SET_MESH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .doit = batadv_netlink_set_mesh, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_SET_HARDIF, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .doit = batadv_netlink_set_hardif, .internal_flags = BATADV_FLAG_NEED_MESH | BATADV_FLAG_NEED_HARDIF, }, { .cmd = BATADV_CMD_GET_VLAN, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, /* can be retrieved by unprivileged users */ - .policy = batadv_netlink_policy, .doit = batadv_netlink_get_vlan, .internal_flags = BATADV_FLAG_NEED_MESH | BATADV_FLAG_NEED_VLAN, }, { .cmd = BATADV_CMD_SET_VLAN, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .doit = batadv_netlink_set_vlan, .internal_flags = BATADV_FLAG_NEED_MESH | BATADV_FLAG_NEED_VLAN, @@ -1470,6 +1469,7 @@ struct genl_family batadv_netlink_family __ro_after_init = { .name = BATADV_NL_NAME, .version = 1, .maxattr = BATADV_ATTR_MAX, + .policy = batadv_netlink_policy, .netnsok = true, .pre_doit = batadv_pre_doit, .post_doit = batadv_post_doit, diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h index 7273368544fc..d1e0681b8743 100644 --- a/net/batman-adv/netlink.h +++ b/net/batman-adv/netlink.h @@ -2,18 +2,6 @@ /* Copyright (C) 2016-2019 B.A.T.M.A.N. contributors: * * Matthias Schiffer - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_NETLINK_H_ diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 278762bd94c6..c5e7906045f3 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -2,18 +2,6 @@ /* Copyright (C) 2012-2019 B.A.T.M.A.N. contributors: * * Martin Hundebøll, Jeppe Ledet-Pedersen - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "network-coding.h" diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h index 96ef0a511fc7..74f56113a5d0 100644 --- a/net/batman-adv/network-coding.h +++ b/net/batman-adv/network-coding.h @@ -2,18 +2,6 @@ /* Copyright (C) 2012-2019 B.A.T.M.A.N. contributors: * * Martin Hundebøll, Jeppe Ledet-Pedersen - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_NETWORK_CODING_H_ diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index e5cdf89ef63c..45db798a7297 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -2,18 +2,6 @@ /* Copyright (C) 2009-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "originator.h" diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index dca1e4a34ec6..3829e26f9c5d 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -2,18 +2,6 @@ /* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_ORIGINATOR_H_ diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index cae0e5dd0768..f0f864820dea 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -2,18 +2,6 @@ /* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "routing.h" diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index 0102d69d345c..b96c6d06d188 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -2,18 +2,6 @@ /* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_ROUTING_H_ diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 66a8b3e44501..3ce5f7bad369 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -2,18 +2,6 @@ /* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "send.h" diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index 1f6132922e60..5921ee4e107c 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -2,18 +2,6 @@ /* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_SEND_H_ diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 2e367230376b..a7677e1d000f 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -2,18 +2,6 @@ /* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "soft-interface.h" @@ -209,7 +197,7 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, unsigned short vid; u32 seqno; int gw_mode; - enum batadv_forw_mode forw_mode; + enum batadv_forw_mode forw_mode = BATADV_FORW_SINGLE; struct batadv_orig_node *mcast_single_orig = NULL; int network_offset = ETH_HLEN; __be16 proto; @@ -317,7 +305,8 @@ send: if (forw_mode == BATADV_FORW_NONE) goto dropped; - if (forw_mode == BATADV_FORW_SINGLE) + if (forw_mode == BATADV_FORW_SINGLE || + forw_mode == BATADV_FORW_SOME) do_bcast = false; } } @@ -377,6 +366,8 @@ send: ret = batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST, 0, mcast_single_orig, vid); + } else if (forw_mode == BATADV_FORW_SOME) { + ret = batadv_mcast_forw_send(bat_priv, skb, vid); } else { if (batadv_dat_snoop_outgoing_arp_request(bat_priv, skb)) @@ -818,6 +809,7 @@ static int batadv_softif_init_late(struct net_device *dev) bat_priv->mcast.querier_ipv6.shadowing = false; bat_priv->mcast.flags = BATADV_NO_FLAGS; atomic_set(&bat_priv->multicast_mode, 1); + atomic_set(&bat_priv->multicast_fanout, 16); atomic_set(&bat_priv->mcast.num_want_all_unsnoopables, 0); atomic_set(&bat_priv->mcast.num_want_all_ipv4, 0); atomic_set(&bat_priv->mcast.num_want_all_ipv6, 0); diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h index 538bb661878c..275442a7acb6 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/soft-interface.h @@ -2,18 +2,6 @@ /* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_SOFT_INTERFACE_H_ diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 208655cf6717..80fc3253c336 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -2,23 +2,12 @@ /* Copyright (C) 2010-2019 B.A.T.M.A.N. contributors: * * Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "sysfs.h" #include "main.h" +#include <asm/current.h> #include <linux/atomic.h> #include <linux/compiler.h> #include <linux/device.h> @@ -34,6 +23,7 @@ #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/rtnetlink.h> +#include <linux/sched.h> #include <linux/slab.h> #include <linux/stddef.h> #include <linux/string.h> @@ -52,6 +42,16 @@ #include "network-coding.h" #include "soft-interface.h" +/** + * batadv_sysfs_deprecated() - Log use of deprecated batadv sysfs access + * @attr: attribute which was accessed + */ +static void batadv_sysfs_deprecated(struct attribute *attr) +{ + pr_warn_ratelimited(DEPRECATED "%s (pid %d) Use of sysfs file \"%s\".\nUse batadv genl family instead", + current->comm, task_pid_nr(current), attr->name); +} + static struct net_device *batadv_kobj_to_netdev(struct kobject *obj) { struct device *dev = container_of(obj->parent, struct device, kobj); @@ -114,22 +114,6 @@ batadv_kobj_to_vlan(struct batadv_priv *bat_priv, struct kobject *obj) return vlan; } -#define BATADV_UEV_TYPE_VAR "BATTYPE=" -#define BATADV_UEV_ACTION_VAR "BATACTION=" -#define BATADV_UEV_DATA_VAR "BATDATA=" - -static char *batadv_uev_action_str[] = { - "add", - "del", - "change", - "loopdetect", -}; - -static char *batadv_uev_type_str[] = { - "gw", - "bla", -}; - /* Use this, if you have customized show and store functions for vlan attrs */ #define BATADV_ATTR_VLAN(_name, _mode, _show, _store) \ struct batadv_attribute batadv_attr_vlan_##_name = { \ @@ -157,6 +141,7 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \ struct batadv_priv *bat_priv = netdev_priv(net_dev); \ ssize_t length; \ \ + batadv_sysfs_deprecated(attr); \ length = __batadv_store_bool_attr(buff, count, _post_func, attr,\ &bat_priv->_name, net_dev); \ \ @@ -171,6 +156,7 @@ ssize_t batadv_show_##_name(struct kobject *kobj, \ { \ struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); \ \ + batadv_sysfs_deprecated(attr); \ return sprintf(buff, "%s\n", \ atomic_read(&bat_priv->_name) == 0 ? \ "disabled" : "enabled"); \ @@ -194,6 +180,7 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \ struct batadv_priv *bat_priv = netdev_priv(net_dev); \ ssize_t length; \ \ + batadv_sysfs_deprecated(attr); \ length = __batadv_store_uint_attr(buff, count, _min, _max, \ _post_func, attr, \ &bat_priv->_var, net_dev, \ @@ -210,6 +197,7 @@ ssize_t batadv_show_##_name(struct kobject *kobj, \ { \ struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); \ \ + batadv_sysfs_deprecated(attr); \ return sprintf(buff, "%i\n", atomic_read(&bat_priv->_var)); \ } \ @@ -234,6 +222,7 @@ ssize_t batadv_store_vlan_##_name(struct kobject *kobj, \ attr, &vlan->_name, \ bat_priv->soft_iface); \ \ + batadv_sysfs_deprecated(attr); \ if (vlan->vid) \ batadv_netlink_notify_vlan(bat_priv, vlan); \ else \ @@ -254,6 +243,7 @@ ssize_t batadv_show_vlan_##_name(struct kobject *kobj, \ atomic_read(&vlan->_name) == 0 ? \ "disabled" : "enabled"); \ \ + batadv_sysfs_deprecated(attr); \ batadv_softif_vlan_put(vlan); \ return res; \ } @@ -275,6 +265,7 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \ struct batadv_priv *bat_priv; \ ssize_t length; \ \ + batadv_sysfs_deprecated(attr); \ hard_iface = batadv_hardif_get_by_netdev(net_dev); \ if (!hard_iface) \ return 0; \ @@ -302,6 +293,7 @@ ssize_t batadv_show_##_name(struct kobject *kobj, \ struct batadv_hard_iface *hard_iface; \ ssize_t length; \ \ + batadv_sysfs_deprecated(attr); \ hard_iface = batadv_hardif_get_by_netdev(net_dev); \ if (!hard_iface) \ return 0; \ @@ -446,6 +438,7 @@ static ssize_t batadv_show_bat_algo(struct kobject *kobj, { struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); + batadv_sysfs_deprecated(attr); return sprintf(buff, "%s\n", bat_priv->algo_ops->name); } @@ -462,6 +455,8 @@ static ssize_t batadv_show_gw_mode(struct kobject *kobj, struct attribute *attr, struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); int bytes_written; + batadv_sysfs_deprecated(attr); + /* GW mode is not available if the routing algorithm in use does not * implement the GW API */ @@ -496,6 +491,8 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj, char *curr_gw_mode_str; int gw_mode_tmp = -1; + batadv_sysfs_deprecated(attr); + /* toggling GW mode is allowed only if the routing algorithm in use * provides the GW API */ @@ -570,6 +567,8 @@ static ssize_t batadv_show_gw_sel_class(struct kobject *kobj, { struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); + batadv_sysfs_deprecated(attr); + /* GW selection class is not available if the routing algorithm in use * does not implement the GW API */ @@ -590,6 +589,8 @@ static ssize_t batadv_store_gw_sel_class(struct kobject *kobj, struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); ssize_t length; + batadv_sysfs_deprecated(attr); + /* setting the GW selection class is allowed only if the routing * algorithm in use implements the GW API */ @@ -620,6 +621,8 @@ static ssize_t batadv_show_gw_bwidth(struct kobject *kobj, struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); u32 down, up; + batadv_sysfs_deprecated(attr); + down = atomic_read(&bat_priv->gw.bandwidth_down); up = atomic_read(&bat_priv->gw.bandwidth_up); @@ -635,6 +638,8 @@ static ssize_t batadv_store_gw_bwidth(struct kobject *kobj, struct net_device *net_dev = batadv_kobj_to_netdev(kobj); ssize_t length; + batadv_sysfs_deprecated(attr); + if (buff[count - 1] == '\n') buff[count - 1] = '\0'; @@ -659,6 +664,7 @@ static ssize_t batadv_show_isolation_mark(struct kobject *kobj, { struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); + batadv_sysfs_deprecated(attr); return sprintf(buff, "%#.8x/%#.8x\n", bat_priv->isolation_mark, bat_priv->isolation_mark_mask); } @@ -682,6 +688,8 @@ static ssize_t batadv_store_isolation_mark(struct kobject *kobj, u32 mark, mask; char *mask_ptr; + batadv_sysfs_deprecated(attr); + /* parse the mask if it has been specified, otherwise assume the mask is * the biggest possible */ @@ -937,6 +945,8 @@ static ssize_t batadv_show_mesh_iface(struct kobject *kobj, ssize_t length; const char *ifname; + batadv_sysfs_deprecated(attr); + hard_iface = batadv_hardif_get_by_netdev(net_dev); if (!hard_iface) return 0; @@ -1041,6 +1051,8 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj, struct net_device *net_dev = batadv_kobj_to_netdev(kobj); struct batadv_store_mesh_work *store_work; + batadv_sysfs_deprecated(attr); + if (buff[count - 1] == '\n') buff[count - 1] = '\0'; @@ -1072,6 +1084,8 @@ static ssize_t batadv_show_iface_status(struct kobject *kobj, struct batadv_hard_iface *hard_iface; ssize_t length; + batadv_sysfs_deprecated(attr); + hard_iface = batadv_hardif_get_by_netdev(net_dev); if (!hard_iface) return 0; @@ -1123,6 +1137,8 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj, u32 old_tp_override; bool ret; + batadv_sysfs_deprecated(attr); + hard_iface = batadv_hardif_get_by_netdev(net_dev); if (!hard_iface) return -EINVAL; @@ -1165,6 +1181,8 @@ static ssize_t batadv_show_throughput_override(struct kobject *kobj, struct batadv_hard_iface *hard_iface; u32 tp_override; + batadv_sysfs_deprecated(attr); + hard_iface = batadv_hardif_get_by_netdev(net_dev); if (!hard_iface) return -EINVAL; @@ -1250,57 +1268,3 @@ void batadv_sysfs_del_hardif(struct kobject **hardif_obj) kobject_put(*hardif_obj); *hardif_obj = NULL; } - -/** - * batadv_throw_uevent() - Send an uevent with batman-adv specific env data - * @bat_priv: the bat priv with all the soft interface information - * @type: subsystem type of event. Stored in uevent's BATTYPE - * @action: action type of event. Stored in uevent's BATACTION - * @data: string with additional information to the event (ignored for - * BATADV_UEV_DEL). Stored in uevent's BATDATA - * - * Return: 0 on success or negative error number in case of failure - */ -int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type, - enum batadv_uev_action action, const char *data) -{ - int ret = -ENOMEM; - struct kobject *bat_kobj; - char *uevent_env[4] = { NULL, NULL, NULL, NULL }; - - bat_kobj = &bat_priv->soft_iface->dev.kobj; - - uevent_env[0] = kasprintf(GFP_ATOMIC, - "%s%s", BATADV_UEV_TYPE_VAR, - batadv_uev_type_str[type]); - if (!uevent_env[0]) - goto out; - - uevent_env[1] = kasprintf(GFP_ATOMIC, - "%s%s", BATADV_UEV_ACTION_VAR, - batadv_uev_action_str[action]); - if (!uevent_env[1]) - goto out; - - /* If the event is DEL, ignore the data field */ - if (action != BATADV_UEV_DEL) { - uevent_env[2] = kasprintf(GFP_ATOMIC, - "%s%s", BATADV_UEV_DATA_VAR, data); - if (!uevent_env[2]) - goto out; - } - - ret = kobject_uevent_env(bat_kobj, KOBJ_CHANGE, uevent_env); -out: - kfree(uevent_env[0]); - kfree(uevent_env[1]); - kfree(uevent_env[2]); - - if (ret) - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Impossible to send uevent for (%s,%s,%s) event (err: %d)\n", - batadv_uev_type_str[type], - batadv_uev_action_str[action], - (action == BATADV_UEV_DEL ? "NULL" : data), ret); - return ret; -} diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h index 705ffbe763f4..83fa808b1871 100644 --- a/net/batman-adv/sysfs.h +++ b/net/batman-adv/sysfs.h @@ -2,18 +2,6 @@ /* Copyright (C) 2010-2019 B.A.T.M.A.N. contributors: * * Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_SYSFS_H_ @@ -57,6 +45,8 @@ struct batadv_attribute { char *buf, size_t count); }; +#ifdef CONFIG_BATMAN_ADV_SYSFS + int batadv_sysfs_add_meshif(struct net_device *dev); void batadv_sysfs_del_meshif(struct net_device *dev); int batadv_sysfs_add_hardif(struct kobject **hardif_obj, @@ -66,7 +56,39 @@ int batadv_sysfs_add_vlan(struct net_device *dev, struct batadv_softif_vlan *vlan); void batadv_sysfs_del_vlan(struct batadv_priv *bat_priv, struct batadv_softif_vlan *vlan); -int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type, - enum batadv_uev_action action, const char *data); + +#else + +static inline int batadv_sysfs_add_meshif(struct net_device *dev) +{ + return 0; +} + +static inline void batadv_sysfs_del_meshif(struct net_device *dev) +{ +} + +static inline int batadv_sysfs_add_hardif(struct kobject **hardif_obj, + struct net_device *dev) +{ + return 0; +} + +static inline void batadv_sysfs_del_hardif(struct kobject **hardif_obj) +{ +} + +static inline int batadv_sysfs_add_vlan(struct net_device *dev, + struct batadv_softif_vlan *vlan) +{ + return 0; +} + +static inline void batadv_sysfs_del_vlan(struct batadv_priv *bat_priv, + struct batadv_softif_vlan *vlan) +{ +} + +#endif #endif /* _NET_BATMAN_ADV_SYSFS_H_ */ diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 500109bbd551..820392146249 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -2,18 +2,6 @@ /* Copyright (C) 2012-2019 B.A.T.M.A.N. contributors: * * Edo Monticelli, Antonio Quartulli - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "tp_meter.h" diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h index 6b4d0f733896..604b3799c972 100644 --- a/net/batman-adv/tp_meter.h +++ b/net/batman-adv/tp_meter.h @@ -2,18 +2,6 @@ /* Copyright (C) 2012-2019 B.A.T.M.A.N. contributors: * * Edo Monticelli, Antonio Quartulli - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_TP_METER_H_ diff --git a/net/batman-adv/trace.c b/net/batman-adv/trace.c index f77c917ed20d..3cedd2c36528 100644 --- a/net/batman-adv/trace.c +++ b/net/batman-adv/trace.c @@ -2,18 +2,6 @@ /* Copyright (C) 2010-2019 B.A.T.M.A.N. contributors: * * Sven Eckelmann - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #define CREATE_TRACE_POINTS diff --git a/net/batman-adv/trace.h b/net/batman-adv/trace.h index 5e5579051400..d8f764521c0b 100644 --- a/net/batman-adv/trace.h +++ b/net/batman-adv/trace.h @@ -2,18 +2,6 @@ /* Copyright (C) 2010-2019 B.A.T.M.A.N. contributors: * * Sven Eckelmann - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #if !defined(_NET_BATMAN_ADV_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 26c4e2493ddf..1ddfd5e011ee 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -2,18 +2,6 @@ /* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich, Antonio Quartulli - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "translation-table.h" @@ -205,7 +193,7 @@ batadv_tt_local_hash_find(struct batadv_priv *bat_priv, const u8 *addr, * Return: a pointer to the corresponding tt_global_entry struct if the client * is found, NULL otherwise. */ -static struct batadv_tt_global_entry * +struct batadv_tt_global_entry * batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid) { @@ -300,8 +288,7 @@ static void batadv_tt_global_entry_release(struct kref *ref) * possibly release it * @tt_global_entry: tt_global_entry to be free'd */ -static void -batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry) +void batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry) { kref_put(&tt_global_entry->common.refcount, batadv_tt_global_entry_release); diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index 61bca75e5911..c8c48d62a430 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -2,18 +2,6 @@ /* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich, Antonio Quartulli - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ @@ -41,6 +29,10 @@ int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb); void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, s32 match_vid, const char *message); +struct batadv_tt_global_entry * +batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr, + unsigned short vid); +void batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry); int batadv_tt_global_hash_count(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid); struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv, diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c index 7e947b01919d..aae63f0d21eb 100644 --- a/net/batman-adv/tvlv.c +++ b/net/batman-adv/tvlv.c @@ -2,18 +2,6 @@ /* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include "main.h" diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h index c0f033b1acb8..114ac01e06af 100644 --- a/net/batman-adv/tvlv.h +++ b/net/batman-adv/tvlv.h @@ -2,18 +2,6 @@ /* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_TVLV_H_ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index a21b34ed6548..357ca119329a 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -2,18 +2,6 @@ /* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #ifndef _NET_BATMAN_ADV_TYPES_H_ @@ -1565,6 +1553,12 @@ struct batadv_priv { * node's sender/originating side */ atomic_t multicast_mode; + + /** + * @multicast_fanout: Maximum number of packet copies to generate for a + * multicast-to-unicast conversion + */ + atomic_t multicast_fanout; #endif /** @orig_interval: OGM broadcast interval in milliseconds */ diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 8d12198eaa94..94ddf19998c7 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -521,14 +521,6 @@ int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) err = put_user(amount, (int __user *) arg); break; - case SIOCGSTAMP: - err = sock_get_timestamp(sk, (struct timeval __user *) arg); - break; - - case SIOCGSTAMPNS: - err = sock_get_timestampns(sk, (struct timespec __user *) arg); - break; - default: err = -ENOIOCTLCMD; break; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index bd4978ce8c45..3cf0764d5793 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1276,6 +1276,14 @@ int hci_conn_check_link_mode(struct hci_conn *conn) !test_bit(HCI_CONN_ENCRYPT, &conn->flags)) return 0; + /* The minimum encryption key size needs to be enforced by the + * host stack before establishing any L2CAP connections. The + * specification in theory allows a minimum of 1, but to align + * BR/EDR and LE transports, a minimum of 7 is chosen. + */ + if (conn->enc_key_size < HCI_MIN_ENC_KEY_SIZE) + return 0; + return 1; } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index d6b2540ba7f8..b81bf53c5ac4 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1460,8 +1460,6 @@ static int hci_dev_do_open(struct hci_dev *hdev) hdev->set_bdaddr) ret = hdev->set_bdaddr(hdev, &hdev->public_addr); - else - ret = -EADDRNOTAVAIL; } setup_failed: @@ -4383,6 +4381,9 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, return; } + /* If we reach this point this event matches the last command sent */ + hci_dev_clear_flag(hdev, HCI_CMD_PENDING); + /* If the command succeeded and there's still more commands in * this request the request is not yet complete. */ @@ -4493,6 +4494,8 @@ static void hci_cmd_work(struct work_struct *work) hdev->sent_cmd = skb_clone(skb, GFP_KERNEL); if (hdev->sent_cmd) { + if (hci_req_status_pend(hdev)) + hci_dev_set_flag(hdev, HCI_CMD_PENDING); atomic_dec(&hdev->cmd_cnt); hci_send_frame(hdev, skb); if (test_bit(HCI_RESET, &hdev->flags)) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 609fd6871c5a..9e4fcf406d9c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3404,6 +3404,12 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb, hci_req_cmd_complete(hdev, *opcode, *status, req_complete, req_complete_skb); + if (hci_dev_test_flag(hdev, HCI_CMD_PENDING)) { + bt_dev_err(hdev, + "unexpected event for opcode 0x%4.4x", *opcode); + return; + } + if (atomic_read(&hdev->cmd_cnt) && !skb_queue_empty(&hdev->cmd_q)) queue_work(hdev->workqueue, &hdev->cmd_work); } @@ -3511,6 +3517,12 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb, hci_req_cmd_complete(hdev, *opcode, ev->status, req_complete, req_complete_skb); + if (hci_dev_test_flag(hdev, HCI_CMD_PENDING)) { + bt_dev_err(hdev, + "unexpected event for opcode 0x%4.4x", *opcode); + return; + } + if (atomic_read(&hdev->cmd_cnt) && !skb_queue_empty(&hdev->cmd_q)) queue_work(hdev->workqueue, &hdev->cmd_work); } @@ -5433,7 +5445,7 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb) ev->data, ev->length); } - ptr += sizeof(*ev) + ev->length + 1; + ptr += sizeof(*ev) + ev->length; } hci_dev_unlock(hdev); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index ca73d36cc149..e9a95ed65491 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -46,6 +46,11 @@ void hci_req_purge(struct hci_request *req) skb_queue_purge(&req->cmd_q); } +bool hci_req_status_pend(struct hci_dev *hdev) +{ + return hdev->req_status == HCI_REQ_PEND; +} + static int req_run(struct hci_request *req, hci_req_complete_t complete, hci_req_complete_skb_t complete_skb) { diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 692cc8b13368..55b2050cc9ff 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -37,6 +37,7 @@ struct hci_request { void hci_req_init(struct hci_request *req, struct hci_dev *hdev); void hci_req_purge(struct hci_request *req); +bool hci_req_status_pend(struct hci_dev *hdev); int hci_req_run(struct hci_request *req, hci_req_complete_t complete); int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete); void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index 9f85a1943be9..2151913892ce 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -75,6 +75,7 @@ static int do_hidp_sock_ioctl(struct socket *sock, unsigned int cmd, void __user sockfd_put(csock); return err; } + ca.name[sizeof(ca.name)-1] = 0; err = hidp_connection_add(&ca, csock, isock); if (!err && copy_to_user(argp, &ca, sizeof(ca))) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index f17e393b43b4..b53acd6c9a3d 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -510,12 +510,12 @@ void l2cap_chan_set_defaults(struct l2cap_chan *chan) } EXPORT_SYMBOL_GPL(l2cap_chan_set_defaults); -static void l2cap_le_flowctl_init(struct l2cap_chan *chan) +static void l2cap_le_flowctl_init(struct l2cap_chan *chan, u16 tx_credits) { chan->sdu = NULL; chan->sdu_last_frag = NULL; chan->sdu_len = 0; - chan->tx_credits = 0; + chan->tx_credits = tx_credits; /* Derive MPS from connection MTU to stop HCI fragmentation */ chan->mps = min_t(u16, chan->imtu, chan->conn->mtu - L2CAP_HDR_SIZE); /* Give enough credits for a full packet */ @@ -1281,7 +1281,7 @@ static void l2cap_le_connect(struct l2cap_chan *chan) if (test_and_set_bit(FLAG_LE_CONN_REQ_SENT, &chan->flags)) return; - l2cap_le_flowctl_init(chan); + l2cap_le_flowctl_init(chan, 0); req.psm = chan->psm; req.scid = cpu_to_le16(chan->scid); @@ -5532,11 +5532,10 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, chan->dcid = scid; chan->omtu = mtu; chan->remote_mps = mps; - chan->tx_credits = __le16_to_cpu(req->credits); __l2cap_chan_add(conn, chan); - l2cap_le_flowctl_init(chan); + l2cap_le_flowctl_init(chan, __le16_to_cpu(req->credits)); dcid = chan->scid; credits = chan->rx_credits; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index a3a2cd55e23a..a7be8b59b3c2 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -791,10 +791,13 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, conn = chan->conn; - /*change security for LE channels */ + /* change security for LE channels */ if (chan->scid == L2CAP_CID_ATT) { - if (smp_conn_security(conn->hcon, sec.level)) + if (smp_conn_security(conn->hcon, sec.level)) { + err = -EINVAL; break; + } + set_bit(FLAG_PENDING_SECURITY, &chan->flags); sk->sk_state = BT_CONFIG; chan->state = BT_CONFIG; @@ -1655,6 +1658,7 @@ static const struct proto_ops l2cap_sock_ops = { .recvmsg = l2cap_sock_recvmsg, .poll = bt_sock_poll, .ioctl = bt_sock_ioctl, + .gettstamp = sock_gettstamp, .mmap = sock_no_mmap, .socketpair = sock_no_socketpair, .shutdown = l2cap_sock_shutdown, diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 2457f408d17d..150114e33b20 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2301,8 +2301,7 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, MGMT_STATUS_INVALID_PARAMS); } - expected_len = sizeof(*cp) + key_count * - sizeof(struct mgmt_link_key_info); + expected_len = struct_size(cp, keys, key_count); if (expected_len != len) { bt_dev_err(hdev, "load_link_keys: expected %u bytes, got %u bytes", expected_len, len); @@ -5030,7 +5029,7 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, MGMT_STATUS_INVALID_PARAMS); } - expected_len = sizeof(*cp) + irk_count * sizeof(struct mgmt_irk_info); + expected_len = struct_size(cp, irks, irk_count); if (expected_len != len) { bt_dev_err(hdev, "load_irks: expected %u bytes, got %u bytes", expected_len, len); @@ -5112,8 +5111,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, MGMT_STATUS_INVALID_PARAMS); } - expected_len = sizeof(*cp) + key_count * - sizeof(struct mgmt_ltk_info); + expected_len = struct_size(cp, keys, key_count); if (expected_len != len) { bt_dev_err(hdev, "load_keys: expected %u bytes, got %u bytes", expected_len, len); @@ -5847,8 +5845,7 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, MGMT_STATUS_INVALID_PARAMS); } - expected_len = sizeof(*cp) + param_count * - sizeof(struct mgmt_conn_param); + expected_len = struct_size(cp, params, param_count); if (expected_len != len) { bt_dev_err(hdev, "load_conn_param: expected %u bytes, got %u bytes", expected_len, len); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index b1f49fcc0478..90bb53aa4bee 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -1039,6 +1039,7 @@ static const struct proto_ops rfcomm_sock_ops = { .setsockopt = rfcomm_sock_setsockopt, .getsockopt = rfcomm_sock_getsockopt, .ioctl = rfcomm_sock_ioctl, + .gettstamp = sock_gettstamp, .poll = bt_sock_poll, .socketpair = sock_no_socketpair, .mmap = sock_no_mmap diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index d892b7c3cc42..b91d6b440fdf 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -1190,6 +1190,7 @@ static const struct proto_ops sco_sock_ops = { .recvmsg = sco_sock_recvmsg, .poll = bt_sock_poll, .ioctl = bt_sock_ioctl, + .gettstamp = sock_gettstamp, .mmap = sock_no_mmap, .socketpair = sock_no_socketpair, .shutdown = sco_sock_shutdown, diff --git a/net/bpf/Makefile b/net/bpf/Makefile index 27b2992a0692..b0ca361742e4 100644 --- a/net/bpf/Makefile +++ b/net/bpf/Makefile @@ -1 +1 @@ -obj-y := test_run.o +obj-$(CONFIG_BPF_SYSCALL) := test_run.o diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index fab142b796ef..33e0dc168c16 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -10,9 +10,13 @@ #include <linux/etherdevice.h> #include <linux/filter.h> #include <linux/sched/signal.h> +#include <net/bpf_sk_storage.h> #include <net/sock.h> #include <net/tcp.h> +#define CREATE_TRACE_POINTS +#include <trace/events/bpf_test_run.h> + static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *retval, u32 *time) { @@ -100,6 +104,7 @@ static int bpf_test_finish(const union bpf_attr *kattr, if (err != -ENOSPC) err = 0; out: + trace_bpf_test_finish(&err); return err; } @@ -123,12 +128,126 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 size, return data; } +static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size) +{ + void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in); + void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out); + u32 size = kattr->test.ctx_size_in; + void *data; + int err; + + if (!data_in && !data_out) + return NULL; + + data = kzalloc(max_size, GFP_USER); + if (!data) + return ERR_PTR(-ENOMEM); + + if (data_in) { + err = bpf_check_uarg_tail_zero(data_in, max_size, size); + if (err) { + kfree(data); + return ERR_PTR(err); + } + + size = min_t(u32, max_size, size); + if (copy_from_user(data, data_in, size)) { + kfree(data); + return ERR_PTR(-EFAULT); + } + } + return data; +} + +static int bpf_ctx_finish(const union bpf_attr *kattr, + union bpf_attr __user *uattr, const void *data, + u32 size) +{ + void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out); + int err = -EFAULT; + u32 copy_size = size; + + if (!data || !data_out) + return 0; + + if (copy_size > kattr->test.ctx_size_out) { + copy_size = kattr->test.ctx_size_out; + err = -ENOSPC; + } + + if (copy_to_user(data_out, data, copy_size)) + goto out; + if (copy_to_user(&uattr->test.ctx_size_out, &size, sizeof(size))) + goto out; + if (err != -ENOSPC) + err = 0; +out: + return err; +} + +/** + * range_is_zero - test whether buffer is initialized + * @buf: buffer to check + * @from: check from this position + * @to: check up until (excluding) this position + * + * This function returns true if the there is a non-zero byte + * in the buf in the range [from,to). + */ +static inline bool range_is_zero(void *buf, size_t from, size_t to) +{ + return !memchr_inv((u8 *)buf + from, 0, to - from); +} + +static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) +{ + struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb; + + if (!__skb) + return 0; + + /* make sure the fields we don't use are zeroed */ + if (!range_is_zero(__skb, 0, offsetof(struct __sk_buff, priority))) + return -EINVAL; + + /* priority is allowed */ + + if (!range_is_zero(__skb, offsetof(struct __sk_buff, priority) + + FIELD_SIZEOF(struct __sk_buff, priority), + offsetof(struct __sk_buff, cb))) + return -EINVAL; + + /* cb is allowed */ + + if (!range_is_zero(__skb, offsetof(struct __sk_buff, cb) + + FIELD_SIZEOF(struct __sk_buff, cb), + sizeof(struct __sk_buff))) + return -EINVAL; + + skb->priority = __skb->priority; + memcpy(&cb->data, __skb->cb, QDISC_CB_PRIV_LEN); + + return 0; +} + +static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb) +{ + struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb; + + if (!__skb) + return; + + __skb->priority = skb->priority; + memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN); +} + int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { bool is_l2 = false, is_direct_pkt_access = false; u32 size = kattr->test.data_size_in; u32 repeat = kattr->test.repeat; + struct __sk_buff *ctx = NULL; u32 retval, duration; int hh_len = ETH_HLEN; struct sk_buff *skb; @@ -141,6 +260,12 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, if (IS_ERR(data)) return PTR_ERR(data); + ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff)); + if (IS_ERR(ctx)) { + kfree(data); + return PTR_ERR(ctx); + } + switch (prog->type) { case BPF_PROG_TYPE_SCHED_CLS: case BPF_PROG_TYPE_SCHED_ACT: @@ -158,6 +283,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, sk = kzalloc(sizeof(struct sock), GFP_USER); if (!sk) { kfree(data); + kfree(ctx); return -ENOMEM; } sock_net_set(sk, current->nsproxy->net_ns); @@ -166,6 +292,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, skb = build_skb(data, 0); if (!skb) { kfree(data); + kfree(ctx); kfree(sk); return -ENOMEM; } @@ -180,32 +307,38 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, __skb_push(skb, hh_len); if (is_direct_pkt_access) bpf_compute_data_pointers(skb); + ret = convert___skb_to_skb(skb, ctx); + if (ret) + goto out; ret = bpf_test_run(prog, skb, repeat, &retval, &duration); - if (ret) { - kfree_skb(skb); - kfree(sk); - return ret; - } + if (ret) + goto out; if (!is_l2) { if (skb_headroom(skb) < hh_len) { int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb)); if (pskb_expand_head(skb, nhead, 0, GFP_USER)) { - kfree_skb(skb); - kfree(sk); - return -ENOMEM; + ret = -ENOMEM; + goto out; } } memset(__skb_push(skb, hh_len), 0, hh_len); } + convert_skb_to___skb(skb, ctx); size = skb->len; /* bpf program can never convert linear skb to non-linear */ if (WARN_ON_ONCE(skb_is_nonlinear(skb))) size = skb_headlen(skb); ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration); + if (!ret) + ret = bpf_ctx_finish(kattr, uattr, ctx, + sizeof(struct __sk_buff)); +out: kfree_skb(skb); + bpf_sk_storage_free(sk); kfree(sk); + kfree(ctx); return ret; } @@ -220,6 +353,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, void *data; int ret; + if (kattr->test.ctx_in || kattr->test.ctx_out) + return -EINVAL; + data = bpf_test_init(kattr, size, XDP_PACKET_HEADROOM + NET_IP_ALIGN, 0); if (IS_ERR(data)) return PTR_ERR(data); @@ -249,13 +385,12 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, union bpf_attr __user *uattr) { u32 size = kattr->test.data_size_in; + struct bpf_flow_dissector ctx = {}; u32 repeat = kattr->test.repeat; struct bpf_flow_keys flow_keys; u64 time_start, time_spent = 0; - struct bpf_skb_data_end *cb; + const struct ethhdr *eth; u32 retval, duration; - struct sk_buff *skb; - struct sock *sk; void *data; int ret; u32 i; @@ -263,46 +398,31 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR) return -EINVAL; - data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN, - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); - if (IS_ERR(data)) - return PTR_ERR(data); - - sk = kzalloc(sizeof(*sk), GFP_USER); - if (!sk) { - kfree(data); - return -ENOMEM; - } - sock_net_set(sk, current->nsproxy->net_ns); - sock_init_data(NULL, sk); + if (kattr->test.ctx_in || kattr->test.ctx_out) + return -EINVAL; - skb = build_skb(data, 0); - if (!skb) { - kfree(data); - kfree(sk); - return -ENOMEM; - } - skb->sk = sk; + if (size < ETH_HLEN) + return -EINVAL; - skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); - __skb_put(skb, size); - skb->protocol = eth_type_trans(skb, - current->nsproxy->net_ns->loopback_dev); - skb_reset_network_header(skb); + data = bpf_test_init(kattr, size, 0, 0); + if (IS_ERR(data)) + return PTR_ERR(data); - cb = (struct bpf_skb_data_end *)skb->cb; - cb->qdisc_cb.flow_keys = &flow_keys; + eth = (struct ethhdr *)data; if (!repeat) repeat = 1; + ctx.flow_keys = &flow_keys; + ctx.data = data; + ctx.data_end = (__u8 *)data + size; + rcu_read_lock(); preempt_disable(); time_start = ktime_get_ns(); for (i = 0; i < repeat; i++) { - retval = __skb_flow_bpf_dissect(prog, skb, - &flow_keys_dissector, - &flow_keys); + retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN, + size); if (signal_pending(current)) { preempt_enable(); @@ -335,7 +455,6 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, retval, duration); out: - kfree_skb(skb); - kfree(sk); + kfree(data); return ret; } diff --git a/net/bridge/br.c b/net/bridge/br.c index a5174e5001d8..3c8e4b38f054 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -40,10 +40,13 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v bool changed_addr; int err; - /* register of bridge completed, add sysfs entries */ - if ((dev->priv_flags & IFF_EBRIDGE) && event == NETDEV_REGISTER) { - br_sysfs_addbr(dev); - return NOTIFY_DONE; + if (dev->priv_flags & IFF_EBRIDGE) { + if (event == NETDEV_REGISTER) { + /* register of bridge completed, add sysfs entries */ + br_sysfs_addbr(dev); + return NOTIFY_DONE; + } + br_vlan_bridge_event(dev, event, ptr); } /* not a port of a bridge */ @@ -126,6 +129,9 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v break; } + if (event != NETDEV_UNREGISTER) + br_vlan_port_event(p, event); + /* Events that may cause spanning tree to refresh */ if (!notified && (event == NETDEV_CHANGEADDR || event == NETDEV_UP || event == NETDEV_CHANGE || event == NETDEV_DOWN)) diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c index 6b78e6351719..15116752365a 100644 --- a/net/bridge/br_arp_nd_proxy.c +++ b/net/bridge/br_arp_nd_proxy.c @@ -21,6 +21,7 @@ #include <linux/if_vlan.h> #include <linux/inetdevice.h> #include <net/addrconf.h> +#include <net/ipv6_stubs.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ip6_checksum.h> #endif @@ -130,7 +131,7 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br, u8 *arpptr, *sha; __be32 sip, tip; - BR_INPUT_SKB_CB(skb)->proxyarp_replied = false; + BR_INPUT_SKB_CB(skb)->proxyarp_replied = 0; if ((dev->flags & IFF_NOARP) || !pskb_may_pull(skb, arp_hdr_len(dev))) @@ -160,7 +161,7 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br, return; if (ipv4_is_zeronet(sip) || sip == tip) { /* prevent flooding to neigh suppress ports */ - BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; return; } } @@ -180,7 +181,7 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br, /* its our local ip, so don't proxy reply * and don't forward to neigh suppress ports */ - BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; return; } @@ -216,7 +217,7 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br, */ if (replied || br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) - BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; } neigh_release(n); @@ -392,7 +393,7 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, struct ipv6hdr *iphdr; struct neighbour *n; - BR_INPUT_SKB_CB(skb)->proxyarp_replied = false; + BR_INPUT_SKB_CB(skb)->proxyarp_replied = 0; if (p && (p->flags & BR_NEIGH_SUPPRESS)) return; @@ -400,7 +401,7 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, if (msg->icmph.icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT && !msg->icmph.icmp6_solicited) { /* prevent flooding to neigh suppress ports */ - BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; return; } @@ -413,7 +414,7 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, if (ipv6_addr_any(saddr) || !ipv6_addr_cmp(saddr, daddr)) { /* prevent flooding to neigh suppress ports */ - BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; return; } @@ -431,7 +432,7 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, /* its our own ip, so don't proxy reply * and don't forward to arp suppress ports */ - BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; return; } @@ -464,7 +465,7 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, */ if (replied || br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) - BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1; } neigh_release(n); } diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 00573cc46c98..b1c91f66d79c 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -33,7 +33,6 @@ static const struct rhashtable_params br_fdb_rht_params = { .key_offset = offsetof(struct net_bridge_fdb_entry, key), .key_len = sizeof(struct net_bridge_fdb_key), .automatic_shrinking = true, - .locks_mul = 1, }; static struct kmem_cache *br_fdb_cache __read_mostly; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 48ddc60b4fbd..82225b8b54f5 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -173,6 +173,7 @@ static struct net_bridge_port *maybe_deliver( struct net_bridge_port *prev, struct net_bridge_port *p, struct sk_buff *skb, bool local_orig) { + u8 igmp_type = br_multicast_igmp_type(skb); int err; if (!should_deliver(p, skb)) @@ -184,8 +185,9 @@ static struct net_bridge_port *maybe_deliver( err = deliver_clone(prev, skb, local_orig); if (err) return ERR_PTR(err); - out: + br_multicast_count(p->br, p, skb, igmp_type, BR_MCAST_DIR_TX); + return p; } @@ -193,7 +195,6 @@ out: void br_flood(struct net_bridge *br, struct sk_buff *skb, enum br_pkt_type pkt_type, bool local_rcv, bool local_orig) { - u8 igmp_type = br_multicast_igmp_type(skb); struct net_bridge_port *prev = NULL; struct net_bridge_port *p; @@ -226,9 +227,6 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb, prev = maybe_deliver(prev, p, skb, local_orig); if (IS_ERR(prev)) goto out; - if (prev == p) - br_multicast_count(p->br, p, skb, igmp_type, - BR_MCAST_DIR_TX); } if (!prev) @@ -277,7 +275,6 @@ void br_multicast_flood(struct net_bridge_mdb_entry *mdst, bool local_rcv, bool local_orig) { struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; - u8 igmp_type = br_multicast_igmp_type(skb); struct net_bridge *br = netdev_priv(dev); struct net_bridge_port *prev = NULL; struct net_bridge_port_group *p; @@ -304,13 +301,9 @@ void br_multicast_flood(struct net_bridge_mdb_entry *mdst, } prev = maybe_deliver(prev, port, skb, local_orig); -delivered: if (IS_ERR(prev)) goto out; - if (prev == port) - br_multicast_count(port->br, port, skb, igmp_type, - BR_MCAST_DIR_TX); - +delivered: if ((unsigned long)lport >= (unsigned long)port) p = rcu_dereference(p->next); if ((unsigned long)rport >= (unsigned long)port) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 41f0a696a65f..4a9aaa3fac8f 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -179,7 +179,7 @@ int nbp_backup_change(struct net_bridge_port *p, ASSERT_RTNL(); if (backup_dev) { - if (!br_port_exists(backup_dev)) + if (!netif_is_bridge_port(backup_dev)) return -ENOENT; backup_p = br_port_get_rtnl(backup_dev); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index ba303ee99b9b..014af7efef25 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -16,6 +16,9 @@ #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/netfilter_bridge.h> +#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE +#include <net/netfilter/nf_queue.h> +#endif #include <linux/neighbour.h> #include <net/arp.h> #include <linux/export.h> @@ -23,10 +26,6 @@ #include "br_private.h" #include "br_private_tunnel.h" -/* Hook for brouter */ -br_should_route_hook_t __rcu *br_should_route_hook __read_mostly; -EXPORT_SYMBOL(br_should_route_hook); - static int br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -203,6 +202,59 @@ static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_bu return 1; } +static int nf_hook_bridge_pre(struct sk_buff *skb, struct sk_buff **pskb) +{ +#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE + struct nf_hook_entries *e = NULL; + struct nf_hook_state state; + unsigned int verdict, i; + struct net *net; + int ret; + + net = dev_net(skb->dev); +#ifdef HAVE_JUMP_LABEL + if (!static_key_false(&nf_hooks_needed[NFPROTO_BRIDGE][NF_BR_PRE_ROUTING])) + goto frame_finish; +#endif + + e = rcu_dereference(net->nf.hooks_bridge[NF_BR_PRE_ROUTING]); + if (!e) + goto frame_finish; + + nf_hook_state_init(&state, NF_BR_PRE_ROUTING, + NFPROTO_BRIDGE, skb->dev, NULL, NULL, + net, br_handle_frame_finish); + + for (i = 0; i < e->num_hook_entries; i++) { + verdict = nf_hook_entry_hookfn(&e->hooks[i], skb, &state); + switch (verdict & NF_VERDICT_MASK) { + case NF_ACCEPT: + if (BR_INPUT_SKB_CB(skb)->br_netfilter_broute) { + *pskb = skb; + return RX_HANDLER_PASS; + } + break; + case NF_DROP: + kfree_skb(skb); + return RX_HANDLER_CONSUMED; + case NF_QUEUE: + ret = nf_queue(skb, &state, e, i, verdict); + if (ret == 1) + continue; + return RX_HANDLER_CONSUMED; + default: /* STOLEN */ + return RX_HANDLER_CONSUMED; + } + } +frame_finish: + net = dev_net(skb->dev); + br_handle_frame_finish(net, NULL, skb); +#else + br_handle_frame_finish(dev_net(skb->dev), NULL, skb); +#endif + return RX_HANDLER_CONSUMED; +} + /* * Return NULL if skb is handled * note: already called with rcu_read_lock @@ -212,7 +264,6 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) struct net_bridge_port *p; struct sk_buff *skb = *pskb; const unsigned char *dest = eth_hdr(skb)->h_dest; - br_should_route_hook_t *rhook; if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) return RX_HANDLER_PASS; @@ -224,6 +275,8 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) if (!skb) return RX_HANDLER_CONSUMED; + memset(skb->cb, 0, sizeof(struct br_input_skb_cb)); + p = br_port_get_rcu(skb->dev); if (p->flags & BR_VLAN_TUNNEL) { if (br_handle_ingress_vlan_tunnel(skb, p, @@ -294,23 +347,11 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) forward: switch (p->state) { case BR_STATE_FORWARDING: - rhook = rcu_dereference(br_should_route_hook); - if (rhook) { - if ((*rhook)(skb)) { - *pskb = skb; - return RX_HANDLER_PASS; - } - dest = eth_hdr(skb)->h_dest; - } - /* fall through */ case BR_STATE_LEARNING: if (ether_addr_equal(p->br->dev->dev_addr, dest)) skb->pkt_type = PACKET_HOST; - NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, - dev_net(skb->dev), NULL, skb, skb->dev, NULL, - br_handle_frame_finish); - break; + return nf_hook_bridge_pre(skb, pskb); default: drop: kfree_skb(skb); diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index f69c8d91dc81..bf6acd34234d 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -26,14 +26,14 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb, if (!br->multicast_router || hlist_empty(&br->router_list)) return 0; - nest = nla_nest_start(skb, MDBA_ROUTER); + nest = nla_nest_start_noflag(skb, MDBA_ROUTER); if (nest == NULL) return -EMSGSIZE; hlist_for_each_entry_rcu(p, &br->router_list, rlist) { if (!p) continue; - port_nest = nla_nest_start(skb, MDBA_ROUTER_PORT); + port_nest = nla_nest_start_noflag(skb, MDBA_ROUTER_PORT); if (!port_nest) goto fail; if (nla_put_nohdr(skb, sizeof(u32), &p->dev->ifindex) || @@ -86,7 +86,7 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) return 0; - nest = nla_nest_start(skb, MDBA_MDB); + nest = nla_nest_start_noflag(skb, MDBA_MDB); if (nest == NULL) return -EMSGSIZE; @@ -98,7 +98,7 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, if (idx < s_idx) goto skip; - nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY); + nest2 = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY); if (!nest2) { err = -EMSGSIZE; break; @@ -124,7 +124,8 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, e.addr.u.ip6 = p->addr.u.ip6; #endif e.addr.proto = p->addr.proto; - nest_ent = nla_nest_start(skb, MDBA_MDB_ENTRY_INFO); + nest_ent = nla_nest_start_noflag(skb, + MDBA_MDB_ENTRY_INFO); if (!nest_ent) { nla_nest_cancel(skb, nest2); err = -EMSGSIZE; @@ -248,10 +249,10 @@ static int nlmsg_populate_mdb_fill(struct sk_buff *skb, memset(bpm, 0, sizeof(*bpm)); bpm->family = AF_BRIDGE; bpm->ifindex = dev->ifindex; - nest = nla_nest_start(skb, MDBA_MDB); + nest = nla_nest_start_noflag(skb, MDBA_MDB); if (nest == NULL) goto cancel; - nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY); + nest2 = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY); if (nest2 == NULL) goto end; @@ -444,7 +445,7 @@ static int nlmsg_populate_rtr_fill(struct sk_buff *skb, memset(bpm, 0, sizeof(*bpm)); bpm->family = AF_BRIDGE; bpm->ifindex = dev->ifindex; - nest = nla_nest_start(skb, MDBA_ROUTER); + nest = nla_nest_start_noflag(skb, MDBA_ROUTER); if (!nest) goto cancel; @@ -529,8 +530,8 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, struct net_device *dev; int err; - err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX, NULL, - NULL); + err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb, + MDBA_SET_ENTRY_MAX, NULL, NULL); if (err < 0) return err; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 45e7f4173bba..c2a30f79a9d0 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -44,7 +44,6 @@ static const struct rhashtable_params br_mdb_rht_params = { .key_offset = offsetof(struct net_bridge_mdb_entry, addr), .key_len = sizeof(struct br_ip), .automatic_shrinking = true, - .locks_mul = 1, }; static void br_multicast_start_querier(struct net_bridge *br, @@ -65,23 +64,6 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, __u16 vid, const unsigned char *src); #endif -static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b) -{ - if (a->proto != b->proto) - return 0; - if (a->vid != b->vid) - return 0; - switch (a->proto) { - case htons(ETH_P_IP): - return a->u.ip4 == b->u.ip4; -#if IS_ENABLED(CONFIG_IPV6) - case htons(ETH_P_IPV6): - return ipv6_addr_equal(&a->u.ip6, &b->u.ip6); -#endif - } - return 0; -} - static struct net_bridge_mdb_entry *br_mdb_ip_get_rcu(struct net_bridge *br, struct br_ip *dst) { @@ -517,7 +499,7 @@ struct net_bridge_port_group *br_multicast_new_port_group( if (src) memcpy(p->eth_addr, src, ETH_ALEN); else - memset(p->eth_addr, 0xff, ETH_ALEN); + eth_broadcast_addr(p->eth_addr); return p; } @@ -2194,7 +2176,7 @@ int br_multicast_list_adjacent(struct net_device *dev, int count = 0; rcu_read_lock(); - if (!br_ip_list || !br_port_exists(dev)) + if (!br_ip_list || !netif_is_bridge_port(dev)) goto unlock; port = br_port_get_rcu(dev); @@ -2241,7 +2223,7 @@ bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto) bool ret = false; rcu_read_lock(); - if (!br_port_exists(dev)) + if (!netif_is_bridge_port(dev)) goto unlock; port = br_port_get_rcu(dev); @@ -2277,7 +2259,7 @@ bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto) bool ret = false; rcu_read_lock(); - if (!br_port_exists(dev)) + if (!netif_is_bridge_port(dev)) goto unlock; port = br_port_get_rcu(dev); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 7104cf13da84..a5acad29cd4f 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -102,7 +102,7 @@ static size_t br_get_link_af_size_filtered(const struct net_device *dev, size_t vinfo_sz = 0; rcu_read_lock(); - if (br_port_exists(dev)) { + if (netif_is_bridge_port(dev)) { p = br_port_get_rcu(dev); vg = nbp_vlan_group_rcu(p); } else if (dev->priv_flags & IFF_EBRIDGE) { @@ -413,9 +413,9 @@ static int br_fill_ifinfo(struct sk_buff *skb, goto nla_put_failure; if (event == RTM_NEWLINK && port) { - struct nlattr *nest - = nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED); + struct nlattr *nest; + nest = nla_nest_start(skb, IFLA_PROTINFO); if (nest == NULL || br_port_fill_attrs(skb, port) < 0) goto nla_put_failure; nla_nest_end(skb, nest); @@ -439,7 +439,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, rcu_read_unlock(); goto done; } - af = nla_nest_start(skb, IFLA_AF_SPEC); + af = nla_nest_start_noflag(skb, IFLA_AF_SPEC); if (!af) { rcu_read_unlock(); goto nla_put_failure; @@ -880,8 +880,10 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags, if (p && protinfo) { if (protinfo->nla_type & NLA_F_NESTED) { - err = nla_parse_nested(tb, IFLA_BRPORT_MAX, protinfo, - br_port_policy, NULL); + err = nla_parse_nested_deprecated(tb, IFLA_BRPORT_MAX, + protinfo, + br_port_policy, + NULL); if (err) return err; @@ -1569,7 +1571,7 @@ static int br_fill_linkxstats(struct sk_buff *skb, return -EINVAL; } - nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE); + nest = nla_nest_start_noflag(skb, LINK_XSTATS_TYPE_BRIDGE); if (!nest) return -EMSGSIZE; diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c index da8cb99fd259..34629d558709 100644 --- a/net/bridge/br_netlink_tunnel.c +++ b/net/bridge/br_netlink_tunnel.c @@ -97,7 +97,7 @@ static int br_fill_vlan_tinfo(struct sk_buff *skb, u16 vid, __be32 tid = tunnel_id_to_key32(tunnel_id); struct nlattr *tmap; - tmap = nla_nest_start(skb, IFLA_BRIDGE_VLAN_TUNNEL_INFO); + tmap = nla_nest_start_noflag(skb, IFLA_BRIDGE_VLAN_TUNNEL_INFO); if (!tmap) return -EMSGSIZE; if (nla_put_u32(skb, IFLA_BRIDGE_VLAN_TUNNEL_ID, @@ -230,8 +230,8 @@ int br_parse_vlan_tunnel_info(struct nlattr *attr, memset(tinfo, 0, sizeof(*tinfo)); - err = nla_parse_nested(tb, IFLA_BRIDGE_VLAN_TUNNEL_MAX, attr, - vlan_tunnel_policy, NULL); + err = nla_parse_nested_deprecated(tb, IFLA_BRIDGE_VLAN_TUNNEL_MAX, + attr, vlan_tunnel_policy, NULL); if (err < 0) return err; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 00deef7fc1f3..334a8c496b50 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -288,8 +288,6 @@ struct net_bridge_port { #define br_auto_port(p) ((p)->flags & BR_AUTO_MASK) #define br_promisc_port(p) ((p)->flags & BR_PROMISC) -#define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT) - static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev) { return rcu_dereference(dev->rx_handler_data); @@ -297,13 +295,13 @@ static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *d static inline struct net_bridge_port *br_port_get_rtnl(const struct net_device *dev) { - return br_port_exists(dev) ? + return netif_is_bridge_port(dev) ? rtnl_dereference(dev->rx_handler_data) : NULL; } static inline struct net_bridge_port *br_port_get_rtnl_rcu(const struct net_device *dev) { - return br_port_exists(dev) ? + return netif_is_bridge_port(dev) ? rcu_dereference_rtnl(dev->rx_handler_data) : NULL; } @@ -323,6 +321,7 @@ enum net_bridge_opts { BROPT_MTU_SET_BY_USER, BROPT_VLAN_STATS_PER_PORT, BROPT_NO_LL_LEARN, + BROPT_VLAN_BRIDGE_BINDING, }; struct net_bridge { @@ -427,15 +426,16 @@ struct br_input_skb_cb { struct net_device *brdev; #ifdef CONFIG_BRIDGE_IGMP_SNOOPING - int igmp; - int mrouters_only; + u8 igmp; + u8 mrouters_only:1; #endif - - bool proxyarp_replied; - bool src_port_isolated; - + u8 proxyarp_replied:1; + u8 src_port_isolated:1; #ifdef CONFIG_BRIDGE_VLAN_FILTERING - bool vlan_filtered; + u8 vlan_filtered:1; +#endif +#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE + u8 br_netfilter_broute:1; #endif #ifdef CONFIG_NET_SWITCHDEV @@ -896,6 +896,9 @@ int nbp_vlan_init(struct net_bridge_port *port, struct netlink_ext_ack *extack); int nbp_get_num_vlan_infos(struct net_bridge_port *p, u32 filter_mask); void br_vlan_get_stats(const struct net_bridge_vlan *v, struct br_vlan_stats *stats); +void br_vlan_port_event(struct net_bridge_port *p, unsigned long event); +void br_vlan_bridge_event(struct net_device *dev, unsigned long event, + void *ptr); static inline struct net_bridge_vlan_group *br_vlan_group( const struct net_bridge *br) @@ -1079,6 +1082,16 @@ static inline void br_vlan_get_stats(const struct net_bridge_vlan *v, struct br_vlan_stats *stats) { } + +static inline void br_vlan_port_event(struct net_bridge_port *p, + unsigned long event) +{ +} + +static inline void br_vlan_bridge_event(struct net_device *dev, + unsigned long event, void *ptr) +{ +} #endif struct nf_br_ops { diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 808e2b914015..8d65ae5210e0 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -117,7 +117,8 @@ void br_stp_disable_port(struct net_bridge_port *p) del_timer(&p->forward_delay_timer); del_timer(&p->hold_timer); - br_fdb_delete_by_port(br, p, 0, 0); + if (!rcu_access_pointer(p->backup_port)) + br_fdb_delete_by_port(br, p, 0, 0); br_multicast_disable_port(p); br_configuration_update(br); diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 96abf8feb9dc..2db63997f313 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -7,6 +7,8 @@ #include "br_private.h" #include "br_private_tunnel.h" +static void nbp_vlan_set_vlan_dev_state(struct net_bridge_port *p, u16 vid); + static inline int br_vlan_cmp(struct rhashtable_compare_arg *arg, const void *ptr) { @@ -21,7 +23,6 @@ static const struct rhashtable_params br_vlan_rht_params = { .key_offset = offsetof(struct net_bridge_vlan, vid), .key_len = sizeof(u16), .nelem_hint = 3, - .locks_mul = 1, .max_size = VLAN_N_VID, .obj_cmpfn = br_vlan_cmp, .automatic_shrinking = true, @@ -294,6 +295,9 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags, __vlan_add_list(v); __vlan_add_flags(v, flags); + + if (p) + nbp_vlan_set_vlan_dev_state(p, v->vid); out: return err; @@ -358,6 +362,7 @@ static int __vlan_del(struct net_bridge_vlan *v) rhashtable_remove_fast(&vg->vlan_hash, &v->vnode, br_vlan_rht_params); __vlan_del_list(v); + nbp_vlan_set_vlan_dev_state(p, v->vid); call_rcu(&v->rcu, nbp_vlan_rcu_free); } @@ -1265,3 +1270,211 @@ int br_vlan_get_info(const struct net_device *dev, u16 vid, return 0; } EXPORT_SYMBOL_GPL(br_vlan_get_info); + +static int br_vlan_is_bind_vlan_dev(const struct net_device *dev) +{ + return is_vlan_dev(dev) && + !!(vlan_dev_priv(dev)->flags & VLAN_FLAG_BRIDGE_BINDING); +} + +static int br_vlan_is_bind_vlan_dev_fn(struct net_device *dev, + __always_unused void *data) +{ + return br_vlan_is_bind_vlan_dev(dev); +} + +static bool br_vlan_has_upper_bind_vlan_dev(struct net_device *dev) +{ + int found; + + rcu_read_lock(); + found = netdev_walk_all_upper_dev_rcu(dev, br_vlan_is_bind_vlan_dev_fn, + NULL); + rcu_read_unlock(); + + return !!found; +} + +struct br_vlan_bind_walk_data { + u16 vid; + struct net_device *result; +}; + +static int br_vlan_match_bind_vlan_dev_fn(struct net_device *dev, + void *data_in) +{ + struct br_vlan_bind_walk_data *data = data_in; + int found = 0; + + if (br_vlan_is_bind_vlan_dev(dev) && + vlan_dev_priv(dev)->vlan_id == data->vid) { + data->result = dev; + found = 1; + } + + return found; +} + +static struct net_device * +br_vlan_get_upper_bind_vlan_dev(struct net_device *dev, u16 vid) +{ + struct br_vlan_bind_walk_data data = { + .vid = vid, + }; + + rcu_read_lock(); + netdev_walk_all_upper_dev_rcu(dev, br_vlan_match_bind_vlan_dev_fn, + &data); + rcu_read_unlock(); + + return data.result; +} + +static bool br_vlan_is_dev_up(const struct net_device *dev) +{ + return !!(dev->flags & IFF_UP) && netif_oper_up(dev); +} + +static void br_vlan_set_vlan_dev_state(const struct net_bridge *br, + struct net_device *vlan_dev) +{ + u16 vid = vlan_dev_priv(vlan_dev)->vlan_id; + struct net_bridge_vlan_group *vg; + struct net_bridge_port *p; + bool has_carrier = false; + + if (!netif_carrier_ok(br->dev)) { + netif_carrier_off(vlan_dev); + return; + } + + list_for_each_entry(p, &br->port_list, list) { + vg = nbp_vlan_group(p); + if (br_vlan_find(vg, vid) && br_vlan_is_dev_up(p->dev)) { + has_carrier = true; + break; + } + } + + if (has_carrier) + netif_carrier_on(vlan_dev); + else + netif_carrier_off(vlan_dev); +} + +static void br_vlan_set_all_vlan_dev_state(struct net_bridge_port *p) +{ + struct net_bridge_vlan_group *vg = nbp_vlan_group(p); + struct net_bridge_vlan *vlan; + struct net_device *vlan_dev; + + list_for_each_entry(vlan, &vg->vlan_list, vlist) { + vlan_dev = br_vlan_get_upper_bind_vlan_dev(p->br->dev, + vlan->vid); + if (vlan_dev) { + if (br_vlan_is_dev_up(p->dev)) { + if (netif_carrier_ok(p->br->dev)) + netif_carrier_on(vlan_dev); + } else { + br_vlan_set_vlan_dev_state(p->br, vlan_dev); + } + } + } +} + +static void br_vlan_upper_change(struct net_device *dev, + struct net_device *upper_dev, + bool linking) +{ + struct net_bridge *br = netdev_priv(dev); + + if (!br_vlan_is_bind_vlan_dev(upper_dev)) + return; + + if (linking) { + br_vlan_set_vlan_dev_state(br, upper_dev); + br_opt_toggle(br, BROPT_VLAN_BRIDGE_BINDING, true); + } else { + br_opt_toggle(br, BROPT_VLAN_BRIDGE_BINDING, + br_vlan_has_upper_bind_vlan_dev(dev)); + } +} + +struct br_vlan_link_state_walk_data { + struct net_bridge *br; +}; + +static int br_vlan_link_state_change_fn(struct net_device *vlan_dev, + void *data_in) +{ + struct br_vlan_link_state_walk_data *data = data_in; + + if (br_vlan_is_bind_vlan_dev(vlan_dev)) + br_vlan_set_vlan_dev_state(data->br, vlan_dev); + + return 0; +} + +static void br_vlan_link_state_change(struct net_device *dev, + struct net_bridge *br) +{ + struct br_vlan_link_state_walk_data data = { + .br = br + }; + + rcu_read_lock(); + netdev_walk_all_upper_dev_rcu(dev, br_vlan_link_state_change_fn, + &data); + rcu_read_unlock(); +} + +/* Must be protected by RTNL. */ +static void nbp_vlan_set_vlan_dev_state(struct net_bridge_port *p, u16 vid) +{ + struct net_device *vlan_dev; + + if (!br_opt_get(p->br, BROPT_VLAN_BRIDGE_BINDING)) + return; + + vlan_dev = br_vlan_get_upper_bind_vlan_dev(p->br->dev, vid); + if (vlan_dev) + br_vlan_set_vlan_dev_state(p->br, vlan_dev); +} + +/* Must be protected by RTNL. */ +void br_vlan_bridge_event(struct net_device *dev, unsigned long event, + void *ptr) +{ + struct netdev_notifier_changeupper_info *info; + struct net_bridge *br; + + switch (event) { + case NETDEV_CHANGEUPPER: + info = ptr; + br_vlan_upper_change(dev, info->upper_dev, info->linking); + break; + + case NETDEV_CHANGE: + case NETDEV_UP: + br = netdev_priv(dev); + if (!br_opt_get(br, BROPT_VLAN_BRIDGE_BINDING)) + return; + br_vlan_link_state_change(dev, br); + break; + } +} + +/* Must be protected by RTNL. */ +void br_vlan_port_event(struct net_bridge_port *p, unsigned long event) +{ + if (!br_opt_get(p->br, BROPT_VLAN_BRIDGE_BINDING)) + return; + + switch (event) { + case NETDEV_CHANGE: + case NETDEV_DOWN: + case NETDEV_UP: + br_vlan_set_all_vlan_dev_state(p); + break; + } +} diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c index 6d2c4eed2dc8..758151863669 100644 --- a/net/bridge/br_vlan_tunnel.c +++ b/net/bridge/br_vlan_tunnel.c @@ -34,7 +34,6 @@ static const struct rhashtable_params br_vlan_tunnel_rht_params = { .key_offset = offsetof(struct net_bridge_vlan, tinfo.tunnel_id), .key_len = sizeof(__be64), .nelem_hint = 3, - .locks_mul = 1, .obj_cmpfn = br_vlan_tunid_cmp, .automatic_shrinking = true, }; diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 276b60262981..ec2652a459da 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -15,6 +15,8 @@ #include <linux/module.h> #include <linux/if_bridge.h> +#include "../br_private.h" + /* EBT_ACCEPT means the frame will be bridged * EBT_DROP means the frame will be routed */ @@ -48,30 +50,63 @@ static const struct ebt_table broute_table = { .me = THIS_MODULE, }; -static int ebt_broute(struct sk_buff *skb) +static unsigned int ebt_broute(void *priv, struct sk_buff *skb, + const struct nf_hook_state *s) { + struct net_bridge_port *p = br_port_get_rcu(skb->dev); struct nf_hook_state state; + unsigned char *dest; int ret; + if (!p || p->state != BR_STATE_FORWARDING) + return NF_ACCEPT; + nf_hook_state_init(&state, NF_BR_BROUTING, - NFPROTO_BRIDGE, skb->dev, NULL, NULL, - dev_net(skb->dev), NULL); + NFPROTO_BRIDGE, s->in, NULL, NULL, + s->net, NULL); ret = ebt_do_table(skb, &state, state.net->xt.broute_table); - if (ret == NF_DROP) - return 1; /* route it */ - return 0; /* bridge it */ + + if (ret != NF_DROP) + return ret; + + /* DROP in ebtables -t broute means that the + * skb should be routed, not bridged. + * This is awkward, but can't be changed for compatibility + * reasons. + * + * We map DROP to ACCEPT and set the ->br_netfilter_broute flag. + */ + BR_INPUT_SKB_CB(skb)->br_netfilter_broute = 1; + + /* undo PACKET_HOST mangling done in br_input in case the dst + * address matches the logical bridge but not the port. + */ + dest = eth_hdr(skb)->h_dest; + if (skb->pkt_type == PACKET_HOST && + !ether_addr_equal(skb->dev->dev_addr, dest) && + ether_addr_equal(p->br->dev->dev_addr, dest)) + skb->pkt_type = PACKET_OTHERHOST; + + return NF_ACCEPT; } +static const struct nf_hook_ops ebt_ops_broute = { + .hook = ebt_broute, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_PRE_ROUTING, + .priority = NF_BR_PRI_FIRST, +}; + static int __net_init broute_net_init(struct net *net) { - return ebt_register_table(net, &broute_table, NULL, + return ebt_register_table(net, &broute_table, &ebt_ops_broute, &net->xt.broute_table); } static void __net_exit broute_net_exit(struct net *net) { - ebt_unregister_table(net, net->xt.broute_table, NULL); + ebt_unregister_table(net, net->xt.broute_table, &ebt_ops_broute); } static struct pernet_operations broute_net_ops = { @@ -81,21 +116,11 @@ static struct pernet_operations broute_net_ops = { static int __init ebtable_broute_init(void) { - int ret; - - ret = register_pernet_subsys(&broute_net_ops); - if (ret < 0) - return ret; - /* see br_input.c */ - RCU_INIT_POINTER(br_should_route_hook, - (br_should_route_hook_t *)ebt_broute); - return 0; + return register_pernet_subsys(&broute_net_ops); } static void __exit ebtable_broute_fini(void) { - RCU_INIT_POINTER(br_should_route_hook, NULL); - synchronize_net(); unregister_pernet_subsys(&broute_net_ops); } diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 3cad01ac64e4..4e0091311d40 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1221,10 +1221,6 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table, mutex_unlock(&ebt_mutex); WRITE_ONCE(*res, table); - - if (!ops) - return 0; - ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks)); if (ret) { __ebt_unregister_table(net, table); @@ -1248,8 +1244,7 @@ out: void ebt_unregister_table(struct net *net, struct ebt_table *table, const struct nf_hook_ops *ops) { - if (ops) - nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); + nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); __ebt_unregister_table(net, table); } diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 711d7156efd8..6c6e01963aac 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -186,15 +186,19 @@ static int transmit(struct cflayer *layer, struct cfpkt *pkt) goto noxoff; if (likely(!netif_queue_stopped(caifd->netdev))) { + struct Qdisc *sch; + /* If we run with a TX queue, check if the queue is too long*/ txq = netdev_get_tx_queue(skb->dev, 0); - qlen = qdisc_qlen(rcu_dereference_bh(txq->qdisc)); - - if (likely(qlen == 0)) + sch = rcu_dereference_bh(txq->qdisc); + if (likely(qdisc_is_empty(sch))) goto noxoff; + /* can check for explicit qdisc len value only !NOLOCK, + * always set flow off otherwise + */ high = (caifd->netdev->tx_queue_len * q_high) / 100; - if (likely(qlen < high)) + if (!(sch->flags & TCQ_F_NOLOCK) && likely(sch->q.qlen < high)) goto noxoff; } diff --git a/net/can/af_can.c b/net/can/af_can.c index 1684ba5b51eb..e8fd5dc1780a 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -89,13 +89,7 @@ static atomic_t skbcounter = ATOMIC_INIT(0); int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { - struct sock *sk = sock->sk; - switch (cmd) { - - case SIOCGSTAMP: - return sock_get_timestamp(sk, (struct timeval __user *)arg); - default: return -ENOIOCTLCMD; } diff --git a/net/can/bcm.c b/net/can/bcm.c index 79bb8afa9c0c..a34ee52f19ea 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1689,6 +1689,7 @@ static const struct proto_ops bcm_ops = { .getname = sock_no_getname, .poll = datagram_poll, .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */ + .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = sock_no_setsockopt, diff --git a/net/can/gw.c b/net/can/gw.c index 53859346dc9a..5275ddf580bc 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -662,8 +662,8 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod, /* initialize modification & checksum data space */ memset(mod, 0, sizeof(*mod)); - err = nlmsg_parse(nlh, sizeof(struct rtcanmsg), tb, CGW_MAX, - cgw_policy, NULL); + err = nlmsg_parse_deprecated(nlh, sizeof(struct rtcanmsg), tb, + CGW_MAX, cgw_policy, NULL); if (err < 0) return err; diff --git a/net/can/raw.c b/net/can/raw.c index c70207537488..afcbff063a67 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -846,6 +846,7 @@ static const struct proto_ops raw_ops = { .getname = raw_getname, .poll = datagram_poll, .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */ + .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = raw_setsockopt, diff --git a/net/compat.c b/net/compat.c index eeea5eb71639..a031bd333092 100644 --- a/net/compat.c +++ b/net/compat.c @@ -395,63 +395,6 @@ COMPAT_SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, return __compat_sys_setsockopt(fd, level, optname, optval, optlen); } -int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) -{ - struct compat_timeval __user *ctv; - int err; - struct timeval tv; - - if (COMPAT_USE_64BIT_TIME) - return sock_get_timestamp(sk, userstamp); - - ctv = (struct compat_timeval __user *) userstamp; - err = -ENOENT; - sock_enable_timestamp(sk, SOCK_TIMESTAMP); - tv = ktime_to_timeval(sock_read_timestamp(sk)); - - if (tv.tv_sec == -1) - return err; - if (tv.tv_sec == 0) { - ktime_t kt = ktime_get_real(); - sock_write_timestamp(sk, kt); - tv = ktime_to_timeval(kt); - } - err = 0; - if (put_user(tv.tv_sec, &ctv->tv_sec) || - put_user(tv.tv_usec, &ctv->tv_usec)) - err = -EFAULT; - return err; -} -EXPORT_SYMBOL(compat_sock_get_timestamp); - -int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp) -{ - struct compat_timespec __user *ctv; - int err; - struct timespec ts; - - if (COMPAT_USE_64BIT_TIME) - return sock_get_timestampns (sk, userstamp); - - ctv = (struct compat_timespec __user *) userstamp; - err = -ENOENT; - sock_enable_timestamp(sk, SOCK_TIMESTAMP); - ts = ktime_to_timespec(sock_read_timestamp(sk)); - if (ts.tv_sec == -1) - return err; - if (ts.tv_sec == 0) { - ktime_t kt = ktime_get_real(); - sock_write_timestamp(sk, kt); - ts = ktime_to_timespec(kt); - } - err = 0; - if (put_user(ts.tv_sec, &ctv->tv_sec) || - put_user(ts.tv_nsec, &ctv->tv_nsec)) - err = -EFAULT; - return err; -} -EXPORT_SYMBOL(compat_sock_get_timestampns); - static int __compat_sys_getsockopt(int fd, int level, int optname, char __user *optval, int __user *optlen) diff --git a/net/core/Makefile b/net/core/Makefile index f97d6254e564..a104dc8faafc 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -34,3 +34,4 @@ obj-$(CONFIG_HWBM) += hwbm.o obj-$(CONFIG_NET_DEVLINK) += devlink.o obj-$(CONFIG_GRO_CELLS) += gro_cells.o obj-$(CONFIG_FAILOVER) += failover.o +obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c new file mode 100644 index 000000000000..cc9597a87770 --- /dev/null +++ b/net/core/bpf_sk_storage.c @@ -0,0 +1,804 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ +#include <linux/rculist.h> +#include <linux/list.h> +#include <linux/hash.h> +#include <linux/types.h> +#include <linux/spinlock.h> +#include <linux/bpf.h> +#include <net/bpf_sk_storage.h> +#include <net/sock.h> +#include <uapi/linux/btf.h> + +static atomic_t cache_idx; + +struct bucket { + struct hlist_head list; + raw_spinlock_t lock; +}; + +/* Thp map is not the primary owner of a bpf_sk_storage_elem. + * Instead, the sk->sk_bpf_storage is. + * + * The map (bpf_sk_storage_map) is for two purposes + * 1. Define the size of the "sk local storage". It is + * the map's value_size. + * + * 2. Maintain a list to keep track of all elems such + * that they can be cleaned up during the map destruction. + * + * When a bpf local storage is being looked up for a + * particular sk, the "bpf_map" pointer is actually used + * as the "key" to search in the list of elem in + * sk->sk_bpf_storage. + * + * Hence, consider sk->sk_bpf_storage is the mini-map + * with the "bpf_map" pointer as the searching key. + */ +struct bpf_sk_storage_map { + struct bpf_map map; + /* Lookup elem does not require accessing the map. + * + * Updating/Deleting requires a bucket lock to + * link/unlink the elem from the map. Having + * multiple buckets to improve contention. + */ + struct bucket *buckets; + u32 bucket_log; + u16 elem_size; + u16 cache_idx; +}; + +struct bpf_sk_storage_data { + /* smap is used as the searching key when looking up + * from sk->sk_bpf_storage. + * + * Put it in the same cacheline as the data to minimize + * the number of cachelines access during the cache hit case. + */ + struct bpf_sk_storage_map __rcu *smap; + u8 data[0] __aligned(8); +}; + +/* Linked to bpf_sk_storage and bpf_sk_storage_map */ +struct bpf_sk_storage_elem { + struct hlist_node map_node; /* Linked to bpf_sk_storage_map */ + struct hlist_node snode; /* Linked to bpf_sk_storage */ + struct bpf_sk_storage __rcu *sk_storage; + struct rcu_head rcu; + /* 8 bytes hole */ + /* The data is stored in aother cacheline to minimize + * the number of cachelines access during a cache hit. + */ + struct bpf_sk_storage_data sdata ____cacheline_aligned; +}; + +#define SELEM(_SDATA) container_of((_SDATA), struct bpf_sk_storage_elem, sdata) +#define SDATA(_SELEM) (&(_SELEM)->sdata) +#define BPF_SK_STORAGE_CACHE_SIZE 16 + +struct bpf_sk_storage { + struct bpf_sk_storage_data __rcu *cache[BPF_SK_STORAGE_CACHE_SIZE]; + struct hlist_head list; /* List of bpf_sk_storage_elem */ + struct sock *sk; /* The sk that owns the the above "list" of + * bpf_sk_storage_elem. + */ + struct rcu_head rcu; + raw_spinlock_t lock; /* Protect adding/removing from the "list" */ +}; + +static struct bucket *select_bucket(struct bpf_sk_storage_map *smap, + struct bpf_sk_storage_elem *selem) +{ + return &smap->buckets[hash_ptr(selem, smap->bucket_log)]; +} + +static int omem_charge(struct sock *sk, unsigned int size) +{ + /* same check as in sock_kmalloc() */ + if (size <= sysctl_optmem_max && + atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { + atomic_add(size, &sk->sk_omem_alloc); + return 0; + } + + return -ENOMEM; +} + +static bool selem_linked_to_sk(const struct bpf_sk_storage_elem *selem) +{ + return !hlist_unhashed(&selem->snode); +} + +static bool selem_linked_to_map(const struct bpf_sk_storage_elem *selem) +{ + return !hlist_unhashed(&selem->map_node); +} + +static struct bpf_sk_storage_elem *selem_alloc(struct bpf_sk_storage_map *smap, + struct sock *sk, void *value, + bool charge_omem) +{ + struct bpf_sk_storage_elem *selem; + + if (charge_omem && omem_charge(sk, smap->elem_size)) + return NULL; + + selem = kzalloc(smap->elem_size, GFP_ATOMIC | __GFP_NOWARN); + if (selem) { + if (value) + memcpy(SDATA(selem)->data, value, smap->map.value_size); + return selem; + } + + if (charge_omem) + atomic_sub(smap->elem_size, &sk->sk_omem_alloc); + + return NULL; +} + +/* sk_storage->lock must be held and selem->sk_storage == sk_storage. + * The caller must ensure selem->smap is still valid to be + * dereferenced for its smap->elem_size and smap->cache_idx. + */ +static bool __selem_unlink_sk(struct bpf_sk_storage *sk_storage, + struct bpf_sk_storage_elem *selem, + bool uncharge_omem) +{ + struct bpf_sk_storage_map *smap; + bool free_sk_storage; + struct sock *sk; + + smap = rcu_dereference(SDATA(selem)->smap); + sk = sk_storage->sk; + + /* All uncharging on sk->sk_omem_alloc must be done first. + * sk may be freed once the last selem is unlinked from sk_storage. + */ + if (uncharge_omem) + atomic_sub(smap->elem_size, &sk->sk_omem_alloc); + + free_sk_storage = hlist_is_singular_node(&selem->snode, + &sk_storage->list); + if (free_sk_storage) { + atomic_sub(sizeof(struct bpf_sk_storage), &sk->sk_omem_alloc); + sk_storage->sk = NULL; + /* After this RCU_INIT, sk may be freed and cannot be used */ + RCU_INIT_POINTER(sk->sk_bpf_storage, NULL); + + /* sk_storage is not freed now. sk_storage->lock is + * still held and raw_spin_unlock_bh(&sk_storage->lock) + * will be done by the caller. + * + * Although the unlock will be done under + * rcu_read_lock(), it is more intutivie to + * read if kfree_rcu(sk_storage, rcu) is done + * after the raw_spin_unlock_bh(&sk_storage->lock). + * + * Hence, a "bool free_sk_storage" is returned + * to the caller which then calls the kfree_rcu() + * after unlock. + */ + } + hlist_del_init_rcu(&selem->snode); + if (rcu_access_pointer(sk_storage->cache[smap->cache_idx]) == + SDATA(selem)) + RCU_INIT_POINTER(sk_storage->cache[smap->cache_idx], NULL); + + kfree_rcu(selem, rcu); + + return free_sk_storage; +} + +static void selem_unlink_sk(struct bpf_sk_storage_elem *selem) +{ + struct bpf_sk_storage *sk_storage; + bool free_sk_storage = false; + + if (unlikely(!selem_linked_to_sk(selem))) + /* selem has already been unlinked from sk */ + return; + + sk_storage = rcu_dereference(selem->sk_storage); + raw_spin_lock_bh(&sk_storage->lock); + if (likely(selem_linked_to_sk(selem))) + free_sk_storage = __selem_unlink_sk(sk_storage, selem, true); + raw_spin_unlock_bh(&sk_storage->lock); + + if (free_sk_storage) + kfree_rcu(sk_storage, rcu); +} + +/* sk_storage->lock must be held and sk_storage->list cannot be empty */ +static void __selem_link_sk(struct bpf_sk_storage *sk_storage, + struct bpf_sk_storage_elem *selem) +{ + RCU_INIT_POINTER(selem->sk_storage, sk_storage); + hlist_add_head(&selem->snode, &sk_storage->list); +} + +static void selem_unlink_map(struct bpf_sk_storage_elem *selem) +{ + struct bpf_sk_storage_map *smap; + struct bucket *b; + + if (unlikely(!selem_linked_to_map(selem))) + /* selem has already be unlinked from smap */ + return; + + smap = rcu_dereference(SDATA(selem)->smap); + b = select_bucket(smap, selem); + raw_spin_lock_bh(&b->lock); + if (likely(selem_linked_to_map(selem))) + hlist_del_init_rcu(&selem->map_node); + raw_spin_unlock_bh(&b->lock); +} + +static void selem_link_map(struct bpf_sk_storage_map *smap, + struct bpf_sk_storage_elem *selem) +{ + struct bucket *b = select_bucket(smap, selem); + + raw_spin_lock_bh(&b->lock); + RCU_INIT_POINTER(SDATA(selem)->smap, smap); + hlist_add_head_rcu(&selem->map_node, &b->list); + raw_spin_unlock_bh(&b->lock); +} + +static void selem_unlink(struct bpf_sk_storage_elem *selem) +{ + /* Always unlink from map before unlinking from sk_storage + * because selem will be freed after successfully unlinked from + * the sk_storage. + */ + selem_unlink_map(selem); + selem_unlink_sk(selem); +} + +static struct bpf_sk_storage_data * +__sk_storage_lookup(struct bpf_sk_storage *sk_storage, + struct bpf_sk_storage_map *smap, + bool cacheit_lockit) +{ + struct bpf_sk_storage_data *sdata; + struct bpf_sk_storage_elem *selem; + + /* Fast path (cache hit) */ + sdata = rcu_dereference(sk_storage->cache[smap->cache_idx]); + if (sdata && rcu_access_pointer(sdata->smap) == smap) + return sdata; + + /* Slow path (cache miss) */ + hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) + if (rcu_access_pointer(SDATA(selem)->smap) == smap) + break; + + if (!selem) + return NULL; + + sdata = SDATA(selem); + if (cacheit_lockit) { + /* spinlock is needed to avoid racing with the + * parallel delete. Otherwise, publishing an already + * deleted sdata to the cache will become a use-after-free + * problem in the next __sk_storage_lookup(). + */ + raw_spin_lock_bh(&sk_storage->lock); + if (selem_linked_to_sk(selem)) + rcu_assign_pointer(sk_storage->cache[smap->cache_idx], + sdata); + raw_spin_unlock_bh(&sk_storage->lock); + } + + return sdata; +} + +static struct bpf_sk_storage_data * +sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit) +{ + struct bpf_sk_storage *sk_storage; + struct bpf_sk_storage_map *smap; + + sk_storage = rcu_dereference(sk->sk_bpf_storage); + if (!sk_storage) + return NULL; + + smap = (struct bpf_sk_storage_map *)map; + return __sk_storage_lookup(sk_storage, smap, cacheit_lockit); +} + +static int check_flags(const struct bpf_sk_storage_data *old_sdata, + u64 map_flags) +{ + if (old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST) + /* elem already exists */ + return -EEXIST; + + if (!old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_EXIST) + /* elem doesn't exist, cannot update it */ + return -ENOENT; + + return 0; +} + +static int sk_storage_alloc(struct sock *sk, + struct bpf_sk_storage_map *smap, + struct bpf_sk_storage_elem *first_selem) +{ + struct bpf_sk_storage *prev_sk_storage, *sk_storage; + int err; + + err = omem_charge(sk, sizeof(*sk_storage)); + if (err) + return err; + + sk_storage = kzalloc(sizeof(*sk_storage), GFP_ATOMIC | __GFP_NOWARN); + if (!sk_storage) { + err = -ENOMEM; + goto uncharge; + } + INIT_HLIST_HEAD(&sk_storage->list); + raw_spin_lock_init(&sk_storage->lock); + sk_storage->sk = sk; + + __selem_link_sk(sk_storage, first_selem); + selem_link_map(smap, first_selem); + /* Publish sk_storage to sk. sk->sk_lock cannot be acquired. + * Hence, atomic ops is used to set sk->sk_bpf_storage + * from NULL to the newly allocated sk_storage ptr. + * + * From now on, the sk->sk_bpf_storage pointer is protected + * by the sk_storage->lock. Hence, when freeing + * the sk->sk_bpf_storage, the sk_storage->lock must + * be held before setting sk->sk_bpf_storage to NULL. + */ + prev_sk_storage = cmpxchg((struct bpf_sk_storage **)&sk->sk_bpf_storage, + NULL, sk_storage); + if (unlikely(prev_sk_storage)) { + selem_unlink_map(first_selem); + err = -EAGAIN; + goto uncharge; + + /* Note that even first_selem was linked to smap's + * bucket->list, first_selem can be freed immediately + * (instead of kfree_rcu) because + * bpf_sk_storage_map_free() does a + * synchronize_rcu() before walking the bucket->list. + * Hence, no one is accessing selem from the + * bucket->list under rcu_read_lock(). + */ + } + + return 0; + +uncharge: + kfree(sk_storage); + atomic_sub(sizeof(*sk_storage), &sk->sk_omem_alloc); + return err; +} + +/* sk cannot be going away because it is linking new elem + * to sk->sk_bpf_storage. (i.e. sk->sk_refcnt cannot be 0). + * Otherwise, it will become a leak (and other memory issues + * during map destruction). + */ +static struct bpf_sk_storage_data *sk_storage_update(struct sock *sk, + struct bpf_map *map, + void *value, + u64 map_flags) +{ + struct bpf_sk_storage_data *old_sdata = NULL; + struct bpf_sk_storage_elem *selem; + struct bpf_sk_storage *sk_storage; + struct bpf_sk_storage_map *smap; + int err; + + /* BPF_EXIST and BPF_NOEXIST cannot be both set */ + if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) || + /* BPF_F_LOCK can only be used in a value with spin_lock */ + unlikely((map_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map))) + return ERR_PTR(-EINVAL); + + smap = (struct bpf_sk_storage_map *)map; + sk_storage = rcu_dereference(sk->sk_bpf_storage); + if (!sk_storage || hlist_empty(&sk_storage->list)) { + /* Very first elem for this sk */ + err = check_flags(NULL, map_flags); + if (err) + return ERR_PTR(err); + + selem = selem_alloc(smap, sk, value, true); + if (!selem) + return ERR_PTR(-ENOMEM); + + err = sk_storage_alloc(sk, smap, selem); + if (err) { + kfree(selem); + atomic_sub(smap->elem_size, &sk->sk_omem_alloc); + return ERR_PTR(err); + } + + return SDATA(selem); + } + + if ((map_flags & BPF_F_LOCK) && !(map_flags & BPF_NOEXIST)) { + /* Hoping to find an old_sdata to do inline update + * such that it can avoid taking the sk_storage->lock + * and changing the lists. + */ + old_sdata = __sk_storage_lookup(sk_storage, smap, false); + err = check_flags(old_sdata, map_flags); + if (err) + return ERR_PTR(err); + if (old_sdata && selem_linked_to_sk(SELEM(old_sdata))) { + copy_map_value_locked(map, old_sdata->data, + value, false); + return old_sdata; + } + } + + raw_spin_lock_bh(&sk_storage->lock); + + /* Recheck sk_storage->list under sk_storage->lock */ + if (unlikely(hlist_empty(&sk_storage->list))) { + /* A parallel del is happening and sk_storage is going + * away. It has just been checked before, so very + * unlikely. Return instead of retry to keep things + * simple. + */ + err = -EAGAIN; + goto unlock_err; + } + + old_sdata = __sk_storage_lookup(sk_storage, smap, false); + err = check_flags(old_sdata, map_flags); + if (err) + goto unlock_err; + + if (old_sdata && (map_flags & BPF_F_LOCK)) { + copy_map_value_locked(map, old_sdata->data, value, false); + selem = SELEM(old_sdata); + goto unlock; + } + + /* sk_storage->lock is held. Hence, we are sure + * we can unlink and uncharge the old_sdata successfully + * later. Hence, instead of charging the new selem now + * and then uncharge the old selem later (which may cause + * a potential but unnecessary charge failure), avoid taking + * a charge at all here (the "!old_sdata" check) and the + * old_sdata will not be uncharged later during __selem_unlink_sk(). + */ + selem = selem_alloc(smap, sk, value, !old_sdata); + if (!selem) { + err = -ENOMEM; + goto unlock_err; + } + + /* First, link the new selem to the map */ + selem_link_map(smap, selem); + + /* Second, link (and publish) the new selem to sk_storage */ + __selem_link_sk(sk_storage, selem); + + /* Third, remove old selem, SELEM(old_sdata) */ + if (old_sdata) { + selem_unlink_map(SELEM(old_sdata)); + __selem_unlink_sk(sk_storage, SELEM(old_sdata), false); + } + +unlock: + raw_spin_unlock_bh(&sk_storage->lock); + return SDATA(selem); + +unlock_err: + raw_spin_unlock_bh(&sk_storage->lock); + return ERR_PTR(err); +} + +static int sk_storage_delete(struct sock *sk, struct bpf_map *map) +{ + struct bpf_sk_storage_data *sdata; + + sdata = sk_storage_lookup(sk, map, false); + if (!sdata) + return -ENOENT; + + selem_unlink(SELEM(sdata)); + + return 0; +} + +/* Called by __sk_destruct() */ +void bpf_sk_storage_free(struct sock *sk) +{ + struct bpf_sk_storage_elem *selem; + struct bpf_sk_storage *sk_storage; + bool free_sk_storage = false; + struct hlist_node *n; + + rcu_read_lock(); + sk_storage = rcu_dereference(sk->sk_bpf_storage); + if (!sk_storage) { + rcu_read_unlock(); + return; + } + + /* Netiher the bpf_prog nor the bpf-map's syscall + * could be modifying the sk_storage->list now. + * Thus, no elem can be added-to or deleted-from the + * sk_storage->list by the bpf_prog or by the bpf-map's syscall. + * + * It is racing with bpf_sk_storage_map_free() alone + * when unlinking elem from the sk_storage->list and + * the map's bucket->list. + */ + raw_spin_lock_bh(&sk_storage->lock); + hlist_for_each_entry_safe(selem, n, &sk_storage->list, snode) { + /* Always unlink from map before unlinking from + * sk_storage. + */ + selem_unlink_map(selem); + free_sk_storage = __selem_unlink_sk(sk_storage, selem, true); + } + raw_spin_unlock_bh(&sk_storage->lock); + rcu_read_unlock(); + + if (free_sk_storage) + kfree_rcu(sk_storage, rcu); +} + +static void bpf_sk_storage_map_free(struct bpf_map *map) +{ + struct bpf_sk_storage_elem *selem; + struct bpf_sk_storage_map *smap; + struct bucket *b; + unsigned int i; + + smap = (struct bpf_sk_storage_map *)map; + + synchronize_rcu(); + + /* bpf prog and the userspace can no longer access this map + * now. No new selem (of this map) can be added + * to the sk->sk_bpf_storage or to the map bucket's list. + * + * The elem of this map can be cleaned up here + * or + * by bpf_sk_storage_free() during __sk_destruct(). + */ + for (i = 0; i < (1U << smap->bucket_log); i++) { + b = &smap->buckets[i]; + + rcu_read_lock(); + /* No one is adding to b->list now */ + while ((selem = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(&b->list)), + struct bpf_sk_storage_elem, + map_node))) { + selem_unlink(selem); + cond_resched_rcu(); + } + rcu_read_unlock(); + } + + /* bpf_sk_storage_free() may still need to access the map. + * e.g. bpf_sk_storage_free() has unlinked selem from the map + * which then made the above while((selem = ...)) loop + * exited immediately. + * + * However, the bpf_sk_storage_free() still needs to access + * the smap->elem_size to do the uncharging in + * __selem_unlink_sk(). + * + * Hence, wait another rcu grace period for the + * bpf_sk_storage_free() to finish. + */ + synchronize_rcu(); + + kvfree(smap->buckets); + kfree(map); +} + +static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr) +{ + if (attr->map_flags != BPF_F_NO_PREALLOC || attr->max_entries || + attr->key_size != sizeof(int) || !attr->value_size || + /* Enforce BTF for userspace sk dumping */ + !attr->btf_key_type_id || !attr->btf_value_type_id) + return -EINVAL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (attr->value_size >= KMALLOC_MAX_SIZE - + MAX_BPF_STACK - sizeof(struct bpf_sk_storage_elem) || + /* U16_MAX is much more than enough for sk local storage + * considering a tcp_sock is ~2k. + */ + attr->value_size > U16_MAX - sizeof(struct bpf_sk_storage_elem)) + return -E2BIG; + + return 0; +} + +static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) +{ + struct bpf_sk_storage_map *smap; + unsigned int i; + u32 nbuckets; + u64 cost; + + smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN); + if (!smap) + return ERR_PTR(-ENOMEM); + bpf_map_init_from_attr(&smap->map, attr); + + smap->bucket_log = ilog2(roundup_pow_of_two(num_possible_cpus())); + nbuckets = 1U << smap->bucket_log; + smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets, + GFP_USER | __GFP_NOWARN); + if (!smap->buckets) { + kfree(smap); + return ERR_PTR(-ENOMEM); + } + cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap); + + for (i = 0; i < nbuckets; i++) { + INIT_HLIST_HEAD(&smap->buckets[i].list); + raw_spin_lock_init(&smap->buckets[i].lock); + } + + smap->elem_size = sizeof(struct bpf_sk_storage_elem) + attr->value_size; + smap->cache_idx = (unsigned int)atomic_inc_return(&cache_idx) % + BPF_SK_STORAGE_CACHE_SIZE; + smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; + + return &smap->map; +} + +static int notsupp_get_next_key(struct bpf_map *map, void *key, + void *next_key) +{ + return -ENOTSUPP; +} + +static int bpf_sk_storage_map_check_btf(const struct bpf_map *map, + const struct btf *btf, + const struct btf_type *key_type, + const struct btf_type *value_type) +{ + u32 int_data; + + if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT) + return -EINVAL; + + int_data = *(u32 *)(key_type + 1); + if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data)) + return -EINVAL; + + return 0; +} + +static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key) +{ + struct bpf_sk_storage_data *sdata; + struct socket *sock; + int fd, err; + + fd = *(int *)key; + sock = sockfd_lookup(fd, &err); + if (sock) { + sdata = sk_storage_lookup(sock->sk, map, true); + sockfd_put(sock); + return sdata ? sdata->data : NULL; + } + + return ERR_PTR(err); +} + +static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) +{ + struct bpf_sk_storage_data *sdata; + struct socket *sock; + int fd, err; + + fd = *(int *)key; + sock = sockfd_lookup(fd, &err); + if (sock) { + sdata = sk_storage_update(sock->sk, map, value, map_flags); + sockfd_put(sock); + return PTR_ERR_OR_ZERO(sdata); + } + + return err; +} + +static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key) +{ + struct socket *sock; + int fd, err; + + fd = *(int *)key; + sock = sockfd_lookup(fd, &err); + if (sock) { + err = sk_storage_delete(sock->sk, map); + sockfd_put(sock); + return err; + } + + return err; +} + +BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, + void *, value, u64, flags) +{ + struct bpf_sk_storage_data *sdata; + + if (flags > BPF_SK_STORAGE_GET_F_CREATE) + return (unsigned long)NULL; + + sdata = sk_storage_lookup(sk, map, true); + if (sdata) + return (unsigned long)sdata->data; + + if (flags == BPF_SK_STORAGE_GET_F_CREATE && + /* Cannot add new elem to a going away sk. + * Otherwise, the new elem may become a leak + * (and also other memory issues during map + * destruction). + */ + refcount_inc_not_zero(&sk->sk_refcnt)) { + sdata = sk_storage_update(sk, map, value, BPF_NOEXIST); + /* sk must be a fullsock (guaranteed by verifier), + * so sock_gen_put() is unnecessary. + */ + sock_put(sk); + return IS_ERR(sdata) ? + (unsigned long)NULL : (unsigned long)sdata->data; + } + + return (unsigned long)NULL; +} + +BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) +{ + if (refcount_inc_not_zero(&sk->sk_refcnt)) { + int err; + + err = sk_storage_delete(sk, map); + sock_put(sk); + return err; + } + + return -ENOENT; +} + +const struct bpf_map_ops sk_storage_map_ops = { + .map_alloc_check = bpf_sk_storage_map_alloc_check, + .map_alloc = bpf_sk_storage_map_alloc, + .map_free = bpf_sk_storage_map_free, + .map_get_next_key = notsupp_get_next_key, + .map_lookup_elem = bpf_fd_sk_storage_lookup_elem, + .map_update_elem = bpf_fd_sk_storage_update_elem, + .map_delete_elem = bpf_fd_sk_storage_delete_elem, + .map_check_btf = bpf_sk_storage_map_check_btf, +}; + +const struct bpf_func_proto bpf_sk_storage_get_proto = { + .func = bpf_sk_storage_get, + .gpl_only = false, + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_SOCKET, + .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, + .arg4_type = ARG_ANYTHING, +}; + +const struct bpf_func_proto bpf_sk_storage_delete_proto = { + .func = bpf_sk_storage_delete, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_SOCKET, +}; diff --git a/net/core/datagram.c b/net/core/datagram.c index e657289db4ac..45a162ef5e02 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -61,6 +61,8 @@ #include <trace/events/skb.h> #include <net/busy_poll.h> +#include "datagram.h" + /* * Is a socket 'connection oriented' ? */ @@ -165,7 +167,7 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, unsigned int flags, void (*destructor)(struct sock *sk, struct sk_buff *skb), - int *peeked, int *off, int *err, + int *off, int *err, struct sk_buff **last) { bool peek_at_off = false; @@ -192,7 +194,6 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, return NULL; } } - *peeked = 1; refcount_inc(&skb->users); } else { __skb_unlink(skb, queue); @@ -210,7 +211,6 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, * @sk: socket * @flags: MSG\_ flags * @destructor: invoked under the receive lock on successful dequeue - * @peeked: returns non-zero if this packet has been seen before * @off: an offset in bytes to peek skb from. Returns an offset * within an skb where data actually starts * @err: error code returned @@ -244,7 +244,7 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, void (*destructor)(struct sock *sk, struct sk_buff *skb), - int *peeked, int *off, int *err, + int *off, int *err, struct sk_buff **last) { struct sk_buff_head *queue = &sk->sk_receive_queue; @@ -258,7 +258,6 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, if (error) goto no_packet; - *peeked = 0; do { /* Again only user level code calls this function, so nothing * interrupt level will suddenly eat the receive_queue. @@ -268,7 +267,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, */ spin_lock_irqsave(&queue->lock, cpu_flags); skb = __skb_try_recv_from_queue(sk, queue, flags, destructor, - peeked, off, &error, last); + off, &error, last); spin_unlock_irqrestore(&queue->lock, cpu_flags); if (error) goto no_packet; @@ -292,7 +291,7 @@ EXPORT_SYMBOL(__skb_try_recv_datagram); struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, void (*destructor)(struct sock *sk, struct sk_buff *skb), - int *peeked, int *off, int *err) + int *off, int *err) { struct sk_buff *skb, *last; long timeo; @@ -300,8 +299,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); do { - skb = __skb_try_recv_datagram(sk, flags, destructor, peeked, - off, err, &last); + skb = __skb_try_recv_datagram(sk, flags, destructor, off, err, + &last); if (skb) return skb; @@ -317,10 +316,10 @@ EXPORT_SYMBOL(__skb_recv_datagram); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, int noblock, int *err) { - int peeked, off = 0; + int off = 0; return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - NULL, &peeked, &off, err); + NULL, &off, err); } EXPORT_SYMBOL(skb_recv_datagram); @@ -408,10 +407,10 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) } EXPORT_SYMBOL(skb_kill_datagram); -int __skb_datagram_iter(const struct sk_buff *skb, int offset, - struct iov_iter *to, int len, bool fault_short, - size_t (*cb)(const void *, size_t, void *, struct iov_iter *), - void *data) +static int __skb_datagram_iter(const struct sk_buff *skb, int offset, + struct iov_iter *to, int len, bool fault_short, + size_t (*cb)(const void *, size_t, void *, + struct iov_iter *), void *data) { int start = skb_headlen(skb); int i, copy = start - offset, start_off = offset, n; diff --git a/net/core/datagram.h b/net/core/datagram.h new file mode 100644 index 000000000000..bcfb75bfa3b2 --- /dev/null +++ b/net/core/datagram.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _NET_CORE_DATAGRAM_H_ +#define _NET_CORE_DATAGRAM_H_ + +#include <linux/types.h> + +struct sock; +struct sk_buff; +struct iov_iter; + +int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, + struct iov_iter *from, size_t length); + +#endif /* _NET_CORE_DATAGRAM_H_ */ diff --git a/net/core/dev.c b/net/core/dev.c index f409406254dd..108ac8137b9b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -131,7 +131,6 @@ #include <trace/events/napi.h> #include <trace/events/net.h> #include <trace/events/skb.h> -#include <linux/pci.h> #include <linux/inetdevice.h> #include <linux/cpu_rmap.h> #include <linux/static_key.h> @@ -146,6 +145,7 @@ #include <net/udp_tunnel.h> #include <linux/net_namespace.h> #include <linux/indirect_call_wrapper.h> +#include <net/devlink.h> #include "net-sysfs.h" @@ -3482,6 +3482,15 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { __qdisc_drop(skb, &to_free); rc = NET_XMIT_DROP; + } else if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty && + qdisc_run_begin(q)) { + qdisc_bstats_cpu_update(q, skb); + + if (sch_direct_xmit(skb, q, dev, txq, NULL, true)) + __qdisc_run(q); + + qdisc_run_end(q); + rc = NET_XMIT_SUCCESS; } else { rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; qdisc_run(q); @@ -3570,9 +3579,6 @@ static void skb_update_prio(struct sk_buff *skb) #define skb_update_prio(skb) #endif -DEFINE_PER_CPU(int, xmit_recursion); -EXPORT_SYMBOL(xmit_recursion); - /** * dev_loopback_xmit - loop back @skb * @net: network namespace this loopback is happening in @@ -3703,23 +3709,21 @@ get_cpus_map: } u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { return 0; } EXPORT_SYMBOL(dev_pick_tx_zero); u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { return (u16)raw_smp_processor_id() % dev->real_num_tx_queues; } EXPORT_SYMBOL(dev_pick_tx_cpu_id); -static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev) +u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev) { struct sock *sk = skb->sk; int queue_index = sk_tx_queue_get(sk); @@ -3743,10 +3747,11 @@ static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, return queue_index; } +EXPORT_SYMBOL(netdev_pick_tx); -struct netdev_queue *netdev_pick_tx(struct net_device *dev, - struct sk_buff *skb, - struct net_device *sb_dev) +struct netdev_queue *netdev_core_pick_tx(struct net_device *dev, + struct sk_buff *skb, + struct net_device *sb_dev) { int queue_index = 0; @@ -3761,10 +3766,9 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev, const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_select_queue) - queue_index = ops->ndo_select_queue(dev, skb, sb_dev, - __netdev_pick_tx); + queue_index = ops->ndo_select_queue(dev, skb, sb_dev); else - queue_index = __netdev_pick_tx(dev, skb, sb_dev); + queue_index = netdev_pick_tx(dev, skb, sb_dev); queue_index = netdev_cap_txqueue(dev, queue_index); } @@ -3838,7 +3842,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) else skb_dst_force(skb); - txq = netdev_pick_tx(dev, skb, sb_dev); + txq = netdev_core_pick_tx(dev, skb, sb_dev); q = rcu_dereference_bh(txq->qdisc); trace_net_dev_queue(skb); @@ -3863,8 +3867,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) int cpu = smp_processor_id(); /* ok because BHs are off */ if (txq->xmit_lock_owner != cpu) { - if (unlikely(__this_cpu_read(xmit_recursion) > - XMIT_RECURSION_LIMIT)) + if (dev_xmit_recursion()) goto recursion_alert; skb = validate_xmit_skb(skb, dev, &again); @@ -3874,9 +3877,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) HARD_TX_LOCK(dev, txq, cpu); if (!netif_xmit_stopped(txq)) { - __this_cpu_inc(xmit_recursion); + dev_xmit_recursion_inc(); skb = dev_hard_start_xmit(skb, dev, txq, &rc); - __this_cpu_dec(xmit_recursion); + dev_xmit_recursion_dec(); if (dev_xmit_complete(rc)) { HARD_TX_UNLOCK(dev, txq); goto out; @@ -3989,9 +3992,9 @@ EXPORT_SYMBOL(rps_sock_flow_table); u32 rps_cpu_mask __read_mostly; EXPORT_SYMBOL(rps_cpu_mask); -struct static_key rps_needed __read_mostly; +struct static_key_false rps_needed __read_mostly; EXPORT_SYMBOL(rps_needed); -struct static_key rfs_needed __read_mostly; +struct static_key_false rfs_needed __read_mostly; EXPORT_SYMBOL(rfs_needed); static struct rps_dev_flow * @@ -4443,7 +4446,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) bool free_skb = true; int cpu, rc; - txq = netdev_pick_tx(dev, skb, NULL); + txq = netdev_core_pick_tx(dev, skb, NULL); cpu = smp_processor_id(); HARD_TX_LOCK(dev, txq, cpu); if (!netif_xmit_stopped(txq)) { @@ -4517,7 +4520,7 @@ static int netif_rx_internal(struct sk_buff *skb) } #ifdef CONFIG_RPS - if (static_key_false(&rps_needed)) { + if (static_branch_unlikely(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu; @@ -4984,7 +4987,8 @@ static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc) ret = __netif_receive_skb_core(skb, pfmemalloc, &pt_prev); if (pt_prev) - ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); + ret = INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb, + skb->dev, pt_prev, orig_dev); return ret; } @@ -5030,7 +5034,8 @@ static inline void __netif_receive_skb_list_ptype(struct list_head *head, else list_for_each_entry_safe(skb, next, head, list) { skb_list_del_init(skb); - pt_prev->func(skb, skb->dev, pt_prev, orig_dev); + INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb, + skb->dev, pt_prev, orig_dev); } } @@ -5188,7 +5193,7 @@ static int netif_receive_skb_internal(struct sk_buff *skb) rcu_read_lock(); #ifdef CONFIG_RPS - if (static_key_false(&rps_needed)) { + if (static_branch_unlikely(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu = get_rps_cpu(skb->dev, skb, &rflow); @@ -5236,7 +5241,7 @@ static void netif_receive_skb_list_internal(struct list_head *head) rcu_read_lock(); #ifdef CONFIG_RPS - if (static_key_false(&rps_needed)) { + if (static_branch_unlikely(&rps_needed)) { list_for_each_entry_safe(skb, next, head, list) { struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu = get_rps_cpu(skb->dev, skb, &rflow); @@ -7886,10 +7891,14 @@ int dev_get_phys_port_name(struct net_device *dev, char *name, size_t len) { const struct net_device_ops *ops = dev->netdev_ops; + int err; - if (!ops->ndo_get_phys_port_name) - return -EOPNOTSUPP; - return ops->ndo_get_phys_port_name(dev, name, len); + if (ops->ndo_get_phys_port_name) { + err = ops->ndo_get_phys_port_name(dev, name, len); + if (err != -EOPNOTSUPP) + return err; + } + return devlink_compat_phys_port_name_get(dev, name, len); } EXPORT_SYMBOL(dev_get_phys_port_name); @@ -7909,14 +7918,21 @@ int dev_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id first = { }; struct net_device *lower_dev; struct list_head *iter; - int err = -EOPNOTSUPP; + int err; - if (ops->ndo_get_port_parent_id) - return ops->ndo_get_port_parent_id(dev, ppid); + if (ops->ndo_get_port_parent_id) { + err = ops->ndo_get_port_parent_id(dev, ppid); + if (err != -EOPNOTSUPP) + return err; + } - if (!recurse) + err = devlink_compat_switch_id_get(dev, ppid); + if (!err || err != -EOPNOTSUPP) return err; + if (!recurse) + return -EOPNOTSUPP; + netdev_for_each_lower_dev(dev, lower_dev, iter) { err = dev_get_port_parent_id(lower_dev, ppid, recurse); if (err) diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 31380fd5a4e2..5163d900bb4f 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -366,7 +366,8 @@ EXPORT_SYMBOL(dev_load); * dev_ioctl - network device ioctl * @net: the applicable net namespace * @cmd: command to issue - * @arg: pointer to a struct ifreq in user space + * @ifr: pointer to a struct ifreq in user space + * @need_copyout: whether or not copy_to_user() should be called * * Issue ioctl functions to devices. This is normally called by the * user space syscall interfaces but can sometimes be useful for diff --git a/net/core/devlink.c b/net/core/devlink.c index da0a29f30885..d43bc52b8840 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -19,6 +19,8 @@ #include <linux/device.h> #include <linux/list.h> #include <linux/netdevice.h> +#include <linux/spinlock.h> +#include <linux/refcount.h> #include <rdma/ib_verbs.h> #include <net/netlink.h> #include <net/genetlink.h> @@ -543,12 +545,14 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, goto nla_put_failure; if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index)) goto nla_put_failure; + + spin_lock(&devlink_port->type_lock); if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type)) - goto nla_put_failure; + goto nla_put_failure_type_locked; if (devlink_port->desired_type != DEVLINK_PORT_TYPE_NOTSET && nla_put_u16(msg, DEVLINK_ATTR_PORT_DESIRED_TYPE, devlink_port->desired_type)) - goto nla_put_failure; + goto nla_put_failure_type_locked; if (devlink_port->type == DEVLINK_PORT_TYPE_ETH) { struct net_device *netdev = devlink_port->type_dev; @@ -557,7 +561,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, netdev->ifindex) || nla_put_string(msg, DEVLINK_ATTR_PORT_NETDEV_NAME, netdev->name))) - goto nla_put_failure; + goto nla_put_failure_type_locked; } if (devlink_port->type == DEVLINK_PORT_TYPE_IB) { struct ib_device *ibdev = devlink_port->type_dev; @@ -565,14 +569,17 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, if (ibdev && nla_put_string(msg, DEVLINK_ATTR_PORT_IBDEV_NAME, ibdev->name)) - goto nla_put_failure; + goto nla_put_failure_type_locked; } + spin_unlock(&devlink_port->type_lock); if (devlink_nl_port_attrs_put(msg, devlink_port)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; +nla_put_failure_type_locked: + spin_unlock(&devlink_port->type_lock); nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; @@ -1041,14 +1048,15 @@ out: static int devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index, u16 pool_index, u32 size, - enum devlink_sb_threshold_type threshold_type) + enum devlink_sb_threshold_type threshold_type, + struct netlink_ext_ack *extack) { const struct devlink_ops *ops = devlink->ops; if (ops->sb_pool_set) return ops->sb_pool_set(devlink, sb_index, pool_index, - size, threshold_type); + size, threshold_type, extack); return -EOPNOTSUPP; } @@ -1076,7 +1084,8 @@ static int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb, size = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_POOL_SIZE]); return devlink_sb_pool_set(devlink, devlink_sb->index, - pool_index, size, threshold_type); + pool_index, size, threshold_type, + info->extack); } static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg, @@ -1237,14 +1246,15 @@ out: static int devlink_sb_port_pool_set(struct devlink_port *devlink_port, unsigned int sb_index, u16 pool_index, - u32 threshold) + u32 threshold, + struct netlink_ext_ack *extack) { const struct devlink_ops *ops = devlink_port->devlink->ops; if (ops->sb_port_pool_set) return ops->sb_port_pool_set(devlink_port, sb_index, - pool_index, threshold); + pool_index, threshold, extack); return -EOPNOTSUPP; } @@ -1267,7 +1277,7 @@ static int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb, threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]); return devlink_sb_port_pool_set(devlink_port, devlink_sb->index, - pool_index, threshold); + pool_index, threshold, info->extack); } static int @@ -1466,7 +1476,8 @@ out: static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port, unsigned int sb_index, u16 tc_index, enum devlink_sb_pool_type pool_type, - u16 pool_index, u32 threshold) + u16 pool_index, u32 threshold, + struct netlink_ext_ack *extack) { const struct devlink_ops *ops = devlink_port->devlink->ops; @@ -1474,7 +1485,7 @@ static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port, if (ops->sb_tc_pool_bind_set) return ops->sb_tc_pool_bind_set(devlink_port, sb_index, tc_index, pool_type, - pool_index, threshold); + pool_index, threshold, extack); return -EOPNOTSUPP; } @@ -1509,7 +1520,7 @@ static int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb, threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]); return devlink_sb_tc_pool_bind_set(devlink_port, devlink_sb->index, tc_index, pool_type, - pool_index, threshold); + pool_index, threshold, info->extack); } static int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb, @@ -1661,7 +1672,7 @@ int devlink_dpipe_match_put(struct sk_buff *skb, struct devlink_dpipe_field *field = &header->fields[match->field_id]; struct nlattr *match_attr; - match_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_MATCH); + match_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_MATCH); if (!match_attr) return -EMSGSIZE; @@ -1686,7 +1697,8 @@ static int devlink_dpipe_matches_put(struct devlink_dpipe_table *table, { struct nlattr *matches_attr; - matches_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE_MATCHES); + matches_attr = nla_nest_start_noflag(skb, + DEVLINK_ATTR_DPIPE_TABLE_MATCHES); if (!matches_attr) return -EMSGSIZE; @@ -1708,7 +1720,7 @@ int devlink_dpipe_action_put(struct sk_buff *skb, struct devlink_dpipe_field *field = &header->fields[action->field_id]; struct nlattr *action_attr; - action_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_ACTION); + action_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_ACTION); if (!action_attr) return -EMSGSIZE; @@ -1733,7 +1745,8 @@ static int devlink_dpipe_actions_put(struct devlink_dpipe_table *table, { struct nlattr *actions_attr; - actions_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE_ACTIONS); + actions_attr = nla_nest_start_noflag(skb, + DEVLINK_ATTR_DPIPE_TABLE_ACTIONS); if (!actions_attr) return -EMSGSIZE; @@ -1755,7 +1768,7 @@ static int devlink_dpipe_table_put(struct sk_buff *skb, u64 table_size; table_size = table->table_ops->size_get(table->priv); - table_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE); + table_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_TABLE); if (!table_attr) return -EMSGSIZE; @@ -1835,7 +1848,7 @@ start_again: if (devlink_nl_put_handle(skb, devlink)) goto nla_put_failure; - tables_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLES); + tables_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_TABLES); if (!tables_attr) goto nla_put_failure; @@ -1936,8 +1949,8 @@ static int devlink_dpipe_action_values_put(struct sk_buff *skb, int err; for (i = 0; i < values_count; i++) { - action_attr = nla_nest_start(skb, - DEVLINK_ATTR_DPIPE_ACTION_VALUE); + action_attr = nla_nest_start_noflag(skb, + DEVLINK_ATTR_DPIPE_ACTION_VALUE); if (!action_attr) return -EMSGSIZE; err = devlink_dpipe_action_value_put(skb, &values[i]); @@ -1973,8 +1986,8 @@ static int devlink_dpipe_match_values_put(struct sk_buff *skb, int err; for (i = 0; i < values_count; i++) { - match_attr = nla_nest_start(skb, - DEVLINK_ATTR_DPIPE_MATCH_VALUE); + match_attr = nla_nest_start_noflag(skb, + DEVLINK_ATTR_DPIPE_MATCH_VALUE); if (!match_attr) return -EMSGSIZE; err = devlink_dpipe_match_value_put(skb, &values[i]); @@ -1995,7 +2008,7 @@ static int devlink_dpipe_entry_put(struct sk_buff *skb, struct nlattr *entry_attr, *matches_attr, *actions_attr; int err; - entry_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_ENTRY); + entry_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_ENTRY); if (!entry_attr) return -EMSGSIZE; @@ -2007,8 +2020,8 @@ static int devlink_dpipe_entry_put(struct sk_buff *skb, entry->counter, DEVLINK_ATTR_PAD)) goto nla_put_failure; - matches_attr = nla_nest_start(skb, - DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES); + matches_attr = nla_nest_start_noflag(skb, + DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES); if (!matches_attr) goto nla_put_failure; @@ -2020,8 +2033,8 @@ static int devlink_dpipe_entry_put(struct sk_buff *skb, } nla_nest_end(skb, matches_attr); - actions_attr = nla_nest_start(skb, - DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES); + actions_attr = nla_nest_start_noflag(skb, + DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES); if (!actions_attr) goto nla_put_failure; @@ -2078,8 +2091,8 @@ int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx) devlink = dump_ctx->info->user_ptr[0]; if (devlink_nl_put_handle(dump_ctx->skb, devlink)) goto nla_put_failure; - dump_ctx->nest = nla_nest_start(dump_ctx->skb, - DEVLINK_ATTR_DPIPE_ENTRIES); + dump_ctx->nest = nla_nest_start_noflag(dump_ctx->skb, + DEVLINK_ATTR_DPIPE_ENTRIES); if (!dump_ctx->nest) goto nla_put_failure; return 0; @@ -2189,7 +2202,8 @@ static int devlink_dpipe_fields_put(struct sk_buff *skb, for (i = 0; i < header->fields_count; i++) { field = &header->fields[i]; - field_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_FIELD); + field_attr = nla_nest_start_noflag(skb, + DEVLINK_ATTR_DPIPE_FIELD); if (!field_attr) return -EMSGSIZE; if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_FIELD_NAME, field->name) || @@ -2212,7 +2226,7 @@ static int devlink_dpipe_header_put(struct sk_buff *skb, struct nlattr *fields_attr, *header_attr; int err; - header_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADER); + header_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_HEADER); if (!header_attr) return -EMSGSIZE; @@ -2221,7 +2235,8 @@ static int devlink_dpipe_header_put(struct sk_buff *skb, nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global)) goto nla_put_failure; - fields_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADER_FIELDS); + fields_attr = nla_nest_start_noflag(skb, + DEVLINK_ATTR_DPIPE_HEADER_FIELDS); if (!fields_attr) goto nla_put_failure; @@ -2268,7 +2283,7 @@ start_again: if (devlink_nl_put_handle(skb, devlink)) goto nla_put_failure; - headers_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADERS); + headers_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_HEADERS); if (!headers_attr) goto nla_put_failure; @@ -2492,7 +2507,7 @@ static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb, struct nlattr *child_resource_attr; struct nlattr *resource_attr; - resource_attr = nla_nest_start(skb, DEVLINK_ATTR_RESOURCE); + resource_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_RESOURCE); if (!resource_attr) return -EMSGSIZE; @@ -2516,7 +2531,8 @@ static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb, resource->size_valid)) goto nla_put_failure; - child_resource_attr = nla_nest_start(skb, DEVLINK_ATTR_RESOURCE_LIST); + child_resource_attr = nla_nest_start_noflag(skb, + DEVLINK_ATTR_RESOURCE_LIST); if (!child_resource_attr) goto nla_put_failure; @@ -2567,7 +2583,8 @@ start_again: if (devlink_nl_put_handle(skb, devlink)) goto nla_put_failure; - resources_attr = nla_nest_start(skb, DEVLINK_ATTR_RESOURCE_LIST); + resources_attr = nla_nest_start_noflag(skb, + DEVLINK_ATTR_RESOURCE_LIST); if (!resources_attr) goto nla_put_failure; @@ -2821,7 +2838,8 @@ devlink_nl_param_value_fill_one(struct sk_buff *msg, { struct nlattr *param_value_attr; - param_value_attr = nla_nest_start(msg, DEVLINK_ATTR_PARAM_VALUE); + param_value_attr = nla_nest_start_noflag(msg, + DEVLINK_ATTR_PARAM_VALUE); if (!param_value_attr) goto nla_put_failure; @@ -2912,7 +2930,7 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, port_index)) goto genlmsg_cancel; - param_attr = nla_nest_start(msg, DEVLINK_ATTR_PARAM); + param_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_PARAM); if (!param_attr) goto genlmsg_cancel; if (nla_put_string(msg, DEVLINK_ATTR_PARAM_NAME, param->name)) @@ -2926,7 +2944,8 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_TYPE, nla_type)) goto param_nest_cancel; - param_values_list = nla_nest_start(msg, DEVLINK_ATTR_PARAM_VALUES_LIST); + param_values_list = nla_nest_start_noflag(msg, + DEVLINK_ATTR_PARAM_VALUES_LIST); if (!param_values_list) goto param_nest_cancel; @@ -3326,7 +3345,7 @@ static int devlink_nl_region_snapshot_id_put(struct sk_buff *msg, struct nlattr *snap_attr; int err; - snap_attr = nla_nest_start(msg, DEVLINK_ATTR_REGION_SNAPSHOT); + snap_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_REGION_SNAPSHOT); if (!snap_attr) return -EINVAL; @@ -3350,7 +3369,8 @@ static int devlink_nl_region_snapshots_id_put(struct sk_buff *msg, struct nlattr *snapshots_attr; int err; - snapshots_attr = nla_nest_start(msg, DEVLINK_ATTR_REGION_SNAPSHOTS); + snapshots_attr = nla_nest_start_noflag(msg, + DEVLINK_ATTR_REGION_SNAPSHOTS); if (!snapshots_attr) return -EINVAL; @@ -3566,7 +3586,7 @@ static int devlink_nl_cmd_region_read_chunk_fill(struct sk_buff *msg, struct nlattr *chunk_attr; int err; - chunk_attr = nla_nest_start(msg, DEVLINK_ATTR_REGION_CHUNK); + chunk_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_REGION_CHUNK); if (!chunk_attr) return -EINVAL; @@ -3640,7 +3660,6 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { u64 ret_offset, start_offset, end_offset = 0; - const struct genl_ops *ops = cb->data; struct devlink_region *region; struct nlattr *chunks_attr; const char *region_name; @@ -3656,8 +3675,10 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, if (!attrs) return -ENOMEM; - err = nlmsg_parse(cb->nlh, GENL_HDRLEN + devlink_nl_family.hdrsize, - attrs, DEVLINK_ATTR_MAX, ops->policy, cb->extack); + err = nlmsg_parse_deprecated(cb->nlh, + GENL_HDRLEN + devlink_nl_family.hdrsize, + attrs, DEVLINK_ATTR_MAX, + devlink_nl_family.policy, cb->extack); if (err) goto out_free; @@ -3699,7 +3720,7 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, if (err) goto nla_put_failure; - chunks_attr = nla_nest_start(skb, DEVLINK_ATTR_REGION_CHUNKS); + chunks_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_REGION_CHUNKS); if (!chunks_attr) { err = -EMSGSIZE; goto nla_put_failure; @@ -3775,7 +3796,7 @@ static int devlink_info_version_put(struct devlink_info_req *req, int attr, struct nlattr *nest; int err; - nest = nla_nest_start(req->msg, attr); + nest = nla_nest_start_noflag(req->msg, attr); if (!nest) return -EMSGSIZE; @@ -4303,7 +4324,7 @@ devlink_fmsg_prepare_skb(struct devlink_fmsg *fmsg, struct sk_buff *skb, int i = 0; int err; - fmsg_nlattr = nla_nest_start(skb, DEVLINK_ATTR_FMSG); + fmsg_nlattr = nla_nest_start_noflag(skb, DEVLINK_ATTR_FMSG); if (!fmsg_nlattr) return -EMSGSIZE; @@ -4412,6 +4433,7 @@ struct devlink_health_reporter { u64 error_count; u64 recovery_count; u64 last_recovery_ts; + refcount_t refcount; }; void * @@ -4427,6 +4449,7 @@ devlink_health_reporter_find_by_name(struct devlink *devlink, { struct devlink_health_reporter *reporter; + lockdep_assert_held(&devlink->reporters_lock); list_for_each_entry(reporter, &devlink->reporter_list, list) if (!strcmp(reporter->ops->name, reporter_name)) return reporter; @@ -4450,7 +4473,7 @@ devlink_health_reporter_create(struct devlink *devlink, { struct devlink_health_reporter *reporter; - mutex_lock(&devlink->lock); + mutex_lock(&devlink->reporters_lock); if (devlink_health_reporter_find_by_name(devlink, ops->name)) { reporter = ERR_PTR(-EEXIST); goto unlock; @@ -4474,9 +4497,10 @@ devlink_health_reporter_create(struct devlink *devlink, reporter->graceful_period = graceful_period; reporter->auto_recover = auto_recover; mutex_init(&reporter->dump_lock); + refcount_set(&reporter->refcount, 1); list_add_tail(&reporter->list, &devlink->reporter_list); unlock: - mutex_unlock(&devlink->lock); + mutex_unlock(&devlink->reporters_lock); return reporter; } EXPORT_SYMBOL_GPL(devlink_health_reporter_create); @@ -4489,9 +4513,12 @@ EXPORT_SYMBOL_GPL(devlink_health_reporter_create); void devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) { - mutex_lock(&reporter->devlink->lock); + mutex_lock(&reporter->devlink->reporters_lock); list_del(&reporter->list); - mutex_unlock(&reporter->devlink->lock); + mutex_unlock(&reporter->devlink->reporters_lock); + while (refcount_read(&reporter->refcount) > 1) + msleep(100); + mutex_destroy(&reporter->dump_lock); if (reporter->dump_fmsg) devlink_fmsg_free(reporter->dump_fmsg); kfree(reporter); @@ -4627,6 +4654,7 @@ static struct devlink_health_reporter * devlink_health_reporter_get_from_info(struct devlink *devlink, struct genl_info *info) { + struct devlink_health_reporter *reporter; char *reporter_name; if (!info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]) @@ -4634,7 +4662,18 @@ devlink_health_reporter_get_from_info(struct devlink *devlink, reporter_name = nla_data(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]); - return devlink_health_reporter_find_by_name(devlink, reporter_name); + mutex_lock(&devlink->reporters_lock); + reporter = devlink_health_reporter_find_by_name(devlink, reporter_name); + if (reporter) + refcount_inc(&reporter->refcount); + mutex_unlock(&devlink->reporters_lock); + return reporter; +} + +static void +devlink_health_reporter_put(struct devlink_health_reporter *reporter) +{ + refcount_dec(&reporter->refcount); } static int @@ -4654,7 +4693,8 @@ devlink_nl_health_reporter_fill(struct sk_buff *msg, if (devlink_nl_put_handle(msg, devlink)) goto genlmsg_cancel; - reporter_attr = nla_nest_start(msg, DEVLINK_ATTR_HEALTH_REPORTER); + reporter_attr = nla_nest_start_noflag(msg, + DEVLINK_ATTR_HEALTH_REPORTER); if (!reporter_attr) goto genlmsg_cancel; if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME, @@ -4708,8 +4748,10 @@ static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb, return -EINVAL; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return -ENOMEM; + if (!msg) { + err = -ENOMEM; + goto out; + } err = devlink_nl_health_reporter_fill(msg, devlink, reporter, DEVLINK_CMD_HEALTH_REPORTER_GET, @@ -4717,10 +4759,13 @@ static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb, 0); if (err) { nlmsg_free(msg); - return err; + goto out; } - return genlmsg_reply(msg, info); + err = genlmsg_reply(msg, info); +out: + devlink_health_reporter_put(reporter); + return err; } static int @@ -4737,7 +4782,7 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, list_for_each_entry(devlink, &devlink_list, list) { if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) continue; - mutex_lock(&devlink->lock); + mutex_lock(&devlink->reporters_lock); list_for_each_entry(reporter, &devlink->reporter_list, list) { if (idx < start) { @@ -4751,12 +4796,12 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, cb->nlh->nlmsg_seq, NLM_F_MULTI); if (err) { - mutex_unlock(&devlink->lock); + mutex_unlock(&devlink->reporters_lock); goto out; } idx++; } - mutex_unlock(&devlink->lock); + mutex_unlock(&devlink->reporters_lock); } out: mutex_unlock(&devlink_mutex); @@ -4771,6 +4816,7 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb, { struct devlink *devlink = info->user_ptr[0]; struct devlink_health_reporter *reporter; + int err; reporter = devlink_health_reporter_get_from_info(devlink, info); if (!reporter) @@ -4778,8 +4824,10 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb, if (!reporter->ops->recover && (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] || - info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])) - return -EOPNOTSUPP; + info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])) { + err = -EOPNOTSUPP; + goto out; + } if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]) reporter->graceful_period = @@ -4789,7 +4837,11 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb, reporter->auto_recover = nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]); + devlink_health_reporter_put(reporter); return 0; +out: + devlink_health_reporter_put(reporter); + return err; } static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb, @@ -4797,12 +4849,16 @@ static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb, { struct devlink *devlink = info->user_ptr[0]; struct devlink_health_reporter *reporter; + int err; reporter = devlink_health_reporter_get_from_info(devlink, info); if (!reporter) return -EINVAL; - return devlink_health_reporter_recover(reporter, NULL); + err = devlink_health_reporter_recover(reporter, NULL); + + devlink_health_reporter_put(reporter); + return err; } static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb, @@ -4817,12 +4873,16 @@ static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb, if (!reporter) return -EINVAL; - if (!reporter->ops->diagnose) + if (!reporter->ops->diagnose) { + devlink_health_reporter_put(reporter); return -EOPNOTSUPP; + } fmsg = devlink_fmsg_alloc(); - if (!fmsg) + if (!fmsg) { + devlink_health_reporter_put(reporter); return -ENOMEM; + } err = devlink_fmsg_obj_nest_start(fmsg); if (err) @@ -4841,6 +4901,7 @@ static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb, out: devlink_fmsg_free(fmsg); + devlink_health_reporter_put(reporter); return err; } @@ -4855,8 +4916,10 @@ static int devlink_nl_cmd_health_reporter_dump_get_doit(struct sk_buff *skb, if (!reporter) return -EINVAL; - if (!reporter->ops->dump) + if (!reporter->ops->dump) { + devlink_health_reporter_put(reporter); return -EOPNOTSUPP; + } mutex_lock(&reporter->dump_lock); err = devlink_health_do_dump(reporter, NULL); @@ -4868,6 +4931,7 @@ static int devlink_nl_cmd_health_reporter_dump_get_doit(struct sk_buff *skb, out: mutex_unlock(&reporter->dump_lock); + devlink_health_reporter_put(reporter); return err; } @@ -4882,12 +4946,15 @@ devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb, if (!reporter) return -EINVAL; - if (!reporter->ops->dump) + if (!reporter->ops->dump) { + devlink_health_reporter_put(reporter); return -EOPNOTSUPP; + } mutex_lock(&reporter->dump_lock); devlink_health_dump_clear(reporter); mutex_unlock(&reporter->dump_lock); + devlink_health_reporter_put(reporter); return 0; } @@ -4926,293 +4993,297 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { static const struct genl_ops devlink_nl_ops[] = { { .cmd = DEVLINK_CMD_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_get_doit, .dumpit = devlink_nl_cmd_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_PORT_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_port_get_doit, .dumpit = devlink_nl_cmd_port_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_PORT_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_port_set_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, }, { .cmd = DEVLINK_CMD_PORT_SPLIT, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_port_split_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_PORT_UNSPLIT, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_port_unsplit_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_SB_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_get_doit, .dumpit = devlink_nl_cmd_sb_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NEED_SB, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_SB_POOL_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_pool_get_doit, .dumpit = devlink_nl_cmd_sb_pool_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NEED_SB, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_SB_POOL_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_pool_set_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NEED_SB, }, { .cmd = DEVLINK_CMD_SB_PORT_POOL_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_port_pool_get_doit, .dumpit = devlink_nl_cmd_sb_port_pool_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | DEVLINK_NL_FLAG_NEED_SB, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_SB_PORT_POOL_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_port_pool_set_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | DEVLINK_NL_FLAG_NEED_SB, }, { .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_tc_pool_bind_get_doit, .dumpit = devlink_nl_cmd_sb_tc_pool_bind_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | DEVLINK_NL_FLAG_NEED_SB, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | DEVLINK_NL_FLAG_NEED_SB, }, { .cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_occ_snapshot_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NEED_SB, }, { .cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_occ_max_clear_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NEED_SB, }, { .cmd = DEVLINK_CMD_ESWITCH_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_eswitch_get_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_ESWITCH_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_eswitch_set_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_DPIPE_TABLE_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_dpipe_table_get, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_dpipe_entries_get, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_dpipe_headers_get, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_dpipe_table_counters_set, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_RESOURCE_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_resource_set, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_RESOURCE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_resource_dump, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_RELOAD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_reload, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_PARAM_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_param_get_doit, .dumpit = devlink_nl_cmd_param_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_PARAM_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_param_set_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_PORT_PARAM_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_port_param_get_doit, .dumpit = devlink_nl_cmd_port_param_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_PORT_PARAM_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_port_param_set_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, }, { .cmd = DEVLINK_CMD_REGION_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_region_get_doit, .dumpit = devlink_nl_cmd_region_get_dumpit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_REGION_DEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_region_del, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_REGION_READ, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = devlink_nl_cmd_region_read_dumpit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_INFO_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_info_get_doit, .dumpit = devlink_nl_cmd_info_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_get_doit, .dumpit = devlink_nl_cmd_health_reporter_get_dumpit, - .policy = devlink_nl_policy, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NO_LOCK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_set_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_recover_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_diagnose_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_dump_get_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_dump_clear_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_FLASH_UPDATE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_flash_update, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, @@ -5222,6 +5293,7 @@ static struct genl_family devlink_nl_family __ro_after_init = { .name = DEVLINK_GENL_NAME, .version = DEVLINK_GENL_VERSION, .maxattr = DEVLINK_ATTR_MAX, + .policy = devlink_nl_policy, .netnsok = true, .pre_doit = devlink_nl_pre_doit, .post_doit = devlink_nl_post_doit, @@ -5261,6 +5333,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) INIT_LIST_HEAD(&devlink->region_list); INIT_LIST_HEAD(&devlink->reporter_list); mutex_init(&devlink->lock); + mutex_init(&devlink->reporters_lock); return devlink; } EXPORT_SYMBOL_GPL(devlink_alloc); @@ -5303,6 +5376,8 @@ EXPORT_SYMBOL_GPL(devlink_unregister); */ void devlink_free(struct devlink *devlink) { + mutex_destroy(&devlink->reporters_lock); + mutex_destroy(&devlink->lock); WARN_ON(!list_empty(&devlink->reporter_list)); WARN_ON(!list_empty(&devlink->region_list)); WARN_ON(!list_empty(&devlink->param_list)); @@ -5340,6 +5415,7 @@ int devlink_port_register(struct devlink *devlink, devlink_port->devlink = devlink; devlink_port->index = port_index; devlink_port->registered = true; + spin_lock_init(&devlink_port->type_lock); list_add_tail(&devlink_port->list, &devlink->port_list); INIT_LIST_HEAD(&devlink_port->param_list); mutex_unlock(&devlink->lock); @@ -5368,8 +5444,12 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port, enum devlink_port_type type, void *type_dev) { + if (WARN_ON(!devlink_port->registered)) + return; + spin_lock(&devlink_port->type_lock); devlink_port->type = type; devlink_port->type_dev = type_dev; + spin_unlock(&devlink_port->type_lock); devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); } @@ -5382,8 +5462,39 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port, void devlink_port_type_eth_set(struct devlink_port *devlink_port, struct net_device *netdev) { - return __devlink_port_type_set(devlink_port, - DEVLINK_PORT_TYPE_ETH, netdev); + const struct net_device_ops *ops = netdev->netdev_ops; + + /* If driver registers devlink port, it should set devlink port + * attributes accordingly so the compat functions are called + * and the original ops are not used. + */ + if (ops->ndo_get_phys_port_name) { + /* Some drivers use the same set of ndos for netdevs + * that have devlink_port registered and also for + * those who don't. Make sure that ndo_get_phys_port_name + * returns -EOPNOTSUPP here in case it is defined. + * Warn if not. + */ + char name[IFNAMSIZ]; + int err; + + err = ops->ndo_get_phys_port_name(netdev, name, sizeof(name)); + WARN_ON(err != -EOPNOTSUPP); + } + if (ops->ndo_get_port_parent_id) { + /* Some drivers use the same set of ndos for netdevs + * that have devlink_port registered and also for + * those who don't. Make sure that ndo_get_port_parent_id + * returns -EOPNOTSUPP here in case it is defined. + * Warn if not. + */ + struct netdev_phys_item_id ppid; + int err; + + err = ops->ndo_get_port_parent_id(netdev, &ppid); + WARN_ON(err != -EOPNOTSUPP); + } + __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH, netdev); } EXPORT_SYMBOL_GPL(devlink_port_type_eth_set); @@ -5396,8 +5507,7 @@ EXPORT_SYMBOL_GPL(devlink_port_type_eth_set); void devlink_port_type_ib_set(struct devlink_port *devlink_port, struct ib_device *ibdev) { - return __devlink_port_type_set(devlink_port, - DEVLINK_PORT_TYPE_IB, ibdev); + __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_IB, ibdev); } EXPORT_SYMBOL_GPL(devlink_port_type_ib_set); @@ -5408,8 +5518,7 @@ EXPORT_SYMBOL_GPL(devlink_port_type_ib_set); */ void devlink_port_type_clear(struct devlink_port *devlink_port) { - return __devlink_port_type_set(devlink_port, - DEVLINK_PORT_TYPE_NOTSET, NULL); + __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET, NULL); } EXPORT_SYMBOL_GPL(devlink_port_type_clear); @@ -5423,25 +5532,40 @@ EXPORT_SYMBOL_GPL(devlink_port_type_clear); * @split: indicates if this is split port * @split_subport_number: if the port is split, this is the number * of subport. + * @switch_id: if the port is part of switch, this is buffer with ID, + * otwerwise this is NULL + * @switch_id_len: length of the switch_id buffer */ void devlink_port_attrs_set(struct devlink_port *devlink_port, enum devlink_port_flavour flavour, u32 port_number, bool split, - u32 split_subport_number) + u32 split_subport_number, + const unsigned char *switch_id, + unsigned char switch_id_len) { struct devlink_port_attrs *attrs = &devlink_port->attrs; + if (WARN_ON(devlink_port->registered)) + return; attrs->set = true; attrs->flavour = flavour; attrs->port_number = port_number; attrs->split = split; attrs->split_subport_number = split_subport_number; - devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); + if (switch_id) { + attrs->switch_port = true; + if (WARN_ON(switch_id_len > MAX_PHYS_ITEM_ID_LEN)) + switch_id_len = MAX_PHYS_ITEM_ID_LEN; + memcpy(attrs->switch_id.id, switch_id, switch_id_len); + attrs->switch_id.id_len = switch_id_len; + } else { + attrs->switch_port = false; + } } EXPORT_SYMBOL_GPL(devlink_port_attrs_set); -int devlink_port_get_phys_port_name(struct devlink_port *devlink_port, - char *name, size_t len) +static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, + char *name, size_t len) { struct devlink_port_attrs *attrs = &devlink_port->attrs; int n = 0; @@ -5471,7 +5595,6 @@ int devlink_port_get_phys_port_name(struct devlink_port *devlink_port, return 0; } -EXPORT_SYMBOL_GPL(devlink_port_get_phys_port_name); int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, @@ -6447,17 +6570,15 @@ void devlink_compat_running_version(struct net_device *dev, dev_hold(dev); rtnl_unlock(); - mutex_lock(&devlink_mutex); devlink = netdev_to_devlink(dev); if (!devlink || !devlink->ops->info_get) - goto unlock_list; + goto out; mutex_lock(&devlink->lock); __devlink_compat_running_version(devlink, buf, len); mutex_unlock(&devlink->lock); -unlock_list: - mutex_unlock(&devlink_mutex); +out: rtnl_lock(); dev_put(dev); } @@ -6465,28 +6586,65 @@ unlock_list: int devlink_compat_flash_update(struct net_device *dev, const char *file_name) { struct devlink *devlink; - int ret = -EOPNOTSUPP; + int ret; dev_hold(dev); rtnl_unlock(); - mutex_lock(&devlink_mutex); devlink = netdev_to_devlink(dev); - if (!devlink || !devlink->ops->flash_update) - goto unlock_list; + if (!devlink || !devlink->ops->flash_update) { + ret = -EOPNOTSUPP; + goto out; + } mutex_lock(&devlink->lock); ret = devlink->ops->flash_update(devlink, file_name, NULL, NULL); mutex_unlock(&devlink->lock); -unlock_list: - mutex_unlock(&devlink_mutex); +out: rtnl_lock(); dev_put(dev); return ret; } +int devlink_compat_phys_port_name_get(struct net_device *dev, + char *name, size_t len) +{ + struct devlink_port *devlink_port; + + /* RTNL mutex is held here which ensures that devlink_port + * instance cannot disappear in the middle. No need to take + * any devlink lock as only permanent values are accessed. + */ + ASSERT_RTNL(); + + devlink_port = netdev_to_devlink_port(dev); + if (!devlink_port) + return -EOPNOTSUPP; + + return __devlink_port_phys_port_name_get(devlink_port, name, len); +} + +int devlink_compat_switch_id_get(struct net_device *dev, + struct netdev_phys_item_id *ppid) +{ + struct devlink_port *devlink_port; + + /* RTNL mutex is held here which ensures that devlink_port + * instance cannot disappear in the middle. No need to take + * any devlink lock as only permanent values are accessed. + */ + ASSERT_RTNL(); + devlink_port = netdev_to_devlink_port(dev); + if (!devlink_port || !devlink_port->attrs.switch_port) + return -EOPNOTSUPP; + + memcpy(ppid, &devlink_port->attrs.switch_id, sizeof(*ppid)); + + return 0; +} + static int __init devlink_init(void) { return genl_register_family(&devlink_nl_family); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index c7785efeea57..d4ce0542acfa 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -355,14 +355,17 @@ out: static const struct genl_ops dropmon_ops[] = { { .cmd = NET_DM_CMD_CONFIG, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = net_dm_cmd_config, }, { .cmd = NET_DM_CMD_START, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = net_dm_cmd_trace, }, { .cmd = NET_DM_CMD_STOP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = net_dm_cmd_trace, }, }; diff --git a/net/core/dst.c b/net/core/dst.c index a263309df115..1f13d90cd0e4 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -26,23 +26,6 @@ #include <net/dst.h> #include <net/dst_metadata.h> -/* - * Theory of operations: - * 1) We use a list, protected by a spinlock, to add - * new entries from both BH and non-BH context. - * 2) In order to keep spinlock held for a small delay, - * we use a second list where are stored long lived - * entries, that are handled by the garbage collect thread - * fired by a workqueue. - * 3) This list is guarded by a mutex, - * so that the gc_task and dst_dev_event() can be synchronized. - */ - -/* - * We want to keep lock & list close together - * to dirty as few cache lines as possible in __dst_free(). - * As this is not a very strong hint, we dont force an alignment on SMP. - */ int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb) { kfree_skb(skb); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 36ed619faf36..4a593853cbf2 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -136,6 +136,7 @@ static const char phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN] = { [ETHTOOL_ID_UNSPEC] = "Unspec", [ETHTOOL_PHY_DOWNSHIFT] = "phy-downshift", + [ETHTOOL_PHY_FAST_LINK_DOWN] = "phy-fast-link-down", }; static int ethtool_get_features(struct net_device *dev, void __user *useraddr) @@ -2446,6 +2447,7 @@ static int ethtool_phy_tunable_valid(const struct ethtool_tunable *tuna) { switch (tuna->id) { case ETHTOOL_PHY_DOWNSHIFT: + case ETHTOOL_PHY_FAST_LINK_DOWN: if (tuna->len != sizeof(u8) || tuna->type_id != ETHTOOL_TUNABLE_U8) return -EINVAL; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index ffbb827723a2..18f8dd8329ed 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -746,7 +746,8 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout; } - err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*frh), tb, FRA_MAX, + ops->policy, extack); if (err < 0) { NL_SET_ERR_MSG(extack, "Error parsing msg"); goto errout; @@ -853,7 +854,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout; } - err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*frh), tb, FRA_MAX, + ops->policy, extack); if (err < 0) { NL_SET_ERR_MSG(extack, "Error parsing msg"); goto errout; diff --git a/net/core/filter.c b/net/core/filter.c index 27e61ffd9039..55bfc941d17a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -74,6 +74,8 @@ #include <net/seg6.h> #include <net/seg6_local.h> #include <net/lwtunnel.h> +#include <net/ipv6_stubs.h> +#include <net/bpf_sk_storage.h> /** * sk_filter_trim_cap - run a packet through a socket filter @@ -1729,6 +1731,40 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = { .arg4_type = ARG_CONST_SIZE, }; +BPF_CALL_4(bpf_flow_dissector_load_bytes, + const struct bpf_flow_dissector *, ctx, u32, offset, + void *, to, u32, len) +{ + void *ptr; + + if (unlikely(offset > 0xffff)) + goto err_clear; + + if (unlikely(!ctx->skb)) + goto err_clear; + + ptr = skb_header_pointer(ctx->skb, offset, len, to); + if (unlikely(!ptr)) + goto err_clear; + if (ptr != to) + memcpy(to, ptr, len); + + return 0; +err_clear: + memset(to, 0, len); + return -EFAULT; +} + +static const struct bpf_func_proto bpf_flow_dissector_load_bytes_proto = { + .func = bpf_flow_dissector_load_bytes, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_UNINIT_MEM, + .arg4_type = ARG_CONST_SIZE, +}; + BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb, u32, offset, void *, to, u32, len, u32, start_header) { @@ -2015,7 +2051,7 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) { int ret; - if (unlikely(__this_cpu_read(xmit_recursion) > XMIT_RECURSION_LIMIT)) { + if (dev_xmit_recursion()) { net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n"); kfree_skb(skb); return -ENETDOWN; @@ -2023,9 +2059,9 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) skb->dev = dev; - __this_cpu_inc(xmit_recursion); + dev_xmit_recursion_inc(); ret = dev_queue_xmit(skb); - __this_cpu_dec(xmit_recursion); + dev_xmit_recursion_dec(); return ret; } @@ -2963,42 +2999,128 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb) } } -static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff) +#define BPF_F_ADJ_ROOM_ENCAP_L3_MASK (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \ + BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) + +#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \ + BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \ + BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \ + BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \ + BPF_F_ADJ_ROOM_ENCAP_L2( \ + BPF_ADJ_ROOM_ENCAP_L2_MASK)) + +static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, + u64 flags) { - u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); + u8 inner_mac_len = flags >> BPF_ADJ_ROOM_ENCAP_L2_SHIFT; + bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK; + u16 mac_len = 0, inner_net = 0, inner_trans = 0; + unsigned int gso_type = SKB_GSO_DODGY; int ret; - if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) - return -ENOTSUPP; + if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) { + /* udp gso_size delineates datagrams, only allow if fixed */ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) || + !(flags & BPF_F_ADJ_ROOM_FIXED_GSO)) + return -ENOTSUPP; + } - ret = skb_cow(skb, len_diff); + ret = skb_cow_head(skb, len_diff); if (unlikely(ret < 0)) return ret; + if (encap) { + if (skb->protocol != htons(ETH_P_IP) && + skb->protocol != htons(ETH_P_IPV6)) + return -ENOTSUPP; + + if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 && + flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) + return -EINVAL; + + if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE && + flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) + return -EINVAL; + + if (skb->encapsulation) + return -EALREADY; + + mac_len = skb->network_header - skb->mac_header; + inner_net = skb->network_header; + if (inner_mac_len > len_diff) + return -EINVAL; + inner_trans = skb->transport_header; + } + ret = bpf_skb_net_hdr_push(skb, off, len_diff); if (unlikely(ret < 0)) return ret; + if (encap) { + skb->inner_mac_header = inner_net - inner_mac_len; + skb->inner_network_header = inner_net; + skb->inner_transport_header = inner_trans; + skb_set_inner_protocol(skb, skb->protocol); + + skb->encapsulation = 1; + skb_set_network_header(skb, mac_len); + + if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) + gso_type |= SKB_GSO_UDP_TUNNEL; + else if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE) + gso_type |= SKB_GSO_GRE; + else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) + gso_type |= SKB_GSO_IPXIP6; + else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4) + gso_type |= SKB_GSO_IPXIP4; + + if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE || + flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) { + int nh_len = flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 ? + sizeof(struct ipv6hdr) : + sizeof(struct iphdr); + + skb_set_transport_header(skb, mac_len + nh_len); + } + + /* Match skb->protocol to new outer l3 protocol */ + if (skb->protocol == htons(ETH_P_IP) && + flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) + skb->protocol = htons(ETH_P_IPV6); + else if (skb->protocol == htons(ETH_P_IPV6) && + flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4) + skb->protocol = htons(ETH_P_IP); + } + if (skb_is_gso(skb)) { struct skb_shared_info *shinfo = skb_shinfo(skb); /* Due to header grow, MSS needs to be downgraded. */ - skb_decrease_gso_size(shinfo, len_diff); + if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO)) + skb_decrease_gso_size(shinfo, len_diff); + /* Header must be checked, and gso_segs recomputed. */ - shinfo->gso_type |= SKB_GSO_DODGY; + shinfo->gso_type |= gso_type; shinfo->gso_segs = 0; } return 0; } -static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff) +static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff, + u64 flags) { - u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); int ret; - if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) - return -ENOTSUPP; + if (flags & ~BPF_F_ADJ_ROOM_FIXED_GSO) + return -EINVAL; + + if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) { + /* udp gso_size delineates datagrams, only allow if fixed */ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) || + !(flags & BPF_F_ADJ_ROOM_FIXED_GSO)) + return -ENOTSUPP; + } ret = skb_unclone(skb, GFP_ATOMIC); if (unlikely(ret < 0)) @@ -3012,7 +3134,9 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff) struct skb_shared_info *shinfo = skb_shinfo(skb); /* Due to header shrink, MSS can be upgraded. */ - skb_increase_gso_size(shinfo, len_diff); + if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO)) + skb_increase_gso_size(shinfo, len_diff); + /* Header must be checked, and gso_segs recomputed. */ shinfo->gso_type |= SKB_GSO_DODGY; shinfo->gso_segs = 0; @@ -3027,49 +3151,50 @@ static u32 __bpf_skb_max_len(const struct sk_buff *skb) SKB_MAX_ALLOC; } -static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff) +BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, + u32, mode, u64, flags) { - bool trans_same = skb->transport_header == skb->network_header; u32 len_cur, len_diff_abs = abs(len_diff); u32 len_min = bpf_skb_net_base_len(skb); u32 len_max = __bpf_skb_max_len(skb); __be16 proto = skb->protocol; bool shrink = len_diff < 0; + u32 off; int ret; + if (unlikely(flags & ~BPF_F_ADJ_ROOM_MASK)) + return -EINVAL; if (unlikely(len_diff_abs > 0xfffU)) return -EFAULT; if (unlikely(proto != htons(ETH_P_IP) && proto != htons(ETH_P_IPV6))) return -ENOTSUPP; + off = skb_mac_header_len(skb); + switch (mode) { + case BPF_ADJ_ROOM_NET: + off += bpf_skb_net_base_len(skb); + break; + case BPF_ADJ_ROOM_MAC: + break; + default: + return -ENOTSUPP; + } + len_cur = skb->len - skb_network_offset(skb); - if (skb_transport_header_was_set(skb) && !trans_same) - len_cur = skb_network_header_len(skb); if ((shrink && (len_diff_abs >= len_cur || len_cur - len_diff_abs < len_min)) || (!shrink && (skb->len + len_diff_abs > len_max && !skb_is_gso(skb)))) return -ENOTSUPP; - ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) : - bpf_skb_net_grow(skb, len_diff_abs); + ret = shrink ? bpf_skb_net_shrink(skb, off, len_diff_abs, flags) : + bpf_skb_net_grow(skb, off, len_diff_abs, flags); bpf_compute_data_pointers(skb); return ret; } -BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, - u32, mode, u64, flags) -{ - if (unlikely(flags)) - return -EINVAL; - if (likely(mode == BPF_ADJ_ROOM_NET)) - return bpf_skb_adjust_net(skb, len_diff); - - return -ENOTSUPP; -} - static const struct bpf_func_proto bpf_skb_adjust_room_proto = { .func = bpf_skb_adjust_room, .gpl_only = false, @@ -4355,8 +4480,7 @@ BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock, if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk)) return -EINVAL; - if (val) - tcp_sk(sk)->bpf_sock_ops_cb_flags = val; + tcp_sk(sk)->bpf_sock_ops_cb_flags = val; return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS); } @@ -4482,11 +4606,11 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params, static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, u32 flags, bool check_mtu) { + struct fib_nh_common *nhc; struct in_device *in_dev; struct neighbour *neigh; struct net_device *dev; struct fib_result res; - struct fib_nh *nh; struct flowi4 fl4; int err; u32 mtu; @@ -4559,22 +4683,33 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, return BPF_FIB_LKUP_RET_FRAG_NEEDED; } - nh = &res.fi->fib_nh[res.nh_sel]; + nhc = res.nhc; /* do not handle lwt encaps right now */ - if (nh->nh_lwtstate) + if (nhc->nhc_lwtstate) return BPF_FIB_LKUP_RET_UNSUPP_LWT; - dev = nh->nh_dev; - if (nh->nh_gw) - params->ipv4_dst = nh->nh_gw; + dev = nhc->nhc_dev; params->rt_metric = res.fi->fib_priority; /* xdp and cls_bpf programs are run in RCU-bh so * rcu_read_lock_bh is not needed here */ - neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst); + if (likely(nhc->nhc_gw_family != AF_INET6)) { + if (nhc->nhc_gw_family) + params->ipv4_dst = nhc->nhc_gw.ipv4; + + neigh = __ipv4_neigh_lookup_noref(dev, + (__force u32)params->ipv4_dst); + } else { + struct in6_addr *dst = (struct in6_addr *)params->ipv6_dst; + + params->family = AF_INET6; + *dst = nhc->nhc_gw.ipv6; + neigh = __ipv6_neigh_lookup_noref_stub(dev, dst); + } + if (!neigh) return BPF_FIB_LKUP_RET_NO_NEIGH; @@ -4588,13 +4723,13 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, { struct in6_addr *src = (struct in6_addr *) params->ipv6_src; struct in6_addr *dst = (struct in6_addr *) params->ipv6_dst; + struct fib6_result res = {}; struct neighbour *neigh; struct net_device *dev; struct inet6_dev *idev; - struct fib6_info *f6i; struct flowi6 fl6; int strict = 0; - int oif; + int oif, err; u32 mtu; /* link local addresses are never forwarded */ @@ -4636,61 +4771,57 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, if (unlikely(!tb)) return BPF_FIB_LKUP_RET_NOT_FWDED; - f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict); + err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res, + strict); } else { fl6.flowi6_mark = 0; fl6.flowi6_secid = 0; fl6.flowi6_tun_key.tun_id = 0; fl6.flowi6_uid = sock_net_uid(net, NULL); - f6i = ipv6_stub->fib6_lookup(net, oif, &fl6, strict); + err = ipv6_stub->fib6_lookup(net, oif, &fl6, &res, strict); } - if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry)) + if (unlikely(err || IS_ERR_OR_NULL(res.f6i) || + res.f6i == net->ipv6.fib6_null_entry)) return BPF_FIB_LKUP_RET_NOT_FWDED; - if (unlikely(f6i->fib6_flags & RTF_REJECT)) { - switch (f6i->fib6_type) { - case RTN_BLACKHOLE: - return BPF_FIB_LKUP_RET_BLACKHOLE; - case RTN_UNREACHABLE: - return BPF_FIB_LKUP_RET_UNREACHABLE; - case RTN_PROHIBIT: - return BPF_FIB_LKUP_RET_PROHIBIT; - default: - return BPF_FIB_LKUP_RET_NOT_FWDED; - } - } - - if (f6i->fib6_type != RTN_UNICAST) + switch (res.fib6_type) { + /* only unicast is forwarded */ + case RTN_UNICAST: + break; + case RTN_BLACKHOLE: + return BPF_FIB_LKUP_RET_BLACKHOLE; + case RTN_UNREACHABLE: + return BPF_FIB_LKUP_RET_UNREACHABLE; + case RTN_PROHIBIT: + return BPF_FIB_LKUP_RET_PROHIBIT; + default: return BPF_FIB_LKUP_RET_NOT_FWDED; + } - if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0) - f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6, - fl6.flowi6_oif, NULL, - strict); + ipv6_stub->fib6_select_path(net, &res, &fl6, fl6.flowi6_oif, + fl6.flowi6_oif != 0, NULL, strict); if (check_mtu) { - mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src); + mtu = ipv6_stub->ip6_mtu_from_fib6(&res, dst, src); if (params->tot_len > mtu) return BPF_FIB_LKUP_RET_FRAG_NEEDED; } - if (f6i->fib6_nh.nh_lwtstate) + if (res.nh->fib_nh_lws) return BPF_FIB_LKUP_RET_UNSUPP_LWT; - if (f6i->fib6_flags & RTF_GATEWAY) - *dst = f6i->fib6_nh.nh_gw; + if (res.nh->fib_nh_gw_family) + *dst = res.nh->fib_nh_gw6; - dev = f6i->fib6_nh.nh_dev; - params->rt_metric = f6i->fib6_metric; + dev = res.nh->fib_nh_dev; + params->rt_metric = res.f6i->fib6_metric; /* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is - * not needed here. Can not use __ipv6_neigh_lookup_noref here - * because we need to get nd_tbl via the stub + * not needed here. */ - neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128, - ndisc_hashfn, dst, dev); + neigh = __ipv6_neigh_lookup_noref_stub(dev, dst); if (!neigh) return BPF_FIB_LKUP_RET_NO_NEIGH; @@ -5158,15 +5289,15 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, return sk; } -/* bpf_sk_lookup performs the core lookup for different types of sockets, +/* bpf_skc_lookup performs the core lookup for different types of sockets, * taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE. * Returns the socket as an 'unsigned long' to simplify the casting in the * callers to satisfy BPF_CALL declarations. */ -static unsigned long -__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, - struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id, - u64 flags) +static struct sock * +__bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, + struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id, + u64 flags) { struct sock *sk = NULL; u8 family = AF_UNSPEC; @@ -5194,15 +5325,27 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, put_net(net); } +out: + return sk; +} + +static struct sock * +__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, + struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id, + u64 flags) +{ + struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net, + ifindex, proto, netns_id, flags); + if (sk) sk = sk_to_full_sk(sk); -out: - return (unsigned long) sk; + + return sk; } -static unsigned long -bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, - u8 proto, u64 netns_id, u64 flags) +static struct sock * +bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, + u8 proto, u64 netns_id, u64 flags) { struct net *caller_net; int ifindex; @@ -5215,14 +5358,47 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, ifindex = 0; } - return __bpf_sk_lookup(skb, tuple, len, caller_net, ifindex, - proto, netns_id, flags); + return __bpf_skc_lookup(skb, tuple, len, caller_net, ifindex, proto, + netns_id, flags); } +static struct sock * +bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, + u8 proto, u64 netns_id, u64 flags) +{ + struct sock *sk = bpf_skc_lookup(skb, tuple, len, proto, netns_id, + flags); + + if (sk) + sk = sk_to_full_sk(sk); + + return sk; +} + +BPF_CALL_5(bpf_skc_lookup_tcp, struct sk_buff *, skb, + struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) +{ + return (unsigned long)bpf_skc_lookup(skb, tuple, len, IPPROTO_TCP, + netns_id, flags); +} + +static const struct bpf_func_proto bpf_skc_lookup_tcp_proto = { + .func = bpf_skc_lookup_tcp, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb, struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) { - return bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP, netns_id, flags); + return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP, + netns_id, flags); } static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = { @@ -5240,7 +5416,8 @@ static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = { BPF_CALL_5(bpf_sk_lookup_udp, struct sk_buff *, skb, struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) { - return bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP, netns_id, flags); + return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP, + netns_id, flags); } static const struct bpf_func_proto bpf_sk_lookup_udp_proto = { @@ -5275,8 +5452,9 @@ BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx, struct net *caller_net = dev_net(ctx->rxq->dev); int ifindex = ctx->rxq->dev->ifindex; - return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex, - IPPROTO_UDP, netns_id, flags); + return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net, + ifindex, IPPROTO_UDP, netns_id, + flags); } static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = { @@ -5291,14 +5469,38 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = { .arg5_type = ARG_ANYTHING, }; +BPF_CALL_5(bpf_xdp_skc_lookup_tcp, struct xdp_buff *, ctx, + struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags) +{ + struct net *caller_net = dev_net(ctx->rxq->dev); + int ifindex = ctx->rxq->dev->ifindex; + + return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, caller_net, + ifindex, IPPROTO_TCP, netns_id, + flags); +} + +static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = { + .func = bpf_xdp_skc_lookup_tcp, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx, struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags) { struct net *caller_net = dev_net(ctx->rxq->dev); int ifindex = ctx->rxq->dev->ifindex; - return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex, - IPPROTO_TCP, netns_id, flags); + return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net, + ifindex, IPPROTO_TCP, netns_id, + flags); } static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = { @@ -5313,11 +5515,31 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = { .arg5_type = ARG_ANYTHING, }; +BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx, + struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) +{ + return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, + sock_net(ctx->sk), 0, + IPPROTO_TCP, netns_id, flags); +} + +static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = { + .func = bpf_sock_addr_skc_lookup_tcp, + .gpl_only = false, + .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx, struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) { - return __bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0, - IPPROTO_TCP, netns_id, flags); + return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, + sock_net(ctx->sk), 0, IPPROTO_TCP, + netns_id, flags); } static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = { @@ -5334,8 +5556,9 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = { BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx, struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) { - return __bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0, - IPPROTO_UDP, netns_id, flags); + return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, + sock_net(ctx->sk), 0, IPPROTO_UDP, + netns_id, flags); } static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = { @@ -5463,6 +5686,74 @@ static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, }; + +BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len, + struct tcphdr *, th, u32, th_len) +{ +#ifdef CONFIG_SYN_COOKIES + u32 cookie; + int ret; + + if (unlikely(th_len < sizeof(*th))) + return -EINVAL; + + /* sk_listener() allows TCP_NEW_SYN_RECV, which makes no sense here. */ + if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN) + return -EINVAL; + + if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies) + return -EINVAL; + + if (!th->ack || th->rst || th->syn) + return -ENOENT; + + if (tcp_synq_no_recent_overflow(sk)) + return -ENOENT; + + cookie = ntohl(th->ack_seq) - 1; + + switch (sk->sk_family) { + case AF_INET: + if (unlikely(iph_len < sizeof(struct iphdr))) + return -EINVAL; + + ret = __cookie_v4_check((struct iphdr *)iph, th, cookie); + break; + +#if IS_BUILTIN(CONFIG_IPV6) + case AF_INET6: + if (unlikely(iph_len < sizeof(struct ipv6hdr))) + return -EINVAL; + + ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie); + break; +#endif /* CONFIG_IPV6 */ + + default: + return -EPROTONOSUPPORT; + } + + if (ret > 0) + return 0; + + return -ENOENT; +#else + return -ENOTSUPP; +#endif +} + +static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = { + .func = bpf_tcp_check_syncookie, + .gpl_only = true, + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_SOCK_COMMON, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE, +}; + #endif /* CONFIG_INET */ bool bpf_helper_changes_pkt_data(void *func) @@ -5588,6 +5879,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sock_addr_sk_lookup_udp_proto; case BPF_FUNC_sk_release: return &bpf_sk_release_proto; + case BPF_FUNC_skc_lookup_tcp: + return &bpf_sock_addr_skc_lookup_tcp_proto; #endif /* CONFIG_INET */ default: return bpf_base_func_proto(func_id); @@ -5611,6 +5904,9 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) } } +const struct bpf_func_proto bpf_sk_storage_get_proto __weak; +const struct bpf_func_proto bpf_sk_storage_delete_proto __weak; + static const struct bpf_func_proto * cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -5619,6 +5915,10 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_local_storage_proto; case BPF_FUNC_sk_fullsock: return &bpf_sk_fullsock_proto; + case BPF_FUNC_sk_storage_get: + return &bpf_sk_storage_get_proto; + case BPF_FUNC_sk_storage_delete: + return &bpf_sk_storage_delete_proto; #ifdef CONFIG_INET case BPF_FUNC_tcp_sock: return &bpf_tcp_sock_proto; @@ -5700,6 +6000,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_skb_fib_lookup_proto; case BPF_FUNC_sk_fullsock: return &bpf_sk_fullsock_proto; + case BPF_FUNC_sk_storage_get: + return &bpf_sk_storage_get_proto; + case BPF_FUNC_sk_storage_delete: + return &bpf_sk_storage_delete_proto; #ifdef CONFIG_XFRM case BPF_FUNC_skb_get_xfrm_state: return &bpf_skb_get_xfrm_state_proto; @@ -5721,6 +6025,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_tcp_sock_proto; case BPF_FUNC_get_listener_sock: return &bpf_get_listener_sock_proto; + case BPF_FUNC_skc_lookup_tcp: + return &bpf_skc_lookup_tcp_proto; + case BPF_FUNC_tcp_check_syncookie: + return &bpf_tcp_check_syncookie_proto; + case BPF_FUNC_skb_ecn_set_ce: + return &bpf_skb_ecn_set_ce_proto; #endif default: return bpf_base_func_proto(func_id); @@ -5756,6 +6066,10 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_xdp_sk_lookup_tcp_proto; case BPF_FUNC_sk_release: return &bpf_sk_release_proto; + case BPF_FUNC_skc_lookup_tcp: + return &bpf_xdp_skc_lookup_tcp_proto; + case BPF_FUNC_tcp_check_syncookie: + return &bpf_tcp_check_syncookie_proto; #endif default: return bpf_base_func_proto(func_id); @@ -5848,6 +6162,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_lookup_udp_proto; case BPF_FUNC_sk_release: return &bpf_sk_release_proto; + case BPF_FUNC_skc_lookup_tcp: + return &bpf_skc_lookup_tcp_proto; #endif default: return bpf_base_func_proto(func_id); @@ -5859,7 +6175,7 @@ flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_skb_load_bytes: - return &bpf_skb_load_bytes_proto; + return &bpf_flow_dissector_load_bytes_proto; default: return bpf_base_func_proto(func_id); } @@ -5986,9 +6302,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type return false; break; case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): - if (size != sizeof(__u64)) - return false; - break; + return false; case bpf_ctx_range(struct __sk_buff, tstamp): if (size != sizeof(__u64)) return false; @@ -6023,7 +6337,6 @@ static bool sk_filter_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data): case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, data_end): - case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): case bpf_ctx_range_till(struct __sk_buff, family, local_port): case bpf_ctx_range(struct __sk_buff, tstamp): case bpf_ctx_range(struct __sk_buff, wire_len): @@ -6050,7 +6363,6 @@ static bool cg_skb_is_valid_access(int off, int size, switch (off) { case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, data_meta): - case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): case bpf_ctx_range(struct __sk_buff, wire_len): return false; case bpf_ctx_range(struct __sk_buff, data): @@ -6096,7 +6408,6 @@ static bool lwt_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range_till(struct __sk_buff, family, local_port): case bpf_ctx_range(struct __sk_buff, data_meta): - case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): case bpf_ctx_range(struct __sk_buff, tstamp): case bpf_ctx_range(struct __sk_buff, wire_len): return false; @@ -6339,7 +6650,6 @@ static bool tc_cls_act_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data_end): info->reg_type = PTR_TO_PACKET_END; break; - case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): case bpf_ctx_range_till(struct __sk_buff, family, local_port): return false; } @@ -6541,7 +6851,6 @@ static bool sk_skb_is_valid_access(int off, int size, switch (off) { case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, data_meta): - case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): case bpf_ctx_range(struct __sk_buff, tstamp): case bpf_ctx_range(struct __sk_buff, wire_len): return false; @@ -6615,24 +6924,65 @@ static bool flow_dissector_is_valid_access(int off, int size, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { + const int size_default = sizeof(__u32); + + if (off < 0 || off >= sizeof(struct __sk_buff)) + return false; + if (type == BPF_WRITE) return false; switch (off) { case bpf_ctx_range(struct __sk_buff, data): + if (size != size_default) + return false; info->reg_type = PTR_TO_PACKET; - break; + return true; case bpf_ctx_range(struct __sk_buff, data_end): + if (size != size_default) + return false; info->reg_type = PTR_TO_PACKET_END; - break; + return true; case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): + if (size != sizeof(__u64)) + return false; info->reg_type = PTR_TO_FLOW_KEYS; - break; + return true; default: return false; } +} - return bpf_skb_is_valid_access(off, size, type, prog, info); +static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, + u32 *target_size) + +{ + struct bpf_insn *insn = insn_buf; + + switch (si->off) { + case offsetof(struct __sk_buff, data): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data), + si->dst_reg, si->src_reg, + offsetof(struct bpf_flow_dissector, data)); + break; + + case offsetof(struct __sk_buff, data_end): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data_end), + si->dst_reg, si->src_reg, + offsetof(struct bpf_flow_dissector, data_end)); + break; + + case offsetof(struct __sk_buff, flow_keys): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, flow_keys), + si->dst_reg, si->src_reg, + offsetof(struct bpf_flow_dissector, flow_keys)); + break; + } + + return insn - insn_buf; } static u32 bpf_convert_ctx_access(enum bpf_access_type type, @@ -6939,15 +7289,6 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, skc_num, 2, target_size)); break; - case offsetof(struct __sk_buff, flow_keys): - off = si->off; - off -= offsetof(struct __sk_buff, flow_keys); - off += offsetof(struct sk_buff, cb); - off += offsetof(struct qdisc_skb_cb, flow_keys); - *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, - si->src_reg, off); - break; - case offsetof(struct __sk_buff, tstamp): BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8); @@ -7952,7 +8293,7 @@ const struct bpf_prog_ops sk_msg_prog_ops = { const struct bpf_verifier_ops flow_dissector_verifier_ops = { .get_func_proto = flow_dissector_func_proto, .is_valid_access = flow_dissector_is_valid_access, - .convert_ctx_access = bpf_convert_ctx_access, + .convert_ctx_access = flow_dissector_convert_ctx_access, }; const struct bpf_prog_ops flow_dissector_prog_ops = { diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 94a450b2191a..9ca784c592ac 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -65,6 +65,45 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, } EXPORT_SYMBOL(skb_flow_dissector_init); +int skb_flow_dissector_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); + u32 prog_id, prog_cnt = 0, flags = 0; + struct bpf_prog *attached; + struct net *net; + + if (attr->query.query_flags) + return -EINVAL; + + net = get_net_ns_by_fd(attr->query.target_fd); + if (IS_ERR(net)) + return PTR_ERR(net); + + rcu_read_lock(); + attached = rcu_dereference(net->flow_dissector_prog); + if (attached) { + prog_cnt = 1; + prog_id = attached->aux->id; + } + rcu_read_unlock(); + + put_net(net); + + if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) + return -EFAULT; + if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt))) + return -EFAULT; + + if (!attr->query.prog_cnt || !prog_ids || !prog_cnt) + return 0; + + if (copy_to_user(prog_ids, &prog_id, sizeof(u32))) + return -EFAULT; + + return 0; +} + int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) { @@ -683,50 +722,30 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, } } -bool __skb_flow_bpf_dissect(struct bpf_prog *prog, - const struct sk_buff *skb, - struct flow_dissector *flow_dissector, - struct bpf_flow_keys *flow_keys) +bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, + __be16 proto, int nhoff, int hlen) { - struct bpf_skb_data_end cb_saved; - struct bpf_skb_data_end *cb; + struct bpf_flow_keys *flow_keys = ctx->flow_keys; u32 result; - /* Note that even though the const qualifier is discarded - * throughout the execution of the BPF program, all changes(the - * control block) are reverted after the BPF program returns. - * Therefore, __skb_flow_dissect does not alter the skb. - */ - - cb = (struct bpf_skb_data_end *)skb->cb; - - /* Save Control Block */ - memcpy(&cb_saved, cb, sizeof(cb_saved)); - memset(cb, 0, sizeof(*cb)); - /* Pass parameters to the BPF program */ memset(flow_keys, 0, sizeof(*flow_keys)); - cb->qdisc_cb.flow_keys = flow_keys; - flow_keys->n_proto = skb->protocol; - flow_keys->nhoff = skb_network_offset(skb); + flow_keys->n_proto = proto; + flow_keys->nhoff = nhoff; flow_keys->thoff = flow_keys->nhoff; - bpf_compute_data_pointers((struct sk_buff *)skb); - result = BPF_PROG_RUN(prog, skb); + result = BPF_PROG_RUN(prog, ctx); - /* Restore state */ - memcpy(cb, &cb_saved, sizeof(cb_saved)); - - flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, - skb_network_offset(skb), skb->len); + flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, nhoff, hlen); flow_keys->thoff = clamp_t(u16, flow_keys->thoff, - flow_keys->nhoff, skb->len); + flow_keys->nhoff, hlen); return result == BPF_OK; } /** * __skb_flow_dissect - extract the flow_keys struct and return it + * @net: associated network namespace, derived from @skb if NULL * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified * @flow_dissector: list of keys to dissect * @target_container: target structure to put dissected values into @@ -734,6 +753,8 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog, * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb) * @hlen: packet header length, if @data is NULL use skb_headlen(skb) + * @flags: flags that control the dissection process, e.g. + * FLOW_DISSECTOR_F_STOP_AT_L3. * * The function will try to retrieve individual keys into target specified * by flow_dissector from either the skbuff or a raw buffer specified by the @@ -741,7 +762,8 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog, * * Caller must take care of zeroing target container memory. */ -bool __skb_flow_dissect(const struct sk_buff *skb, +bool __skb_flow_dissect(const struct net *net, + const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, void *data, __be16 proto, int nhoff, int hlen, @@ -754,6 +776,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector_key_icmp *key_icmp; struct flow_dissector_key_tags *key_tags; struct flow_dissector_key_vlan *key_vlan; + struct bpf_prog *attached = NULL; enum flow_dissect_ret fdret; enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX; int num_hdrs = 0; @@ -796,22 +819,39 @@ bool __skb_flow_dissect(const struct sk_buff *skb, target_container); if (skb) { - struct bpf_flow_keys flow_keys; - struct bpf_prog *attached = NULL; + if (!net) { + if (skb->dev) + net = dev_net(skb->dev); + else if (skb->sk) + net = sock_net(skb->sk); + } + } + WARN_ON_ONCE(!net); + if (net) { rcu_read_lock(); - - if (skb->dev) - attached = rcu_dereference(dev_net(skb->dev)->flow_dissector_prog); - else if (skb->sk) - attached = rcu_dereference(sock_net(skb->sk)->flow_dissector_prog); - else - WARN_ON_ONCE(1); + attached = rcu_dereference(net->flow_dissector_prog); if (attached) { - ret = __skb_flow_bpf_dissect(attached, skb, - flow_dissector, - &flow_keys); + struct bpf_flow_keys flow_keys; + struct bpf_flow_dissector ctx = { + .flow_keys = &flow_keys, + .data = data, + .data_end = data + hlen, + }; + __be16 n_proto = proto; + + if (skb) { + ctx.skb = skb; + /* we can't use 'proto' in the skb case + * because it might be set to skb->vlan_proto + * which has been pulled from the data + */ + n_proto = skb->protocol; + } + + ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff, + hlen); __skb_flow_bpf_to_target(&flow_keys, flow_dissector, target_container); rcu_read_unlock(); @@ -1408,8 +1448,8 @@ u32 __skb_get_hash_symmetric(const struct sk_buff *skb) __flow_hash_secret_init(); memset(&keys, 0, sizeof(keys)); - __skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys, - NULL, 0, 0, 0, + __skb_flow_dissect(NULL, skb, &flow_keys_dissector_symmetric, + &keys, NULL, 0, 0, 0, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); return __flow_hash_from_keys(&keys, hashrnd); @@ -1510,7 +1550,8 @@ u32 skb_get_poff(const struct sk_buff *skb) { struct flow_keys_basic keys; - if (!skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0)) + if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, + NULL, 0, 0, 0, 0)) return 0; return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb)); diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index ac679f74ba47..9bf1b9ad1780 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -291,6 +291,7 @@ __gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats, for_each_possible_cpu(i) { const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i); + qstats->qlen = 0; qstats->backlog += qcpu->backlog; qstats->drops += qcpu->drops; qstats->requeues += qcpu->requeues; @@ -306,6 +307,7 @@ void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats, if (cpu) { __gnet_stats_copy_queue_cpu(qstats, cpu); } else { + qstats->qlen = q->qlen; qstats->backlog = q->backlog; qstats->drops = q->drops; qstats->requeues = q->requeues; diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 126d31ff5ee3..1c94f529f4a1 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -18,6 +18,7 @@ #include <net/lwtunnel.h> #include <net/gre.h> #include <net/ip6_route.h> +#include <net/ipv6_stubs.h> struct bpf_lwt_prog { struct bpf_prog *prog; @@ -342,8 +343,8 @@ static int bpf_parse_prog(struct nlattr *attr, struct bpf_lwt_prog *prog, int ret; u32 fd; - ret = nla_parse_nested(tb, LWT_BPF_PROG_MAX, attr, bpf_prog_policy, - NULL); + ret = nla_parse_nested_deprecated(tb, LWT_BPF_PROG_MAX, attr, + bpf_prog_policy, NULL); if (ret < 0) return ret; @@ -384,7 +385,8 @@ static int bpf_build_state(struct nlattr *nla, if (family != AF_INET && family != AF_INET6) return -EAFNOSUPPORT; - ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy, extack); + ret = nla_parse_nested_deprecated(tb, LWT_BPF_MAX, nla, bpf_nl_policy, + extack); if (ret < 0) return ret; @@ -452,7 +454,7 @@ static int bpf_fill_lwt_prog(struct sk_buff *skb, int attr, if (!prog->prog) return 0; - nest = nla_nest_start(skb, attr); + nest = nla_nest_start_noflag(skb, attr); if (!nest) return -EMSGSIZE; diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 19b557bd294b..69e249fbc02f 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -26,7 +26,7 @@ #include <net/lwtunnel.h> #include <net/rtnetlink.h> #include <net/ip6_fib.h> -#include <net/nexthop.h> +#include <net/rtnh.h> #ifdef CONFIG_MODULES @@ -223,7 +223,8 @@ void lwtstate_free(struct lwtunnel_state *lws) } EXPORT_SYMBOL_GPL(lwtstate_free); -int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate) +int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate, + int encap_attr, int encap_type_attr) { const struct lwtunnel_encap_ops *ops; struct nlattr *nest; @@ -236,7 +237,7 @@ int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate) lwtstate->type > LWTUNNEL_ENCAP_MAX) return 0; - nest = nla_nest_start(skb, RTA_ENCAP); + nest = nla_nest_start_noflag(skb, encap_attr); if (!nest) return -EMSGSIZE; @@ -250,7 +251,7 @@ int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate) if (ret) goto nla_put_failure; nla_nest_end(skb, nest); - ret = nla_put_u16(skb, RTA_ENCAP_TYPE, lwtstate->type); + ret = nla_put_u16(skb, encap_type_attr, lwtstate->type); if (ret) goto nla_put_failure; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 30f6fd8f68e0..dfa871061f14 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -31,6 +31,7 @@ #include <linux/times.h> #include <net/net_namespace.h> #include <net/neighbour.h> +#include <net/arp.h> #include <net/dst.h> #include <net/sock.h> #include <net/netevent.h> @@ -663,6 +664,8 @@ out: out_tbl_unlock: write_unlock_bh(&tbl->lock); out_neigh_release: + if (!exempt_from_gc) + atomic_dec(&tbl->gc_entries); neigh_release(n); goto out; } @@ -1862,7 +1865,8 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, int err; ASSERT_RTNL(); - err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, nda_policy, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, + nda_policy, extack); if (err < 0) goto out; @@ -1920,6 +1924,11 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; } + if (tbl->allow_add && !tbl->allow_add(dev, extack)) { + err = -EINVAL; + goto out; + } + neigh = neigh_lookup(tbl, dst, dev); if (neigh == NULL) { bool exempt_from_gc; @@ -1974,7 +1983,7 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) { struct nlattr *nest; - nest = nla_nest_start(skb, NDTA_PARMS); + nest = nla_nest_start_noflag(skb, NDTA_PARMS); if (nest == NULL) return -ENOBUFS; @@ -2176,8 +2185,8 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, bool found = false; int err, tidx; - err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX, - nl_neightbl_policy, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX, + nl_neightbl_policy, extack); if (err < 0) goto errout; @@ -2214,8 +2223,9 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, struct neigh_parms *p; int i, ifindex = 0; - err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS], - nl_ntbl_parm_policy, extack); + err = nla_parse_nested_deprecated(tbp, NDTPA_MAX, + tb[NDTA_PARMS], + nl_ntbl_parm_policy, extack); if (err < 0) goto errout_tbl_lock; @@ -2655,11 +2665,12 @@ static int neigh_valid_dump_req(const struct nlmsghdr *nlh, return -EINVAL; } - err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX, - nda_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), + tb, NDA_MAX, nda_policy, + extack); } else { - err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, - nda_policy, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb, + NDA_MAX, nda_policy, extack); } if (err < 0) return err; @@ -2759,8 +2770,8 @@ static int neigh_valid_get_req(const struct nlmsghdr *nlh, return -EINVAL; } - err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX, - nda_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb, + NDA_MAX, nda_policy, extack); if (err < 0) return err; @@ -2982,7 +2993,13 @@ int neigh_xmit(int index, struct net_device *dev, if (!tbl) goto out; rcu_read_lock_bh(); - neigh = __neigh_lookup_noref(tbl, addr, dev); + if (index == NEIGH_ARP_TABLE) { + u32 key = *((u32 *)addr); + + neigh = __ipv4_neigh_lookup_noref(dev, key); + } else { + neigh = __neigh_lookup_noref(tbl, addr, dev); + } if (!neigh) neigh = __neigh_create(tbl, addr, dev, false); err = PTR_ERR(neigh); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 530e5b04b97d..d9c4360257ce 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -754,9 +754,9 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue, rcu_assign_pointer(queue->rps_map, map); if (map) - static_key_slow_inc(&rps_needed); + static_branch_inc(&rps_needed); if (old_map) - static_key_slow_dec(&rps_needed); + static_branch_dec(&rps_needed); mutex_unlock(&rps_map_mutex); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 7e6dcc625701..711b161505ac 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -682,8 +682,8 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *peer; int nsid, err; - err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, - rtnl_net_policy, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(struct rtgenmsg), tb, + NETNSA_MAX, rtnl_net_policy, extack); if (err < 0) return err; if (!tb[NETNSA_NSID]) { @@ -787,11 +787,13 @@ static int rtnl_net_valid_getid_req(struct sk_buff *skb, int i, err; if (!netlink_strict_get_check(skb)) - return nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, - rtnl_net_policy, extack); + return nlmsg_parse_deprecated(nlh, sizeof(struct rtgenmsg), + tb, NETNSA_MAX, rtnl_net_policy, + extack); - err = nlmsg_parse_strict(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, - rtnl_net_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct rtgenmsg), tb, + NETNSA_MAX, rtnl_net_policy, + extack); if (err) return err; @@ -839,7 +841,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh, peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD])); nla = tb[NETNSA_FD]; } else if (tb[NETNSA_NSID]) { - peer = get_net_ns_by_id(net, nla_get_u32(tb[NETNSA_NSID])); + peer = get_net_ns_by_id(net, nla_get_s32(tb[NETNSA_NSID])); if (!peer) peer = ERR_PTR(-ENOENT); nla = tb[NETNSA_NSID]; @@ -929,8 +931,9 @@ static int rtnl_valid_dump_net_req(const struct nlmsghdr *nlh, struct sock *sk, struct nlattr *tb[NETNSA_MAX + 1]; int err, i; - err = nlmsg_parse_strict(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, - rtnl_net_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct rtgenmsg), tb, + NETNSA_MAX, rtnl_net_policy, + extack); if (err < 0) return err; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index bf5446192d6a..a0f05416657b 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -323,7 +323,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) { struct netdev_queue *txq; - txq = netdev_pick_tx(dev, skb, NULL); + txq = netdev_core_pick_tx(dev, skb, NULL); /* try until next clock tick */ for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index b9057478d69c..7e3d0d99dfae 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -301,6 +301,4 @@ static int __init init_cgroup_netprio(void) register_netdevice_notifier(&netprio_device_notifier); return 0; } - subsys_initcall(init_cgroup_netprio); -MODULE_LICENSE("GPL v2"); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index f3f5a78cd062..319ad5490fb3 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2521,7 +2521,7 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev) skb->_skb_refdst = (unsigned long)&pkt_dev->xdst.u.dst | SKB_DST_NOREF; rcu_read_lock_bh(); - err = x->outer_mode->output(x, skb); + err = pktgen_xfrm_outer_mode_output(x, skb); rcu_read_unlock_bh(); if (err) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 220c56e93659..2bd12afb9297 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -634,7 +634,7 @@ static int rtnl_link_slave_info_fill(struct sk_buff *skb, if (nla_put_string(skb, IFLA_INFO_SLAVE_KIND, ops->kind) < 0) return -EMSGSIZE; if (ops->fill_slave_info) { - slave_data = nla_nest_start(skb, IFLA_INFO_SLAVE_DATA); + slave_data = nla_nest_start_noflag(skb, IFLA_INFO_SLAVE_DATA); if (!slave_data) return -EMSGSIZE; err = ops->fill_slave_info(skb, master_dev, dev); @@ -666,7 +666,7 @@ static int rtnl_link_info_fill(struct sk_buff *skb, return err; } if (ops->fill_info) { - data = nla_nest_start(skb, IFLA_INFO_DATA); + data = nla_nest_start_noflag(skb, IFLA_INFO_DATA); if (data == NULL) return -EMSGSIZE; err = ops->fill_info(skb, dev); @@ -686,7 +686,7 @@ static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) struct nlattr *linkinfo; int err = -EMSGSIZE; - linkinfo = nla_nest_start(skb, IFLA_LINKINFO); + linkinfo = nla_nest_start_noflag(skb, IFLA_LINKINFO); if (linkinfo == NULL) goto out; @@ -755,7 +755,7 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) struct nlattr *mx; int i, valid = 0; - mx = nla_nest_start(skb, RTA_METRICS); + mx = nla_nest_start_noflag(skb, RTA_METRICS); if (mx == NULL) return -ENOBUFS; @@ -1036,12 +1036,12 @@ static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) int vf; int err; - vf_ports = nla_nest_start(skb, IFLA_VF_PORTS); + vf_ports = nla_nest_start_noflag(skb, IFLA_VF_PORTS); if (!vf_ports) return -EMSGSIZE; for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) { - vf_port = nla_nest_start(skb, IFLA_VF_PORT); + vf_port = nla_nest_start_noflag(skb, IFLA_VF_PORT); if (!vf_port) goto nla_put_failure; if (nla_put_u32(skb, IFLA_PORT_VF, vf)) @@ -1070,7 +1070,7 @@ static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev) struct nlattr *port_self; int err; - port_self = nla_nest_start(skb, IFLA_PORT_SELF); + port_self = nla_nest_start_noflag(skb, IFLA_PORT_SELF); if (!port_self) return -EMSGSIZE; @@ -1247,7 +1247,7 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, vf_linkstate.link_state = ivi.linkstate; vf_rss_query_en.setting = ivi.rss_query_en; vf_trust.setting = ivi.trusted; - vf = nla_nest_start(skb, IFLA_VF_INFO); + vf = nla_nest_start_noflag(skb, IFLA_VF_INFO); if (!vf) goto nla_put_vfinfo_failure; if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) || @@ -1266,7 +1266,7 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, nla_put(skb, IFLA_VF_TRUST, sizeof(vf_trust), &vf_trust)) goto nla_put_vf_failure; - vfvlanlist = nla_nest_start(skb, IFLA_VF_VLAN_LIST); + vfvlanlist = nla_nest_start_noflag(skb, IFLA_VF_VLAN_LIST); if (!vfvlanlist) goto nla_put_vf_failure; if (nla_put(skb, IFLA_VF_VLAN_INFO, sizeof(vf_vlan_info), @@ -1279,7 +1279,7 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, if (dev->netdev_ops->ndo_get_vf_stats) dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num, &vf_stats); - vfstats = nla_nest_start(skb, IFLA_VF_STATS); + vfstats = nla_nest_start_noflag(skb, IFLA_VF_STATS); if (!vfstats) goto nla_put_vf_failure; if (nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_PACKETS, @@ -1329,7 +1329,7 @@ static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb, if (!dev->netdev_ops->ndo_get_vf_config) return 0; - vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST); + vfinfo = nla_nest_start_noflag(skb, IFLA_VFINFO_LIST); if (!vfinfo) return -EMSGSIZE; @@ -1414,7 +1414,7 @@ static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) int err; u8 mode; - xdp = nla_nest_start(skb, IFLA_XDP); + xdp = nla_nest_start_noflag(skb, IFLA_XDP); if (!xdp) return -EMSGSIZE; @@ -1541,7 +1541,7 @@ static int rtnl_fill_link_af(struct sk_buff *skb, const struct rtnl_af_ops *af_ops; struct nlattr *af_spec; - af_spec = nla_nest_start(skb, IFLA_AF_SPEC); + af_spec = nla_nest_start_noflag(skb, IFLA_AF_SPEC); if (!af_spec) return -EMSGSIZE; @@ -1552,7 +1552,7 @@ static int rtnl_fill_link_af(struct sk_buff *skb, if (!af_ops->fill_link_af) continue; - af = nla_nest_start(skb, af_ops->family); + af = nla_nest_start_noflag(skb, af_ops->family); if (!af) return -EMSGSIZE; @@ -1797,8 +1797,7 @@ static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla const struct rtnl_link_ops *ops = NULL; struct nlattr *linfo[IFLA_INFO_MAX + 1]; - if (nla_parse_nested(linfo, IFLA_INFO_MAX, nla, - ifla_info_policy, NULL) < 0) + if (nla_parse_nested_deprecated(linfo, IFLA_INFO_MAX, nla, ifla_info_policy, NULL) < 0) return NULL; if (linfo[IFLA_INFO_KIND]) { @@ -1897,8 +1896,9 @@ static int rtnl_valid_dump_ifinfo_req(const struct nlmsghdr *nlh, return -EINVAL; } - return nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFLA_MAX, - ifla_policy, extack); + return nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, + IFLA_MAX, ifla_policy, + extack); } /* A hack to preserve kernel<->userspace interface. @@ -1911,7 +1911,8 @@ static int rtnl_valid_dump_ifinfo_req(const struct nlmsghdr *nlh, hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ? sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg); - return nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy, extack); + return nlmsg_parse_deprecated(nlh, hdrlen, tb, IFLA_MAX, ifla_policy, + extack); } static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) @@ -2019,7 +2020,8 @@ out_err: int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len, struct netlink_ext_ack *exterr) { - return nla_parse(tb, IFLA_MAX, head, len, ifla_policy, exterr); + return nla_parse_deprecated(tb, IFLA_MAX, head, len, ifla_policy, + exterr); } EXPORT_SYMBOL(rtnl_nla_parse_ifla); @@ -2564,8 +2566,10 @@ static int do_setlink(const struct sk_buff *skb, err = -EINVAL; goto errout; } - err = nla_parse_nested(vfinfo, IFLA_VF_MAX, attr, - ifla_vf_policy, NULL); + err = nla_parse_nested_deprecated(vfinfo, IFLA_VF_MAX, + attr, + ifla_vf_policy, + NULL); if (err < 0) goto errout; err = do_setvfinfo(dev, vfinfo); @@ -2592,8 +2596,10 @@ static int do_setlink(const struct sk_buff *skb, err = -EINVAL; goto errout; } - err = nla_parse_nested(port, IFLA_PORT_MAX, attr, - ifla_port_policy, NULL); + err = nla_parse_nested_deprecated(port, IFLA_PORT_MAX, + attr, + ifla_port_policy, + NULL); if (err < 0) goto errout; if (!port[IFLA_PORT_VF]) { @@ -2612,9 +2618,9 @@ static int do_setlink(const struct sk_buff *skb, if (tb[IFLA_PORT_SELF]) { struct nlattr *port[IFLA_PORT_MAX+1]; - err = nla_parse_nested(port, IFLA_PORT_MAX, - tb[IFLA_PORT_SELF], ifla_port_policy, - NULL); + err = nla_parse_nested_deprecated(port, IFLA_PORT_MAX, + tb[IFLA_PORT_SELF], + ifla_port_policy, NULL); if (err < 0) goto errout; @@ -2661,8 +2667,9 @@ static int do_setlink(const struct sk_buff *skb, struct nlattr *xdp[IFLA_XDP_MAX + 1]; u32 xdp_flags = 0; - err = nla_parse_nested(xdp, IFLA_XDP_MAX, tb[IFLA_XDP], - ifla_xdp_policy, NULL); + err = nla_parse_nested_deprecated(xdp, IFLA_XDP_MAX, + tb[IFLA_XDP], + ifla_xdp_policy, NULL); if (err < 0) goto errout; @@ -2716,8 +2723,8 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr *tb[IFLA_MAX+1]; char ifname[IFNAMSIZ]; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX, + ifla_policy, extack); if (err < 0) goto errout; @@ -2813,7 +2820,8 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, int err; int netnsid = -1; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX, + ifla_policy, extack); if (err < 0) return err; @@ -2990,7 +2998,8 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, #ifdef CONFIG_MODULES replay: #endif - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX, + ifla_policy, extack); if (err < 0) return err; @@ -3024,9 +3033,9 @@ replay: return err; if (tb[IFLA_LINKINFO]) { - err = nla_parse_nested(linkinfo, IFLA_INFO_MAX, - tb[IFLA_LINKINFO], ifla_info_policy, - NULL); + err = nla_parse_nested_deprecated(linkinfo, IFLA_INFO_MAX, + tb[IFLA_LINKINFO], + ifla_info_policy, NULL); if (err < 0) return err; } else @@ -3046,9 +3055,9 @@ replay: return -EINVAL; if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { - err = nla_parse_nested(attr, ops->maxtype, - linkinfo[IFLA_INFO_DATA], - ops->policy, extack); + err = nla_parse_nested_deprecated(attr, ops->maxtype, + linkinfo[IFLA_INFO_DATA], + ops->policy, extack); if (err < 0) return err; data = attr; @@ -3067,9 +3076,11 @@ replay: if (m_ops->slave_maxtype && linkinfo[IFLA_INFO_SLAVE_DATA]) { - err = nla_parse_nested(slave_attr, m_ops->slave_maxtype, - linkinfo[IFLA_INFO_SLAVE_DATA], - m_ops->slave_policy, extack); + err = nla_parse_nested_deprecated(slave_attr, + m_ops->slave_maxtype, + linkinfo[IFLA_INFO_SLAVE_DATA], + m_ops->slave_policy, + extack); if (err < 0) return err; slave_data = slave_attr; @@ -3250,8 +3261,8 @@ static int rtnl_valid_getlink_req(struct sk_buff *skb, } if (!netlink_strict_get_check(skb)) - return nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, - extack); + return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX, + ifla_policy, extack); ifm = nlmsg_data(nlh); if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags || @@ -3260,8 +3271,8 @@ static int rtnl_valid_getlink_req(struct sk_buff *skb, return -EINVAL; } - err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, - extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFLA_MAX, + ifla_policy, extack); if (err) return err; @@ -3366,7 +3377,7 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ? sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg); - if (nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy, NULL) >= 0) { + if (nlmsg_parse_deprecated(nlh, hdrlen, tb, IFLA_MAX, ifla_policy, NULL) >= 0) { if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); } @@ -3569,7 +3580,7 @@ errout: rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); } -/** +/* * ndo_dflt_fdb_add - default netdevice operation to add an FDB entry */ int ndo_dflt_fdb_add(struct ndmsg *ndm, @@ -3639,7 +3650,8 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, u16 vid; int err; - err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, + extack); if (err < 0) return err; @@ -3708,7 +3720,7 @@ out: return err; } -/** +/* * ndo_dflt_fdb_del - default netdevice operation to delete an FDB entry */ int ndo_dflt_fdb_del(struct ndmsg *ndm, @@ -3749,7 +3761,8 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, + extack); if (err < 0) return err; @@ -3847,8 +3860,11 @@ skip: /** * ndo_dflt_fdb_dump - default netdevice operation to dump an FDB table. - * @nlh: netlink message header + * @skb: socket buffer to store message in + * @cb: netlink callback * @dev: netdevice + * @filter_dev: ignored + * @idx: the number of FDB table entries dumped is added to *@idx * * Default netdevice operation to dump the existing unicast address list. * Returns number of addresses from list put in skb. @@ -3895,8 +3911,8 @@ static int valid_fdb_dump_strict(const struct nlmsghdr *nlh, return -EINVAL; } - err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX, - NULL, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb, + NDA_MAX, NULL, extack); if (err < 0) return err; @@ -3948,8 +3964,9 @@ static int valid_fdb_dump_legacy(const struct nlmsghdr *nlh, nla_attr_size(sizeof(u32)))) { struct ifinfomsg *ifm; - err = nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, - ifla_policy, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(struct ifinfomsg), + tb, IFLA_MAX, ifla_policy, + extack); if (err < 0) { return -EINVAL; } else if (err == 0) { @@ -4088,8 +4105,8 @@ static int valid_fdb_get_strict(const struct nlmsghdr *nlh, return -EINVAL; } - err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX, - nda_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb, + NDA_MAX, nda_policy, extack); if (err < 0) return err; @@ -4270,7 +4287,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev)))) goto nla_put_failure; - br_afspec = nla_nest_start(skb, IFLA_AF_SPEC); + br_afspec = nla_nest_start_noflag(skb, IFLA_AF_SPEC); if (!br_afspec) goto nla_put_failure; @@ -4294,7 +4311,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, } nla_nest_end(skb, br_afspec); - protinfo = nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED); + protinfo = nla_nest_start(skb, IFLA_PROTINFO); if (!protinfo) goto nla_put_failure; @@ -4351,11 +4368,14 @@ static int valid_bridge_getlink_req(const struct nlmsghdr *nlh, return -EINVAL; } - err = nlmsg_parse_strict(nlh, sizeof(struct ifinfomsg), tb, - IFLA_MAX, ifla_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, + sizeof(struct ifinfomsg), + tb, IFLA_MAX, ifla_policy, + extack); } else { - err = nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb, - IFLA_MAX, ifla_policy, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(struct ifinfomsg), + tb, IFLA_MAX, ifla_policy, + extack); } if (err < 0) return err; @@ -4773,8 +4793,8 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, if (ops && ops->fill_linkxstats) { *idxattr = IFLA_STATS_LINK_XSTATS; - attr = nla_nest_start(skb, - IFLA_STATS_LINK_XSTATS); + attr = nla_nest_start_noflag(skb, + IFLA_STATS_LINK_XSTATS); if (!attr) goto nla_put_failure; @@ -4796,8 +4816,8 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, ops = master->rtnl_link_ops; if (ops && ops->fill_linkxstats) { *idxattr = IFLA_STATS_LINK_XSTATS_SLAVE; - attr = nla_nest_start(skb, - IFLA_STATS_LINK_XSTATS_SLAVE); + attr = nla_nest_start_noflag(skb, + IFLA_STATS_LINK_XSTATS_SLAVE); if (!attr) goto nla_put_failure; @@ -4812,7 +4832,8 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, *idxattr)) { *idxattr = IFLA_STATS_LINK_OFFLOAD_XSTATS; - attr = nla_nest_start(skb, IFLA_STATS_LINK_OFFLOAD_XSTATS); + attr = nla_nest_start_noflag(skb, + IFLA_STATS_LINK_OFFLOAD_XSTATS); if (!attr) goto nla_put_failure; @@ -4831,7 +4852,7 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, struct rtnl_af_ops *af_ops; *idxattr = IFLA_STATS_AF_SPEC; - attr = nla_nest_start(skb, IFLA_STATS_AF_SPEC); + attr = nla_nest_start_noflag(skb, IFLA_STATS_AF_SPEC); if (!attr) goto nla_put_failure; @@ -4841,7 +4862,8 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, struct nlattr *af; int err; - af = nla_nest_start(skb, af_ops->family); + af = nla_nest_start_noflag(skb, + af_ops->family); if (!af) { rcu_read_unlock(); goto nla_put_failure; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 40796b8bf820..e89be6282693 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -77,6 +77,8 @@ #include <linux/capability.h> #include <linux/user_namespace.h> +#include "datagram.h" + struct kmem_cache *skbuff_head_cache __ro_after_init; static struct kmem_cache *skbuff_fclone_cache __ro_after_init; #ifdef CONFIG_SKB_EXTENSIONS @@ -256,6 +258,33 @@ nodata: } EXPORT_SYMBOL(__alloc_skb); +/* Caller must provide SKB that is memset cleared */ +static struct sk_buff *__build_skb_around(struct sk_buff *skb, + void *data, unsigned int frag_size) +{ + struct skb_shared_info *shinfo; + unsigned int size = frag_size ? : ksize(data); + + size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + + /* Assumes caller memset cleared SKB */ + skb->truesize = SKB_TRUESIZE(size); + refcount_set(&skb->users, 1); + skb->head = data; + skb->data = data; + skb_reset_tail_pointer(skb); + skb->end = skb->tail + size; + skb->mac_header = (typeof(skb->mac_header))~0U; + skb->transport_header = (typeof(skb->transport_header))~0U; + + /* make sure we initialize shinfo sequentially */ + shinfo = skb_shinfo(skb); + memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); + atomic_set(&shinfo->dataref, 1); + + return skb; +} + /** * __build_skb - build a network buffer * @data: data buffer provided by caller @@ -277,32 +306,15 @@ EXPORT_SYMBOL(__alloc_skb); */ struct sk_buff *__build_skb(void *data, unsigned int frag_size) { - struct skb_shared_info *shinfo; struct sk_buff *skb; - unsigned int size = frag_size ? : ksize(data); skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC); - if (!skb) + if (unlikely(!skb)) return NULL; - size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - memset(skb, 0, offsetof(struct sk_buff, tail)); - skb->truesize = SKB_TRUESIZE(size); - refcount_set(&skb->users, 1); - skb->head = data; - skb->data = data; - skb_reset_tail_pointer(skb); - skb->end = skb->tail + size; - skb->mac_header = (typeof(skb->mac_header))~0U; - skb->transport_header = (typeof(skb->transport_header))~0U; - - /* make sure we initialize shinfo sequentially */ - shinfo = skb_shinfo(skb); - memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); - atomic_set(&shinfo->dataref, 1); - return skb; + return __build_skb_around(skb, data, frag_size); } /* build_skb() is wrapper over __build_skb(), that specifically @@ -323,6 +335,29 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) } EXPORT_SYMBOL(build_skb); +/** + * build_skb_around - build a network buffer around provided skb + * @skb: sk_buff provide by caller, must be memset cleared + * @data: data buffer provided by caller + * @frag_size: size of data, or 0 if head was kmalloced + */ +struct sk_buff *build_skb_around(struct sk_buff *skb, + void *data, unsigned int frag_size) +{ + if (unlikely(!skb)) + return NULL; + + skb = __build_skb_around(skb, data, frag_size); + + if (skb && frag_size) { + skb->head_frag = 1; + if (page_is_pfmemalloc(virt_to_head_page(data))) + skb->pfmemalloc = 1; + } + return skb; +} +EXPORT_SYMBOL(build_skb_around); + #define NAPI_SKB_CACHE_SIZE 64 struct napi_alloc_cache { @@ -1105,9 +1140,6 @@ void sock_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref) } EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort); -extern int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, - struct iov_iter *from, size_t length); - int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len) { return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len); diff --git a/net/core/sock.c b/net/core/sock.c index 067878a1e4c5..75b1c950b49f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -137,6 +137,7 @@ #include <linux/filter.h> #include <net/sock_reuseport.h> +#include <net/bpf_sk_storage.h> #include <trace/events/sock.h> @@ -1709,6 +1710,10 @@ static void __sk_destruct(struct rcu_head *head) sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP); +#ifdef CONFIG_BPF_SYSCALL + bpf_sk_storage_free(sk); +#endif + if (atomic_read(&sk->sk_omem_alloc)) pr_debug("%s: optmem leakage (%d bytes) detected\n", __func__, atomic_read(&sk->sk_omem_alloc)); @@ -2977,39 +2982,44 @@ bool lock_sock_fast(struct sock *sk) } EXPORT_SYMBOL(lock_sock_fast); -int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) +int sock_gettstamp(struct socket *sock, void __user *userstamp, + bool timeval, bool time32) { - struct timeval tv; + struct sock *sk = sock->sk; + struct timespec64 ts; sock_enable_timestamp(sk, SOCK_TIMESTAMP); - tv = ktime_to_timeval(sock_read_timestamp(sk)); - if (tv.tv_sec == -1) + ts = ktime_to_timespec64(sock_read_timestamp(sk)); + if (ts.tv_sec == -1) return -ENOENT; - if (tv.tv_sec == 0) { + if (ts.tv_sec == 0) { ktime_t kt = ktime_get_real(); - sock_write_timestamp(sk, kt); - tv = ktime_to_timeval(kt); + sock_write_timestamp(sk, kt);; + ts = ktime_to_timespec64(kt); } - return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0; -} -EXPORT_SYMBOL(sock_get_timestamp); -int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp) -{ - struct timespec ts; + if (timeval) + ts.tv_nsec /= 1000; - sock_enable_timestamp(sk, SOCK_TIMESTAMP); - ts = ktime_to_timespec(sock_read_timestamp(sk)); - if (ts.tv_sec == -1) - return -ENOENT; - if (ts.tv_sec == 0) { - ktime_t kt = ktime_get_real(); - sock_write_timestamp(sk, kt); - ts = ktime_to_timespec(sk->sk_stamp); +#ifdef CONFIG_COMPAT_32BIT_TIME + if (time32) + return put_old_timespec32(&ts, userstamp); +#endif +#ifdef CONFIG_SPARC64 + /* beware of padding in sparc64 timeval */ + if (timeval && !in_compat_syscall()) { + struct __kernel_old_timeval __user tv = { + .tv_sec = ts.tv_sec, + .tv_usec = ts.tv_nsec, + }; + if (copy_to_user(userstamp, &tv, sizeof(tv))) + return -EFAULT; + return 0; } - return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0; +#endif + return put_timespec64(&ts, userstamp); } -EXPORT_SYMBOL(sock_get_timestampns); +EXPORT_SYMBOL(sock_gettstamp); void sock_enable_timestamp(struct sock *sk, int flag) { diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index d8fe3e549373..dc4aefdf2a08 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -144,6 +144,8 @@ static void reuseport_free_rcu(struct rcu_head *head) * reuseport_add_sock - Add a socket to the reuseport group of another. * @sk: New socket to add to the group. * @sk2: Socket belonging to the existing reuseport group. + * @bind_inany: Whether or not the group is bound to a local INANY address. + * * May return ENOMEM and not add socket to group under memory pressure. */ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 84bf2861f45f..1a2685694abd 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -95,12 +95,12 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write, if (sock_table != orig_sock_table) { rcu_assign_pointer(rps_sock_flow_table, sock_table); if (sock_table) { - static_key_slow_inc(&rps_needed); - static_key_slow_inc(&rfs_needed); + static_branch_inc(&rps_needed); + static_branch_inc(&rfs_needed); } if (orig_sock_table) { - static_key_slow_dec(&rps_needed); - static_key_slow_dec(&rfs_needed); + static_branch_dec(&rps_needed); + static_branch_dec(&rfs_needed); synchronize_rcu(); vfree(orig_sock_table); } diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index a556cd708885..ceff9d22deea 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -241,12 +241,13 @@ static int dcbnl_getpfccfg(struct net_device *netdev, struct nlmsghdr *nlh, if (!netdev->dcbnl_ops->getpfccfg) return -EOPNOTSUPP; - ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX, - tb[DCB_ATTR_PFC_CFG], dcbnl_pfc_up_nest, NULL); + ret = nla_parse_nested_deprecated(data, DCB_PFC_UP_ATTR_MAX, + tb[DCB_ATTR_PFC_CFG], + dcbnl_pfc_up_nest, NULL); if (ret) return ret; - nest = nla_nest_start(skb, DCB_ATTR_PFC_CFG); + nest = nla_nest_start_noflag(skb, DCB_ATTR_PFC_CFG); if (!nest) return -EMSGSIZE; @@ -299,12 +300,13 @@ static int dcbnl_getcap(struct net_device *netdev, struct nlmsghdr *nlh, if (!netdev->dcbnl_ops->getcap) return -EOPNOTSUPP; - ret = nla_parse_nested(data, DCB_CAP_ATTR_MAX, tb[DCB_ATTR_CAP], - dcbnl_cap_nest, NULL); + ret = nla_parse_nested_deprecated(data, DCB_CAP_ATTR_MAX, + tb[DCB_ATTR_CAP], dcbnl_cap_nest, + NULL); if (ret) return ret; - nest = nla_nest_start(skb, DCB_ATTR_CAP); + nest = nla_nest_start_noflag(skb, DCB_ATTR_CAP); if (!nest) return -EMSGSIZE; @@ -343,12 +345,13 @@ static int dcbnl_getnumtcs(struct net_device *netdev, struct nlmsghdr *nlh, if (!netdev->dcbnl_ops->getnumtcs) return -EOPNOTSUPP; - ret = nla_parse_nested(data, DCB_NUMTCS_ATTR_MAX, tb[DCB_ATTR_NUMTCS], - dcbnl_numtcs_nest, NULL); + ret = nla_parse_nested_deprecated(data, DCB_NUMTCS_ATTR_MAX, + tb[DCB_ATTR_NUMTCS], + dcbnl_numtcs_nest, NULL); if (ret) return ret; - nest = nla_nest_start(skb, DCB_ATTR_NUMTCS); + nest = nla_nest_start_noflag(skb, DCB_ATTR_NUMTCS); if (!nest) return -EMSGSIZE; @@ -388,8 +391,9 @@ static int dcbnl_setnumtcs(struct net_device *netdev, struct nlmsghdr *nlh, if (!netdev->dcbnl_ops->setnumtcs) return -EOPNOTSUPP; - ret = nla_parse_nested(data, DCB_NUMTCS_ATTR_MAX, tb[DCB_ATTR_NUMTCS], - dcbnl_numtcs_nest, NULL); + ret = nla_parse_nested_deprecated(data, DCB_NUMTCS_ATTR_MAX, + tb[DCB_ATTR_NUMTCS], + dcbnl_numtcs_nest, NULL); if (ret) return ret; @@ -447,8 +451,9 @@ static int dcbnl_getapp(struct net_device *netdev, struct nlmsghdr *nlh, if (!tb[DCB_ATTR_APP]) return -EINVAL; - ret = nla_parse_nested(app_tb, DCB_APP_ATTR_MAX, tb[DCB_ATTR_APP], - dcbnl_app_nest, NULL); + ret = nla_parse_nested_deprecated(app_tb, DCB_APP_ATTR_MAX, + tb[DCB_ATTR_APP], dcbnl_app_nest, + NULL); if (ret) return ret; @@ -479,7 +484,7 @@ static int dcbnl_getapp(struct net_device *netdev, struct nlmsghdr *nlh, up = dcb_getapp(netdev, &app); } - app_nest = nla_nest_start(skb, DCB_ATTR_APP); + app_nest = nla_nest_start_noflag(skb, DCB_ATTR_APP); if (!app_nest) return -EMSGSIZE; @@ -515,8 +520,9 @@ static int dcbnl_setapp(struct net_device *netdev, struct nlmsghdr *nlh, if (!tb[DCB_ATTR_APP]) return -EINVAL; - ret = nla_parse_nested(app_tb, DCB_APP_ATTR_MAX, tb[DCB_ATTR_APP], - dcbnl_app_nest, NULL); + ret = nla_parse_nested_deprecated(app_tb, DCB_APP_ATTR_MAX, + tb[DCB_ATTR_APP], dcbnl_app_nest, + NULL); if (ret) return ret; @@ -573,12 +579,13 @@ static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlmsghdr *nlh, !netdev->dcbnl_ops->getpgbwgcfgrx) return -EOPNOTSUPP; - ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, tb[DCB_ATTR_PG_CFG], - dcbnl_pg_nest, NULL); + ret = nla_parse_nested_deprecated(pg_tb, DCB_PG_ATTR_MAX, + tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest, + NULL); if (ret) return ret; - pg_nest = nla_nest_start(skb, DCB_ATTR_PG_CFG); + pg_nest = nla_nest_start_noflag(skb, DCB_ATTR_PG_CFG); if (!pg_nest) return -EMSGSIZE; @@ -593,12 +600,13 @@ static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlmsghdr *nlh, data = pg_tb[DCB_PG_ATTR_TC_ALL]; else data = pg_tb[i]; - ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX, data, - dcbnl_tc_param_nest, NULL); + ret = nla_parse_nested_deprecated(param_tb, + DCB_TC_ATTR_PARAM_MAX, data, + dcbnl_tc_param_nest, NULL); if (ret) goto err_pg; - param_nest = nla_nest_start(skb, i); + param_nest = nla_nest_start_noflag(skb, i); if (!param_nest) goto err_pg; @@ -730,8 +738,9 @@ static int dcbnl_setpfccfg(struct net_device *netdev, struct nlmsghdr *nlh, if (!netdev->dcbnl_ops->setpfccfg) return -EOPNOTSUPP; - ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX, - tb[DCB_ATTR_PFC_CFG], dcbnl_pfc_up_nest, NULL); + ret = nla_parse_nested_deprecated(data, DCB_PFC_UP_ATTR_MAX, + tb[DCB_ATTR_PFC_CFG], + dcbnl_pfc_up_nest, NULL); if (ret) return ret; @@ -786,8 +795,9 @@ static int __dcbnl_pg_setcfg(struct net_device *netdev, struct nlmsghdr *nlh, !netdev->dcbnl_ops->setpgbwgcfgrx) return -EOPNOTSUPP; - ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, tb[DCB_ATTR_PG_CFG], - dcbnl_pg_nest, NULL); + ret = nla_parse_nested_deprecated(pg_tb, DCB_PG_ATTR_MAX, + tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest, + NULL); if (ret) return ret; @@ -795,8 +805,10 @@ static int __dcbnl_pg_setcfg(struct net_device *netdev, struct nlmsghdr *nlh, if (!pg_tb[i]) continue; - ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX, - pg_tb[i], dcbnl_tc_param_nest, NULL); + ret = nla_parse_nested_deprecated(param_tb, + DCB_TC_ATTR_PARAM_MAX, + pg_tb[i], + dcbnl_tc_param_nest, NULL); if (ret) return ret; @@ -884,12 +896,13 @@ static int dcbnl_bcn_getcfg(struct net_device *netdev, struct nlmsghdr *nlh, !netdev->dcbnl_ops->getbcncfg) return -EOPNOTSUPP; - ret = nla_parse_nested(bcn_tb, DCB_BCN_ATTR_MAX, tb[DCB_ATTR_BCN], - dcbnl_bcn_nest, NULL); + ret = nla_parse_nested_deprecated(bcn_tb, DCB_BCN_ATTR_MAX, + tb[DCB_ATTR_BCN], dcbnl_bcn_nest, + NULL); if (ret) return ret; - bcn_nest = nla_nest_start(skb, DCB_ATTR_BCN); + bcn_nest = nla_nest_start_noflag(skb, DCB_ATTR_BCN); if (!bcn_nest) return -EMSGSIZE; @@ -943,8 +956,9 @@ static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlmsghdr *nlh, !netdev->dcbnl_ops->setbcnrp) return -EOPNOTSUPP; - ret = nla_parse_nested(data, DCB_BCN_ATTR_MAX, tb[DCB_ATTR_BCN], - dcbnl_pfc_up_nest, NULL); + ret = nla_parse_nested_deprecated(data, DCB_BCN_ATTR_MAX, + tb[DCB_ATTR_BCN], dcbnl_pfc_up_nest, + NULL); if (ret) return ret; @@ -1002,7 +1016,7 @@ static int dcbnl_build_peer_app(struct net_device *netdev, struct sk_buff* skb, */ err = -EMSGSIZE; - app = nla_nest_start(skb, app_nested_type); + app = nla_nest_start_noflag(skb, app_nested_type); if (!app) goto nla_put_failure; @@ -1036,7 +1050,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) if (nla_put_string(skb, DCB_ATTR_IFNAME, netdev->name)) return -EMSGSIZE; - ieee = nla_nest_start(skb, DCB_ATTR_IEEE); + ieee = nla_nest_start_noflag(skb, DCB_ATTR_IEEE); if (!ieee) return -EMSGSIZE; @@ -1106,7 +1120,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) return -EMSGSIZE; } - app = nla_nest_start(skb, DCB_ATTR_IEEE_APP_TABLE); + app = nla_nest_start_noflag(skb, DCB_ATTR_IEEE_APP_TABLE); if (!app) return -EMSGSIZE; @@ -1174,13 +1188,13 @@ static int dcbnl_cee_pg_fill(struct sk_buff *skb, struct net_device *dev, u8 pgid, up_map, prio, tc_pct; const struct dcbnl_rtnl_ops *ops = dev->dcbnl_ops; int i = dir ? DCB_ATTR_CEE_TX_PG : DCB_ATTR_CEE_RX_PG; - struct nlattr *pg = nla_nest_start(skb, i); + struct nlattr *pg = nla_nest_start_noflag(skb, i); if (!pg) return -EMSGSIZE; for (i = DCB_PG_ATTR_TC_0; i <= DCB_PG_ATTR_TC_7; i++) { - struct nlattr *tc_nest = nla_nest_start(skb, i); + struct nlattr *tc_nest = nla_nest_start_noflag(skb, i); if (!tc_nest) return -EMSGSIZE; @@ -1231,7 +1245,7 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) if (nla_put_string(skb, DCB_ATTR_IFNAME, netdev->name)) goto nla_put_failure; - cee = nla_nest_start(skb, DCB_ATTR_CEE); + cee = nla_nest_start_noflag(skb, DCB_ATTR_CEE); if (!cee) goto nla_put_failure; @@ -1250,7 +1264,8 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) /* local pfc */ if (ops->getpfccfg) { - struct nlattr *pfc_nest = nla_nest_start(skb, DCB_ATTR_CEE_PFC); + struct nlattr *pfc_nest = nla_nest_start_noflag(skb, + DCB_ATTR_CEE_PFC); if (!pfc_nest) goto nla_put_failure; @@ -1265,14 +1280,14 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) /* local app */ spin_lock_bh(&dcb_lock); - app = nla_nest_start(skb, DCB_ATTR_CEE_APP_TABLE); + app = nla_nest_start_noflag(skb, DCB_ATTR_CEE_APP_TABLE); if (!app) goto dcb_unlock; list_for_each_entry(itr, &dcb_app_list, list) { if (itr->ifindex == netdev->ifindex) { - struct nlattr *app_nest = nla_nest_start(skb, - DCB_ATTR_APP); + struct nlattr *app_nest = nla_nest_start_noflag(skb, + DCB_ATTR_APP); if (!app_nest) goto dcb_unlock; @@ -1305,7 +1320,8 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) /* features flags */ if (ops->getfeatcfg) { - struct nlattr *feat = nla_nest_start(skb, DCB_ATTR_CEE_FEAT); + struct nlattr *feat = nla_nest_start_noflag(skb, + DCB_ATTR_CEE_FEAT); if (!feat) goto nla_put_failure; @@ -1429,8 +1445,9 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh, if (!tb[DCB_ATTR_IEEE]) return -EINVAL; - err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, tb[DCB_ATTR_IEEE], - dcbnl_ieee_policy, NULL); + err = nla_parse_nested_deprecated(ieee, DCB_ATTR_IEEE_MAX, + tb[DCB_ATTR_IEEE], + dcbnl_ieee_policy, NULL); if (err) return err; @@ -1529,8 +1546,9 @@ static int dcbnl_ieee_del(struct net_device *netdev, struct nlmsghdr *nlh, if (!tb[DCB_ATTR_IEEE]) return -EINVAL; - err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, tb[DCB_ATTR_IEEE], - dcbnl_ieee_policy, NULL); + err = nla_parse_nested_deprecated(ieee, DCB_ATTR_IEEE_MAX, + tb[DCB_ATTR_IEEE], + dcbnl_ieee_policy, NULL); if (err) return err; @@ -1602,12 +1620,13 @@ static int dcbnl_getfeatcfg(struct net_device *netdev, struct nlmsghdr *nlh, if (!tb[DCB_ATTR_FEATCFG]) return -EINVAL; - ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX, - tb[DCB_ATTR_FEATCFG], dcbnl_featcfg_nest, NULL); + ret = nla_parse_nested_deprecated(data, DCB_FEATCFG_ATTR_MAX, + tb[DCB_ATTR_FEATCFG], + dcbnl_featcfg_nest, NULL); if (ret) return ret; - nest = nla_nest_start(skb, DCB_ATTR_FEATCFG); + nest = nla_nest_start_noflag(skb, DCB_ATTR_FEATCFG); if (!nest) return -EMSGSIZE; @@ -1646,8 +1665,9 @@ static int dcbnl_setfeatcfg(struct net_device *netdev, struct nlmsghdr *nlh, if (!tb[DCB_ATTR_FEATCFG]) return -EINVAL; - ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX, - tb[DCB_ATTR_FEATCFG], dcbnl_featcfg_nest, NULL); + ret = nla_parse_nested_deprecated(data, DCB_FEATCFG_ATTR_MAX, + tb[DCB_ATTR_FEATCFG], + dcbnl_featcfg_nest, NULL); if (ret) goto err; @@ -1736,8 +1756,8 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, if ((nlh->nlmsg_type == RTM_SETDCB) && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - ret = nlmsg_parse(nlh, sizeof(*dcb), tb, DCB_ATTR_MAX, - dcbnl_rtnl_policy, extack); + ret = nlmsg_parse_deprecated(nlh, sizeof(*dcb), tb, DCB_ATTR_MAX, + dcbnl_rtnl_policy, extack); if (ret < 0) return ret; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 26a21d97b6b0..004535e4c070 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -991,6 +991,7 @@ static const struct proto_ops inet_dccp_ops = { /* FIXME: work on tcp_poll to rename it to inet_csk_poll */ .poll = dccp_poll, .ioctl = inet_ioctl, + .gettstamp = sock_gettstamp, /* FIXME: work on inet_listen to rename it to sock_common_listen */ .listen = inet_dccp_listen, .shutdown = inet_shutdown, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 57d84e9b7b6f..c4e4d1301062 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1075,6 +1075,7 @@ static const struct proto_ops inet6_dccp_ops = { .getname = inet6_getname, .poll = dccp_poll, .ioctl = inet6_ioctl, + .gettstamp = sock_gettstamp, .listen = inet_dccp_listen, .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index bdccc46a2921..c1fa4785c4c2 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -444,7 +444,7 @@ static void dn_destruct(struct sock *sk) skb_queue_purge(&scp->other_xmit_queue); skb_queue_purge(&scp->other_receive_queue); - dst_release(rcu_dereference_check(sk->sk_dst_cache, 1)); + dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1)); } static unsigned long dn_memory_pressure; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 0962f9201baa..cca7ae712995 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -583,8 +583,8 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, if (!net_eq(net, &init_net)) goto errout; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, + dn_ifa_policy, extack); if (err < 0) goto errout; @@ -629,8 +629,8 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, if (!net_eq(net, &init_net)) return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, + dn_ifa_policy, extack); if (err < 0) return err; diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 6cd3737593a6..77fbf8e9df4b 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -42,7 +42,7 @@ #include <net/dn_fib.h> #include <net/dn_neigh.h> #include <net/dn_dev.h> -#include <net/nexthop.h> +#include <net/rtnh.h> #define RT_MIN_TABLE 1 @@ -517,8 +517,8 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (!net_eq(net, &init_net)) return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*r), attrs, RTA_MAX, + rtm_dn_policy, extack); if (err < 0) return err; @@ -544,8 +544,8 @@ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (!net_eq(net, &init_net)) return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*r), attrs, RTA_MAX, + rtm_dn_policy, extack); if (err < 0) return err; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 950613ee7881..664584763c36 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1651,8 +1651,8 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (!net_eq(net, &init_net)) return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_dn_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_dn_policy, extack); if (err < 0) return err; diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index f0710b5d037d..33fefb0aebca 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -348,7 +348,8 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, struct rtnexthop *nhp; struct nlattr *mp_head; - if (!(mp_head = nla_nest_start(skb, RTA_MULTIPATH))) + mp_head = nla_nest_start_noflag(skb, RTA_MULTIPATH); + if (!mp_head) goto errout; for_nexthops(fi) { diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c index 76338c38738a..19aa32fc1802 100644 --- a/net/dns_resolver/dns_query.c +++ b/net/dns_resolver/dns_query.c @@ -94,8 +94,6 @@ int dns_query(const char *type, const char *name, size_t namelen, desclen += typelen + 1; } - if (!namelen) - namelen = strnlen(name, 256); if (namelen < 3 || namelen > 255) return -EINVAL; desclen += namelen + 1; diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index fab49132345f..cf855352a440 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -4,60 +4,117 @@ config HAVE_NET_DSA # Drivers must select NET_DSA and the appropriate tagging format -config NET_DSA +menuconfig NET_DSA tristate "Distributed Switch Architecture" depends on HAVE_NET_DSA depends on BRIDGE || BRIDGE=n select NET_SWITCHDEV select PHYLINK + select NET_DEVLINK ---help--- Say Y if you want to enable support for the hardware switches supported by the Distributed Switch Architecture. if NET_DSA -config NET_DSA_LEGACY - bool "Support for older platform device and Device Tree registration" - default y - ---help--- - Say Y if you want to enable support for the older platform device and - deprecated Device Tree binding registration. +# tagging formats +config NET_DSA_TAG_8021Q + tristate "Tag driver for switches using custom 802.1Q VLAN headers" + select VLAN_8021Q + help + Unlike the other tagging protocols, the 802.1Q config option simply + provides helpers for other tagging implementations that might rely on + VLAN in one way or another. It is not a complete solution. - This feature is scheduled for removal in 4.17. + Drivers which use these helpers should select this as dependency. + +config NET_DSA_TAG_BRCM_COMMON + tristate + default n -# tagging formats config NET_DSA_TAG_BRCM - bool + tristate "Tag driver for Broadcom switches using in-frame headers" + select NET_DSA_TAG_BRCM_COMMON + help + Say Y if you want to enable support for tagging frames for the + Broadcom switches which place the tag after the MAC source address. + config NET_DSA_TAG_BRCM_PREPEND - bool + tristate "Tag driver for Broadcom switches using prepended headers" + select NET_DSA_TAG_BRCM_COMMON + help + Say Y if you want to enable support for tagging frames for the + Broadcom switches which places the tag before the Ethernet header + (prepended). + +config NET_DSA_TAG_GSWIP + tristate "Tag driver for Lantiq / Intel GSWIP switches" + help + Say Y or M if you want to enable support for tagging frames for the + Lantiq / Intel GSWIP switches. config NET_DSA_TAG_DSA - bool + tristate "Tag driver for Marvell switches using DSA headers" + help + Say Y or M if you want to enable support for tagging frames for the + Marvell switches which use DSA headers. config NET_DSA_TAG_EDSA - bool + tristate "Tag driver for Marvell switches using EtherType DSA headers" + help + Say Y or M if you want to enable support for tagging frames for the + Marvell switches which use EtherType DSA headers. -config NET_DSA_TAG_GSWIP - bool +config NET_DSA_TAG_MTK + tristate "Tag driver for Mediatek switches" + help + Say Y or M if you want to enable support for tagging frames for + Mediatek switches. + +config NET_DSA_TAG_KSZ_COMMON + tristate + default n config NET_DSA_TAG_KSZ - bool + tristate "Tag driver for Microchip 9893 family of switches" + select NET_DSA_TAG_KSZ_COMMON + help + Say Y if you want to enable support for tagging frames for the + Microchip 9893 family of switches. config NET_DSA_TAG_KSZ9477 - bool - select NET_DSA_TAG_KSZ + tristate "Tag driver for Microchip 9477 family of switches" + select NET_DSA_TAG_KSZ_COMMON + help + Say Y if you want to enable support for tagging frames for the + Microchip 9477 family of switches. -config NET_DSA_TAG_LAN9303 - bool +config NET_DSA_TAG_QCA + tristate "Tag driver for Qualcomm Atheros QCA8K switches" + help + Say Y or M if you want to enable support for tagging frames for + the Qualcomm Atheros QCA8K switches. -config NET_DSA_TAG_MTK - bool +config NET_DSA_TAG_LAN9303 + tristate "Tag driver for SMSC/Microchip LAN9303 family of switches" + help + Say Y or M if you want to enable support for tagging frames for the + SMSC/Microchip LAN9303 family of switches. + +config NET_DSA_TAG_SJA1105 + tristate "Tag driver for NXP SJA1105 switches" + select NET_DSA_TAG_8021Q + help + Say Y or M if you want to enable support for tagging frames with the + NXP SJA1105 switch family. Both the native tagging protocol (which + is only for link-local traffic) as well as non-native tagging (based + on a custom 802.1Q VLAN header) are available. config NET_DSA_TAG_TRAILER - bool - -config NET_DSA_TAG_QCA - bool + tristate "Tag driver for switches using a trailer tag" + help + Say Y or M if you want to enable support for tagging frames at + with a trailed. e.g. Marvell 88E6060. endif diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 6e721f7a2947..c342f54715ba 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -2,16 +2,16 @@ # the core obj-$(CONFIG_NET_DSA) += dsa_core.o dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o -dsa_core-$(CONFIG_NET_DSA_LEGACY) += legacy.o # tagging formats -dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o -dsa_core-$(CONFIG_NET_DSA_TAG_BRCM_PREPEND) += tag_brcm.o -dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o -dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o -dsa_core-$(CONFIG_NET_DSA_TAG_GSWIP) += tag_gswip.o -dsa_core-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o -dsa_core-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o -dsa_core-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o -dsa_core-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o -dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o +obj-$(CONFIG_NET_DSA_TAG_8021Q) += tag_8021q.o +obj-$(CONFIG_NET_DSA_TAG_BRCM_COMMON) += tag_brcm.o +obj-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o +obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o +obj-$(CONFIG_NET_DSA_TAG_GSWIP) += tag_gswip.o +obj-$(CONFIG_NET_DSA_TAG_KSZ_COMMON) += tag_ksz.o +obj-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o +obj-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o +obj-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o +obj-$(CONFIG_NET_DSA_TAG_SJA1105) += tag_sja1105.o +obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 36de4f2a3366..1fc782fab393 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -27,6 +27,9 @@ #include "dsa_priv.h" +static LIST_HEAD(dsa_tag_drivers_list); +static DEFINE_MUTEX(dsa_tag_drivers_lock); + static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -35,106 +38,103 @@ static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb, } static const struct dsa_device_ops none_ops = { + .name = "none", + .proto = DSA_TAG_PROTO_NONE, .xmit = dsa_slave_notag_xmit, .rcv = NULL, }; -const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = { -#ifdef CONFIG_NET_DSA_TAG_BRCM - [DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops, -#endif -#ifdef CONFIG_NET_DSA_TAG_BRCM_PREPEND - [DSA_TAG_PROTO_BRCM_PREPEND] = &brcm_prepend_netdev_ops, -#endif -#ifdef CONFIG_NET_DSA_TAG_DSA - [DSA_TAG_PROTO_DSA] = &dsa_netdev_ops, -#endif -#ifdef CONFIG_NET_DSA_TAG_EDSA - [DSA_TAG_PROTO_EDSA] = &edsa_netdev_ops, -#endif -#ifdef CONFIG_NET_DSA_TAG_GSWIP - [DSA_TAG_PROTO_GSWIP] = &gswip_netdev_ops, -#endif -#ifdef CONFIG_NET_DSA_TAG_KSZ9477 - [DSA_TAG_PROTO_KSZ9477] = &ksz9477_netdev_ops, - [DSA_TAG_PROTO_KSZ9893] = &ksz9893_netdev_ops, -#endif -#ifdef CONFIG_NET_DSA_TAG_LAN9303 - [DSA_TAG_PROTO_LAN9303] = &lan9303_netdev_ops, -#endif -#ifdef CONFIG_NET_DSA_TAG_MTK - [DSA_TAG_PROTO_MTK] = &mtk_netdev_ops, -#endif -#ifdef CONFIG_NET_DSA_TAG_QCA - [DSA_TAG_PROTO_QCA] = &qca_netdev_ops, -#endif -#ifdef CONFIG_NET_DSA_TAG_TRAILER - [DSA_TAG_PROTO_TRAILER] = &trailer_netdev_ops, -#endif - [DSA_TAG_PROTO_NONE] = &none_ops, -}; +DSA_TAG_DRIVER(none_ops); -const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops) +static void dsa_tag_driver_register(struct dsa_tag_driver *dsa_tag_driver, + struct module *owner) +{ + dsa_tag_driver->owner = owner; + + mutex_lock(&dsa_tag_drivers_lock); + list_add_tail(&dsa_tag_driver->list, &dsa_tag_drivers_list); + mutex_unlock(&dsa_tag_drivers_lock); +} + +void dsa_tag_drivers_register(struct dsa_tag_driver *dsa_tag_driver_array[], + unsigned int count, struct module *owner) { - const char *protocol_name[DSA_TAG_LAST] = { -#ifdef CONFIG_NET_DSA_TAG_BRCM - [DSA_TAG_PROTO_BRCM] = "brcm", -#endif -#ifdef CONFIG_NET_DSA_TAG_BRCM_PREPEND - [DSA_TAG_PROTO_BRCM_PREPEND] = "brcm-prepend", -#endif -#ifdef CONFIG_NET_DSA_TAG_DSA - [DSA_TAG_PROTO_DSA] = "dsa", -#endif -#ifdef CONFIG_NET_DSA_TAG_EDSA - [DSA_TAG_PROTO_EDSA] = "edsa", -#endif -#ifdef CONFIG_NET_DSA_TAG_GSWIP - [DSA_TAG_PROTO_GSWIP] = "gswip", -#endif -#ifdef CONFIG_NET_DSA_TAG_KSZ9477 - [DSA_TAG_PROTO_KSZ9477] = "ksz9477", - [DSA_TAG_PROTO_KSZ9893] = "ksz9893", -#endif -#ifdef CONFIG_NET_DSA_TAG_LAN9303 - [DSA_TAG_PROTO_LAN9303] = "lan9303", -#endif -#ifdef CONFIG_NET_DSA_TAG_MTK - [DSA_TAG_PROTO_MTK] = "mtk", -#endif -#ifdef CONFIG_NET_DSA_TAG_QCA - [DSA_TAG_PROTO_QCA] = "qca", -#endif -#ifdef CONFIG_NET_DSA_TAG_TRAILER - [DSA_TAG_PROTO_TRAILER] = "trailer", -#endif - [DSA_TAG_PROTO_NONE] = "none", - }; unsigned int i; - BUILD_BUG_ON(ARRAY_SIZE(protocol_name) != DSA_TAG_LAST); + for (i = 0; i < count; i++) + dsa_tag_driver_register(dsa_tag_driver_array[i], owner); +} - for (i = 0; i < ARRAY_SIZE(dsa_device_ops); i++) - if (ops == dsa_device_ops[i]) - return protocol_name[i]; +static void dsa_tag_driver_unregister(struct dsa_tag_driver *dsa_tag_driver) +{ + mutex_lock(&dsa_tag_drivers_lock); + list_del(&dsa_tag_driver->list); + mutex_unlock(&dsa_tag_drivers_lock); +} +EXPORT_SYMBOL_GPL(dsa_tag_drivers_register); - return protocol_name[DSA_TAG_PROTO_NONE]; +void dsa_tag_drivers_unregister(struct dsa_tag_driver *dsa_tag_driver_array[], + unsigned int count) +{ + unsigned int i; + + for (i = 0; i < count; i++) + dsa_tag_driver_unregister(dsa_tag_driver_array[i]); +} +EXPORT_SYMBOL_GPL(dsa_tag_drivers_unregister); + +const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops) +{ + return ops->name; }; -const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol) +const struct dsa_device_ops *dsa_tag_driver_get(int tag_protocol) { + struct dsa_tag_driver *dsa_tag_driver; const struct dsa_device_ops *ops; + char module_name[128]; + bool found = false; + + snprintf(module_name, 127, "%s%d", DSA_TAG_DRIVER_ALIAS, + tag_protocol); + + request_module(module_name); + + mutex_lock(&dsa_tag_drivers_lock); + list_for_each_entry(dsa_tag_driver, &dsa_tag_drivers_list, list) { + ops = dsa_tag_driver->ops; + if (ops->proto == tag_protocol) { + found = true; + break; + } + } - if (tag_protocol >= DSA_TAG_LAST) - return ERR_PTR(-EINVAL); - ops = dsa_device_ops[tag_protocol]; + if (found) { + if (!try_module_get(dsa_tag_driver->owner)) + ops = ERR_PTR(-ENOPROTOOPT); + } else { + ops = ERR_PTR(-ENOPROTOOPT); + } - if (!ops) - return ERR_PTR(-ENOPROTOOPT); + mutex_unlock(&dsa_tag_drivers_lock); return ops; } +void dsa_tag_driver_put(const struct dsa_device_ops *ops) +{ + struct dsa_tag_driver *dsa_tag_driver; + + mutex_lock(&dsa_tag_drivers_lock); + list_for_each_entry(dsa_tag_driver, &dsa_tag_drivers_list, list) { + if (dsa_tag_driver->ops == ops) { + module_put(dsa_tag_driver->owner); + break; + } + } + mutex_unlock(&dsa_tag_drivers_lock); +} + static int dev_is_class(struct device *dev, void *class) { if (dev->class != NULL && !strcmp(dev->class->name, class)) @@ -344,23 +344,28 @@ static int __init dsa_init_module(void) rc = dsa_slave_register_notifier(); if (rc) - return rc; - - rc = dsa_legacy_register(); - if (rc) - return rc; + goto register_notifier_fail; dev_add_pack(&dsa_pack_type); + dsa_tag_driver_register(&DSA_TAG_DRIVER_NAME(none_ops), + THIS_MODULE); + return 0; + +register_notifier_fail: + destroy_workqueue(dsa_owq); + + return rc; } module_init(dsa_init_module); static void __exit dsa_cleanup_module(void) { + dsa_tag_driver_unregister(&DSA_TAG_DRIVER_NAME(none_ops)); + dsa_slave_unregister_notifier(); dev_remove_pack(&dsa_pack_type); - dsa_legacy_unregister(); destroy_workqueue(dsa_owq); } module_exit(dsa_cleanup_module); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index c00ee464afc7..3b5f434cad3f 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -18,6 +18,7 @@ #include <linux/rtnetlink.h> #include <linux/of.h> #include <linux/of_net.h> +#include <net/devlink.h> #include "dsa_priv.h" @@ -257,14 +258,39 @@ static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst) static int dsa_port_setup(struct dsa_port *dp) { + enum devlink_port_flavour flavour; struct dsa_switch *ds = dp->ds; - int err = 0; + struct dsa_switch_tree *dst = ds->dst; + int err; + + if (dp->type == DSA_PORT_TYPE_UNUSED) + return 0; memset(&dp->devlink_port, 0, sizeof(dp->devlink_port)); + dp->mac = of_get_mac_address(dp->dn); - if (dp->type != DSA_PORT_TYPE_UNUSED) - err = devlink_port_register(ds->devlink, &dp->devlink_port, - dp->index); + switch (dp->type) { + case DSA_PORT_TYPE_CPU: + flavour = DEVLINK_PORT_FLAVOUR_CPU; + break; + case DSA_PORT_TYPE_DSA: + flavour = DEVLINK_PORT_FLAVOUR_DSA; + break; + case DSA_PORT_TYPE_USER: /* fall-through */ + default: + flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; + break; + } + + /* dp->index is used now as port_number. However + * CPU and DSA ports should have separate numbering + * independent from front panel port numbers. + */ + devlink_port_attrs_set(&dp->devlink_port, flavour, + dp->index, false, 0, + (const char *) &dst->index, sizeof(dst->index)); + err = devlink_port_register(ds->devlink, &dp->devlink_port, + dp->index); if (err) return err; @@ -272,13 +298,6 @@ static int dsa_port_setup(struct dsa_port *dp) case DSA_PORT_TYPE_UNUSED: break; case DSA_PORT_TYPE_CPU: - /* dp->index is used now as port_number. However - * CPU ports should have separate numbering - * independent from front panel port numbers. - */ - devlink_port_attrs_set(&dp->devlink_port, - DEVLINK_PORT_FLAVOUR_CPU, - dp->index, false, 0); err = dsa_port_link_register_of(dp); if (err) { dev_err(ds->dev, "failed to setup link for port %d.%d\n", @@ -287,13 +306,6 @@ static int dsa_port_setup(struct dsa_port *dp) } break; case DSA_PORT_TYPE_DSA: - /* dp->index is used now as port_number. However - * DSA ports should have separate numbering - * independent from front panel port numbers. - */ - devlink_port_attrs_set(&dp->devlink_port, - DEVLINK_PORT_FLAVOUR_DSA, - dp->index, false, 0); err = dsa_port_link_register_of(dp); if (err) { dev_err(ds->dev, "failed to setup link for port %d.%d\n", @@ -302,9 +314,6 @@ static int dsa_port_setup(struct dsa_port *dp) } break; case DSA_PORT_TYPE_USER: - devlink_port_attrs_set(&dp->devlink_port, - DEVLINK_PORT_FLAVOUR_PHYSICAL, - dp->index, false, 0); err = dsa_slave_create(dp); if (err) dev_err(ds->dev, "failed to create slave for port %d.%d\n", @@ -326,6 +335,8 @@ static void dsa_port_teardown(struct dsa_port *dp) case DSA_PORT_TYPE_UNUSED: break; case DSA_PORT_TYPE_CPU: + dsa_tag_driver_put(dp->tag_ops); + /* fall-through */ case DSA_PORT_TYPE_DSA: dsa_port_link_unregister_of(dp); break; @@ -360,14 +371,14 @@ static int dsa_switch_setup(struct dsa_switch *ds) if (err) return err; - err = ds->ops->setup(ds); - if (err < 0) - return err; - err = dsa_switch_register_notifier(ds); if (err) return err; + err = ds->ops->setup(ds); + if (err < 0) + return err; + if (!ds->slave_mii_bus && ds->ops->phy_read) { ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); if (!ds->slave_mii_bus) @@ -568,13 +579,14 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master) enum dsa_tag_protocol tag_protocol; tag_protocol = ds->ops->get_tag_protocol(ds, dp->index); - tag_ops = dsa_resolve_tag_protocol(tag_protocol); + tag_ops = dsa_tag_driver_get(tag_protocol); if (IS_ERR(tag_ops)) { dev_warn(ds->dev, "No tagger for this switch\n"); return PTR_ERR(tag_ops); } dp->type = DSA_PORT_TYPE_CPU; + dp->filter = tag_ops->filter; dp->rcv = tag_ops->rcv; dp->tag_ops = tag_ops; dp->master = master; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 093b7d145eb1..8f1222324646 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -84,22 +84,12 @@ struct dsa_slave_priv { }; /* dsa.c */ -const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol); +const struct dsa_device_ops *dsa_tag_driver_get(int tag_protocol); +void dsa_tag_driver_put(const struct dsa_device_ops *ops); + bool dsa_schedule_work(struct work_struct *work); const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops); -/* legacy.c */ -#if IS_ENABLED(CONFIG_NET_DSA_LEGACY) -int dsa_legacy_register(void); -void dsa_legacy_unregister(void); -#else -static inline int dsa_legacy_register(void) -{ - return 0; -} - -static inline void dsa_legacy_unregister(void) { } -#endif int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, @@ -169,6 +159,8 @@ int dsa_port_vlan_add(struct dsa_port *dp, struct switchdev_trans *trans); int dsa_port_vlan_del(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan); +int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags); +int dsa_port_vid_del(struct dsa_port *dp, u16 vid); int dsa_port_link_register_of(struct dsa_port *dp); void dsa_port_link_unregister_of(struct dsa_port *dp); @@ -182,6 +174,8 @@ int dsa_slave_resume(struct net_device *slave_dev); int dsa_slave_register_notifier(void); void dsa_slave_unregister_notifier(void); +void *dsa_defer_xmit(struct sk_buff *skb, struct net_device *dev); + static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -200,34 +194,4 @@ dsa_slave_to_master(const struct net_device *dev) /* switch.c */ int dsa_switch_register_notifier(struct dsa_switch *ds); void dsa_switch_unregister_notifier(struct dsa_switch *ds); - -/* tag_brcm.c */ -extern const struct dsa_device_ops brcm_netdev_ops; -extern const struct dsa_device_ops brcm_prepend_netdev_ops; - -/* tag_dsa.c */ -extern const struct dsa_device_ops dsa_netdev_ops; - -/* tag_edsa.c */ -extern const struct dsa_device_ops edsa_netdev_ops; - -/* tag_gswip.c */ -extern const struct dsa_device_ops gswip_netdev_ops; - -/* tag_ksz.c */ -extern const struct dsa_device_ops ksz9477_netdev_ops; -extern const struct dsa_device_ops ksz9893_netdev_ops; - -/* tag_lan9303.c */ -extern const struct dsa_device_ops lan9303_netdev_ops; - -/* tag_mtk.c */ -extern const struct dsa_device_ops mtk_netdev_ops; - -/* tag_qca.c */ -extern const struct dsa_device_ops qca_netdev_ops; - -/* tag_trailer.c */ -extern const struct dsa_device_ops trailer_netdev_ops; - #endif diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c deleted file mode 100644 index cb42939db776..000000000000 --- a/net/dsa/legacy.c +++ /dev/null @@ -1,745 +0,0 @@ -/* - * net/dsa/legacy.c - Hardware switch handling - * Copyright (c) 2008-2009 Marvell Semiconductor - * Copyright (c) 2013 Florian Fainelli <florian@openwrt.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include <linux/device.h> -#include <linux/list.h> -#include <linux/platform_device.h> -#include <linux/slab.h> -#include <linux/module.h> -#include <linux/of.h> -#include <linux/of_mdio.h> -#include <linux/of_platform.h> -#include <linux/of_net.h> -#include <linux/netdevice.h> -#include <linux/sysfs.h> -#include <linux/phy_fixed.h> -#include <linux/etherdevice.h> - -#include "dsa_priv.h" - -/* switch driver registration ***********************************************/ -static DEFINE_MUTEX(dsa_switch_drivers_mutex); -static LIST_HEAD(dsa_switch_drivers); - -void register_switch_driver(struct dsa_switch_driver *drv) -{ - mutex_lock(&dsa_switch_drivers_mutex); - list_add_tail(&drv->list, &dsa_switch_drivers); - mutex_unlock(&dsa_switch_drivers_mutex); -} -EXPORT_SYMBOL_GPL(register_switch_driver); - -void unregister_switch_driver(struct dsa_switch_driver *drv) -{ - mutex_lock(&dsa_switch_drivers_mutex); - list_del_init(&drv->list); - mutex_unlock(&dsa_switch_drivers_mutex); -} -EXPORT_SYMBOL_GPL(unregister_switch_driver); - -static const struct dsa_switch_ops * -dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr, - const char **_name, void **priv) -{ - const struct dsa_switch_ops *ret; - struct list_head *list; - const char *name; - - ret = NULL; - name = NULL; - - mutex_lock(&dsa_switch_drivers_mutex); - list_for_each(list, &dsa_switch_drivers) { - const struct dsa_switch_ops *ops; - struct dsa_switch_driver *drv; - - drv = list_entry(list, struct dsa_switch_driver, list); - ops = drv->ops; - - name = ops->probe(parent, host_dev, sw_addr, priv); - if (name != NULL) { - ret = ops; - break; - } - } - mutex_unlock(&dsa_switch_drivers_mutex); - - *_name = name; - - return ret; -} - -/* basic switch operations **************************************************/ -static int dsa_cpu_dsa_setups(struct dsa_switch *ds) -{ - int ret, port; - - for (port = 0; port < ds->num_ports; port++) { - if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) - continue; - - ret = dsa_port_link_register_of(&ds->ports[port]); - if (ret) - return ret; - } - return 0; -} - -static int dsa_switch_setup_one(struct dsa_switch *ds, - struct net_device *master) -{ - const struct dsa_switch_ops *ops = ds->ops; - struct dsa_switch_tree *dst = ds->dst; - struct dsa_chip_data *cd = ds->cd; - bool valid_name_found = false; - int index = ds->index; - struct dsa_port *dp; - int i, ret; - - /* - * Validate supplied switch configuration. - */ - for (i = 0; i < ds->num_ports; i++) { - char *name; - - dp = &ds->ports[i]; - - name = cd->port_names[i]; - if (name == NULL) - continue; - dp->name = name; - - if (!strcmp(name, "cpu")) { - if (dst->cpu_dp) { - netdev_err(master, - "multiple cpu ports?!\n"); - return -EINVAL; - } - dst->cpu_dp = &ds->ports[i]; - dst->cpu_dp->master = master; - dp->type = DSA_PORT_TYPE_CPU; - } else if (!strcmp(name, "dsa")) { - dp->type = DSA_PORT_TYPE_DSA; - } else { - dp->type = DSA_PORT_TYPE_USER; - } - valid_name_found = true; - } - - if (!valid_name_found && i == ds->num_ports) - return -EINVAL; - - /* Make the built-in MII bus mask match the number of ports, - * switch drivers can override this later - */ - ds->phys_mii_mask |= dsa_user_ports(ds); - - /* - * If the CPU connects to this switch, set the switch tree - * tagging protocol to the preferred tagging format of this - * switch. - */ - if (dst->cpu_dp->ds == ds) { - const struct dsa_device_ops *tag_ops; - enum dsa_tag_protocol tag_protocol; - - tag_protocol = ops->get_tag_protocol(ds, dst->cpu_dp->index); - tag_ops = dsa_resolve_tag_protocol(tag_protocol); - if (IS_ERR(tag_ops)) - return PTR_ERR(tag_ops); - - dst->cpu_dp->tag_ops = tag_ops; - - /* Few copies for faster access in master receive hot path */ - dst->cpu_dp->rcv = dst->cpu_dp->tag_ops->rcv; - dst->cpu_dp->dst = dst; - } - - memcpy(ds->rtable, cd->rtable, sizeof(ds->rtable)); - - /* - * Do basic register setup. - */ - ret = ops->setup(ds); - if (ret < 0) - return ret; - - ret = dsa_switch_register_notifier(ds); - if (ret) - return ret; - - if (!ds->slave_mii_bus && ops->phy_read) { - ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); - if (!ds->slave_mii_bus) - return -ENOMEM; - dsa_slave_mii_bus_init(ds); - - ret = mdiobus_register(ds->slave_mii_bus); - if (ret < 0) - return ret; - } - - /* - * Create network devices for physical switch ports. - */ - for (i = 0; i < ds->num_ports; i++) { - ds->ports[i].dn = cd->port_dn[i]; - ds->ports[i].cpu_dp = dst->cpu_dp; - - if (!dsa_is_user_port(ds, i)) - continue; - - ret = dsa_slave_create(&ds->ports[i]); - if (ret < 0) - netdev_err(master, "[%d]: can't create dsa slave device for port %d(%s): %d\n", - index, i, cd->port_names[i], ret); - } - - /* Perform configuration of the CPU and DSA ports */ - ret = dsa_cpu_dsa_setups(ds); - if (ret < 0) - netdev_err(master, "[%d] : can't configure CPU and DSA ports\n", - index); - - return 0; -} - -static struct dsa_switch * -dsa_switch_setup(struct dsa_switch_tree *dst, struct net_device *master, - int index, struct device *parent, struct device *host_dev) -{ - struct dsa_chip_data *cd = dst->pd->chip + index; - const struct dsa_switch_ops *ops; - struct dsa_switch *ds; - int ret; - const char *name; - void *priv; - - /* - * Probe for switch model. - */ - ops = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv); - if (!ops) { - netdev_err(master, "[%d]: could not detect attached switch\n", - index); - return ERR_PTR(-EINVAL); - } - netdev_info(master, "[%d]: detected a %s switch\n", - index, name); - - - /* - * Allocate and initialise switch state. - */ - ds = dsa_switch_alloc(parent, DSA_MAX_PORTS); - if (!ds) - return ERR_PTR(-ENOMEM); - - ds->dst = dst; - ds->index = index; - ds->cd = cd; - ds->ops = ops; - ds->priv = priv; - - ret = dsa_switch_setup_one(ds, master); - if (ret) - return ERR_PTR(ret); - - return ds; -} - -static void dsa_switch_destroy(struct dsa_switch *ds) -{ - int port; - - /* Destroy network devices for physical switch ports. */ - for (port = 0; port < ds->num_ports; port++) { - if (!dsa_is_user_port(ds, port)) - continue; - - if (!ds->ports[port].slave) - continue; - - dsa_slave_destroy(ds->ports[port].slave); - } - - /* Disable configuration of the CPU and DSA ports */ - for (port = 0; port < ds->num_ports; port++) { - if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) - continue; - dsa_port_link_unregister_of(&ds->ports[port]); - } - - if (ds->slave_mii_bus && ds->ops->phy_read) - mdiobus_unregister(ds->slave_mii_bus); - - dsa_switch_unregister_notifier(ds); -} - -/* platform driver init and cleanup *****************************************/ -static int dev_is_class(struct device *dev, void *class) -{ - if (dev->class != NULL && !strcmp(dev->class->name, class)) - return 1; - - return 0; -} - -static struct device *dev_find_class(struct device *parent, char *class) -{ - if (dev_is_class(parent, class)) { - get_device(parent); - return parent; - } - - return device_find_child(parent, class, dev_is_class); -} - -struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev) -{ - struct device *d; - - d = dev_find_class(dev, "mdio_bus"); - if (d != NULL) { - struct mii_bus *bus; - - bus = to_mii_bus(d); - put_device(d); - - return bus; - } - - return NULL; -} -EXPORT_SYMBOL_GPL(dsa_host_dev_to_mii_bus); - -#ifdef CONFIG_OF -static int dsa_of_setup_routing_table(struct dsa_platform_data *pd, - struct dsa_chip_data *cd, - int chip_index, int port_index, - struct device_node *link) -{ - const __be32 *reg; - int link_sw_addr; - struct device_node *parent_sw; - int len; - - parent_sw = of_get_parent(link); - if (!parent_sw) - return -EINVAL; - - reg = of_get_property(parent_sw, "reg", &len); - if (!reg || (len != sizeof(*reg) * 2)) - return -EINVAL; - - /* - * Get the destination switch number from the second field of its 'reg' - * property, i.e. for "reg = <0x19 1>" sw_addr is '1'. - */ - link_sw_addr = be32_to_cpup(reg + 1); - - if (link_sw_addr >= pd->nr_chips) - return -EINVAL; - - cd->rtable[link_sw_addr] = port_index; - - return 0; -} - -static int dsa_of_probe_links(struct dsa_platform_data *pd, - struct dsa_chip_data *cd, - int chip_index, int port_index, - struct device_node *port, - const char *port_name) -{ - struct device_node *link; - int link_index; - int ret; - - for (link_index = 0;; link_index++) { - link = of_parse_phandle(port, "link", link_index); - if (!link) - break; - - if (!strcmp(port_name, "dsa") && pd->nr_chips > 1) { - ret = dsa_of_setup_routing_table(pd, cd, chip_index, - port_index, link); - if (ret) - return ret; - } - } - return 0; -} - -static void dsa_of_free_platform_data(struct dsa_platform_data *pd) -{ - int i; - int port_index; - - for (i = 0; i < pd->nr_chips; i++) { - port_index = 0; - while (port_index < DSA_MAX_PORTS) { - kfree(pd->chip[i].port_names[port_index]); - port_index++; - } - - /* Drop our reference to the MDIO bus device */ - put_device(pd->chip[i].host_dev); - } - kfree(pd->chip); -} - -static int dsa_of_probe(struct device *dev) -{ - struct device_node *np = dev->of_node; - struct device_node *child, *mdio, *ethernet, *port; - struct mii_bus *mdio_bus, *mdio_bus_switch; - struct net_device *ethernet_dev; - struct dsa_platform_data *pd; - struct dsa_chip_data *cd; - const char *port_name; - int chip_index, port_index; - const unsigned int *sw_addr, *port_reg; - u32 eeprom_len; - int ret; - - mdio = of_parse_phandle(np, "dsa,mii-bus", 0); - if (!mdio) - return -EINVAL; - - mdio_bus = of_mdio_find_bus(mdio); - if (!mdio_bus) - return -EPROBE_DEFER; - - ethernet = of_parse_phandle(np, "dsa,ethernet", 0); - if (!ethernet) { - ret = -EINVAL; - goto out_put_mdio; - } - - ethernet_dev = of_find_net_device_by_node(ethernet); - if (!ethernet_dev) { - ret = -EPROBE_DEFER; - goto out_put_mdio; - } - - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) { - ret = -ENOMEM; - goto out_put_ethernet; - } - - dev->platform_data = pd; - pd->of_netdev = ethernet_dev; - pd->nr_chips = of_get_available_child_count(np); - if (pd->nr_chips > DSA_MAX_SWITCHES) - pd->nr_chips = DSA_MAX_SWITCHES; - - pd->chip = kcalloc(pd->nr_chips, sizeof(struct dsa_chip_data), - GFP_KERNEL); - if (!pd->chip) { - ret = -ENOMEM; - goto out_free; - } - - chip_index = -1; - for_each_available_child_of_node(np, child) { - int i; - - chip_index++; - cd = &pd->chip[chip_index]; - - cd->of_node = child; - - /* Initialize the routing table */ - for (i = 0; i < DSA_MAX_SWITCHES; ++i) - cd->rtable[i] = DSA_RTABLE_NONE; - - /* When assigning the host device, increment its refcount */ - cd->host_dev = get_device(&mdio_bus->dev); - - sw_addr = of_get_property(child, "reg", NULL); - if (!sw_addr) - continue; - - cd->sw_addr = be32_to_cpup(sw_addr); - if (cd->sw_addr >= PHY_MAX_ADDR) - continue; - - if (!of_property_read_u32(child, "eeprom-length", &eeprom_len)) - cd->eeprom_len = eeprom_len; - - mdio = of_parse_phandle(child, "mii-bus", 0); - if (mdio) { - mdio_bus_switch = of_mdio_find_bus(mdio); - if (!mdio_bus_switch) { - ret = -EPROBE_DEFER; - goto out_free_chip; - } - - /* Drop the mdio_bus device ref, replacing the host - * device with the mdio_bus_switch device, keeping - * the refcount from of_mdio_find_bus() above. - */ - put_device(cd->host_dev); - cd->host_dev = &mdio_bus_switch->dev; - } - - for_each_available_child_of_node(child, port) { - port_reg = of_get_property(port, "reg", NULL); - if (!port_reg) - continue; - - port_index = be32_to_cpup(port_reg); - if (port_index >= DSA_MAX_PORTS) - break; - - port_name = of_get_property(port, "label", NULL); - if (!port_name) - continue; - - cd->port_dn[port_index] = port; - - cd->port_names[port_index] = kstrdup(port_name, - GFP_KERNEL); - if (!cd->port_names[port_index]) { - ret = -ENOMEM; - goto out_free_chip; - } - - ret = dsa_of_probe_links(pd, cd, chip_index, - port_index, port, port_name); - if (ret) - goto out_free_chip; - - } - } - - /* The individual chips hold their own refcount on the mdio bus, - * so drop ours */ - put_device(&mdio_bus->dev); - - return 0; - -out_free_chip: - dsa_of_free_platform_data(pd); -out_free: - kfree(pd); - dev->platform_data = NULL; -out_put_ethernet: - put_device(ðernet_dev->dev); -out_put_mdio: - put_device(&mdio_bus->dev); - return ret; -} - -static void dsa_of_remove(struct device *dev) -{ - struct dsa_platform_data *pd = dev->platform_data; - - if (!dev->of_node) - return; - - dsa_of_free_platform_data(pd); - put_device(&pd->of_netdev->dev); - kfree(pd); -} -#else -static inline int dsa_of_probe(struct device *dev) -{ - return 0; -} - -static inline void dsa_of_remove(struct device *dev) -{ -} -#endif - -static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, - struct device *parent, struct dsa_platform_data *pd) -{ - int i; - unsigned configured = 0; - - dst->pd = pd; - - for (i = 0; i < pd->nr_chips; i++) { - struct dsa_switch *ds; - - ds = dsa_switch_setup(dst, dev, i, parent, pd->chip[i].host_dev); - if (IS_ERR(ds)) { - netdev_err(dev, "[%d]: couldn't create dsa switch instance (error %ld)\n", - i, PTR_ERR(ds)); - continue; - } - - dst->ds[i] = ds; - - ++configured; - } - - /* - * If no switch was found, exit cleanly - */ - if (!configured) - return -EPROBE_DEFER; - - return dsa_master_setup(dst->cpu_dp->master, dst->cpu_dp); -} - -static int dsa_probe(struct platform_device *pdev) -{ - struct dsa_platform_data *pd = pdev->dev.platform_data; - struct net_device *dev; - struct dsa_switch_tree *dst; - int ret; - - if (pdev->dev.of_node) { - ret = dsa_of_probe(&pdev->dev); - if (ret) - return ret; - - pd = pdev->dev.platform_data; - } - - if (pd == NULL || (pd->netdev == NULL && pd->of_netdev == NULL)) - return -EINVAL; - - if (pd->of_netdev) { - dev = pd->of_netdev; - dev_hold(dev); - } else { - dev = dsa_dev_to_net_device(pd->netdev); - } - if (dev == NULL) { - ret = -EPROBE_DEFER; - goto out; - } - - if (dev->dsa_ptr != NULL) { - dev_put(dev); - ret = -EEXIST; - goto out; - } - - dst = devm_kzalloc(&pdev->dev, sizeof(*dst), GFP_KERNEL); - if (dst == NULL) { - dev_put(dev); - ret = -ENOMEM; - goto out; - } - - platform_set_drvdata(pdev, dst); - - ret = dsa_setup_dst(dst, dev, &pdev->dev, pd); - if (ret) { - dev_put(dev); - goto out; - } - - return 0; - -out: - dsa_of_remove(&pdev->dev); - - return ret; -} - -static void dsa_remove_dst(struct dsa_switch_tree *dst) -{ - int i; - - dsa_master_teardown(dst->cpu_dp->master); - - for (i = 0; i < dst->pd->nr_chips; i++) { - struct dsa_switch *ds = dst->ds[i]; - - if (ds) - dsa_switch_destroy(ds); - } - - dev_put(dst->cpu_dp->master); -} - -static int dsa_remove(struct platform_device *pdev) -{ - struct dsa_switch_tree *dst = platform_get_drvdata(pdev); - - dsa_remove_dst(dst); - dsa_of_remove(&pdev->dev); - - return 0; -} - -static void dsa_shutdown(struct platform_device *pdev) -{ -} - -#ifdef CONFIG_PM_SLEEP -static int dsa_suspend(struct device *d) -{ - struct dsa_switch_tree *dst = dev_get_drvdata(d); - int i, ret = 0; - - for (i = 0; i < dst->pd->nr_chips; i++) { - struct dsa_switch *ds = dst->ds[i]; - - if (ds != NULL) - ret = dsa_switch_suspend(ds); - } - - return ret; -} - -static int dsa_resume(struct device *d) -{ - struct dsa_switch_tree *dst = dev_get_drvdata(d); - int i, ret = 0; - - for (i = 0; i < dst->pd->nr_chips; i++) { - struct dsa_switch *ds = dst->ds[i]; - - if (ds != NULL) - ret = dsa_switch_resume(ds); - } - - return ret; -} -#endif - -static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume); - -static const struct of_device_id dsa_of_match_table[] = { - { .compatible = "marvell,dsa", }, - {} -}; -MODULE_DEVICE_TABLE(of, dsa_of_match_table); - -static struct platform_driver dsa_driver = { - .probe = dsa_probe, - .remove = dsa_remove, - .shutdown = dsa_shutdown, - .driver = { - .name = "dsa", - .of_match_table = dsa_of_match_table, - .pm = &dsa_pm_ops, - }, -}; - -int dsa_legacy_register(void) -{ - return platform_driver_register(&dsa_driver); -} - -void dsa_legacy_unregister(void) -{ - platform_driver_unregister(&dsa_driver); -} diff --git a/net/dsa/port.c b/net/dsa/port.c index caeef4c99dc0..ed8ba9daa3ba 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -154,19 +154,67 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br) dsa_port_set_state_now(dp, BR_STATE_FORWARDING); } +static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp, + bool vlan_filtering) +{ + struct dsa_switch *ds = dp->ds; + int i; + + if (!ds->vlan_filtering_is_global) + return true; + + /* For cases where enabling/disabling VLAN awareness is global to the + * switch, we need to handle the case where multiple bridges span + * different ports of the same switch device and one of them has a + * different setting than what is being requested. + */ + for (i = 0; i < ds->num_ports; i++) { + struct net_device *other_bridge; + + other_bridge = dsa_to_port(ds, i)->bridge_dev; + if (!other_bridge) + continue; + /* If it's the same bridge, it also has same + * vlan_filtering setting => no need to check + */ + if (other_bridge == dp->bridge_dev) + continue; + if (br_vlan_enabled(other_bridge) != vlan_filtering) { + dev_err(ds->dev, "VLAN filtering is a global setting\n"); + return false; + } + } + return true; +} + int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, struct switchdev_trans *trans) { struct dsa_switch *ds = dp->ds; + int err; /* bridge skips -EOPNOTSUPP, so skip the prepare phase */ if (switchdev_trans_ph_prepare(trans)) return 0; - if (ds->ops->port_vlan_filtering) - return ds->ops->port_vlan_filtering(ds, dp->index, - vlan_filtering); + if (!ds->ops->port_vlan_filtering) + return 0; + + if (!dsa_port_can_apply_vlan_filtering(dp, vlan_filtering)) + return -EINVAL; + + if (dsa_port_is_vlan_filtering(dp) == vlan_filtering) + return 0; + + err = ds->ops->port_vlan_filtering(ds, dp->index, + vlan_filtering); + if (err) + return err; + if (ds->vlan_filtering_is_global) + ds->vlan_filtering = vlan_filtering; + else + dp->vlan_filtering = vlan_filtering; return 0; } @@ -322,6 +370,39 @@ int dsa_port_vlan_del(struct dsa_port *dp, return 0; } +int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags) +{ + struct switchdev_obj_port_vlan vlan = { + .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, + .flags = flags, + .vid_begin = vid, + .vid_end = vid, + }; + struct switchdev_trans trans; + int err; + + trans.ph_prepare = true; + err = dsa_port_vlan_add(dp, &vlan, &trans); + if (err == -EOPNOTSUPP) + return 0; + + trans.ph_prepare = false; + return dsa_port_vlan_add(dp, &vlan, &trans); +} +EXPORT_SYMBOL(dsa_port_vid_add); + +int dsa_port_vid_del(struct dsa_port *dp, u16 vid) +{ + struct switchdev_obj_port_vlan vlan = { + .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, + .vid_begin = vid, + .vid_end = vid, + }; + + return dsa_port_vlan_del(dp, &vlan); +} +EXPORT_SYMBOL(dsa_port_vid_del); + static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp) { struct device_node *phy_dn; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 093eef6f2599..fe7b6a62e8f1 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -120,6 +120,9 @@ static int dsa_slave_close(struct net_device *dev) struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); + cancel_work_sync(&dp->xmit_work); + skb_queue_purge(&dp->xmit_queue); + phylink_stop(dp->pl); dsa_port_disable(dp); @@ -379,6 +382,13 @@ static int dsa_slave_get_port_parent_id(struct net_device *dev, struct dsa_switch *ds = dp->ds; struct dsa_switch_tree *dst = ds->dst; + /* For non-legacy ports, devlink is used and it takes + * care of the name generation. This ndo implementation + * should be removed with legacy support. + */ + if (dp->ds->devlink) + return -EOPNOTSUPP; + ppid->id_len = sizeof(dst->index); memcpy(&ppid->id, &dst->index, ppid->id_len); @@ -423,6 +433,24 @@ static void dsa_skb_tx_timestamp(struct dsa_slave_priv *p, kfree_skb(clone); } +netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev) +{ + /* SKB for netpoll still need to be mangled with the protocol-specific + * tag to be successfully transmitted + */ + if (unlikely(netpoll_tx_running(dev))) + return dsa_slave_netpoll_send_skb(dev, skb); + + /* Queue the SKB for transmission on the parent interface, but + * do not modify its EtherType + */ + skb->dev = dsa_slave_to_master(dev); + dev_queue_xmit(skb); + + return NETDEV_TX_OK; +} +EXPORT_SYMBOL_GPL(dsa_enqueue_skb); + static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -445,23 +473,37 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) */ nskb = p->xmit(skb, dev); if (!nskb) { - kfree_skb(skb); + if (!DSA_SKB_CB(skb)->deferred_xmit) + kfree_skb(skb); return NETDEV_TX_OK; } - /* SKB for netpoll still need to be mangled with the protocol-specific - * tag to be successfully transmitted - */ - if (unlikely(netpoll_tx_running(dev))) - return dsa_slave_netpoll_send_skb(dev, nskb); + return dsa_enqueue_skb(nskb, dev); +} - /* Queue the SKB for transmission on the parent interface, but - * do not modify its EtherType - */ - nskb->dev = dsa_slave_to_master(dev); - dev_queue_xmit(nskb); +void *dsa_defer_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); - return NETDEV_TX_OK; + DSA_SKB_CB(skb)->deferred_xmit = true; + + skb_queue_tail(&dp->xmit_queue, skb); + schedule_work(&dp->xmit_work); + return NULL; +} +EXPORT_SYMBOL_GPL(dsa_defer_xmit); + +static void dsa_port_xmit_work(struct work_struct *work) +{ + struct dsa_port *dp = container_of(work, struct dsa_port, xmit_work); + struct dsa_switch *ds = dp->ds; + struct sk_buff *skb; + + if (unlikely(!ds->ops->port_deferred_xmit)) + return; + + while ((skb = skb_dequeue(&dp->xmit_queue)) != NULL) + ds->ops->port_deferred_xmit(ds, dp->index, skb); } /* ethtool operations *******************************************************/ @@ -736,6 +778,13 @@ static int dsa_slave_get_phys_port_name(struct net_device *dev, { struct dsa_port *dp = dsa_slave_to_port(dev); + /* For non-legacy ports, devlink is used and it takes + * care of the name generation. This ndo implementation + * should be removed with legacy support. + */ + if (dp->ds->devlink) + return -EOPNOTSUPP; + if (snprintf(name, len, "p%d", dp->index) >= len) return -EINVAL; @@ -764,27 +813,25 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev, struct dsa_mall_tc_entry *mall_tc_entry; __be16 protocol = cls->common.protocol; struct dsa_switch *ds = dp->ds; - struct net_device *to_dev; - const struct tc_action *a; + struct flow_action_entry *act; struct dsa_port *to_dp; int err = -EOPNOTSUPP; if (!ds->ops->port_mirror_add) return err; - if (!tcf_exts_has_one_action(cls->exts)) + if (!flow_offload_has_one_action(&cls->rule->action)) return err; - a = tcf_exts_first_action(cls->exts); + act = &cls->rule->action.entries[0]; - if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) { + if (act->id == FLOW_ACTION_MIRRED && protocol == htons(ETH_P_ALL)) { struct dsa_mall_mirror_tc_entry *mirror; - to_dev = tcf_mirred_dev(a); - if (!to_dev) + if (!act->dev) return -EINVAL; - if (!dsa_slave_dev_check(to_dev)) + if (!dsa_slave_dev_check(act->dev)) return -EOPNOTSUPP; mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL); @@ -795,7 +842,7 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev, mall_tc_entry->type = DSA_PORT_MALL_MIRROR; mirror = &mall_tc_entry->mirror; - to_dp = dsa_slave_to_port(to_dev); + to_dp = dsa_slave_to_port(act->dev); mirror->to_local_port = to_dp->index; mirror->ingress = ingress; @@ -987,13 +1034,6 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { struct dsa_port *dp = dsa_slave_to_port(dev); - struct switchdev_obj_port_vlan vlan = { - .vid_begin = vid, - .vid_end = vid, - /* This API only allows programming tagged, non-PVID VIDs */ - .flags = 0, - }; - struct switchdev_trans trans; struct bridge_vlan_info info; int ret; @@ -1010,25 +1050,14 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, return -EBUSY; } - trans.ph_prepare = true; - ret = dsa_port_vlan_add(dp, &vlan, &trans); - if (ret == -EOPNOTSUPP) - return 0; - - trans.ph_prepare = false; - return dsa_port_vlan_add(dp, &vlan, &trans); + /* This API only allows programming tagged, non-PVID VIDs */ + return dsa_port_vid_add(dp, vid, 0); } static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { struct dsa_port *dp = dsa_slave_to_port(dev); - struct switchdev_obj_port_vlan vlan = { - .vid_begin = vid, - .vid_end = vid, - /* This API only allows programming tagged, non-PVID VIDs */ - .flags = 0, - }; struct bridge_vlan_info info; int ret; @@ -1045,7 +1074,7 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, return -EBUSY; } - ret = dsa_port_vlan_del(dp, &vlan); + ret = dsa_port_vid_del(dp, vid); if (ret == -EOPNOTSUPP) ret = 0; @@ -1096,6 +1125,13 @@ int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], return dsa_port_fdb_del(dp, addr, vid); } +static struct devlink_port *dsa_slave_get_devlink_port(struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + + return dp->ds->devlink ? &dp->devlink_port : NULL; +} + static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_open = dsa_slave_open, .ndo_stop = dsa_slave_close, @@ -1119,6 +1155,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_get_port_parent_id = dsa_slave_get_port_parent_id, .ndo_vlan_rx_add_vid = dsa_slave_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = dsa_slave_vlan_rx_kill_vid, + .ndo_get_devlink_port = dsa_slave_get_devlink_port, }; static struct device_type dsa_type = { @@ -1283,9 +1320,9 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) phy_flags = ds->ops->get_phy_flags(ds, dp->index); ret = phylink_of_phy_connect(dp->pl, port_dn, phy_flags); - if (ret == -ENODEV) { - /* We could not connect to a designated PHY or SFP, so use the - * switch internal MDIO bus instead + if (ret == -ENODEV && ds->slave_mii_bus) { + /* We could not connect to a designated PHY or SFP, so try to + * use the switch internal MDIO bus instead */ ret = dsa_slave_phy_connect(slave_dev, dp->index); if (ret) { @@ -1297,7 +1334,7 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) } } - return 0; + return ret; } static struct lock_class_key dsa_slave_netdev_xmit_lock_key; @@ -1316,6 +1353,9 @@ int dsa_slave_suspend(struct net_device *slave_dev) if (!netif_running(slave_dev)) return 0; + cancel_work_sync(&dp->xmit_work); + skb_queue_purge(&dp->xmit_queue); + netif_device_detach(slave_dev); rtnl_lock(); @@ -1378,7 +1418,10 @@ int dsa_slave_create(struct dsa_port *port) NETIF_F_HW_VLAN_CTAG_FILTER; slave_dev->hw_features |= NETIF_F_HW_TC; slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; - eth_hw_addr_inherit(slave_dev, master); + if (!IS_ERR_OR_NULL(port->mac)) + ether_addr_copy(slave_dev->dev_addr, port->mac); + else + eth_hw_addr_inherit(slave_dev, master); slave_dev->priv_flags |= IFF_NO_QUEUE; slave_dev->netdev_ops = &dsa_slave_netdev_ops; slave_dev->min_mtu = 0; @@ -1400,6 +1443,8 @@ int dsa_slave_create(struct dsa_port *port) } p->dp = port; INIT_LIST_HEAD(&p->mall_tc_list); + INIT_WORK(&port->xmit_work, dsa_port_xmit_work); + skb_queue_head_init(&port->xmit_queue); p->xmit = cpu_dp->tag_ops->xmit; port->slave = slave_dev; diff --git a/net/dsa/switch.c b/net/dsa/switch.c index e1fae969aa73..7d8cd9bc0ecc 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -10,6 +10,7 @@ * (at your option) any later version. */ +#include <linux/if_bridge.h> #include <linux/netdevice.h> #include <linux/notifier.h> #include <linux/if_vlan.h> @@ -71,6 +72,9 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds, static int dsa_switch_bridge_leave(struct dsa_switch *ds, struct dsa_notifier_bridge_info *info) { + bool unset_vlan_filtering = br_vlan_enabled(info->br); + int err, i; + if (ds->index == info->sw_index && ds->ops->port_bridge_leave) ds->ops->port_bridge_leave(ds, info->port, info->br); @@ -78,6 +82,31 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, ds->ops->crosschip_bridge_leave(ds, info->sw_index, info->port, info->br); + /* If the bridge was vlan_filtering, the bridge core doesn't trigger an + * event for changing vlan_filtering setting upon slave ports leaving + * it. That is a good thing, because that lets us handle it and also + * handle the case where the switch's vlan_filtering setting is global + * (not per port). When that happens, the correct moment to trigger the + * vlan_filtering callback is only when the last port left this bridge. + */ + if (unset_vlan_filtering && ds->vlan_filtering_is_global) { + for (i = 0; i < ds->num_ports; i++) { + if (i == info->port) + continue; + if (dsa_to_port(ds, i)->bridge_dev == info->br) { + unset_vlan_filtering = false; + break; + } + } + } + if (unset_vlan_filtering) { + struct switchdev_trans trans = {0}; + + err = dsa_port_vlan_filtering(&ds->ports[info->port], + false, &trans); + if (err && err != EOPNOTSUPP) + return err; + } return 0; } @@ -196,7 +225,7 @@ static int dsa_port_vlan_check(struct dsa_switch *ds, int port, if (!dp->bridge_dev) return err; - /* dsa_slave_vlan_rx_{add,kill}_vid() cannot use the prepare pharse and + /* dsa_slave_vlan_rx_{add,kill}_vid() cannot use the prepare phase and * already checks whether there is an overlapping bridge VLAN entry * with the same VID, so here we only need to check that if we are * adding a bridge VLAN entry there is not an overlapping VLAN device diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c new file mode 100644 index 000000000000..8ae48c7e1e76 --- /dev/null +++ b/net/dsa/tag_8021q.c @@ -0,0 +1,222 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com> + * + * This module is not a complete tagger implementation. It only provides + * primitives for taggers that rely on 802.1Q VLAN tags to use. The + * dsa_8021q_netdev_ops is registered for API compliance and not used + * directly by callers. + */ +#include <linux/if_bridge.h> +#include <linux/if_vlan.h> + +#include "dsa_priv.h" + +/* Allocating two VLAN tags per port - one for the RX VID and + * the other for the TX VID - see below + */ +#define DSA_8021Q_VID_RANGE (DSA_MAX_SWITCHES * DSA_MAX_PORTS) +#define DSA_8021Q_VID_BASE (VLAN_N_VID - 2 * DSA_8021Q_VID_RANGE - 1) +#define DSA_8021Q_RX_VID_BASE (DSA_8021Q_VID_BASE) +#define DSA_8021Q_TX_VID_BASE (DSA_8021Q_VID_BASE + DSA_8021Q_VID_RANGE) + +/* Returns the VID to be inserted into the frame from xmit for switch steering + * instructions on egress. Encodes switch ID and port ID. + */ +u16 dsa_8021q_tx_vid(struct dsa_switch *ds, int port) +{ + return DSA_8021Q_TX_VID_BASE + (DSA_MAX_PORTS * ds->index) + port; +} +EXPORT_SYMBOL_GPL(dsa_8021q_tx_vid); + +/* Returns the VID that will be installed as pvid for this switch port, sent as + * tagged egress towards the CPU port and decoded by the rcv function. + */ +u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port) +{ + return DSA_8021Q_RX_VID_BASE + (DSA_MAX_PORTS * ds->index) + port; +} +EXPORT_SYMBOL_GPL(dsa_8021q_rx_vid); + +/* Returns the decoded switch ID from the RX VID. */ +int dsa_8021q_rx_switch_id(u16 vid) +{ + return ((vid - DSA_8021Q_RX_VID_BASE) / DSA_MAX_PORTS); +} +EXPORT_SYMBOL_GPL(dsa_8021q_rx_switch_id); + +/* Returns the decoded port ID from the RX VID. */ +int dsa_8021q_rx_source_port(u16 vid) +{ + return ((vid - DSA_8021Q_RX_VID_BASE) % DSA_MAX_PORTS); +} +EXPORT_SYMBOL_GPL(dsa_8021q_rx_source_port); + +/* RX VLAN tagging (left) and TX VLAN tagging (right) setup shown for a single + * front-panel switch port (here swp0). + * + * Port identification through VLAN (802.1Q) tags has different requirements + * for it to work effectively: + * - On RX (ingress from network): each front-panel port must have a pvid + * that uniquely identifies it, and the egress of this pvid must be tagged + * towards the CPU port, so that software can recover the source port based + * on the VID in the frame. But this would only work for standalone ports; + * if bridged, this VLAN setup would break autonomous forwarding and would + * force all switched traffic to pass through the CPU. So we must also make + * the other front-panel ports members of this VID we're adding, albeit + * we're not making it their PVID (they'll still have their own). + * By the way - just because we're installing the same VID in multiple + * switch ports doesn't mean that they'll start to talk to one another, even + * while not bridged: the final forwarding decision is still an AND between + * the L2 forwarding information (which is limiting forwarding in this case) + * and the VLAN-based restrictions (of which there are none in this case, + * since all ports are members). + * - On TX (ingress from CPU and towards network) we are faced with a problem. + * If we were to tag traffic (from within DSA) with the port's pvid, all + * would be well, assuming the switch ports were standalone. Frames would + * have no choice but to be directed towards the correct front-panel port. + * But because we also want the RX VLAN to not break bridging, then + * inevitably that means that we have to give them a choice (of what + * front-panel port to go out on), and therefore we cannot steer traffic + * based on the RX VID. So what we do is simply install one more VID on the + * front-panel and CPU ports, and profit off of the fact that steering will + * work just by virtue of the fact that there is only one other port that's + * a member of the VID we're tagging the traffic with - the desired one. + * + * So at the end, each front-panel port will have one RX VID (also the PVID), + * the RX VID of all other front-panel ports, and one TX VID. Whereas the CPU + * port will have the RX and TX VIDs of all front-panel ports, and on top of + * that, is also tagged-input and tagged-output (VLAN trunk). + * + * CPU port CPU port + * +-------------+-----+-------------+ +-------------+-----+-------------+ + * | RX VID | | | | TX VID | | | + * | of swp0 | | | | of swp0 | | | + * | +-----+ | | +-----+ | + * | ^ T | | | Tagged | + * | | | | | ingress | + * | +-------+---+---+-------+ | | +-----------+ | + * | | | | | | | | Untagged | + * | | U v U v U v | | v egress | + * | +-----+ +-----+ +-----+ +-----+ | | +-----+ +-----+ +-----+ +-----+ | + * | | | | | | | | | | | | | | | | | | | | + * | |PVID | | | | | | | | | | | | | | | | | | + * +-+-----+-+-----+-+-----+-+-----+-+ +-+-----+-+-----+-+-----+-+-----+-+ + * swp0 swp1 swp2 swp3 swp0 swp1 swp2 swp3 + */ +int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled) +{ + int upstream = dsa_upstream_port(ds, port); + struct dsa_port *dp = &ds->ports[port]; + struct dsa_port *upstream_dp = &ds->ports[upstream]; + u16 rx_vid = dsa_8021q_rx_vid(ds, port); + u16 tx_vid = dsa_8021q_tx_vid(ds, port); + int i, err; + + /* The CPU port is implicitly configured by + * configuring the front-panel ports + */ + if (!dsa_is_user_port(ds, port)) + return 0; + + /* Add this user port's RX VID to the membership list of all others + * (including itself). This is so that bridging will not be hindered. + * L2 forwarding rules still take precedence when there are no VLAN + * restrictions, so there are no concerns about leaking traffic. + */ + for (i = 0; i < ds->num_ports; i++) { + struct dsa_port *other_dp = &ds->ports[i]; + u16 flags; + + if (i == upstream) + /* CPU port needs to see this port's RX VID + * as tagged egress. + */ + flags = 0; + else if (i == port) + /* The RX VID is pvid on this port */ + flags = BRIDGE_VLAN_INFO_UNTAGGED | + BRIDGE_VLAN_INFO_PVID; + else + /* The RX VID is a regular VLAN on all others */ + flags = BRIDGE_VLAN_INFO_UNTAGGED; + + if (enabled) + err = dsa_port_vid_add(other_dp, rx_vid, flags); + else + err = dsa_port_vid_del(other_dp, rx_vid); + if (err) { + dev_err(ds->dev, "Failed to apply RX VID %d to port %d: %d\n", + rx_vid, port, err); + return err; + } + } + /* Finally apply the TX VID on this port and on the CPU port */ + if (enabled) + err = dsa_port_vid_add(dp, tx_vid, BRIDGE_VLAN_INFO_UNTAGGED); + else + err = dsa_port_vid_del(dp, tx_vid); + if (err) { + dev_err(ds->dev, "Failed to apply TX VID %d on port %d: %d\n", + tx_vid, port, err); + return err; + } + if (enabled) + err = dsa_port_vid_add(upstream_dp, tx_vid, 0); + else + err = dsa_port_vid_del(upstream_dp, tx_vid); + if (err) { + dev_err(ds->dev, "Failed to apply TX VID %d on port %d: %d\n", + tx_vid, upstream, err); + return err; + } + + return 0; +} +EXPORT_SYMBOL_GPL(dsa_port_setup_8021q_tagging); + +struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, + u16 tpid, u16 tci) +{ + /* skb->data points at skb_mac_header, which + * is fine for vlan_insert_tag. + */ + return vlan_insert_tag(skb, htons(tpid), tci); +} +EXPORT_SYMBOL_GPL(dsa_8021q_xmit); + +struct sk_buff *dsa_8021q_rcv(struct sk_buff *skb, struct net_device *netdev, + struct packet_type *pt, u16 *tpid, u16 *tci) +{ + struct vlan_ethhdr *tag; + + if (unlikely(!pskb_may_pull(skb, VLAN_HLEN))) + return NULL; + + tag = vlan_eth_hdr(skb); + *tpid = ntohs(tag->h_vlan_proto); + *tci = ntohs(tag->h_vlan_TCI); + + /* skb->data points in the middle of the VLAN tag, + * after tpid and before tci. This is because so far, + * ETH_HLEN (DMAC, SMAC, EtherType) bytes were pulled. + * There are 2 bytes of VLAN tag left in skb->data, and upper + * layers expect the 'real' EtherType to be consumed as well. + * Coincidentally, a VLAN header is also of the same size as + * the number of bytes that need to be pulled. + */ + skb_pull_rcsum(skb, VLAN_HLEN); + + return skb; +} +EXPORT_SYMBOL_GPL(dsa_8021q_rcv); + +static const struct dsa_device_ops dsa_8021q_netdev_ops = { + .name = "8021q", + .proto = DSA_TAG_PROTO_8021Q, + .overhead = VLAN_HLEN, +}; + +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_8021Q); + +module_dsa_tag_driver(dsa_8021q_netdev_ops); diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 4aa1d368a5ae..d52db5f2c721 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Broadcom tag support * * Copyright (C) 2014 Broadcom Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #include <linux/etherdevice.h> @@ -59,6 +55,9 @@ #define BRCM_EG_TC_MASK 0x7 #define BRCM_EG_PID_MASK 0x1f +#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM) || \ + IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_PREPEND) + static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, struct net_device *dev, unsigned int offset) @@ -143,8 +142,9 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb, return skb; } +#endif -#ifdef CONFIG_NET_DSA_TAG_BRCM +#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM) static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -171,14 +171,19 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, return nskb; } -const struct dsa_device_ops brcm_netdev_ops = { +static const struct dsa_device_ops brcm_netdev_ops = { + .name = "brcm", + .proto = DSA_TAG_PROTO_BRCM, .xmit = brcm_tag_xmit, .rcv = brcm_tag_rcv, .overhead = BRCM_TAG_LEN, }; + +DSA_TAG_DRIVER(brcm_netdev_ops); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_BRCM); #endif -#ifdef CONFIG_NET_DSA_TAG_BRCM_PREPEND +#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_PREPEND) static struct sk_buff *brcm_tag_xmit_prepend(struct sk_buff *skb, struct net_device *dev) { @@ -194,9 +199,27 @@ static struct sk_buff *brcm_tag_rcv_prepend(struct sk_buff *skb, return brcm_tag_rcv_ll(skb, dev, pt, ETH_HLEN); } -const struct dsa_device_ops brcm_prepend_netdev_ops = { +static const struct dsa_device_ops brcm_prepend_netdev_ops = { + .name = "brcm-prepend", + .proto = DSA_TAG_PROTO_BRCM_PREPEND, .xmit = brcm_tag_xmit_prepend, .rcv = brcm_tag_rcv_prepend, .overhead = BRCM_TAG_LEN, }; #endif + +DSA_TAG_DRIVER(brcm_prepend_netdev_ops); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_BRCM_PREPEND); + +static struct dsa_tag_driver *dsa_tag_driver_array[] = { +#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM) + &DSA_TAG_DRIVER_NAME(brcm_netdev_ops), +#endif +#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_PREPEND) + &DSA_TAG_DRIVER_NAME(brcm_prepend_netdev_ops), +#endif +}; + +module_dsa_tag_drivers(dsa_tag_driver_array); + +MODULE_LICENSE("GPL"); diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 67ff3fae18d8..7ddec9794477 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * net/dsa/tag_dsa.c - (Non-ethertype) DSA tagging * Copyright (c) 2008-2009 Marvell Semiconductor - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #include <linux/etherdevice.h> @@ -154,9 +150,16 @@ static int dsa_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto, return 0; } -const struct dsa_device_ops dsa_netdev_ops = { +static const struct dsa_device_ops dsa_netdev_ops = { + .name = "dsa", + .proto = DSA_TAG_PROTO_DSA, .xmit = dsa_xmit, .rcv = dsa_rcv, .flow_dissect = dsa_tag_flow_dissect, .overhead = DSA_HLEN, }; + +MODULE_LICENSE("GPL"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_DSA); + +module_dsa_tag_driver(dsa_netdev_ops); diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index 234585ec116e..e8eaa804ccb9 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * net/dsa/tag_edsa.c - Ethertype DSA tagging * Copyright (c) 2008-2009 Marvell Semiconductor - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #include <linux/etherdevice.h> @@ -173,9 +169,16 @@ static int edsa_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto, return 0; } -const struct dsa_device_ops edsa_netdev_ops = { +static const struct dsa_device_ops edsa_netdev_ops = { + .name = "edsa", + .proto = DSA_TAG_PROTO_EDSA, .xmit = edsa_xmit, .rcv = edsa_rcv, .flow_dissect = edsa_tag_flow_dissect, .overhead = EDSA_HLEN, }; + +MODULE_LICENSE("GPL"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_EDSA); + +module_dsa_tag_driver(edsa_netdev_ops); diff --git a/net/dsa/tag_gswip.c b/net/dsa/tag_gswip.c index cb6f82ffe5eb..b678160bbd66 100644 --- a/net/dsa/tag_gswip.c +++ b/net/dsa/tag_gswip.c @@ -103,8 +103,15 @@ static struct sk_buff *gswip_tag_rcv(struct sk_buff *skb, return skb; } -const struct dsa_device_ops gswip_netdev_ops = { +static const struct dsa_device_ops gswip_netdev_ops = { + .name = "gwsip", + .proto = DSA_TAG_PROTO_GSWIP, .xmit = gswip_tag_xmit, .rcv = gswip_tag_rcv, .overhead = GSWIP_RX_HEADER_LEN, }; + +MODULE_LICENSE("GPL"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_GSWIP); + +module_dsa_tag_driver(gswip_netdev_ops); diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index de246c93d3bb..b4872b87d4a6 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * net/dsa/tag_ksz.c - Microchip KSZ Switch tag format handling * Copyright (c) 2017 Microchip Technology - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #include <linux/etherdevice.h> @@ -137,12 +133,17 @@ static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev, return ksz_common_rcv(skb, dev, port, len); } -const struct dsa_device_ops ksz9477_netdev_ops = { +static const struct dsa_device_ops ksz9477_netdev_ops = { + .name = "ksz9477", + .proto = DSA_TAG_PROTO_KSZ9477, .xmit = ksz9477_xmit, .rcv = ksz9477_rcv, .overhead = KSZ9477_INGRESS_TAG_LEN, }; +DSA_TAG_DRIVER(ksz9477_netdev_ops); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ9477); + #define KSZ9893_TAIL_TAG_OVERRIDE BIT(5) #define KSZ9893_TAIL_TAG_LOOKUP BIT(6) @@ -170,8 +171,22 @@ static struct sk_buff *ksz9893_xmit(struct sk_buff *skb, return nskb; } -const struct dsa_device_ops ksz9893_netdev_ops = { +static const struct dsa_device_ops ksz9893_netdev_ops = { + .name = "ksz9893", + .proto = DSA_TAG_PROTO_KSZ9893, .xmit = ksz9893_xmit, .rcv = ksz9477_rcv, .overhead = KSZ_INGRESS_TAG_LEN, }; + +DSA_TAG_DRIVER(ksz9893_netdev_ops); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ9893); + +static struct dsa_tag_driver *dsa_tag_driver_array[] = { + &DSA_TAG_DRIVER_NAME(ksz9477_netdev_ops), + &DSA_TAG_DRIVER_NAME(ksz9893_netdev_ops), +}; + +module_dsa_tag_drivers(dsa_tag_driver_array); + +MODULE_LICENSE("GPL"); diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index f48889e46ff7..eb0e7a32e53d 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -1,15 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2017 Pengutronix, Juergen Borleis <jbe@pengutronix.de> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * */ #include <linux/dsa/lan9303.h> #include <linux/etherdevice.h> @@ -137,8 +128,15 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, return skb; } -const struct dsa_device_ops lan9303_netdev_ops = { +static const struct dsa_device_ops lan9303_netdev_ops = { + .name = "lan9303", + .proto = DSA_TAG_PROTO_LAN9303, .xmit = lan9303_xmit, .rcv = lan9303_rcv, .overhead = LAN9303_TAG_LEN, }; + +MODULE_LICENSE("GPL"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_LAN9303); + +module_dsa_tag_driver(lan9303_netdev_ops); diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index f39f4dfeda34..b5705cba8318 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -1,15 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Mediatek DSA Tag support * Copyright (C) 2017 Landen Chao <landen.chao@mediatek.com> * Sean Wang <sean.wang@mediatek.com> - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 and - * only version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include <linux/etherdevice.h> @@ -105,9 +98,16 @@ static int mtk_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto, return 0; } -const struct dsa_device_ops mtk_netdev_ops = { +static const struct dsa_device_ops mtk_netdev_ops = { + .name = "mtk", + .proto = DSA_TAG_PROTO_MTK, .xmit = mtk_tag_xmit, .rcv = mtk_tag_rcv, .flow_dissect = mtk_tag_flow_dissect, .overhead = MTK_HDR_LEN, }; + +MODULE_LICENSE("GPL"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_MTK); + +module_dsa_tag_driver(mtk_netdev_ops); diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index 85c22ada4744..c95885215525 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -1,14 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2015, The Linux Foundation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 and - * only version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include <linux/etherdevice.h> @@ -107,9 +99,16 @@ static int qca_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto, return 0; } -const struct dsa_device_ops qca_netdev_ops = { +static const struct dsa_device_ops qca_netdev_ops = { + .name = "qca", + .proto = DSA_TAG_PROTO_QCA, .xmit = qca_tag_xmit, .rcv = qca_tag_rcv, .flow_dissect = qca_tag_flow_dissect, .overhead = QCA_HDR_LEN, }; + +MODULE_LICENSE("GPL"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_QCA); + +module_dsa_tag_driver(qca_netdev_ops); diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c new file mode 100644 index 000000000000..969402c7dbf1 --- /dev/null +++ b/net/dsa/tag_sja1105.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com> + */ +#include <linux/if_vlan.h> +#include <linux/dsa/sja1105.h> +#include <linux/dsa/8021q.h> +#include <linux/packing.h> +#include "dsa_priv.h" + +/* Similar to is_link_local_ether_addr(hdr->h_dest) but also covers PTP */ +static inline bool sja1105_is_link_local(const struct sk_buff *skb) +{ + const struct ethhdr *hdr = eth_hdr(skb); + u64 dmac = ether_addr_to_u64(hdr->h_dest); + + if ((dmac & SJA1105_LINKLOCAL_FILTER_A_MASK) == + SJA1105_LINKLOCAL_FILTER_A) + return true; + if ((dmac & SJA1105_LINKLOCAL_FILTER_B_MASK) == + SJA1105_LINKLOCAL_FILTER_B) + return true; + return false; +} + +/* This is the first time the tagger sees the frame on RX. + * Figure out if we can decode it, and if we can, annotate skb->cb with how we + * plan to do that, so we don't need to check again in the rcv function. + */ +static bool sja1105_filter(const struct sk_buff *skb, struct net_device *dev) +{ + if (sja1105_is_link_local(skb)) { + SJA1105_SKB_CB(skb)->type = SJA1105_FRAME_TYPE_LINK_LOCAL; + return true; + } + if (!dsa_port_is_vlan_filtering(dev->dsa_ptr)) { + SJA1105_SKB_CB(skb)->type = SJA1105_FRAME_TYPE_NORMAL; + return true; + } + return false; +} + +static struct sk_buff *sja1105_xmit(struct sk_buff *skb, + struct net_device *netdev) +{ + struct dsa_port *dp = dsa_slave_to_port(netdev); + struct dsa_switch *ds = dp->ds; + u16 tx_vid = dsa_8021q_tx_vid(ds, dp->index); + u8 pcp = skb->priority; + + /* Transmitting management traffic does not rely upon switch tagging, + * but instead SPI-installed management routes. Part 2 of this + * is the .port_deferred_xmit driver callback. + */ + if (unlikely(sja1105_is_link_local(skb))) + return dsa_defer_xmit(skb, netdev); + + /* If we are under a vlan_filtering bridge, IP termination on + * switch ports based on 802.1Q tags is simply too brittle to + * be passable. So just defer to the dsa_slave_notag_xmit + * implementation. + */ + if (dsa_port_is_vlan_filtering(dp)) + return skb; + + return dsa_8021q_xmit(skb, netdev, ETH_P_SJA1105, + ((pcp << VLAN_PRIO_SHIFT) | tx_vid)); +} + +static struct sk_buff *sja1105_rcv(struct sk_buff *skb, + struct net_device *netdev, + struct packet_type *pt) +{ + struct ethhdr *hdr = eth_hdr(skb); + u64 source_port, switch_id; + struct sk_buff *nskb; + u16 tpid, vid, tci; + bool is_tagged; + + nskb = dsa_8021q_rcv(skb, netdev, pt, &tpid, &tci); + is_tagged = (nskb && tpid == ETH_P_SJA1105); + + skb->priority = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; + vid = tci & VLAN_VID_MASK; + + skb->offload_fwd_mark = 1; + + if (SJA1105_SKB_CB(skb)->type == SJA1105_FRAME_TYPE_LINK_LOCAL) { + /* Management traffic path. Switch embeds the switch ID and + * port ID into bytes of the destination MAC, courtesy of + * the incl_srcpt options. + */ + source_port = hdr->h_dest[3]; + switch_id = hdr->h_dest[4]; + /* Clear the DMAC bytes that were mangled by the switch */ + hdr->h_dest[3] = 0; + hdr->h_dest[4] = 0; + } else { + /* Normal traffic path. */ + source_port = dsa_8021q_rx_source_port(vid); + switch_id = dsa_8021q_rx_switch_id(vid); + } + + skb->dev = dsa_master_find_slave(netdev, switch_id, source_port); + if (!skb->dev) { + netdev_warn(netdev, "Couldn't decode source port\n"); + return NULL; + } + + /* Delete/overwrite fake VLAN header, DSA expects to not find + * it there, see dsa_switch_rcv: skb_push(skb, ETH_HLEN). + */ + if (is_tagged) + memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - VLAN_HLEN, + ETH_HLEN - VLAN_HLEN); + + return skb; +} + +static struct dsa_device_ops sja1105_netdev_ops = { + .name = "sja1105", + .proto = DSA_TAG_PROTO_SJA1105, + .xmit = sja1105_xmit, + .rcv = sja1105_rcv, + .filter = sja1105_filter, + .overhead = VLAN_HLEN, +}; + +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_SJA1105); + +module_dsa_tag_driver(sja1105_netdev_ops); diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index b40756ed6e57..4f8ab62f0208 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * net/dsa/tag_trailer.c - Trailer tag format handling * Copyright (c) 2008-2009 Marvell Semiconductor - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #include <linux/etherdevice.h> @@ -81,8 +77,15 @@ static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev, return skb; } -const struct dsa_device_ops trailer_netdev_ops = { +static const struct dsa_device_ops trailer_netdev_ops = { + .name = "trailer", + .proto = DSA_TAG_PROTO_TRAILER, .xmit = trailer_xmit, .rcv = trailer_rcv, .overhead = 4, }; + +MODULE_LICENSE("GPL"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_TRAILER); + +module_dsa_tag_driver(trailer_netdev_ops); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index f7a3d7a171c7..4b2b222377ac 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -119,13 +119,14 @@ EXPORT_SYMBOL(eth_header); /** * eth_get_headlen - determine the length of header for an ethernet frame + * @dev: pointer to network device * @data: pointer to start of frame * @len: total length of frame * * Make a best effort attempt to pull the length for all of the headers for * a given frame in a linear buffer. */ -u32 eth_get_headlen(void *data, unsigned int len) +u32 eth_get_headlen(const struct net_device *dev, void *data, unsigned int len) { const unsigned int flags = FLOW_DISSECTOR_F_PARSE_1ST_FRAG; const struct ethhdr *eth = (const struct ethhdr *)data; @@ -136,8 +137,9 @@ u32 eth_get_headlen(void *data, unsigned int len) return len; /* parse any remaining L2/L3 headers, check for L4 */ - if (!skb_flow_dissect_flow_keys_basic(NULL, &keys, data, eth->h_proto, - sizeof(*eth), len, flags)) + if (!skb_flow_dissect_flow_keys_basic(dev_net(dev), NULL, &keys, data, + eth->h_proto, sizeof(*eth), + len, flags)) return max_t(u32, keys.control.thoff, sizeof(*eth)); /* parse for any L4 headers */ @@ -183,8 +185,12 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) * at all, so we check here whether one of those tagging * variants has been configured on the receiving interface, * and if so, set skb->protocol without looking at the packet. + * The DSA tagging protocol may be able to decode some but not all + * traffic (for example only for management). In that case give it the + * option to filter the packets from which it can decode source port + * information. */ - if (unlikely(netdev_uses_dsa(dev))) + if (unlikely(netdev_uses_dsa(dev)) && dsa_can_decode(skb, dev)) return htons(ETH_P_XDSA); if (likely(eth_proto_is_802_3(eth->h_proto))) @@ -554,7 +560,7 @@ int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr) addr = NULL; if (dp) addr = of_get_mac_address(dp); - if (!addr) + if (IS_ERR_OR_NULL(addr)) addr = arch_get_platform_mac_address(); if (!addr) diff --git a/net/hsr/Makefile b/net/hsr/Makefile index 9ae972a820f4..e45757fc477f 100644 --- a/net/hsr/Makefile +++ b/net/hsr/Makefile @@ -6,3 +6,4 @@ obj-$(CONFIG_HSR) += hsr.o hsr-y := hsr_main.o hsr_framereg.o hsr_device.o \ hsr_netlink.o hsr_slave.o hsr_forward.o +hsr-$(CONFIG_DEBUG_FS) += hsr_debugfs.o diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c new file mode 100644 index 000000000000..94447974a3c0 --- /dev/null +++ b/net/hsr/hsr_debugfs.c @@ -0,0 +1,119 @@ +/* + * hsr_debugfs code + * Copyright (C) 2019 Texas Instruments Incorporated + * + * Author(s): + * Murali Karicheri <m-karicheri2@ti.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation version 2. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/debugfs.h> +#include "hsr_main.h" +#include "hsr_framereg.h" + +static void print_mac_address(struct seq_file *sfp, unsigned char *mac) +{ + seq_printf(sfp, "%02x:%02x:%02x:%02x:%02x:%02x:", + mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); +} + +/* hsr_node_table_show - Formats and prints node_table entries */ +static int +hsr_node_table_show(struct seq_file *sfp, void *data) +{ + struct hsr_priv *priv = (struct hsr_priv *)sfp->private; + struct hsr_node *node; + + seq_puts(sfp, "Node Table entries\n"); + seq_puts(sfp, "MAC-Address-A, MAC-Address-B, time_in[A], "); + seq_puts(sfp, "time_in[B], Address-B port\n"); + rcu_read_lock(); + list_for_each_entry_rcu(node, &priv->node_db, mac_list) { + /* skip self node */ + if (hsr_addr_is_self(priv, node->macaddress_A)) + continue; + print_mac_address(sfp, &node->macaddress_A[0]); + seq_puts(sfp, " "); + print_mac_address(sfp, &node->macaddress_B[0]); + seq_printf(sfp, "0x%lx, ", node->time_in[HSR_PT_SLAVE_A]); + seq_printf(sfp, "0x%lx ", node->time_in[HSR_PT_SLAVE_B]); + seq_printf(sfp, "0x%x\n", node->addr_B_port); + } + rcu_read_unlock(); + return 0; +} + +/* hsr_node_table_open - Open the node_table file + * + * Description: + * This routine opens a debugfs file node_table of specific hsr device + */ +static int +hsr_node_table_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, hsr_node_table_show, inode->i_private); +} + +static const struct file_operations hsr_fops = { + .owner = THIS_MODULE, + .open = hsr_node_table_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/* hsr_debugfs_init - create hsr node_table file for dumping + * the node table + * + * Description: + * When debugfs is configured this routine sets up the node_table file per + * hsr device for dumping the node_table entries + */ +int hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev) +{ + int rc = -1; + struct dentry *de = NULL; + + de = debugfs_create_dir(hsr_dev->name, NULL); + if (!de) { + pr_err("Cannot create hsr debugfs root\n"); + return rc; + } + + priv->node_tbl_root = de; + + de = debugfs_create_file("node_table", S_IFREG | 0444, + priv->node_tbl_root, priv, + &hsr_fops); + if (!de) { + pr_err("Cannot create hsr node_table directory\n"); + return rc; + } + priv->node_tbl_file = de; + + return 0; +} + +/* hsr_debugfs_term - Tear down debugfs intrastructure + * + * Description: + * When Debufs is configured this routine removes debugfs file system + * elements that are specific to hsr + */ +void +hsr_debugfs_term(struct hsr_priv *priv) +{ + debugfs_remove(priv->node_tbl_file); + priv->node_tbl_file = NULL; + debugfs_remove(priv->node_tbl_root); + priv->node_tbl_root = NULL; +} diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index a97bf326b231..15c72065df79 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -1,10 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright 2011-2014 Autronica Fire and Security AS * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se * @@ -23,7 +19,6 @@ #include "hsr_main.h" #include "hsr_forward.h" - static bool is_admin_up(struct net_device *dev) { return dev && (dev->flags & IFF_UP); @@ -68,7 +63,7 @@ static bool hsr_check_carrier(struct hsr_port *master) rcu_read_lock(); hsr_for_each_port(master->hsr, port) - if ((port->type != HSR_PT_MASTER) && is_slave_up(port->dev)) { + if (port->type != HSR_PT_MASTER && is_slave_up(port->dev)) { has_carrier = true; break; } @@ -82,7 +77,6 @@ static bool hsr_check_carrier(struct hsr_port *master) return has_carrier; } - static void hsr_check_announce(struct net_device *hsr_dev, unsigned char old_operstate) { @@ -90,15 +84,14 @@ static void hsr_check_announce(struct net_device *hsr_dev, hsr = netdev_priv(hsr_dev); - if ((hsr_dev->operstate == IF_OPER_UP) - && (old_operstate != IF_OPER_UP)) { + if (hsr_dev->operstate == IF_OPER_UP && old_operstate != IF_OPER_UP) { /* Went up */ hsr->announce_count = 0; mod_timer(&hsr->announce_timer, jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL)); } - if ((hsr_dev->operstate != IF_OPER_UP) && (old_operstate == IF_OPER_UP)) + if (hsr_dev->operstate != IF_OPER_UP && old_operstate == IF_OPER_UP) /* Went down */ del_timer(&hsr->announce_timer); } @@ -136,7 +129,6 @@ int hsr_get_max_mtu(struct hsr_priv *hsr) return mtu_max - HSR_HLEN; } - static int hsr_dev_change_mtu(struct net_device *dev, int new_mtu) { struct hsr_priv *hsr; @@ -191,14 +183,12 @@ static int hsr_dev_open(struct net_device *dev) return 0; } - static int hsr_dev_close(struct net_device *dev) { /* Nothing to do here. */ return 0; } - static netdev_features_t hsr_features_recompute(struct hsr_priv *hsr, netdev_features_t features) { @@ -231,7 +221,6 @@ static netdev_features_t hsr_fix_features(struct net_device *dev, return hsr_features_recompute(hsr, features); } - static int hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev) { struct hsr_priv *hsr = netdev_priv(dev); @@ -244,14 +233,13 @@ static int hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } - static const struct header_ops hsr_header_ops = { .create = eth_header, .parse = eth_header_parse, }; static void send_hsr_supervision_frame(struct hsr_port *master, - u8 type, u8 hsrVer) + u8 type, u8 hsr_ver) { struct sk_buff *skb; int hlen, tlen; @@ -262,39 +250,38 @@ static void send_hsr_supervision_frame(struct hsr_port *master, hlen = LL_RESERVED_SPACE(master->dev); tlen = master->dev->needed_tailroom; - skb = dev_alloc_skb( - sizeof(struct hsr_tag) + - sizeof(struct hsr_sup_tag) + - sizeof(struct hsr_sup_payload) + hlen + tlen); + skb = dev_alloc_skb(sizeof(struct hsr_tag) + + sizeof(struct hsr_sup_tag) + + sizeof(struct hsr_sup_payload) + hlen + tlen); - if (skb == NULL) + if (!skb) return; skb_reserve(skb, hlen); skb->dev = master->dev; - skb->protocol = htons(hsrVer ? ETH_P_HSR : ETH_P_PRP); + skb->protocol = htons(hsr_ver ? ETH_P_HSR : ETH_P_PRP); skb->priority = TC_PRIO_CONTROL; - if (dev_hard_header(skb, skb->dev, (hsrVer ? ETH_P_HSR : ETH_P_PRP), + if (dev_hard_header(skb, skb->dev, (hsr_ver ? ETH_P_HSR : ETH_P_PRP), master->hsr->sup_multicast_addr, skb->dev->dev_addr, skb->len) <= 0) goto out; skb_reset_mac_header(skb); - if (hsrVer > 0) { + if (hsr_ver > 0) { hsr_tag = skb_put(skb, sizeof(struct hsr_tag)); hsr_tag->encap_proto = htons(ETH_P_PRP); set_hsr_tag_LSDU_size(hsr_tag, HSR_V1_SUP_LSDUSIZE); } hsr_stag = skb_put(skb, sizeof(struct hsr_sup_tag)); - set_hsr_stag_path(hsr_stag, (hsrVer ? 0x0 : 0xf)); - set_hsr_stag_HSR_Ver(hsr_stag, hsrVer); + set_hsr_stag_path(hsr_stag, (hsr_ver ? 0x0 : 0xf)); + set_hsr_stag_HSR_ver(hsr_stag, hsr_ver); /* From HSRv1 on we have separate supervision sequence numbers. */ spin_lock_irqsave(&master->hsr->seqnr_lock, irqflags); - if (hsrVer > 0) { + if (hsr_ver > 0) { hsr_stag->sequence_nr = htons(master->hsr->sup_sequence_nr); hsr_tag->sequence_nr = htons(master->hsr->sequence_nr); master->hsr->sup_sequence_nr++; @@ -305,13 +292,14 @@ static void send_hsr_supervision_frame(struct hsr_port *master, } spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags); - hsr_stag->HSR_TLV_Type = type; + hsr_stag->HSR_TLV_type = type; /* TODO: Why 12 in HSRv0? */ - hsr_stag->HSR_TLV_Length = hsrVer ? sizeof(struct hsr_sup_payload) : 12; + hsr_stag->HSR_TLV_length = + hsr_ver ? sizeof(struct hsr_sup_payload) : 12; /* Payload: MacAddressA */ hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload)); - ether_addr_copy(hsr_sp->MacAddressA, master->dev->dev_addr); + ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr); if (skb_put_padto(skb, ETH_ZLEN + HSR_HLEN)) return; @@ -324,7 +312,6 @@ out: kfree_skb(skb); } - /* Announce (supervision frame) timer function */ static void hsr_announce(struct timer_list *t) @@ -338,15 +325,15 @@ static void hsr_announce(struct timer_list *t) rcu_read_lock(); master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); - if (hsr->announce_count < 3 && hsr->protVersion == 0) { + if (hsr->announce_count < 3 && hsr->prot_version == 0) { send_hsr_supervision_frame(master, HSR_TLV_ANNOUNCE, - hsr->protVersion); + hsr->prot_version); hsr->announce_count++; interval = msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); } else { send_hsr_supervision_frame(master, HSR_TLV_LIFE_CHECK, - hsr->protVersion); + hsr->prot_version); interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); } @@ -357,7 +344,6 @@ static void hsr_announce(struct timer_list *t) rcu_read_unlock(); } - /* According to comments in the declaration of struct net_device, this function * is "Called from unregister, can be used to call free_netdev". Ok then... */ @@ -368,6 +354,8 @@ static void hsr_dev_destroy(struct net_device *hsr_dev) hsr = netdev_priv(hsr_dev); + hsr_debugfs_term(hsr); + rtnl_lock(); hsr_for_each_port(hsr, port) hsr_del_port(port); @@ -423,7 +411,6 @@ void hsr_dev_setup(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; } - /* Return true if dev is a HSR master; return false otherwise. */ inline bool is_hsr_master(struct net_device *dev) @@ -467,7 +454,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], ether_addr_copy(hsr->sup_multicast_addr, def_multicast_addr); hsr->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec; - hsr->protVersion = protocol_version; + hsr->prot_version = protocol_version; /* FIXME: should I modify the value of these? * @@ -498,6 +485,9 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], goto fail; mod_timer(&hsr->prune_timer, jiffies + msecs_to_jiffies(PRUNE_PERIOD)); + res = hsr_debugfs_init(hsr, hsr_dev); + if (res) + goto fail; return 0; diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h index 9975e31bbb82..6d7759c4f5f9 100644 --- a/net/hsr/hsr_device.h +++ b/net/hsr/hsr_device.h @@ -1,10 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright 2011-2014 Autronica Fire and Security AS * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se */ diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 04b5450c5a55..ddd9605bad04 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -1,10 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright 2011-2014 Autronica Fire and Security AS * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se */ @@ -17,7 +13,6 @@ #include "hsr_main.h" #include "hsr_framereg.h" - struct hsr_node; struct hsr_frame_info { @@ -32,7 +27,6 @@ struct hsr_frame_info { bool is_local_exclusive; }; - /* The uses I can see for these HSR supervision frames are: * 1) Use the frames that are sent after node initialization ("HSR_TLV.Type = * 22") to reset any sequence_nr counters belonging to that node. Useful if @@ -50,46 +44,45 @@ struct hsr_frame_info { */ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb) { - struct ethhdr *ethHdr; - struct hsr_sup_tag *hsrSupTag; - struct hsrv1_ethhdr_sp *hsrV1Hdr; + struct ethhdr *eth_hdr; + struct hsr_sup_tag *hsr_sup_tag; + struct hsrv1_ethhdr_sp *hsr_V1_hdr; WARN_ON_ONCE(!skb_mac_header_was_set(skb)); - ethHdr = (struct ethhdr *) skb_mac_header(skb); + eth_hdr = (struct ethhdr *)skb_mac_header(skb); /* Correct addr? */ - if (!ether_addr_equal(ethHdr->h_dest, + if (!ether_addr_equal(eth_hdr->h_dest, hsr->sup_multicast_addr)) return false; /* Correct ether type?. */ - if (!(ethHdr->h_proto == htons(ETH_P_PRP) - || ethHdr->h_proto == htons(ETH_P_HSR))) + if (!(eth_hdr->h_proto == htons(ETH_P_PRP) || + eth_hdr->h_proto == htons(ETH_P_HSR))) return false; /* Get the supervision header from correct location. */ - if (ethHdr->h_proto == htons(ETH_P_HSR)) { /* Okay HSRv1. */ - hsrV1Hdr = (struct hsrv1_ethhdr_sp *) skb_mac_header(skb); - if (hsrV1Hdr->hsr.encap_proto != htons(ETH_P_PRP)) + if (eth_hdr->h_proto == htons(ETH_P_HSR)) { /* Okay HSRv1. */ + hsr_V1_hdr = (struct hsrv1_ethhdr_sp *)skb_mac_header(skb); + if (hsr_V1_hdr->hsr.encap_proto != htons(ETH_P_PRP)) return false; - hsrSupTag = &hsrV1Hdr->hsr_sup; + hsr_sup_tag = &hsr_V1_hdr->hsr_sup; } else { - hsrSupTag = &((struct hsrv0_ethhdr_sp *) skb_mac_header(skb))->hsr_sup; + hsr_sup_tag = + &((struct hsrv0_ethhdr_sp *)skb_mac_header(skb))->hsr_sup; } - if ((hsrSupTag->HSR_TLV_Type != HSR_TLV_ANNOUNCE) && - (hsrSupTag->HSR_TLV_Type != HSR_TLV_LIFE_CHECK)) + if (hsr_sup_tag->HSR_TLV_type != HSR_TLV_ANNOUNCE && + hsr_sup_tag->HSR_TLV_type != HSR_TLV_LIFE_CHECK) return false; - if ((hsrSupTag->HSR_TLV_Length != 12) && - (hsrSupTag->HSR_TLV_Length != - sizeof(struct hsr_sup_payload))) + if (hsr_sup_tag->HSR_TLV_length != 12 && + hsr_sup_tag->HSR_TLV_length != sizeof(struct hsr_sup_payload)) return false; return true; } - static struct sk_buff *create_stripped_skb(struct sk_buff *skb_in, struct hsr_frame_info *frame) { @@ -100,7 +93,7 @@ static struct sk_buff *create_stripped_skb(struct sk_buff *skb_in, skb_pull(skb_in, HSR_HLEN); skb = __pskb_copy(skb_in, skb_headroom(skb_in) - HSR_HLEN, GFP_ATOMIC); skb_push(skb_in, HSR_HLEN); - if (skb == NULL) + if (!skb) return NULL; skb_reset_mac_header(skb); @@ -108,7 +101,7 @@ static struct sk_buff *create_stripped_skb(struct sk_buff *skb_in, if (skb->ip_summed == CHECKSUM_PARTIAL) skb->csum_start -= HSR_HLEN; - copylen = 2*ETH_ALEN; + copylen = 2 * ETH_ALEN; if (frame->is_vlan) copylen += VLAN_HLEN; src = skb_mac_header(skb_in); @@ -127,9 +120,8 @@ static struct sk_buff *frame_get_stripped_skb(struct hsr_frame_info *frame, return skb_clone(frame->skb_std, GFP_ATOMIC); } - static void hsr_fill_tag(struct sk_buff *skb, struct hsr_frame_info *frame, - struct hsr_port *port, u8 protoVersion) + struct hsr_port *port, u8 proto_version) { struct hsr_ethhdr *hsr_ethhdr; int lane_id; @@ -144,13 +136,13 @@ static void hsr_fill_tag(struct sk_buff *skb, struct hsr_frame_info *frame, if (frame->is_vlan) lsdu_size -= 4; - hsr_ethhdr = (struct hsr_ethhdr *) skb_mac_header(skb); + hsr_ethhdr = (struct hsr_ethhdr *)skb_mac_header(skb); set_hsr_tag_path(&hsr_ethhdr->hsr_tag, lane_id); set_hsr_tag_LSDU_size(&hsr_ethhdr->hsr_tag, lsdu_size); hsr_ethhdr->hsr_tag.sequence_nr = htons(frame->sequence_nr); hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto; - hsr_ethhdr->ethhdr.h_proto = htons(protoVersion ? + hsr_ethhdr->ethhdr.h_proto = htons(proto_version ? ETH_P_HSR : ETH_P_PRP); } @@ -164,7 +156,7 @@ static struct sk_buff *create_tagged_skb(struct sk_buff *skb_o, /* Create the new skb with enough headroom to fit the HSR tag */ skb = __pskb_copy(skb_o, skb_headroom(skb_o) + HSR_HLEN, GFP_ATOMIC); - if (skb == NULL) + if (!skb) return NULL; skb_reset_mac_header(skb); @@ -180,7 +172,7 @@ static struct sk_buff *create_tagged_skb(struct sk_buff *skb_o, memmove(dst, src, movelen); skb_reset_mac_header(skb); - hsr_fill_tag(skb, frame, port, port->hsr->protVersion); + hsr_fill_tag(skb, frame, port, port->hsr->prot_version); return skb; } @@ -194,7 +186,7 @@ static struct sk_buff *frame_get_tagged_skb(struct hsr_frame_info *frame, if (frame->skb_hsr) return skb_clone(frame->skb_hsr, GFP_ATOMIC); - if ((port->type != HSR_PT_SLAVE_A) && (port->type != HSR_PT_SLAVE_B)) { + if (port->type != HSR_PT_SLAVE_A && port->type != HSR_PT_SLAVE_B) { WARN_ONCE(1, "HSR: Bug: trying to create a tagged frame for a non-ring port"); return NULL; } @@ -202,7 +194,6 @@ static struct sk_buff *frame_get_tagged_skb(struct hsr_frame_info *frame, return create_tagged_skb(frame->skb_std, frame, port); } - static void hsr_deliver_master(struct sk_buff *skb, struct net_device *dev, struct hsr_node *node_src) { @@ -237,7 +228,6 @@ static int hsr_xmit(struct sk_buff *skb, struct hsr_port *port, return dev_queue_xmit(skb); } - /* Forward the frame through all devices except: * - Back through the receiving device * - If it's a HSR frame: through a device where it has passed before @@ -260,11 +250,11 @@ static void hsr_forward_do(struct hsr_frame_info *frame) continue; /* Don't deliver locally unless we should */ - if ((port->type == HSR_PT_MASTER) && !frame->is_local_dest) + if (port->type == HSR_PT_MASTER && !frame->is_local_dest) continue; /* Deliver frames directly addressed to us to master only */ - if ((port->type != HSR_PT_MASTER) && frame->is_local_exclusive) + if (port->type != HSR_PT_MASTER && frame->is_local_exclusive) continue; /* Don't send frame over port where it has been sent before */ @@ -272,7 +262,7 @@ static void hsr_forward_do(struct hsr_frame_info *frame) frame->sequence_nr)) continue; - if (frame->is_supervision && (port->type == HSR_PT_MASTER)) { + if (frame->is_supervision && port->type == HSR_PT_MASTER) { hsr_handle_sup_frame(frame->skb_hsr, frame->node_src, frame->port_rcv); @@ -283,7 +273,7 @@ static void hsr_forward_do(struct hsr_frame_info *frame) skb = frame_get_tagged_skb(frame, port); else skb = frame_get_stripped_skb(frame, port); - if (skb == NULL) { + if (!skb) { /* FIXME: Record the dropped frame? */ continue; } @@ -296,7 +286,6 @@ static void hsr_forward_do(struct hsr_frame_info *frame) } } - static void check_local_dest(struct hsr_priv *hsr, struct sk_buff *skb, struct hsr_frame_info *frame) { @@ -307,16 +296,15 @@ static void check_local_dest(struct hsr_priv *hsr, struct sk_buff *skb, frame->is_local_exclusive = false; } - if ((skb->pkt_type == PACKET_HOST) || - (skb->pkt_type == PACKET_MULTICAST) || - (skb->pkt_type == PACKET_BROADCAST)) { + if (skb->pkt_type == PACKET_HOST || + skb->pkt_type == PACKET_MULTICAST || + skb->pkt_type == PACKET_BROADCAST) { frame->is_local_dest = true; } else { frame->is_local_dest = false; } } - static int hsr_fill_frame_info(struct hsr_frame_info *frame, struct sk_buff *skb, struct hsr_port *port) { @@ -325,18 +313,18 @@ static int hsr_fill_frame_info(struct hsr_frame_info *frame, frame->is_supervision = is_supervision_frame(port->hsr, skb); frame->node_src = hsr_get_node(port, skb, frame->is_supervision); - if (frame->node_src == NULL) + if (!frame->node_src) return -1; /* Unknown node and !is_supervision, or no mem */ - ethhdr = (struct ethhdr *) skb_mac_header(skb); + ethhdr = (struct ethhdr *)skb_mac_header(skb); frame->is_vlan = false; if (ethhdr->h_proto == htons(ETH_P_8021Q)) { frame->is_vlan = true; /* FIXME: */ WARN_ONCE(1, "HSR: VLAN not yet supported"); } - if (ethhdr->h_proto == htons(ETH_P_PRP) - || ethhdr->h_proto == htons(ETH_P_HSR)) { + if (ethhdr->h_proto == htons(ETH_P_PRP) || + ethhdr->h_proto == htons(ETH_P_HSR)) { frame->skb_std = NULL; frame->skb_hsr = skb; frame->sequence_nr = hsr_get_skb_sequence_nr(skb); @@ -371,10 +359,17 @@ void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port) goto out_drop; hsr_register_frame_in(frame.node_src, port, frame.sequence_nr); hsr_forward_do(&frame); + /* Gets called for ingress frames as well as egress from master port. + * So check and increment stats for master port only here. + */ + if (port->type == HSR_PT_MASTER) { + port->dev->stats.tx_packets++; + port->dev->stats.tx_bytes += skb->len; + } - if (frame.skb_hsr != NULL) + if (frame.skb_hsr) kfree_skb(frame.skb_hsr); - if (frame.skb_std != NULL) + if (frame.skb_std) kfree_skb(frame.skb_std); return; diff --git a/net/hsr/hsr_forward.h b/net/hsr/hsr_forward.h index 5c5bc4b6b75f..51a69295566c 100644 --- a/net/hsr/hsr_forward.h +++ b/net/hsr/hsr_forward.h @@ -1,10 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright 2011-2014 Autronica Fire and Security AS * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se */ diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 9af16cb68f76..9fa9abd83018 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -1,10 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright 2011-2014 Autronica Fire and Security AS * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se * @@ -22,23 +18,8 @@ #include "hsr_framereg.h" #include "hsr_netlink.h" - -struct hsr_node { - struct list_head mac_list; - unsigned char MacAddressA[ETH_ALEN]; - unsigned char MacAddressB[ETH_ALEN]; - /* Local slave through which AddrB frames are received from this node */ - enum hsr_port_type AddrB_port; - unsigned long time_in[HSR_PT_PORTS]; - bool time_in_stale[HSR_PT_PORTS]; - u16 seq_out[HSR_PT_PORTS]; - struct rcu_head rcu_head; -}; - - /* TODO: use hash lists for mac addresses (linux/jhash.h)? */ - /* seq_nr_after(a, b) - return true if a is after (higher in sequence than) b, * false otherwise. */ @@ -47,16 +28,16 @@ static bool seq_nr_after(u16 a, u16 b) /* Remove inconsistency where * seq_nr_after(a, b) == seq_nr_before(a, b) */ - if ((int) b - a == 32768) + if ((int)b - a == 32768) return false; - return (((s16) (b - a)) < 0); + return (((s16)(b - a)) < 0); } + #define seq_nr_before(a, b) seq_nr_after((b), (a)) #define seq_nr_after_or_eq(a, b) (!seq_nr_before((a), (b))) #define seq_nr_before_or_eq(a, b) (!seq_nr_after((a), (b))) - bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr) { struct hsr_node *node; @@ -68,9 +49,9 @@ bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr) return false; } - if (ether_addr_equal(addr, node->MacAddressA)) + if (ether_addr_equal(addr, node->macaddress_A)) return true; - if (ether_addr_equal(addr, node->MacAddressB)) + if (ether_addr_equal(addr, node->macaddress_B)) return true; return false; @@ -78,20 +59,19 @@ bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr) /* Search for mac entry. Caller must hold rcu read lock. */ -static struct hsr_node *find_node_by_AddrA(struct list_head *node_db, - const unsigned char addr[ETH_ALEN]) +static struct hsr_node *find_node_by_addr_A(struct list_head *node_db, + const unsigned char addr[ETH_ALEN]) { struct hsr_node *node; list_for_each_entry_rcu(node, node_db, mac_list) { - if (ether_addr_equal(node->MacAddressA, addr)) + if (ether_addr_equal(node->macaddress_A, addr)) return node; } return NULL; } - /* Helper for device init; the self_node_db is used in hsr_rcv() to recognize * frames from self that's been looped over the HSR ring. */ @@ -105,12 +85,12 @@ int hsr_create_self_node(struct list_head *self_node_db, if (!node) return -ENOMEM; - ether_addr_copy(node->MacAddressA, addr_a); - ether_addr_copy(node->MacAddressB, addr_b); + ether_addr_copy(node->macaddress_A, addr_a); + ether_addr_copy(node->macaddress_B, addr_b); rcu_read_lock(); oldnode = list_first_or_null_rcu(self_node_db, - struct hsr_node, mac_list); + struct hsr_node, mac_list); if (oldnode) { list_replace_rcu(&oldnode->mac_list, &node->mac_list); rcu_read_unlock(); @@ -137,7 +117,7 @@ void hsr_del_node(struct list_head *self_node_db) } } -/* Allocate an hsr_node and add it to node_db. 'addr' is the node's AddressA; +/* Allocate an hsr_node and add it to node_db. 'addr' is the node's address_A; * seq_out is used to initialize filtering of outgoing duplicate frames * originating from the newly added node. */ @@ -152,7 +132,7 @@ struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[], if (!node) return NULL; - ether_addr_copy(node->MacAddressA, addr); + ether_addr_copy(node->macaddress_A, addr); /* We are only interested in time diffs here, so use current jiffies * as initialization. (0 could trigger an spurious ring error warning). @@ -181,19 +161,19 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb, if (!skb_mac_header_was_set(skb)) return NULL; - ethhdr = (struct ethhdr *) skb_mac_header(skb); + ethhdr = (struct ethhdr *)skb_mac_header(skb); list_for_each_entry_rcu(node, node_db, mac_list) { - if (ether_addr_equal(node->MacAddressA, ethhdr->h_source)) + if (ether_addr_equal(node->macaddress_A, ethhdr->h_source)) return node; - if (ether_addr_equal(node->MacAddressB, ethhdr->h_source)) + if (ether_addr_equal(node->macaddress_B, ethhdr->h_source)) return node; } /* Everyone may create a node entry, connected node to a HSR device. */ - if (ethhdr->h_proto == htons(ETH_P_PRP) - || ethhdr->h_proto == htons(ETH_P_HSR)) { + if (ethhdr->h_proto == htons(ETH_P_PRP) || + ethhdr->h_proto == htons(ETH_P_HSR)) { /* Use the existing sequence_nr from the tag as starting point * for filtering duplicate frames. */ @@ -210,8 +190,8 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb, return hsr_add_node(node_db, ethhdr->h_source, seq_out); } -/* Use the Supervision frame's info about an eventual MacAddressB for merging - * nodes that has previously had their MacAddressB registered as a separate +/* Use the Supervision frame's info about an eventual macaddress_B for merging + * nodes that has previously had their macaddress_B registered as a separate * node. */ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, @@ -223,7 +203,7 @@ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, struct list_head *node_db; int i; - ethhdr = (struct ethhdr *) skb_mac_header(skb); + ethhdr = (struct ethhdr *)skb_mac_header(skb); /* Leave the ethernet header. */ skb_pull(skb, sizeof(struct ethhdr)); @@ -235,14 +215,14 @@ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, /* And leave the HSR sup tag. */ skb_pull(skb, sizeof(struct hsr_sup_tag)); - hsr_sp = (struct hsr_sup_payload *) skb->data; + hsr_sp = (struct hsr_sup_payload *)skb->data; - /* Merge node_curr (registered on MacAddressB) into node_real */ + /* Merge node_curr (registered on macaddress_B) into node_real */ node_db = &port_rcv->hsr->node_db; - node_real = find_node_by_AddrA(node_db, hsr_sp->MacAddressA); + node_real = find_node_by_addr_A(node_db, hsr_sp->macaddress_A); if (!node_real) /* No frame received from AddrA of this node yet */ - node_real = hsr_add_node(node_db, hsr_sp->MacAddressA, + node_real = hsr_add_node(node_db, hsr_sp->macaddress_A, HSR_SEQNR_START - 1); if (!node_real) goto done; /* No mem */ @@ -250,17 +230,18 @@ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, /* Node has already been merged */ goto done; - ether_addr_copy(node_real->MacAddressB, ethhdr->h_source); + ether_addr_copy(node_real->macaddress_B, ethhdr->h_source); for (i = 0; i < HSR_PT_PORTS; i++) { if (!node_curr->time_in_stale[i] && time_after(node_curr->time_in[i], node_real->time_in[i])) { node_real->time_in[i] = node_curr->time_in[i]; - node_real->time_in_stale[i] = node_curr->time_in_stale[i]; + node_real->time_in_stale[i] = + node_curr->time_in_stale[i]; } if (seq_nr_after(node_curr->seq_out[i], node_real->seq_out[i])) node_real->seq_out[i] = node_curr->seq_out[i]; } - node_real->AddrB_port = port_rcv->type; + node_real->addr_B_port = port_rcv->type; list_del_rcu(&node_curr->mac_list); kfree_rcu(node_curr, rcu_head); @@ -269,11 +250,10 @@ done: skb_push(skb, sizeof(struct hsrv1_ethhdr_sp)); } - /* 'skb' is a frame meant for this host, that is to be passed to upper layers. * * If the frame was sent by a node's B interface, replace the source - * address with that node's "official" address (MacAddressA) so that upper + * address with that node's "official" address (macaddress_A) so that upper * layers recognize where it came from. */ void hsr_addr_subst_source(struct hsr_node *node, struct sk_buff *skb) @@ -283,7 +263,7 @@ void hsr_addr_subst_source(struct hsr_node *node, struct sk_buff *skb) return; } - memcpy(ð_hdr(skb)->h_source, node->MacAddressA, ETH_ALEN); + memcpy(ð_hdr(skb)->h_source, node->macaddress_A, ETH_ALEN); } /* 'skb' is a frame meant for another host. @@ -308,18 +288,18 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb, if (!is_unicast_ether_addr(eth_hdr(skb)->h_dest)) return; - node_dst = find_node_by_AddrA(&port->hsr->node_db, eth_hdr(skb)->h_dest); + node_dst = find_node_by_addr_A(&port->hsr->node_db, + eth_hdr(skb)->h_dest); if (!node_dst) { WARN_ONCE(1, "%s: Unknown node\n", __func__); return; } - if (port->type != node_dst->AddrB_port) + if (port->type != node_dst->addr_B_port) return; - ether_addr_copy(eth_hdr(skb)->h_dest, node_dst->MacAddressB); + ether_addr_copy(eth_hdr(skb)->h_dest, node_dst->macaddress_B); } - void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port, u16 sequence_nr) { @@ -352,7 +332,6 @@ int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node, return 0; } - static struct hsr_port *get_late_port(struct hsr_priv *hsr, struct hsr_node *node) { @@ -373,7 +352,6 @@ static struct hsr_port *get_late_port(struct hsr_priv *hsr, return NULL; } - /* Remove stale sequence_nr records. Called by timer every * HSR_LIFE_CHECK_INTERVAL (two seconds or so). */ @@ -392,9 +370,9 @@ void hsr_prune_nodes(struct timer_list *t) time_b = node->time_in[HSR_PT_SLAVE_B]; /* Check for timestamps old enough to risk wrap-around */ - if (time_after(jiffies, time_a + MAX_JIFFY_OFFSET/2)) + if (time_after(jiffies, time_a + MAX_JIFFY_OFFSET / 2)) node->time_in_stale[HSR_PT_SLAVE_A] = true; - if (time_after(jiffies, time_b + MAX_JIFFY_OFFSET/2)) + if (time_after(jiffies, time_b + MAX_JIFFY_OFFSET / 2)) node->time_in_stale[HSR_PT_SLAVE_B] = true; /* Get age of newest frame from node. @@ -409,26 +387,29 @@ void hsr_prune_nodes(struct timer_list *t) /* Warn of ring error only as long as we get frames at all */ if (time_is_after_jiffies(timestamp + - msecs_to_jiffies(1.5*MAX_SLAVE_DIFF))) { + msecs_to_jiffies(1.5 * MAX_SLAVE_DIFF))) { rcu_read_lock(); port = get_late_port(hsr, node); - if (port != NULL) - hsr_nl_ringerror(hsr, node->MacAddressA, port); + if (port) + hsr_nl_ringerror(hsr, node->macaddress_A, port); rcu_read_unlock(); } /* Prune old entries */ if (time_is_before_jiffies(timestamp + - msecs_to_jiffies(HSR_NODE_FORGET_TIME))) { - hsr_nl_nodedown(hsr, node->MacAddressA); + msecs_to_jiffies(HSR_NODE_FORGET_TIME))) { + hsr_nl_nodedown(hsr, node->macaddress_A); list_del_rcu(&node->mac_list); /* Note that we need to free this entry later: */ kfree_rcu(node, rcu_head); } } rcu_read_unlock(); -} + /* Restart timer */ + mod_timer(&hsr->prune_timer, + jiffies + msecs_to_jiffies(PRUNE_PERIOD)); +} void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos, unsigned char addr[ETH_ALEN]) @@ -439,20 +420,19 @@ void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos, node = list_first_or_null_rcu(&hsr->node_db, struct hsr_node, mac_list); if (node) - ether_addr_copy(addr, node->MacAddressA); + ether_addr_copy(addr, node->macaddress_A); return node; } node = _pos; list_for_each_entry_continue_rcu(node, &hsr->node_db, mac_list) { - ether_addr_copy(addr, node->MacAddressA); + ether_addr_copy(addr, node->macaddress_A); return node; } return NULL; } - int hsr_get_node_data(struct hsr_priv *hsr, const unsigned char *addr, unsigned char addr_b[ETH_ALEN], @@ -466,15 +446,14 @@ int hsr_get_node_data(struct hsr_priv *hsr, struct hsr_port *port; unsigned long tdiff; - rcu_read_lock(); - node = find_node_by_AddrA(&hsr->node_db, addr); + node = find_node_by_addr_A(&hsr->node_db, addr); if (!node) { rcu_read_unlock(); return -ENOENT; /* No such entry */ } - ether_addr_copy(addr_b, node->MacAddressB); + ether_addr_copy(addr_b, node->macaddress_B); tdiff = jiffies - node->time_in[HSR_PT_SLAVE_A]; if (node->time_in_stale[HSR_PT_SLAVE_A]) @@ -500,8 +479,8 @@ int hsr_get_node_data(struct hsr_priv *hsr, *if1_seq = node->seq_out[HSR_PT_SLAVE_B]; *if2_seq = node->seq_out[HSR_PT_SLAVE_A]; - if (node->AddrB_port != HSR_PT_NONE) { - port = hsr_port_get_hsr(hsr, node->AddrB_port); + if (node->addr_B_port != HSR_PT_NONE) { + port = hsr_port_get_hsr(hsr, node->addr_B_port); *addr_b_ifindex = port->dev->ifindex; } else { *addr_b_ifindex = -1; diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index 531fd3dfcac1..a3bdcdab469d 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -1,10 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright 2011-2014 Autronica Fire and Security AS * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se */ @@ -52,4 +48,16 @@ int hsr_get_node_data(struct hsr_priv *hsr, int *if2_age, u16 *if2_seq); +struct hsr_node { + struct list_head mac_list; + unsigned char macaddress_A[ETH_ALEN]; + unsigned char macaddress_B[ETH_ALEN]; + /* Local slave through which AddrB frames are received from this node */ + enum hsr_port_type addr_B_port; + unsigned long time_in[HSR_PT_PORTS]; + bool time_in_stale[HSR_PT_PORTS]; + u16 seq_out[HSR_PT_PORTS]; + struct rcu_head rcu_head; +}; + #endif /* __HSR_FRAMEREG_H */ diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index cd37d0011b42..b9988a662ee1 100644 --- a/net/hsr/hsr_main.c +++ b/net/hsr/hsr_main.c @@ -1,10 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright 2011-2014 Autronica Fire and Security AS * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se */ @@ -19,7 +15,6 @@ #include "hsr_framereg.h" #include "hsr_slave.h" - static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, void *ptr) { @@ -31,12 +26,12 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, dev = netdev_notifier_info_to_dev(ptr); port = hsr_port_get_rtnl(dev); - if (port == NULL) { + if (!port) { if (!is_hsr_master(dev)) return NOTIFY_DONE; /* Not an HSR device */ hsr = netdev_priv(dev); port = hsr_port_get_hsr(hsr, HSR_PT_MASTER); - if (port == NULL) { + if (!port) { /* Resend of notification concerning removed device? */ return NOTIFY_DONE; } @@ -63,7 +58,8 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, if (port->type == HSR_PT_SLAVE_A) { ether_addr_copy(master->dev->dev_addr, dev->dev_addr); - call_netdevice_notifiers(NETDEV_CHANGEADDR, master->dev); + call_netdevice_notifiers(NETDEV_CHANGEADDR, + master->dev); } /* Make sure we recognize frames from ourselves in hsr_rcv() */ @@ -97,7 +93,6 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, return NOTIFY_DONE; } - struct hsr_port *hsr_port_get_hsr(struct hsr_priv *hsr, enum hsr_port_type pt) { struct hsr_port *port; @@ -112,7 +107,6 @@ static struct notifier_block hsr_nb = { .notifier_call = hsr_netdev_notify, /* Slave event notifications */ }; - static int __init hsr_init(void) { int res; diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 9b9909e89e9e..96fac696a1e1 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -1,10 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright 2011-2014 Autronica Fire and Security AS * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se */ @@ -15,7 +11,6 @@ #include <linux/netdevice.h> #include <linux/list.h> - /* Time constants as specified in the HSR specification (IEC-62439-3 2010) * Table 8. * All values in milliseconds. @@ -24,7 +19,6 @@ #define HSR_NODE_FORGET_TIME 60000 /* ms */ #define HSR_ANNOUNCE_INTERVAL 100 /* ms */ - /* By how much may slave1 and slave2 timestamps of latest received frame from * each node differ before we notify of communication problem? */ @@ -32,17 +26,14 @@ #define HSR_SEQNR_START (USHRT_MAX - 1024) #define HSR_SUP_SEQNR_START (HSR_SEQNR_START / 2) - /* How often shall we check for broken ring and remove node entries older than * HSR_NODE_FORGET_TIME? */ #define PRUNE_PERIOD 3000 /* ms */ - #define HSR_TLV_ANNOUNCE 22 #define HSR_TLV_LIFE_CHECK 23 - /* HSR Tag. * As defined in IEC-62439-3:2010, the HSR tag is really { ethertype = 0x88FB, * path, LSDU_size, sequence Nr }. But we let eth_header() create { h_dest, @@ -83,15 +74,14 @@ static inline u16 get_hsr_tag_LSDU_size(struct hsr_tag *ht) static inline void set_hsr_tag_path(struct hsr_tag *ht, u16 path) { - ht->path_and_LSDU_size = htons( - (ntohs(ht->path_and_LSDU_size) & 0x0FFF) | (path << 12)); + ht->path_and_LSDU_size = + htons((ntohs(ht->path_and_LSDU_size) & 0x0FFF) | (path << 12)); } static inline void set_hsr_tag_LSDU_size(struct hsr_tag *ht, u16 LSDU_size) { - ht->path_and_LSDU_size = htons( - (ntohs(ht->path_and_LSDU_size) & 0xF000) | - (LSDU_size & 0x0FFF)); + ht->path_and_LSDU_size = htons((ntohs(ht->path_and_LSDU_size) & + 0xF000) | (LSDU_size & 0x0FFF)); } struct hsr_ethhdr { @@ -99,39 +89,38 @@ struct hsr_ethhdr { struct hsr_tag hsr_tag; } __packed; - /* HSR Supervision Frame data types. * Field names as defined in the IEC:2010 standard for HSR. */ struct hsr_sup_tag { - __be16 path_and_HSR_Ver; + __be16 path_and_HSR_ver; __be16 sequence_nr; - __u8 HSR_TLV_Type; - __u8 HSR_TLV_Length; + __u8 HSR_TLV_type; + __u8 HSR_TLV_length; } __packed; struct hsr_sup_payload { - unsigned char MacAddressA[ETH_ALEN]; + unsigned char macaddress_A[ETH_ALEN]; } __packed; static inline u16 get_hsr_stag_path(struct hsr_sup_tag *hst) { - return get_hsr_tag_path((struct hsr_tag *) hst); + return get_hsr_tag_path((struct hsr_tag *)hst); } static inline u16 get_hsr_stag_HSR_ver(struct hsr_sup_tag *hst) { - return get_hsr_tag_LSDU_size((struct hsr_tag *) hst); + return get_hsr_tag_LSDU_size((struct hsr_tag *)hst); } static inline void set_hsr_stag_path(struct hsr_sup_tag *hst, u16 path) { - set_hsr_tag_path((struct hsr_tag *) hst, path); + set_hsr_tag_path((struct hsr_tag *)hst, path); } -static inline void set_hsr_stag_HSR_Ver(struct hsr_sup_tag *hst, u16 HSR_Ver) +static inline void set_hsr_stag_HSR_ver(struct hsr_sup_tag *hst, u16 HSR_ver) { - set_hsr_tag_LSDU_size((struct hsr_tag *) hst, HSR_Ver); + set_hsr_tag_LSDU_size((struct hsr_tag *)hst, HSR_ver); } struct hsrv0_ethhdr_sp { @@ -145,7 +134,6 @@ struct hsrv1_ethhdr_sp { struct hsr_sup_tag hsr_sup; } __packed; - enum hsr_port_type { HSR_PT_NONE = 0, /* Must be 0, used by framereg */ HSR_PT_SLAVE_A, @@ -171,10 +159,14 @@ struct hsr_priv { struct timer_list prune_timer; int announce_count; u16 sequence_nr; - u16 sup_sequence_nr; /* For HSRv1 separate seq_nr for supervision */ - u8 protVersion; /* Indicate if HSRv0 or HSRv1. */ + u16 sup_sequence_nr; /* For HSRv1 separate seq_nr for supervision */ + u8 prot_version; /* Indicate if HSRv0 or HSRv1. */ spinlock_t seqnr_lock; /* locking for sequence_nr */ unsigned char sup_multicast_addr[ETH_ALEN]; +#ifdef CONFIG_DEBUG_FS + struct dentry *node_tbl_root; + struct dentry *node_tbl_file; +#endif }; #define hsr_for_each_port(hsr, port) \ @@ -187,8 +179,22 @@ static inline u16 hsr_get_skb_sequence_nr(struct sk_buff *skb) { struct hsr_ethhdr *hsr_ethhdr; - hsr_ethhdr = (struct hsr_ethhdr *) skb_mac_header(skb); + hsr_ethhdr = (struct hsr_ethhdr *)skb_mac_header(skb); return ntohs(hsr_ethhdr->hsr_tag.sequence_nr); } +#if IS_ENABLED(CONFIG_DEBUG_FS) +int hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev); +void hsr_debugfs_term(struct hsr_priv *priv); +#else +static inline int hsr_debugfs_init(struct hsr_priv *priv, + struct net_device *hsr_dev) +{ + return 0; +} + +static inline void hsr_debugfs_term(struct hsr_priv *priv) +{} +#endif + #endif /* __HSR_PRIVATE_H */ diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index b9cce0fd5696..8f8337f893ba 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -1,10 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright 2011-2014 Autronica Fire and Security AS * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se * @@ -28,7 +24,6 @@ static const struct nla_policy hsr_policy[IFLA_HSR_MAX + 1] = { [IFLA_HSR_SEQ_NR] = { .type = NLA_U16 }, }; - /* Here, it seems a netdevice has already been allocated for us, and the * hsr_dev_setup routine has been executed. Nice! */ @@ -47,12 +42,14 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, netdev_info(dev, "HSR: Slave1 device not specified\n"); return -EINVAL; } - link[0] = __dev_get_by_index(src_net, nla_get_u32(data[IFLA_HSR_SLAVE1])); + link[0] = __dev_get_by_index(src_net, + nla_get_u32(data[IFLA_HSR_SLAVE1])); if (!data[IFLA_HSR_SLAVE2]) { netdev_info(dev, "HSR: Slave2 device not specified\n"); return -EINVAL; } - link[1] = __dev_get_by_index(src_net, nla_get_u32(data[IFLA_HSR_SLAVE2])); + link[1] = __dev_get_by_index(src_net, + nla_get_u32(data[IFLA_HSR_SLAVE2])); if (!link[0] || !link[1]) return -ENODEV; @@ -119,8 +116,6 @@ static struct rtnl_link_ops hsr_link_ops __read_mostly = { .fill_info = hsr_fill_info, }; - - /* attribute policy */ static const struct nla_policy hsr_genl_policy[HSR_A_MAX + 1] = { [HSR_A_NODE_ADDR] = { .len = ETH_ALEN }, @@ -138,8 +133,6 @@ static const struct genl_multicast_group hsr_mcgrps[] = { { .name = "hsr-network", }, }; - - /* This is called if for some node with MAC address addr, we only get frames * over one of the slave interfaces. This would indicate an open network ring * (i.e. a link has failed somewhere). @@ -156,7 +149,8 @@ void hsr_nl_ringerror(struct hsr_priv *hsr, unsigned char addr[ETH_ALEN], if (!skb) goto fail; - msg_head = genlmsg_put(skb, 0, 0, &hsr_genl_family, 0, HSR_C_RING_ERROR); + msg_head = genlmsg_put(skb, 0, 0, &hsr_genl_family, 0, + HSR_C_RING_ERROR); if (!msg_head) goto nla_put_failure; @@ -201,7 +195,6 @@ void hsr_nl_nodedown(struct hsr_priv *hsr, unsigned char addr[ETH_ALEN]) if (!msg_head) goto nla_put_failure; - res = nla_put(skb, HSR_A_NODE_ADDR, ETH_ALEN, addr); if (res < 0) goto nla_put_failure; @@ -221,7 +214,6 @@ fail: rcu_read_unlock(); } - /* HSR_C_GET_NODE_STATUS lets userspace query the internal HSR node table * about the status of a specific node in the network, defined by its MAC * address. @@ -260,15 +252,13 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) goto invalid; hsr_dev = __dev_get_by_index(genl_info_net(info), - nla_get_u32(info->attrs[HSR_A_IFINDEX])); + nla_get_u32(info->attrs[HSR_A_IFINDEX])); if (!hsr_dev) goto invalid; if (!is_hsr_master(hsr_dev)) goto invalid; - /* Send reply */ - skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb_out) { res = -ENOMEM; @@ -276,8 +266,8 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) } msg_head = genlmsg_put(skb_out, NETLINK_CB(skb_in).portid, - info->snd_seq, &hsr_genl_family, 0, - HSR_C_SET_NODE_STATUS); + info->snd_seq, &hsr_genl_family, 0, + HSR_C_SET_NODE_STATUS); if (!msg_head) { res = -ENOMEM; goto nla_put_failure; @@ -289,28 +279,30 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) hsr = netdev_priv(hsr_dev); res = hsr_get_node_data(hsr, - (unsigned char *) nla_data(info->attrs[HSR_A_NODE_ADDR]), - hsr_node_addr_b, - &addr_b_ifindex, - &hsr_node_if1_age, - &hsr_node_if1_seq, - &hsr_node_if2_age, - &hsr_node_if2_seq); + (unsigned char *) + nla_data(info->attrs[HSR_A_NODE_ADDR]), + hsr_node_addr_b, + &addr_b_ifindex, + &hsr_node_if1_age, + &hsr_node_if1_seq, + &hsr_node_if2_age, + &hsr_node_if2_seq); if (res < 0) goto nla_put_failure; res = nla_put(skb_out, HSR_A_NODE_ADDR, ETH_ALEN, - nla_data(info->attrs[HSR_A_NODE_ADDR])); + nla_data(info->attrs[HSR_A_NODE_ADDR])); if (res < 0) goto nla_put_failure; if (addr_b_ifindex > -1) { res = nla_put(skb_out, HSR_A_NODE_ADDR_B, ETH_ALEN, - hsr_node_addr_b); + hsr_node_addr_b); if (res < 0) goto nla_put_failure; - res = nla_put_u32(skb_out, HSR_A_ADDR_B_IFINDEX, addr_b_ifindex); + res = nla_put_u32(skb_out, HSR_A_ADDR_B_IFINDEX, + addr_b_ifindex); if (res < 0) goto nla_put_failure; } @@ -392,9 +384,7 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) if (!is_hsr_master(hsr_dev)) goto invalid; - /* Send reply */ - skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb_out) { res = -ENOMEM; @@ -402,8 +392,8 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) } msg_head = genlmsg_put(skb_out, NETLINK_CB(skb_in).portid, - info->snd_seq, &hsr_genl_family, 0, - HSR_C_SET_NODE_LIST); + info->snd_seq, &hsr_genl_family, 0, + HSR_C_SET_NODE_LIST); if (!msg_head) { res = -ENOMEM; goto nla_put_failure; @@ -444,19 +434,18 @@ fail: return res; } - static const struct genl_ops hsr_ops[] = { { .cmd = HSR_C_GET_NODE_STATUS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, - .policy = hsr_genl_policy, .doit = hsr_get_node_status, .dumpit = NULL, }, { .cmd = HSR_C_GET_NODE_LIST, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, - .policy = hsr_genl_policy, .doit = hsr_get_node_list, .dumpit = NULL, }, @@ -467,6 +456,7 @@ static struct genl_family hsr_genl_family __ro_after_init = { .name = "HSR", .version = 1, .maxattr = HSR_A_MAX, + .policy = hsr_genl_policy, .module = THIS_MODULE, .ops = hsr_ops, .n_ops = ARRAY_SIZE(hsr_ops), diff --git a/net/hsr/hsr_netlink.h b/net/hsr/hsr_netlink.h index 3f6b95b5b6b8..1121bb192a18 100644 --- a/net/hsr/hsr_netlink.h +++ b/net/hsr/hsr_netlink.h @@ -1,10 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright 2011-2014 Autronica Fire and Security AS * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se */ diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index 56080da4aa77..88b6705ded83 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -1,10 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* Copyright 2011-2014 Autronica Fire and Security AS * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se */ @@ -18,7 +14,6 @@ #include "hsr_forward.h" #include "hsr_framereg.h" - static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; @@ -61,12 +56,11 @@ bool hsr_port_exists(const struct net_device *dev) return rcu_access_pointer(dev->rx_handler) == hsr_handle_frame; } - static int hsr_check_dev_ok(struct net_device *dev) { /* Don't allow HSR on non-ethernet like devices */ - if ((dev->flags & IFF_LOOPBACK) || (dev->type != ARPHRD_ETHER) || - (dev->addr_len != ETH_ALEN)) { + if ((dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER || + dev->addr_len != ETH_ALEN) { netdev_info(dev, "Cannot use loopback or non-ethernet device as HSR slave.\n"); return -EINVAL; } @@ -99,7 +93,6 @@ static int hsr_check_dev_ok(struct net_device *dev) return 0; } - /* Setup device to be added to the HSR bridge. */ static int hsr_portdev_setup(struct net_device *dev, struct hsr_port *port) { @@ -143,11 +136,11 @@ int hsr_add_port(struct hsr_priv *hsr, struct net_device *dev, } port = hsr_port_get_hsr(hsr, type); - if (port != NULL) + if (port) return -EBUSY; /* This port already exists */ port = kzalloc(sizeof(*port), GFP_KERNEL); - if (port == NULL) + if (!port) return -ENOMEM; if (type != HSR_PT_MASTER) { @@ -184,7 +177,7 @@ void hsr_del_port(struct hsr_port *port) list_del_rcu(&port->port_list); if (port != master) { - if (master != NULL) { + if (master) { netdev_update_features(master->dev); dev_set_mtu(master->dev, hsr_get_max_mtu(hsr)); } diff --git a/net/hsr/hsr_slave.h b/net/hsr/hsr_slave.h index 3ccfbf71c92e..64b549529592 100644 --- a/net/hsr/hsr_slave.h +++ b/net/hsr/hsr_slave.h @@ -1,11 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Copyright 2011-2014 Autronica Fire and Security AS * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se */ diff --git a/net/ieee802154/ieee802154.h b/net/ieee802154/ieee802154.h index a5d7515b7f62..bc147bc8e36a 100644 --- a/net/ieee802154/ieee802154.h +++ b/net/ieee802154/ieee802154.h @@ -20,7 +20,6 @@ void ieee802154_nl_exit(void); #define IEEE802154_OP(_cmd, _func) \ { \ .cmd = _cmd, \ - .policy = ieee802154_policy, \ .doit = _func, \ .dumpit = NULL, \ .flags = GENL_ADMIN_PERM, \ @@ -29,7 +28,6 @@ void ieee802154_nl_exit(void); #define IEEE802154_DUMP(_cmd, _func, _dump) \ { \ .cmd = _cmd, \ - .policy = ieee802154_policy, \ .doit = _func, \ .dumpit = _dump, \ } diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 96636e3b7aa9..098d67439b6d 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -136,6 +136,7 @@ struct genl_family nl802154_family __ro_after_init = { .name = IEEE802154_NL_NAME, .version = 1, .maxattr = IEEE802154_ATTR_MAX, + .policy = ieee802154_policy, .module = THIS_MODULE, .ops = ieee802154_ops, .n_ops = ARRAY_SIZE(ieee802154_ops), diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 99f6c254ea77..e4c4174f9efb 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -247,9 +247,11 @@ nl802154_prepare_wpan_dev_dump(struct sk_buff *skb, rtnl_lock(); if (!cb->args[0]) { - err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize, - genl_family_attrbuf(&nl802154_fam), - nl802154_fam.maxattr, nl802154_policy, NULL); + err = nlmsg_parse_deprecated(cb->nlh, + GENL_HDRLEN + nl802154_fam.hdrsize, + genl_family_attrbuf(&nl802154_fam), + nl802154_fam.maxattr, + nl802154_policy, NULL); if (err) goto out_unlock; @@ -312,7 +314,7 @@ static inline void *nl802154hdr_put(struct sk_buff *skb, u32 portid, u32 seq, static int nl802154_put_flags(struct sk_buff *msg, int attr, u32 mask) { - struct nlattr *nl_flags = nla_nest_start(msg, attr); + struct nlattr *nl_flags = nla_nest_start_noflag(msg, attr); int i; if (!nl_flags) @@ -338,7 +340,7 @@ nl802154_send_wpan_phy_channels(struct cfg802154_registered_device *rdev, struct nlattr *nl_page; unsigned long page; - nl_page = nla_nest_start(msg, NL802154_ATTR_CHANNELS_SUPPORTED); + nl_page = nla_nest_start_noflag(msg, NL802154_ATTR_CHANNELS_SUPPORTED); if (!nl_page) return -ENOBUFS; @@ -360,11 +362,11 @@ nl802154_put_capabilities(struct sk_buff *msg, struct nlattr *nl_caps, *nl_channels; int i; - nl_caps = nla_nest_start(msg, NL802154_ATTR_WPAN_PHY_CAPS); + nl_caps = nla_nest_start_noflag(msg, NL802154_ATTR_WPAN_PHY_CAPS); if (!nl_caps) return -ENOBUFS; - nl_channels = nla_nest_start(msg, NL802154_CAP_ATTR_CHANNELS); + nl_channels = nla_nest_start_noflag(msg, NL802154_CAP_ATTR_CHANNELS); if (!nl_channels) return -ENOBUFS; @@ -380,8 +382,8 @@ nl802154_put_capabilities(struct sk_buff *msg, if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_ED_LEVEL) { struct nlattr *nl_ed_lvls; - nl_ed_lvls = nla_nest_start(msg, - NL802154_CAP_ATTR_CCA_ED_LEVELS); + nl_ed_lvls = nla_nest_start_noflag(msg, + NL802154_CAP_ATTR_CCA_ED_LEVELS); if (!nl_ed_lvls) return -ENOBUFS; @@ -396,7 +398,8 @@ nl802154_put_capabilities(struct sk_buff *msg, if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_TXPOWER) { struct nlattr *nl_tx_pwrs; - nl_tx_pwrs = nla_nest_start(msg, NL802154_CAP_ATTR_TX_POWERS); + nl_tx_pwrs = nla_nest_start_noflag(msg, + NL802154_CAP_ATTR_TX_POWERS); if (!nl_tx_pwrs) return -ENOBUFS; @@ -504,7 +507,7 @@ static int nl802154_send_wpan_phy(struct cfg802154_registered_device *rdev, if (nl802154_put_capabilities(msg, rdev)) goto nla_put_failure; - nl_cmds = nla_nest_start(msg, NL802154_ATTR_SUPPORTED_COMMANDS); + nl_cmds = nla_nest_start_noflag(msg, NL802154_ATTR_SUPPORTED_COMMANDS); if (!nl_cmds) goto nla_put_failure; @@ -561,8 +564,10 @@ static int nl802154_dump_wpan_phy_parse(struct sk_buff *skb, struct nl802154_dump_wpan_phy_state *state) { struct nlattr **tb = genl_family_attrbuf(&nl802154_fam); - int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize, tb, - nl802154_fam.maxattr, nl802154_policy, NULL); + int ret = nlmsg_parse_deprecated(cb->nlh, + GENL_HDRLEN + nl802154_fam.hdrsize, + tb, nl802154_fam.maxattr, + nl802154_policy, NULL); /* TODO check if we can handle error here, * we have no backward compatibility @@ -693,7 +698,8 @@ ieee802154_llsec_send_key_id(struct sk_buff *msg, switch (desc->mode) { case NL802154_KEY_ID_MODE_IMPLICIT: - nl_dev_addr = nla_nest_start(msg, NL802154_KEY_ID_ATTR_IMPLICIT); + nl_dev_addr = nla_nest_start_noflag(msg, + NL802154_KEY_ID_ATTR_IMPLICIT); if (!nl_dev_addr) return -ENOBUFS; @@ -768,7 +774,7 @@ static int nl802154_get_llsec_params(struct sk_buff *msg, params.frame_counter)) return -ENOBUFS; - nl_key_id = nla_nest_start(msg, NL802154_ATTR_SEC_OUT_KEY_ID); + nl_key_id = nla_nest_start_noflag(msg, NL802154_ATTR_SEC_OUT_KEY_ID); if (!nl_key_id) return -ENOBUFS; @@ -1306,8 +1312,7 @@ ieee802154_llsec_parse_dev_addr(struct nlattr *nla, { struct nlattr *attrs[NL802154_DEV_ADDR_ATTR_MAX + 1]; - if (!nla || nla_parse_nested(attrs, NL802154_DEV_ADDR_ATTR_MAX, nla, - nl802154_dev_addr_policy, NULL)) + if (!nla || nla_parse_nested_deprecated(attrs, NL802154_DEV_ADDR_ATTR_MAX, nla, nl802154_dev_addr_policy, NULL)) return -EINVAL; if (!attrs[NL802154_DEV_ADDR_ATTR_PAN_ID] || @@ -1346,8 +1351,7 @@ ieee802154_llsec_parse_key_id(struct nlattr *nla, { struct nlattr *attrs[NL802154_KEY_ID_ATTR_MAX + 1]; - if (!nla || nla_parse_nested(attrs, NL802154_KEY_ID_ATTR_MAX, nla, - nl802154_key_id_policy, NULL)) + if (!nla || nla_parse_nested_deprecated(attrs, NL802154_KEY_ID_ATTR_MAX, nla, nl802154_key_id_policy, NULL)) return -EINVAL; if (!attrs[NL802154_KEY_ID_ATTR_MODE]) @@ -1455,11 +1459,11 @@ static int nl802154_send_key(struct sk_buff *msg, u32 cmd, u32 portid, if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; - nl_key = nla_nest_start(msg, NL802154_ATTR_SEC_KEY); + nl_key = nla_nest_start_noflag(msg, NL802154_ATTR_SEC_KEY); if (!nl_key) goto nla_put_failure; - nl_key_id = nla_nest_start(msg, NL802154_KEY_ATTR_ID); + nl_key_id = nla_nest_start_noflag(msg, NL802154_KEY_ATTR_ID); if (!nl_key_id) goto nla_put_failure; @@ -1562,9 +1566,7 @@ static int nl802154_add_llsec_key(struct sk_buff *skb, struct genl_info *info) struct ieee802154_llsec_key_id id = { }; u32 commands[NL802154_CMD_FRAME_NR_IDS / 32] = { }; - if (nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX, - info->attrs[NL802154_ATTR_SEC_KEY], - nl802154_key_policy, info->extack)) + if (nla_parse_nested_deprecated(attrs, NL802154_KEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_KEY], nl802154_key_policy, info->extack)) return -EINVAL; if (!attrs[NL802154_KEY_ATTR_USAGE_FRAMES] || @@ -1612,9 +1614,7 @@ static int nl802154_del_llsec_key(struct sk_buff *skb, struct genl_info *info) struct nlattr *attrs[NL802154_KEY_ATTR_MAX + 1]; struct ieee802154_llsec_key_id id; - if (nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX, - info->attrs[NL802154_ATTR_SEC_KEY], - nl802154_key_policy, info->extack)) + if (nla_parse_nested_deprecated(attrs, NL802154_KEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_KEY], nl802154_key_policy, info->extack)) return -EINVAL; if (ieee802154_llsec_parse_key_id(attrs[NL802154_KEY_ATTR_ID], &id) < 0) @@ -1639,7 +1639,7 @@ static int nl802154_send_device(struct sk_buff *msg, u32 cmd, u32 portid, if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; - nl_device = nla_nest_start(msg, NL802154_ATTR_SEC_DEVICE); + nl_device = nla_nest_start_noflag(msg, NL802154_ATTR_SEC_DEVICE); if (!nl_device) goto nla_put_failure; @@ -1728,8 +1728,7 @@ ieee802154_llsec_parse_device(struct nlattr *nla, { struct nlattr *attrs[NL802154_DEV_ATTR_MAX + 1]; - if (!nla || nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX, - nla, nl802154_dev_policy, NULL)) + if (!nla || nla_parse_nested_deprecated(attrs, NL802154_DEV_ATTR_MAX, nla, nl802154_dev_policy, NULL)) return -EINVAL; memset(dev, 0, sizeof(*dev)); @@ -1780,9 +1779,7 @@ static int nl802154_del_llsec_dev(struct sk_buff *skb, struct genl_info *info) struct nlattr *attrs[NL802154_DEV_ATTR_MAX + 1]; __le64 extended_addr; - if (nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX, - info->attrs[NL802154_ATTR_SEC_DEVICE], - nl802154_dev_policy, info->extack)) + if (nla_parse_nested_deprecated(attrs, NL802154_DEV_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_DEVICE], nl802154_dev_policy, info->extack)) return -EINVAL; if (!attrs[NL802154_DEV_ATTR_EXTENDED_ADDR]) @@ -1808,7 +1805,7 @@ static int nl802154_send_devkey(struct sk_buff *msg, u32 cmd, u32 portid, if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; - nl_devkey = nla_nest_start(msg, NL802154_ATTR_SEC_DEVKEY); + nl_devkey = nla_nest_start_noflag(msg, NL802154_ATTR_SEC_DEVKEY); if (!nl_devkey) goto nla_put_failure; @@ -1818,7 +1815,7 @@ static int nl802154_send_devkey(struct sk_buff *msg, u32 cmd, u32 portid, devkey->frame_counter)) goto nla_put_failure; - nl_key_id = nla_nest_start(msg, NL802154_DEVKEY_ATTR_ID); + nl_key_id = nla_nest_start_noflag(msg, NL802154_DEVKEY_ATTR_ID); if (!nl_key_id) goto nla_put_failure; @@ -1908,9 +1905,7 @@ static int nl802154_add_llsec_devkey(struct sk_buff *skb, struct genl_info *info __le64 extended_addr; if (!info->attrs[NL802154_ATTR_SEC_DEVKEY] || - nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX, - info->attrs[NL802154_ATTR_SEC_DEVKEY], - nl802154_devkey_policy, info->extack) < 0) + nla_parse_nested_deprecated(attrs, NL802154_DEVKEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_DEVKEY], nl802154_devkey_policy, info->extack) < 0) return -EINVAL; if (!attrs[NL802154_DEVKEY_ATTR_FRAME_COUNTER] || @@ -1940,9 +1935,7 @@ static int nl802154_del_llsec_devkey(struct sk_buff *skb, struct genl_info *info struct ieee802154_llsec_device_key key; __le64 extended_addr; - if (nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX, - info->attrs[NL802154_ATTR_SEC_DEVKEY], - nl802154_devkey_policy, info->extack)) + if (nla_parse_nested_deprecated(attrs, NL802154_DEVKEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_DEVKEY], nl802154_devkey_policy, info->extack)) return -EINVAL; if (!attrs[NL802154_DEVKEY_ATTR_EXTENDED_ADDR]) @@ -1976,7 +1969,7 @@ static int nl802154_send_seclevel(struct sk_buff *msg, u32 cmd, u32 portid, if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; - nl_seclevel = nla_nest_start(msg, NL802154_ATTR_SEC_LEVEL); + nl_seclevel = nla_nest_start_noflag(msg, NL802154_ATTR_SEC_LEVEL); if (!nl_seclevel) goto nla_put_failure; @@ -2062,8 +2055,7 @@ llsec_parse_seclevel(struct nlattr *nla, struct ieee802154_llsec_seclevel *sl) { struct nlattr *attrs[NL802154_SECLEVEL_ATTR_MAX + 1]; - if (!nla || nla_parse_nested(attrs, NL802154_SECLEVEL_ATTR_MAX, - nla, nl802154_seclevel_policy, NULL)) + if (!nla || nla_parse_nested_deprecated(attrs, NL802154_SECLEVEL_ATTR_MAX, nla, nl802154_seclevel_policy, NULL)) return -EINVAL; memset(sl, 0, sizeof(*sl)); @@ -2217,131 +2209,131 @@ static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb, static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_GET_WPAN_PHY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_get_wpan_phy, .dumpit = nl802154_dump_wpan_phy, .done = nl802154_dump_wpan_phy_done, - .policy = nl802154_policy, /* can be retrieved by unprivileged users */ .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_GET_INTERFACE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_get_interface, .dumpit = nl802154_dump_interface, - .policy = nl802154_policy, /* can be retrieved by unprivileged users */ .internal_flags = NL802154_FLAG_NEED_WPAN_DEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_NEW_INTERFACE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_new_interface, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_DEL_INTERFACE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_del_interface, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_DEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_SET_CHANNEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_set_channel, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_SET_CCA_MODE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_set_cca_mode, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_SET_CCA_ED_LEVEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_set_cca_ed_level, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_SET_TX_POWER, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_set_tx_power, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_SET_WPAN_PHY_NETNS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_wpan_phy_netns, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_SET_PAN_ID, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_set_pan_id, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_SET_SHORT_ADDR, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_set_short_addr, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_SET_BACKOFF_EXPONENT, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_set_backoff_exponent, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_SET_MAX_CSMA_BACKOFFS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_set_max_csma_backoffs, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_SET_MAX_FRAME_RETRIES, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_set_max_frame_retries, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_SET_LBT_MODE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_set_lbt_mode, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_SET_ACKREQ_DEFAULT, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_set_ackreq_default, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2349,33 +2341,33 @@ static const struct genl_ops nl802154_ops[] = { #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL { .cmd = NL802154_CMD_SET_SEC_PARAMS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_set_llsec_params, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_GET_SEC_KEY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, /* TODO .doit by matching key id? */ .dumpit = nl802154_dump_llsec_key, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_NEW_SEC_KEY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_add_llsec_key, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_DEL_SEC_KEY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_del_llsec_key, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2383,25 +2375,25 @@ static const struct genl_ops nl802154_ops[] = { /* TODO unique identifier must short+pan OR extended_addr */ { .cmd = NL802154_CMD_GET_SEC_DEV, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, /* TODO .doit by matching extended_addr? */ .dumpit = nl802154_dump_llsec_dev, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_NEW_SEC_DEV, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_add_llsec_dev, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_DEL_SEC_DEV, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_del_llsec_dev, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2409,51 +2401,51 @@ static const struct genl_ops nl802154_ops[] = { /* TODO remove complete devkey, put it as nested? */ { .cmd = NL802154_CMD_GET_SEC_DEVKEY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, /* TODO doit by matching ??? */ .dumpit = nl802154_dump_llsec_devkey, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_NEW_SEC_DEVKEY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_add_llsec_devkey, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_DEL_SEC_DEVKEY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_del_llsec_devkey, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_GET_SEC_LEVEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, /* TODO .doit by matching frame_type? */ .dumpit = nl802154_dump_llsec_seclevel, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_NEW_SEC_LEVEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_add_llsec_seclevel, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, }, { .cmd = NL802154_CMD_DEL_SEC_LEVEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, /* TODO match frame_type only? */ .doit = nl802154_del_llsec_seclevel, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2466,6 +2458,7 @@ static struct genl_family nl802154_fam __ro_after_init = { .hdrsize = 0, /* no private header */ .version = 1, /* no particular meaning now */ .maxattr = NL802154_ATTR_MAX, + .policy = nl802154_policy, .netnsok = true, .pre_doit = nl802154_pre_doit, .post_doit = nl802154_post_doit, diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index bc6b912603f1..ce2dfb997537 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -164,10 +164,6 @@ static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd, struct sock *sk = sock->sk; switch (cmd) { - case SIOCGSTAMP: - return sock_get_timestamp(sk, (struct timeval __user *)arg); - case SIOCGSTAMPNS: - return sock_get_timestampns(sk, (struct timespec __user *)arg); case SIOCGIFADDR: case SIOCSIFADDR: return ieee802154_dev_ioctl(sk, (struct ifreq __user *)arg, @@ -426,6 +422,7 @@ static const struct proto_ops ieee802154_raw_ops = { .getname = sock_no_getname, .poll = datagram_poll, .ioctl = ieee802154_sock_ioctl, + .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = sock_common_setsockopt, @@ -988,6 +985,7 @@ static const struct proto_ops ieee802154_dgram_ops = { .getname = sock_no_getname, .poll = datagram_poll, .ioctl = ieee802154_sock_ioctl, + .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = sock_common_setsockopt, diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 32cae39cdff6..8108e97d4285 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -304,7 +304,7 @@ config NET_IPVTI tristate "Virtual (secure) IP: tunneling" select INET_TUNNEL select NET_IP_TUNNEL - depends on INET_XFRM_MODE_TUNNEL + select XFRM ---help--- Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the @@ -396,33 +396,6 @@ config INET_TUNNEL tristate default n -config INET_XFRM_MODE_TRANSPORT - tristate "IP: IPsec transport mode" - default y - select XFRM - ---help--- - Support for IPsec transport mode. - - If unsure, say Y. - -config INET_XFRM_MODE_TUNNEL - tristate "IP: IPsec tunnel mode" - default y - select XFRM - ---help--- - Support for IPsec tunnel mode. - - If unsure, say Y. - -config INET_XFRM_MODE_BEET - tristate "IP: IPsec BEET mode" - default y - select XFRM - ---help--- - Support for IPsec BEET mode. - - If unsure, say Y. - config INET_DIAG tristate "INET: socket monitoring interface" default y diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 58629314eae9..000a61994c8f 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -37,10 +37,7 @@ obj-$(CONFIG_INET_ESP) += esp4.o obj-$(CONFIG_INET_ESP_OFFLOAD) += esp4_offload.o obj-$(CONFIG_INET_IPCOMP) += ipcomp.o obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o -obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o obj-$(CONFIG_INET_TUNNEL) += tunnel4.o -obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o -obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o obj-$(CONFIG_IP_PNP) += ipconfig.o obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/ obj-$(CONFIG_INET_DIAG) += inet_diag.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index eab3ebde981e..5183a2daba64 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -136,6 +136,10 @@ void inet_sock_destruct(struct sock *sk) struct inet_sock *inet = inet_sk(sk); __skb_queue_purge(&sk->sk_receive_queue); + if (sk->sk_rx_skb_cache) { + __kfree_skb(sk->sk_rx_skb_cache); + sk->sk_rx_skb_cache = NULL; + } __skb_queue_purge(&sk->sk_error_queue); sk_mem_reclaim(sk); @@ -156,7 +160,7 @@ void inet_sock_destruct(struct sock *sk) WARN_ON(sk->sk_forward_alloc); kfree(rcu_dereference_protected(inet->inet_opt, 1)); - dst_release(rcu_dereference_check(sk->sk_dst_cache, 1)); + dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1)); dst_release(sk->sk_rx_dst); sk_refcnt_debug_dec(sk); } @@ -911,12 +915,6 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct rtentry rt; switch (cmd) { - case SIOCGSTAMP: - err = sock_get_timestamp(sk, (struct timeval __user *)arg); - break; - case SIOCGSTAMPNS: - err = sock_get_timestampns(sk, (struct timespec __user *)arg); - break; case SIOCADDRT: case SIOCDELRT: if (copy_from_user(&rt, p, sizeof(struct rtentry))) @@ -988,6 +986,7 @@ const struct proto_ops inet_stream_ops = { .getname = inet_getname, .poll = tcp_poll, .ioctl = inet_ioctl, + .gettstamp = sock_gettstamp, .listen = inet_listen, .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, @@ -1023,6 +1022,7 @@ const struct proto_ops inet_dgram_ops = { .getname = inet_getname, .poll = udp_poll, .ioctl = inet_ioctl, + .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, @@ -1055,6 +1055,7 @@ static const struct proto_ops inet_sockraw_ops = { .getname = inet_getname, .poll = datagram_poll, .ioctl = inet_ioctl, + .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c index 1e976bb93d99..15427163a041 100644 --- a/net/ipv4/bpfilter/sockopt.c +++ b/net/ipv4/bpfilter/sockopt.c @@ -77,5 +77,4 @@ static int __init bpfilter_sockopt_init(void) return 0; } - -module_init(bpfilter_sockopt_init); +device_initcall(bpfilter_sockopt_init); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index eb514f312e6f..701c5d113a34 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -621,8 +621,8 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, ASSERT_RTNL(); - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv4_policy, extack); if (err < 0) goto errout; @@ -793,8 +793,8 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, struct in_device *in_dev; int err; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv4_policy, extack); if (err < 0) goto errout; @@ -1689,8 +1689,8 @@ static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh, fillargs->flags |= NLM_F_DUMP_FILTERED; } - err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX, - ifa_ipv4_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv4_policy, extack); if (err < 0) return err; @@ -1906,7 +1906,8 @@ static int inet_validate_link_af(const struct net_device *dev, if (dev && !__in_dev_get_rcu(dev)) return -EAFNOSUPPORT; - err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL); + err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, + inet_af_policy, NULL); if (err < 0) return err; @@ -1934,7 +1935,7 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) if (!in_dev) return -EAFNOSUPPORT; - if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0) + if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0) BUG(); if (tb[IFLA_INET_CONF]) { @@ -2076,11 +2077,13 @@ static int inet_netconf_valid_get_req(struct sk_buff *skb, } if (!netlink_strict_get_check(skb)) - return nlmsg_parse(nlh, sizeof(struct netconfmsg), tb, - NETCONFA_MAX, devconf_ipv4_policy, extack); + return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg), + tb, NETCONFA_MAX, + devconf_ipv4_policy, extack); - err = nlmsg_parse_strict(nlh, sizeof(struct netconfmsg), tb, - NETCONFA_MAX, devconf_ipv4_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg), + tb, NETCONFA_MAX, + devconf_ipv4_policy, extack); if (err) return err; diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index d3170a8001b2..8edcfa66d1e5 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -109,6 +109,44 @@ static void esp4_gso_encap(struct xfrm_state *x, struct sk_buff *skb) xo->proto = proto; } +static struct sk_buff *xfrm4_tunnel_gso_segment(struct xfrm_state *x, + struct sk_buff *skb, + netdev_features_t features) +{ + __skb_push(skb, skb->mac_len); + return skb_mac_gso_segment(skb, features); +} + +static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x, + struct sk_buff *skb, + netdev_features_t features) +{ + const struct net_offload *ops; + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct xfrm_offload *xo = xfrm_offload(skb); + + skb->transport_header += x->props.header_len; + ops = rcu_dereference(inet_offloads[xo->proto]); + if (likely(ops && ops->callbacks.gso_segment)) + segs = ops->callbacks.gso_segment(skb, features); + + return segs; +} + +static struct sk_buff *xfrm4_outer_mode_gso_segment(struct xfrm_state *x, + struct sk_buff *skb, + netdev_features_t features) +{ + switch (x->outer_mode.encap) { + case XFRM_MODE_TUNNEL: + return xfrm4_tunnel_gso_segment(x, skb, features); + case XFRM_MODE_TRANSPORT: + return xfrm4_transport_gso_segment(x, skb, features); + } + + return ERR_PTR(-EOPNOTSUPP); +} + static struct sk_buff *esp4_gso_segment(struct sk_buff *skb, netdev_features_t features) { @@ -140,14 +178,16 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb, skb->encap_hdr_csum = 1; - if (!(features & NETIF_F_HW_ESP) || x->xso.dev != skb->dev) + if ((!(skb->dev->gso_partial_features & NETIF_F_HW_ESP) && + !(features & NETIF_F_HW_ESP)) || x->xso.dev != skb->dev) esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); - else if (!(features & NETIF_F_HW_ESP_TX_CSUM)) + else if (!(features & NETIF_F_HW_ESP_TX_CSUM) && + !(skb->dev->gso_partial_features & NETIF_F_HW_ESP_TX_CSUM)) esp_features = features & ~NETIF_F_CSUM_MASK; xo->flags |= XFRM_GSO_SEGMENT; - return x->outer_mode->gso_segment(x, skb, esp_features); + return xfrm4_outer_mode_gso_segment(x, skb, esp_features); } static int esp_input_tail(struct xfrm_state *x, struct sk_buff *skb) @@ -183,7 +223,9 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ if (!xo) return -EINVAL; - if (!(features & NETIF_F_HW_ESP) || x->xso.dev != skb->dev) { + if ((!(features & NETIF_F_HW_ESP) && + !(skb->dev->gso_partial_features & NETIF_F_HW_ESP)) || + x->xso.dev != skb->dev) { xo->flags |= CRYPTO_FALLBACK; hw_offload = false; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index ed14ec245584..b298255f6fdb 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -307,7 +307,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) .flowi4_mark = vmark ? skb->mark : 0, }; if (!fib_lookup(net, &fl4, &res, 0)) - return FIB_RES_PREFSRC(net, res); + return fib_result_prefsrc(net, &res); } else { scope = RT_SCOPE_LINK; } @@ -324,16 +324,16 @@ bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev) for (ret = 0; ret < fi->fib_nhs; ret++) { struct fib_nh *nh = &fi->fib_nh[ret]; - if (nh->nh_dev == dev) { + if (nh->fib_nh_dev == dev) { dev_match = true; break; - } else if (l3mdev_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) { + } else if (l3mdev_master_ifindex_rcu(nh->fib_nh_dev) == dev->ifindex) { dev_match = true; break; } } #else - if (fi->fib_nh[0].nh_dev == dev) + if (fi->fib_nh[0].fib_nh_dev == dev) dev_match = true; #endif @@ -390,7 +390,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, dev_match = fib_info_nh_uses_dev(res.fi, dev); if (dev_match) { - ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; + ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_HOST; return ret; } if (no_addr) @@ -402,7 +402,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, ret = 0; if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) { if (res.type == RTN_UNICAST) - ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; + ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_HOST; } return ret; @@ -558,7 +558,8 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, if (rt->rt_gateway.sa_family == AF_INET && addr) { unsigned int addr_type; - cfg->fc_gw = addr; + cfg->fc_gw4 = addr; + cfg->fc_gw_family = AF_INET; addr_type = inet_addr_type_table(net, addr, cfg->fc_table); if (rt->rt_flags & RTF_GATEWAY && addr_type == RTN_UNICAST) @@ -568,7 +569,7 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, if (cmd == SIOCDELRT) return 0; - if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw) + if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw_family) return -EINVAL; if (cfg->fc_scope == RT_SCOPE_NOWHERE) @@ -664,16 +665,61 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { [RTA_DPORT] = { .type = NLA_U16 }, }; +int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla, + struct netlink_ext_ack *extack) +{ + struct rtvia *via; + int alen; + + if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) { + NL_SET_ERR_MSG(extack, "Invalid attribute length for RTA_VIA"); + return -EINVAL; + } + + via = nla_data(nla); + alen = nla_len(nla) - offsetof(struct rtvia, rtvia_addr); + + switch (via->rtvia_family) { + case AF_INET: + if (alen != sizeof(__be32)) { + NL_SET_ERR_MSG(extack, "Invalid IPv4 address in RTA_VIA"); + return -EINVAL; + } + cfg->fc_gw_family = AF_INET; + cfg->fc_gw4 = *((__be32 *)via->rtvia_addr); + break; + case AF_INET6: +#ifdef CONFIG_IPV6 + if (alen != sizeof(struct in6_addr)) { + NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_VIA"); + return -EINVAL; + } + cfg->fc_gw_family = AF_INET6; + cfg->fc_gw6 = *((struct in6_addr *)via->rtvia_addr); +#else + NL_SET_ERR_MSG(extack, "IPv6 support not enabled in kernel"); + return -EINVAL; +#endif + break; + default: + NL_SET_ERR_MSG(extack, "Unsupported address family in RTA_VIA"); + return -EINVAL; + } + + return 0; +} + static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh, struct fib_config *cfg, struct netlink_ext_ack *extack) { + bool has_gw = false, has_via = false; struct nlattr *attr; int err, remaining; struct rtmsg *rtm; - err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy, - extack); + err = nlmsg_validate_deprecated(nlh, sizeof(*rtm), RTA_MAX, + rtm_ipv4_policy, extack); if (err < 0) goto errout; @@ -708,12 +754,17 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, cfg->fc_oif = nla_get_u32(attr); break; case RTA_GATEWAY: - cfg->fc_gw = nla_get_be32(attr); + has_gw = true; + cfg->fc_gw4 = nla_get_be32(attr); + if (cfg->fc_gw4) + cfg->fc_gw_family = AF_INET; break; case RTA_VIA: - NL_SET_ERR_MSG(extack, "IPv4 does not support RTA_VIA attribute"); - err = -EINVAL; - goto errout; + has_via = true; + err = fib_gw_from_via(cfg, attr, extack); + if (err) + goto errout; + break; case RTA_PRIORITY: cfg->fc_priority = nla_get_u32(attr); break; @@ -752,6 +803,12 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, } } + if (has_gw && has_via) { + NL_SET_ERR_MSG(extack, + "Nexthop configuration can not contain both GATEWAY and VIA"); + goto errout; + } + return 0; errout: return err; @@ -839,8 +896,8 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, filter->rt_type = rtm->rtm_type; filter->table_id = rtm->rtm_table; - err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_ipv4_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); if (err < 0) return err; diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index e6ff282bb7f4..7945f0534db7 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -45,6 +45,7 @@ static inline void fib_result_assign(struct fib_result *res, { /* we used to play games with refcounts, but we now use RCU */ res->fi = fi; + res->nhc = fib_info_nhc(fi, 0); } struct fib_prop { diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 8e185b5a2bf6..d3da6a10f86f 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -41,10 +41,12 @@ #include <net/tcp.h> #include <net/sock.h> #include <net/ip_fib.h> +#include <net/ip6_fib.h> #include <net/netlink.h> -#include <net/nexthop.h> +#include <net/rtnh.h> #include <net/lwtunnel.h> #include <net/fib_notifier.h> +#include <net/addrconf.h> #include "fib_lookup.h" @@ -157,12 +159,12 @@ static void rt_fibinfo_free(struct rtable __rcu **rtp) dst_release_immediate(&rt->dst); } -static void free_nh_exceptions(struct fib_nh *nh) +static void free_nh_exceptions(struct fib_nh_common *nhc) { struct fnhe_hash_bucket *hash; int i; - hash = rcu_dereference_protected(nh->nh_exceptions, 1); + hash = rcu_dereference_protected(nhc->nhc_exceptions, 1); if (!hash) return; for (i = 0; i < FNHE_HASH_SIZE; i++) { @@ -204,18 +206,34 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) free_percpu(rtp); } +void fib_nh_common_release(struct fib_nh_common *nhc) +{ + if (nhc->nhc_dev) + dev_put(nhc->nhc_dev); + + lwtstate_put(nhc->nhc_lwtstate); + rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output); + rt_fibinfo_free(&nhc->nhc_rth_input); + free_nh_exceptions(nhc); +} +EXPORT_SYMBOL_GPL(fib_nh_common_release); + +void fib_nh_release(struct net *net, struct fib_nh *fib_nh) +{ +#ifdef CONFIG_IP_ROUTE_CLASSID + if (fib_nh->nh_tclassid) + net->ipv4.fib_num_tclassid_users--; +#endif + fib_nh_common_release(&fib_nh->nh_common); +} + /* Release a nexthop info record */ static void free_fib_info_rcu(struct rcu_head *head) { struct fib_info *fi = container_of(head, struct fib_info, rcu); change_nexthops(fi) { - if (nexthop_nh->nh_dev) - dev_put(nexthop_nh->nh_dev); - lwtstate_put(nexthop_nh->nh_lwtstate); - free_nh_exceptions(nexthop_nh); - rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output); - rt_fibinfo_free(&nexthop_nh->nh_rth_input); + fib_nh_release(fi->fib_net, nexthop_nh); } endfor_nexthops(fi); ip_fib_metrics_put(fi->fib_metrics); @@ -230,12 +248,7 @@ void free_fib_info(struct fib_info *fi) return; } fib_info_cnt--; -#ifdef CONFIG_IP_ROUTE_CLASSID - change_nexthops(fi) { - if (nexthop_nh->nh_tclassid) - fi->fib_net->ipv4.fib_num_tclassid_users--; - } endfor_nexthops(fi); -#endif + call_rcu(&fi->rcu, free_fib_info_rcu); } EXPORT_SYMBOL_GPL(free_fib_info); @@ -248,7 +261,7 @@ void fib_release_info(struct fib_info *fi) if (fi->fib_prefsrc) hlist_del(&fi->fib_lhash); change_nexthops(fi) { - if (!nexthop_nh->nh_dev) + if (!nexthop_nh->fib_nh_dev) continue; hlist_del(&nexthop_nh->nh_hash); } endfor_nexthops(fi) @@ -263,18 +276,27 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) const struct fib_nh *onh = ofi->fib_nh; for_nexthops(fi) { - if (nh->nh_oif != onh->nh_oif || - nh->nh_gw != onh->nh_gw || - nh->nh_scope != onh->nh_scope || + if (nh->fib_nh_oif != onh->fib_nh_oif || + nh->fib_nh_gw_family != onh->fib_nh_gw_family || + nh->fib_nh_scope != onh->fib_nh_scope || #ifdef CONFIG_IP_ROUTE_MULTIPATH - nh->nh_weight != onh->nh_weight || + nh->fib_nh_weight != onh->fib_nh_weight || #endif #ifdef CONFIG_IP_ROUTE_CLASSID nh->nh_tclassid != onh->nh_tclassid || #endif - lwtunnel_cmp_encap(nh->nh_lwtstate, onh->nh_lwtstate) || - ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_COMPARE_MASK)) + lwtunnel_cmp_encap(nh->fib_nh_lws, onh->fib_nh_lws) || + ((nh->fib_nh_flags ^ onh->fib_nh_flags) & ~RTNH_COMPARE_MASK)) + return -1; + + if (nh->fib_nh_gw_family == AF_INET && + nh->fib_nh_gw4 != onh->fib_nh_gw4) return -1; + + if (nh->fib_nh_gw_family == AF_INET6 && + ipv6_addr_cmp(&nh->fib_nh_gw6, &onh->fib_nh_gw6)) + return -1; + onh++; } endfor_nexthops(fi); return 0; @@ -298,7 +320,7 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi) val ^= (__force u32)fi->fib_prefsrc; val ^= fi->fib_priority; for_nexthops(fi) { - val ^= fib_devindex_hashfn(nh->nh_oif); + val ^= fib_devindex_hashfn(nh->fib_nh_oif); } endfor_nexthops(fi) return (val ^ (val >> 7) ^ (val >> 12)) & mask; @@ -347,9 +369,9 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev) hash = fib_devindex_hashfn(dev->ifindex); head = &fib_info_devhash[hash]; hlist_for_each_entry(nh, head, nh_hash) { - if (nh->nh_dev == dev && - nh->nh_gw == gw && - !(nh->nh_flags & RTNH_F_DEAD)) { + if (nh->fib_nh_dev == dev && + nh->fib_nh_gw4 == gw && + !(nh->fib_nh_flags & RTNH_F_DEAD)) { spin_unlock(&fib_info_lock); return 0; } @@ -384,10 +406,10 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi) /* grab encap info */ for_nexthops(fi) { - if (nh->nh_lwtstate) { + if (nh->fib_nh_lws) { /* RTA_ENCAP_TYPE */ nh_encapsize += lwtunnel_get_encap_size( - nh->nh_lwtstate); + nh->fib_nh_lws); /* RTA_ENCAP */ nh_encapsize += nla_total_size(2); } @@ -435,10 +457,18 @@ static int fib_detect_death(struct fib_info *fi, int order, struct fib_info **last_resort, int *last_idx, int dflt) { + const struct fib_nh_common *nhc = fib_info_nhc(fi, 0); struct neighbour *n; int state = NUD_NONE; - n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev); + if (likely(nhc->nhc_gw_family == AF_INET)) + n = neigh_lookup(&arp_tbl, &nhc->nhc_gw.ipv4, nhc->nhc_dev); + else if (nhc->nhc_gw_family == AF_INET6) + n = neigh_lookup(ipv6_stub->nd_tbl, &nhc->nhc_gw.ipv6, + nhc->nhc_dev); + else + n = NULL; + if (n) { state = n->nud_state; neigh_release(n); @@ -457,6 +487,75 @@ static int fib_detect_death(struct fib_info *fi, int order, return 1; } +int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *encap, + u16 encap_type, void *cfg, gfp_t gfp_flags, + struct netlink_ext_ack *extack) +{ + int err; + + nhc->nhc_pcpu_rth_output = alloc_percpu_gfp(struct rtable __rcu *, + gfp_flags); + if (!nhc->nhc_pcpu_rth_output) + return -ENOMEM; + + if (encap) { + struct lwtunnel_state *lwtstate; + + if (encap_type == LWTUNNEL_ENCAP_NONE) { + NL_SET_ERR_MSG(extack, "LWT encap type not specified"); + err = -EINVAL; + goto lwt_failure; + } + err = lwtunnel_build_state(encap_type, encap, nhc->nhc_family, + cfg, &lwtstate, extack); + if (err) + goto lwt_failure; + + nhc->nhc_lwtstate = lwtstate_get(lwtstate); + } + + return 0; + +lwt_failure: + rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output); + nhc->nhc_pcpu_rth_output = NULL; + return err; +} +EXPORT_SYMBOL_GPL(fib_nh_common_init); + +int fib_nh_init(struct net *net, struct fib_nh *nh, + struct fib_config *cfg, int nh_weight, + struct netlink_ext_ack *extack) +{ + int err; + + nh->fib_nh_family = AF_INET; + + err = fib_nh_common_init(&nh->nh_common, cfg->fc_encap, + cfg->fc_encap_type, cfg, GFP_KERNEL, extack); + if (err) + return err; + + nh->fib_nh_oif = cfg->fc_oif; + nh->fib_nh_gw_family = cfg->fc_gw_family; + if (cfg->fc_gw_family == AF_INET) + nh->fib_nh_gw4 = cfg->fc_gw4; + else if (cfg->fc_gw_family == AF_INET6) + nh->fib_nh_gw6 = cfg->fc_gw6; + + nh->fib_nh_flags = cfg->fc_flags; + +#ifdef CONFIG_IP_ROUTE_CLASSID + nh->nh_tclassid = cfg->fc_flow; + if (nh->nh_tclassid) + net->ipv4.fib_num_tclassid_users++; +#endif +#ifdef CONFIG_IP_ROUTE_MULTIPATH + nh->fib_nh_weight = nh_weight; +#endif + return 0; +} + #ifdef CONFIG_IP_ROUTE_MULTIPATH static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining, @@ -483,11 +582,15 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, int remaining, struct fib_config *cfg, struct netlink_ext_ack *extack) { + struct net *net = fi->fib_net; + struct fib_config fib_cfg; int ret; change_nexthops(fi) { int attrlen; + memset(&fib_cfg, 0, sizeof(fib_cfg)); + if (!rtnh_ok(rtnh, remaining)) { NL_SET_ERR_MSG(extack, "Invalid nexthop configuration - extra data after nexthop"); @@ -500,56 +603,73 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, return -EINVAL; } - nexthop_nh->nh_flags = - (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; - nexthop_nh->nh_oif = rtnh->rtnh_ifindex; - nexthop_nh->nh_weight = rtnh->rtnh_hops + 1; + fib_cfg.fc_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; + fib_cfg.fc_oif = rtnh->rtnh_ifindex; attrlen = rtnh_attrlen(rtnh); if (attrlen > 0) { - struct nlattr *nla, *attrs = rtnh_attrs(rtnh); + struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh); nla = nla_find(attrs, attrlen, RTA_GATEWAY); - nexthop_nh->nh_gw = nla ? nla_get_in_addr(nla) : 0; -#ifdef CONFIG_IP_ROUTE_CLASSID - nla = nla_find(attrs, attrlen, RTA_FLOW); - nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; - if (nexthop_nh->nh_tclassid) - fi->fib_net->ipv4.fib_num_tclassid_users++; -#endif - nla = nla_find(attrs, attrlen, RTA_ENCAP); + nlav = nla_find(attrs, attrlen, RTA_VIA); + if (nla && nlav) { + NL_SET_ERR_MSG(extack, + "Nexthop configuration can not contain both GATEWAY and VIA"); + return -EINVAL; + } if (nla) { - struct lwtunnel_state *lwtstate; - struct nlattr *nla_entype; - - nla_entype = nla_find(attrs, attrlen, - RTA_ENCAP_TYPE); - if (!nla_entype) { - NL_SET_BAD_ATTR(extack, nla); - NL_SET_ERR_MSG(extack, - "Encap type is missing"); - goto err_inval; - } - - ret = lwtunnel_build_state(nla_get_u16( - nla_entype), - nla, AF_INET, cfg, - &lwtstate, extack); + fib_cfg.fc_gw4 = nla_get_in_addr(nla); + if (fib_cfg.fc_gw4) + fib_cfg.fc_gw_family = AF_INET; + } else if (nlav) { + ret = fib_gw_from_via(&fib_cfg, nlav, extack); if (ret) goto errout; - nexthop_nh->nh_lwtstate = - lwtstate_get(lwtstate); } + + nla = nla_find(attrs, attrlen, RTA_FLOW); + if (nla) + fib_cfg.fc_flow = nla_get_u32(nla); + + fib_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); + nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); + if (nla) + fib_cfg.fc_encap_type = nla_get_u16(nla); } + ret = fib_nh_init(net, nexthop_nh, &fib_cfg, + rtnh->rtnh_hops + 1, extack); + if (ret) + goto errout; + rtnh = rtnh_next(rtnh, &remaining); } endfor_nexthops(fi); - return 0; - -err_inval: ret = -EINVAL; - + if (cfg->fc_oif && fi->fib_nh->fib_nh_oif != cfg->fc_oif) { + NL_SET_ERR_MSG(extack, + "Nexthop device index does not match RTA_OIF"); + goto errout; + } + if (cfg->fc_gw_family) { + if (cfg->fc_gw_family != fi->fib_nh->fib_nh_gw_family || + (cfg->fc_gw_family == AF_INET && + fi->fib_nh->fib_nh_gw4 != cfg->fc_gw4) || + (cfg->fc_gw_family == AF_INET6 && + ipv6_addr_cmp(&fi->fib_nh->fib_nh_gw6, &cfg->fc_gw6))) { + NL_SET_ERR_MSG(extack, + "Nexthop gateway does not match RTA_GATEWAY or RTA_VIA"); + goto errout; + } + } +#ifdef CONFIG_IP_ROUTE_CLASSID + if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) { + NL_SET_ERR_MSG(extack, + "Nexthop class id does not match RTA_FLOW"); + goto errout; + } +#endif + ret = 0; errout: return ret; } @@ -558,49 +678,51 @@ static void fib_rebalance(struct fib_info *fi) { int total; int w; - struct in_device *in_dev; if (fi->fib_nhs < 2) return; total = 0; for_nexthops(fi) { - if (nh->nh_flags & RTNH_F_DEAD) + if (nh->fib_nh_flags & RTNH_F_DEAD) continue; - in_dev = __in_dev_get_rtnl(nh->nh_dev); - - if (in_dev && - IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && - nh->nh_flags & RTNH_F_LINKDOWN) + if (ip_ignore_linkdown(nh->fib_nh_dev) && + nh->fib_nh_flags & RTNH_F_LINKDOWN) continue; - total += nh->nh_weight; + total += nh->fib_nh_weight; } endfor_nexthops(fi); w = 0; change_nexthops(fi) { int upper_bound; - in_dev = __in_dev_get_rtnl(nexthop_nh->nh_dev); - - if (nexthop_nh->nh_flags & RTNH_F_DEAD) { + if (nexthop_nh->fib_nh_flags & RTNH_F_DEAD) { upper_bound = -1; - } else if (in_dev && - IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && - nexthop_nh->nh_flags & RTNH_F_LINKDOWN) { + } else if (ip_ignore_linkdown(nexthop_nh->fib_nh_dev) && + nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN) { upper_bound = -1; } else { - w += nexthop_nh->nh_weight; + w += nexthop_nh->fib_nh_weight; upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1; } - atomic_set(&nexthop_nh->nh_upper_bound, upper_bound); + atomic_set(&nexthop_nh->fib_nh_upper_bound, upper_bound); } endfor_nexthops(fi); } #else /* CONFIG_IP_ROUTE_MULTIPATH */ +static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, + int remaining, struct fib_config *cfg, + struct netlink_ext_ack *extack) +{ + NL_SET_ERR_MSG(extack, "Multipath support not enabled in kernel"); + + return -EINVAL; +} + #define fib_rebalance(fi) do { } while (0) #endif /* CONFIG_IP_ROUTE_MULTIPATH */ @@ -620,7 +742,7 @@ static int fib_encap_match(u16 encap_type, ret = lwtunnel_build_state(encap_type, encap, AF_INET, cfg, &lwtstate, extack); if (!ret) { - result = lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate); + result = lwtunnel_cmp_encap(lwtstate, nh->fib_nh_lws); lwtstate_free(lwtstate); } @@ -638,7 +760,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority) return 1; - if (cfg->fc_oif || cfg->fc_gw) { + if (cfg->fc_oif || cfg->fc_gw_family) { if (cfg->fc_encap) { if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap, fi->fib_nh, cfg, extack)) @@ -649,10 +771,20 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, cfg->fc_flow != fi->fib_nh->nh_tclassid) return 1; #endif - if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && - (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw)) - return 0; - return 1; + if ((cfg->fc_oif && cfg->fc_oif != fi->fib_nh->fib_nh_oif) || + (cfg->fc_gw_family && + cfg->fc_gw_family != fi->fib_nh->fib_nh_gw_family)) + return 1; + + if (cfg->fc_gw_family == AF_INET && + cfg->fc_gw4 != fi->fib_nh->fib_nh_gw4) + return 1; + + if (cfg->fc_gw_family == AF_INET6 && + ipv6_addr_cmp(&cfg->fc_gw6, &fi->fib_nh->fib_nh_gw6)) + return 1; + + return 0; } #ifdef CONFIG_IP_ROUTE_MULTIPATH @@ -668,16 +800,48 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, if (!rtnh_ok(rtnh, remaining)) return -EINVAL; - if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif) + if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->fib_nh_oif) return 1; attrlen = rtnh_attrlen(rtnh); if (attrlen > 0) { - struct nlattr *nla, *attrs = rtnh_attrs(rtnh); + struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh); nla = nla_find(attrs, attrlen, RTA_GATEWAY); - if (nla && nla_get_in_addr(nla) != nh->nh_gw) - return 1; + nlav = nla_find(attrs, attrlen, RTA_VIA); + if (nla && nlav) { + NL_SET_ERR_MSG(extack, + "Nexthop configuration can not contain both GATEWAY and VIA"); + return -EINVAL; + } + + if (nla) { + if (nh->fib_nh_gw_family != AF_INET || + nla_get_in_addr(nla) != nh->fib_nh_gw4) + return 1; + } else if (nlav) { + struct fib_config cfg2; + int err; + + err = fib_gw_from_via(&cfg2, nlav, extack); + if (err) + return err; + + switch (nh->fib_nh_gw_family) { + case AF_INET: + if (cfg2.fc_gw_family != AF_INET || + cfg2.fc_gw4 != nh->fib_nh_gw4) + return 1; + break; + case AF_INET6: + if (cfg2.fc_gw_family != AF_INET6 || + ipv6_addr_cmp(&cfg2.fc_gw6, + &nh->fib_nh_gw6)) + return 1; + break; + } + } + #ifdef CONFIG_IP_ROUTE_CLASSID nla = nla_find(attrs, attrlen, RTA_FLOW); if (nla && nla_get_u32(nla) != nh->nh_tclassid) @@ -731,6 +895,30 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) return true; } +static int fib_check_nh_v6_gw(struct net *net, struct fib_nh *nh, + u32 table, struct netlink_ext_ack *extack) +{ + struct fib6_config cfg = { + .fc_table = table, + .fc_flags = nh->fib_nh_flags | RTF_GATEWAY, + .fc_ifindex = nh->fib_nh_oif, + .fc_gateway = nh->fib_nh_gw6, + }; + struct fib6_nh fib6_nh = {}; + int err; + + err = ipv6_stub->fib6_nh_init(net, &fib6_nh, &cfg, GFP_KERNEL, extack); + if (!err) { + nh->fib_nh_dev = fib6_nh.fib_nh_dev; + dev_hold(nh->fib_nh_dev); + nh->fib_nh_oif = nh->fib_nh_dev->ifindex; + nh->fib_nh_scope = RT_SCOPE_LINK; + + ipv6_stub->fib6_nh_release(&fib6_nh); + } + + return err; +} /* * Picture @@ -775,133 +963,152 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) * | * |-> {local prefix} (terminal node) */ -static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh, - struct netlink_ext_ack *extack) +static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table, + u8 scope, struct netlink_ext_ack *extack) { - int err = 0; - struct net *net; struct net_device *dev; + struct fib_result res; + int err; - net = cfg->fc_nlinfo.nl_net; - if (nh->nh_gw) { - struct fib_result res; - - if (nh->nh_flags & RTNH_F_ONLINK) { - unsigned int addr_type; + if (nh->fib_nh_flags & RTNH_F_ONLINK) { + unsigned int addr_type; - if (cfg->fc_scope >= RT_SCOPE_LINK) { - NL_SET_ERR_MSG(extack, - "Nexthop has invalid scope"); - return -EINVAL; - } - dev = __dev_get_by_index(net, nh->nh_oif); - if (!dev) { - NL_SET_ERR_MSG(extack, "Nexthop device required for onlink"); - return -ENODEV; - } - if (!(dev->flags & IFF_UP)) { - NL_SET_ERR_MSG(extack, - "Nexthop device is not up"); - return -ENETDOWN; - } - addr_type = inet_addr_type_dev_table(net, dev, nh->nh_gw); - if (addr_type != RTN_UNICAST) { - NL_SET_ERR_MSG(extack, - "Nexthop has invalid gateway"); - return -EINVAL; - } - if (!netif_carrier_ok(dev)) - nh->nh_flags |= RTNH_F_LINKDOWN; - nh->nh_dev = dev; - dev_hold(dev); - nh->nh_scope = RT_SCOPE_LINK; - return 0; + if (scope >= RT_SCOPE_LINK) { + NL_SET_ERR_MSG(extack, "Nexthop has invalid scope"); + return -EINVAL; } - rcu_read_lock(); - { - struct fib_table *tbl = NULL; - struct flowi4 fl4 = { - .daddr = nh->nh_gw, - .flowi4_scope = cfg->fc_scope + 1, - .flowi4_oif = nh->nh_oif, - .flowi4_iif = LOOPBACK_IFINDEX, - }; - - /* It is not necessary, but requires a bit of thinking */ - if (fl4.flowi4_scope < RT_SCOPE_LINK) - fl4.flowi4_scope = RT_SCOPE_LINK; - - if (cfg->fc_table) - tbl = fib_get_table(net, cfg->fc_table); - - if (tbl) - err = fib_table_lookup(tbl, &fl4, &res, - FIB_LOOKUP_IGNORE_LINKSTATE | - FIB_LOOKUP_NOREF); - - /* on error or if no table given do full lookup. This - * is needed for example when nexthops are in the local - * table rather than the given table - */ - if (!tbl || err) { - err = fib_lookup(net, &fl4, &res, - FIB_LOOKUP_IGNORE_LINKSTATE); - } - - if (err) { - NL_SET_ERR_MSG(extack, - "Nexthop has invalid gateway"); - rcu_read_unlock(); - return err; - } + dev = __dev_get_by_index(net, nh->fib_nh_oif); + if (!dev) { + NL_SET_ERR_MSG(extack, "Nexthop device required for onlink"); + return -ENODEV; } - err = -EINVAL; - if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) { - NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); - goto out; + if (!(dev->flags & IFF_UP)) { + NL_SET_ERR_MSG(extack, "Nexthop device is not up"); + return -ENETDOWN; } - nh->nh_scope = res.scope; - nh->nh_oif = FIB_RES_OIF(res); - nh->nh_dev = dev = FIB_RES_DEV(res); - if (!dev) { - NL_SET_ERR_MSG(extack, - "No egress device for nexthop gateway"); - goto out; + addr_type = inet_addr_type_dev_table(net, dev, nh->fib_nh_gw4); + if (addr_type != RTN_UNICAST) { + NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); + return -EINVAL; } - dev_hold(dev); if (!netif_carrier_ok(dev)) - nh->nh_flags |= RTNH_F_LINKDOWN; - err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN; - } else { - struct in_device *in_dev; - - if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) { - NL_SET_ERR_MSG(extack, - "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set"); - return -EINVAL; + nh->fib_nh_flags |= RTNH_F_LINKDOWN; + nh->fib_nh_dev = dev; + dev_hold(dev); + nh->fib_nh_scope = RT_SCOPE_LINK; + return 0; + } + rcu_read_lock(); + { + struct fib_table *tbl = NULL; + struct flowi4 fl4 = { + .daddr = nh->fib_nh_gw4, + .flowi4_scope = scope + 1, + .flowi4_oif = nh->fib_nh_oif, + .flowi4_iif = LOOPBACK_IFINDEX, + }; + + /* It is not necessary, but requires a bit of thinking */ + if (fl4.flowi4_scope < RT_SCOPE_LINK) + fl4.flowi4_scope = RT_SCOPE_LINK; + + if (table) + tbl = fib_get_table(net, table); + + if (tbl) + err = fib_table_lookup(tbl, &fl4, &res, + FIB_LOOKUP_IGNORE_LINKSTATE | + FIB_LOOKUP_NOREF); + + /* on error or if no table given do full lookup. This + * is needed for example when nexthops are in the local + * table rather than the given table + */ + if (!tbl || err) { + err = fib_lookup(net, &fl4, &res, + FIB_LOOKUP_IGNORE_LINKSTATE); } - rcu_read_lock(); - err = -ENODEV; - in_dev = inetdev_by_index(net, nh->nh_oif); - if (!in_dev) - goto out; - err = -ENETDOWN; - if (!(in_dev->dev->flags & IFF_UP)) { - NL_SET_ERR_MSG(extack, "Device for nexthop is not up"); + + if (err) { + NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); goto out; } - nh->nh_dev = in_dev->dev; - dev_hold(nh->nh_dev); - nh->nh_scope = RT_SCOPE_HOST; - if (!netif_carrier_ok(nh->nh_dev)) - nh->nh_flags |= RTNH_F_LINKDOWN; - err = 0; } + + err = -EINVAL; + if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) { + NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); + goto out; + } + nh->fib_nh_scope = res.scope; + nh->fib_nh_oif = FIB_RES_OIF(res); + nh->fib_nh_dev = dev = FIB_RES_DEV(res); + if (!dev) { + NL_SET_ERR_MSG(extack, + "No egress device for nexthop gateway"); + goto out; + } + dev_hold(dev); + if (!netif_carrier_ok(dev)) + nh->fib_nh_flags |= RTNH_F_LINKDOWN; + err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN; out: rcu_read_unlock(); return err; } +static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh, + struct netlink_ext_ack *extack) +{ + struct in_device *in_dev; + int err; + + if (nh->fib_nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) { + NL_SET_ERR_MSG(extack, + "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set"); + return -EINVAL; + } + + rcu_read_lock(); + + err = -ENODEV; + in_dev = inetdev_by_index(net, nh->fib_nh_oif); + if (!in_dev) + goto out; + err = -ENETDOWN; + if (!(in_dev->dev->flags & IFF_UP)) { + NL_SET_ERR_MSG(extack, "Device for nexthop is not up"); + goto out; + } + + nh->fib_nh_dev = in_dev->dev; + dev_hold(nh->fib_nh_dev); + nh->fib_nh_scope = RT_SCOPE_HOST; + if (!netif_carrier_ok(nh->fib_nh_dev)) + nh->fib_nh_flags |= RTNH_F_LINKDOWN; + err = 0; +out: + rcu_read_unlock(); + return err; +} + +static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh, + struct netlink_ext_ack *extack) +{ + struct net *net = cfg->fc_nlinfo.nl_net; + u32 table = cfg->fc_table; + int err; + + if (nh->fib_nh_gw_family == AF_INET) + err = fib_check_nh_v4_gw(net, nh, table, cfg->fc_scope, extack); + else if (nh->fib_nh_gw_family == AF_INET6) + err = fib_check_nh_v6_gw(net, nh, table, extack); + else + err = fib_check_nh_nongw(net, nh, extack); + + return err; +} + static inline unsigned int fib_laddr_hashfn(__be32 val) { unsigned int mask = (fib_info_hash_size - 1); @@ -986,14 +1193,29 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash, __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh) { - nh->nh_saddr = inet_select_addr(nh->nh_dev, - nh->nh_gw, + nh->nh_saddr = inet_select_addr(nh->fib_nh_dev, + nh->fib_nh_gw4, nh->nh_parent->fib_scope); nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid); return nh->nh_saddr; } +__be32 fib_result_prefsrc(struct net *net, struct fib_result *res) +{ + struct fib_nh_common *nhc = res->nhc; + struct fib_nh *nh; + + if (res->fi->fib_prefsrc) + return res->fi->fib_prefsrc; + + nh = container_of(nhc, struct fib_nh, nh_common); + if (nh->nh_saddr_genid == atomic_read(&net->ipv4.dev_addr_genid)) + return nh->nh_saddr; + + return fib_info_update_nh_saddr(net, nh); +} + static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc) { if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || @@ -1096,72 +1318,18 @@ struct fib_info *fib_create_info(struct fib_config *cfg, fi->fib_nhs = nhs; change_nexthops(fi) { nexthop_nh->nh_parent = fi; - nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *); - if (!nexthop_nh->nh_pcpu_rth_output) - goto failure; } endfor_nexthops(fi) - if (cfg->fc_mp) { -#ifdef CONFIG_IP_ROUTE_MULTIPATH + if (cfg->fc_mp) err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack); - if (err != 0) - goto failure; - if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) { - NL_SET_ERR_MSG(extack, - "Nexthop device index does not match RTA_OIF"); - goto err_inval; - } - if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) { - NL_SET_ERR_MSG(extack, - "Nexthop gateway does not match RTA_GATEWAY"); - goto err_inval; - } -#ifdef CONFIG_IP_ROUTE_CLASSID - if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) { - NL_SET_ERR_MSG(extack, - "Nexthop class id does not match RTA_FLOW"); - goto err_inval; - } -#endif -#else - NL_SET_ERR_MSG(extack, - "Multipath support not enabled in kernel"); - goto err_inval; -#endif - } else { - struct fib_nh *nh = fi->fib_nh; - - if (cfg->fc_encap) { - struct lwtunnel_state *lwtstate; - - if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE) { - NL_SET_ERR_MSG(extack, - "LWT encap type not specified"); - goto err_inval; - } - err = lwtunnel_build_state(cfg->fc_encap_type, - cfg->fc_encap, AF_INET, cfg, - &lwtstate, extack); - if (err) - goto failure; + else + err = fib_nh_init(net, fi->fib_nh, cfg, 1, extack); - nh->nh_lwtstate = lwtstate_get(lwtstate); - } - nh->nh_oif = cfg->fc_oif; - nh->nh_gw = cfg->fc_gw; - nh->nh_flags = cfg->fc_flags; -#ifdef CONFIG_IP_ROUTE_CLASSID - nh->nh_tclassid = cfg->fc_flow; - if (nh->nh_tclassid) - fi->fib_net->ipv4.fib_num_tclassid_users++; -#endif -#ifdef CONFIG_IP_ROUTE_MULTIPATH - nh->nh_weight = 1; -#endif - } + if (err != 0) + goto failure; if (fib_props[cfg->fc_type].error) { - if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) { + if (cfg->fc_gw_family || cfg->fc_oif || cfg->fc_mp) { NL_SET_ERR_MSG(extack, "Gateway, device and multipath can not be specified for this route type"); goto err_inval; @@ -1195,15 +1363,15 @@ struct fib_info *fib_create_info(struct fib_config *cfg, "Route with host scope can not have multiple nexthops"); goto err_inval; } - if (nh->nh_gw) { + if (nh->fib_nh_gw_family) { NL_SET_ERR_MSG(extack, "Route with host scope can not have a gateway"); goto err_inval; } - nh->nh_scope = RT_SCOPE_NOWHERE; - nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif); + nh->fib_nh_scope = RT_SCOPE_NOWHERE; + nh->fib_nh_dev = dev_get_by_index(net, fi->fib_nh->fib_nh_oif); err = -ENODEV; - if (!nh->nh_dev) + if (!nh->fib_nh_dev) goto failure; } else { int linkdown = 0; @@ -1212,7 +1380,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg, err = fib_check_nh(cfg, nexthop_nh, extack); if (err != 0) goto failure; - if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN) + if (nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN) linkdown++; } endfor_nexthops(fi) if (linkdown == fi->fib_nhs) @@ -1226,6 +1394,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg, change_nexthops(fi) { fib_info_update_nh_saddr(net, nexthop_nh); + if (nexthop_nh->fib_nh_gw_family == AF_INET6) + fi->fib_nh_is_v6 = true; } endfor_nexthops(fi) fib_rebalance(fi); @@ -1254,9 +1424,9 @@ link_it: struct hlist_head *head; unsigned int hash; - if (!nexthop_nh->nh_dev) + if (!nexthop_nh->fib_nh_dev) continue; - hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex); + hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex); head = &fib_info_devhash[hash]; hlist_add_head(&nexthop_nh->nh_hash, head); } endfor_nexthops(fi) @@ -1275,6 +1445,141 @@ failure: return ERR_PTR(err); } +int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc, + unsigned char *flags, bool skip_oif) +{ + if (nhc->nhc_flags & RTNH_F_DEAD) + *flags |= RTNH_F_DEAD; + + if (nhc->nhc_flags & RTNH_F_LINKDOWN) { + *flags |= RTNH_F_LINKDOWN; + + rcu_read_lock(); + switch (nhc->nhc_family) { + case AF_INET: + if (ip_ignore_linkdown(nhc->nhc_dev)) + *flags |= RTNH_F_DEAD; + break; + case AF_INET6: + if (ip6_ignore_linkdown(nhc->nhc_dev)) + *flags |= RTNH_F_DEAD; + break; + } + rcu_read_unlock(); + } + + switch (nhc->nhc_gw_family) { + case AF_INET: + if (nla_put_in_addr(skb, RTA_GATEWAY, nhc->nhc_gw.ipv4)) + goto nla_put_failure; + break; + case AF_INET6: + /* if gateway family does not match nexthop family + * gateway is encoded as RTA_VIA + */ + if (nhc->nhc_gw_family != nhc->nhc_family) { + int alen = sizeof(struct in6_addr); + struct nlattr *nla; + struct rtvia *via; + + nla = nla_reserve(skb, RTA_VIA, alen + 2); + if (!nla) + goto nla_put_failure; + + via = nla_data(nla); + via->rtvia_family = AF_INET6; + memcpy(via->rtvia_addr, &nhc->nhc_gw.ipv6, alen); + } else if (nla_put_in6_addr(skb, RTA_GATEWAY, + &nhc->nhc_gw.ipv6) < 0) { + goto nla_put_failure; + } + break; + } + + *flags |= (nhc->nhc_flags & RTNH_F_ONLINK); + if (nhc->nhc_flags & RTNH_F_OFFLOAD) + *flags |= RTNH_F_OFFLOAD; + + if (!skip_oif && nhc->nhc_dev && + nla_put_u32(skb, RTA_OIF, nhc->nhc_dev->ifindex)) + goto nla_put_failure; + + if (nhc->nhc_lwtstate && + lwtunnel_fill_encap(skb, nhc->nhc_lwtstate, + RTA_ENCAP, RTA_ENCAP_TYPE) < 0) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} +EXPORT_SYMBOL_GPL(fib_nexthop_info); + +#if IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) || IS_ENABLED(CONFIG_IPV6) +int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc, + int nh_weight) +{ + const struct net_device *dev = nhc->nhc_dev; + struct rtnexthop *rtnh; + unsigned char flags = 0; + + rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); + if (!rtnh) + goto nla_put_failure; + + rtnh->rtnh_hops = nh_weight - 1; + rtnh->rtnh_ifindex = dev ? dev->ifindex : 0; + + if (fib_nexthop_info(skb, nhc, &flags, true) < 0) + goto nla_put_failure; + + rtnh->rtnh_flags = flags; + + /* length of rtnetlink header + attributes */ + rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} +EXPORT_SYMBOL_GPL(fib_add_nexthop); +#endif + +#ifdef CONFIG_IP_ROUTE_MULTIPATH +static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi) +{ + struct nlattr *mp; + + mp = nla_nest_start_noflag(skb, RTA_MULTIPATH); + if (!mp) + goto nla_put_failure; + + for_nexthops(fi) { + if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight) < 0) + goto nla_put_failure; +#ifdef CONFIG_IP_ROUTE_CLASSID + if (nh->nh_tclassid && + nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) + goto nla_put_failure; +#endif + } endfor_nexthops(fi); + + nla_nest_end(skb, mp); + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} +#else +static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi) +{ + return 0; +} +#endif + int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int flags) @@ -1315,80 +1620,23 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc)) goto nla_put_failure; if (fi->fib_nhs == 1) { - if (fi->fib_nh->nh_gw && - nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw)) - goto nla_put_failure; - if (fi->fib_nh->nh_oif && - nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif)) + struct fib_nh *nh = &fi->fib_nh[0]; + unsigned char flags = 0; + + if (fib_nexthop_info(skb, &nh->nh_common, &flags, false) < 0) goto nla_put_failure; - if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) { - struct in_device *in_dev; - - rcu_read_lock(); - in_dev = __in_dev_get_rcu(fi->fib_nh->nh_dev); - if (in_dev && - IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) - rtm->rtm_flags |= RTNH_F_DEAD; - rcu_read_unlock(); - } - if (fi->fib_nh->nh_flags & RTNH_F_OFFLOAD) - rtm->rtm_flags |= RTNH_F_OFFLOAD; + + rtm->rtm_flags = flags; #ifdef CONFIG_IP_ROUTE_CLASSID - if (fi->fib_nh[0].nh_tclassid && - nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid)) + if (nh->nh_tclassid && + nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) goto nla_put_failure; #endif - if (fi->fib_nh->nh_lwtstate && - lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate) < 0) + } else { + if (fib_add_multipath(skb, fi) < 0) goto nla_put_failure; } -#ifdef CONFIG_IP_ROUTE_MULTIPATH - if (fi->fib_nhs > 1) { - struct rtnexthop *rtnh; - struct nlattr *mp; - mp = nla_nest_start(skb, RTA_MULTIPATH); - if (!mp) - goto nla_put_failure; - - for_nexthops(fi) { - rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); - if (!rtnh) - goto nla_put_failure; - - rtnh->rtnh_flags = nh->nh_flags & 0xFF; - if (nh->nh_flags & RTNH_F_LINKDOWN) { - struct in_device *in_dev; - - rcu_read_lock(); - in_dev = __in_dev_get_rcu(nh->nh_dev); - if (in_dev && - IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) - rtnh->rtnh_flags |= RTNH_F_DEAD; - rcu_read_unlock(); - } - rtnh->rtnh_hops = nh->nh_weight - 1; - rtnh->rtnh_ifindex = nh->nh_oif; - - if (nh->nh_gw && - nla_put_in_addr(skb, RTA_GATEWAY, nh->nh_gw)) - goto nla_put_failure; -#ifdef CONFIG_IP_ROUTE_CLASSID - if (nh->nh_tclassid && - nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) - goto nla_put_failure; -#endif - if (nh->nh_lwtstate && - lwtunnel_fill_encap(skb, nh->nh_lwtstate) < 0) - goto nla_put_failure; - - /* length of rtnetlink header + attributes */ - rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; - } endfor_nexthops(fi); - - nla_nest_end(skb, mp); - } -#endif nlmsg_end(skb, nlh); return 0; @@ -1427,28 +1675,26 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local) return ret; } -static int call_fib_nh_notifiers(struct fib_nh *fib_nh, +static int call_fib_nh_notifiers(struct fib_nh *nh, enum fib_event_type event_type) { - struct in_device *in_dev = __in_dev_get_rtnl(fib_nh->nh_dev); + bool ignore_link_down = ip_ignore_linkdown(nh->fib_nh_dev); struct fib_nh_notifier_info info = { - .fib_nh = fib_nh, + .fib_nh = nh, }; switch (event_type) { case FIB_EVENT_NH_ADD: - if (fib_nh->nh_flags & RTNH_F_DEAD) + if (nh->fib_nh_flags & RTNH_F_DEAD) break; - if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && - fib_nh->nh_flags & RTNH_F_LINKDOWN) + if (ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN) break; - return call_fib4_notifiers(dev_net(fib_nh->nh_dev), event_type, + return call_fib4_notifiers(dev_net(nh->fib_nh_dev), event_type, &info.info); case FIB_EVENT_NH_DEL: - if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && - fib_nh->nh_flags & RTNH_F_LINKDOWN) || - (fib_nh->nh_flags & RTNH_F_DEAD)) - return call_fib4_notifiers(dev_net(fib_nh->nh_dev), + if ((ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN) || + (nh->fib_nh_flags & RTNH_F_DEAD)) + return call_fib4_notifiers(dev_net(nh->fib_nh_dev), event_type, &info.info); default: break; @@ -1467,12 +1713,12 @@ static int call_fib_nh_notifiers(struct fib_nh *fib_nh, * - if the new MTU is greater than the PMTU, don't make any change * - otherwise, unlock and set PMTU */ -static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig) +static void nh_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig) { struct fnhe_hash_bucket *bucket; int i; - bucket = rcu_dereference_protected(nh->nh_exceptions, 1); + bucket = rcu_dereference_protected(nhc->nhc_exceptions, 1); if (!bucket) return; @@ -1502,8 +1748,8 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) struct fib_nh *nh; hlist_for_each_entry(nh, head, nh_hash) { - if (nh->nh_dev == dev) - nh_update_mtu(nh, dev->mtu, orig_mtu); + if (nh->fib_nh_dev == dev) + nh_update_mtu(&nh->nh_common, dev->mtu, orig_mtu); } } @@ -1530,22 +1776,22 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) int dead; BUG_ON(!fi->fib_nhs); - if (nh->nh_dev != dev || fi == prev_fi) + if (nh->fib_nh_dev != dev || fi == prev_fi) continue; prev_fi = fi; dead = 0; change_nexthops(fi) { - if (nexthop_nh->nh_flags & RTNH_F_DEAD) + if (nexthop_nh->fib_nh_flags & RTNH_F_DEAD) dead++; - else if (nexthop_nh->nh_dev == dev && - nexthop_nh->nh_scope != scope) { + else if (nexthop_nh->fib_nh_dev == dev && + nexthop_nh->fib_nh_scope != scope) { switch (event) { case NETDEV_DOWN: case NETDEV_UNREGISTER: - nexthop_nh->nh_flags |= RTNH_F_DEAD; + nexthop_nh->fib_nh_flags |= RTNH_F_DEAD; /* fall through */ case NETDEV_CHANGE: - nexthop_nh->nh_flags |= RTNH_F_LINKDOWN; + nexthop_nh->fib_nh_flags |= RTNH_F_LINKDOWN; break; } call_fib_nh_notifiers(nexthop_nh, @@ -1554,7 +1800,7 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) } #ifdef CONFIG_IP_ROUTE_MULTIPATH if (event == NETDEV_UNREGISTER && - nexthop_nh->nh_dev == dev) { + nexthop_nh->fib_nh_dev == dev) { dead = fi->fib_nhs; break; } @@ -1614,8 +1860,8 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res) if (next_fi->fib_scope != res->scope || fa->fa_type != RTN_UNICAST) continue; - if (!next_fi->fib_nh[0].nh_gw || - next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) + if (!next_fi->fib_nh[0].fib_nh_gw4 || + next_fi->fib_nh[0].fib_nh_scope != RT_SCOPE_LINK) continue; fib_alias_accessed(fa); @@ -1658,7 +1904,7 @@ out: * Dead device goes up. We wake up dead nexthops. * It takes sense only on multipath routes. */ -int fib_sync_up(struct net_device *dev, unsigned int nh_flags) +int fib_sync_up(struct net_device *dev, unsigned char nh_flags) { struct fib_info *prev_fi; unsigned int hash; @@ -1686,24 +1932,24 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags) int alive; BUG_ON(!fi->fib_nhs); - if (nh->nh_dev != dev || fi == prev_fi) + if (nh->fib_nh_dev != dev || fi == prev_fi) continue; prev_fi = fi; alive = 0; change_nexthops(fi) { - if (!(nexthop_nh->nh_flags & nh_flags)) { + if (!(nexthop_nh->fib_nh_flags & nh_flags)) { alive++; continue; } - if (!nexthop_nh->nh_dev || - !(nexthop_nh->nh_dev->flags & IFF_UP)) + if (!nexthop_nh->fib_nh_dev || + !(nexthop_nh->fib_nh_dev->flags & IFF_UP)) continue; - if (nexthop_nh->nh_dev != dev || + if (nexthop_nh->fib_nh_dev != dev || !__in_dev_get_rtnl(dev)) continue; alive++; - nexthop_nh->nh_flags &= ~nh_flags; + nexthop_nh->fib_nh_flags &= ~nh_flags; call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_ADD); } endfor_nexthops(fi) @@ -1723,13 +1969,19 @@ static bool fib_good_nh(const struct fib_nh *nh) { int state = NUD_REACHABLE; - if (nh->nh_scope == RT_SCOPE_LINK) { + if (nh->fib_nh_scope == RT_SCOPE_LINK) { struct neighbour *n; rcu_read_lock_bh(); - n = __ipv4_neigh_lookup_noref(nh->nh_dev, - (__force u32)nh->nh_gw); + if (likely(nh->fib_nh_gw_family == AF_INET)) + n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev, + (__force u32)nh->fib_nh_gw4); + else if (nh->fib_nh_gw_family == AF_INET6) + n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, + &nh->fib_nh_gw6); + else + n = NULL; if (n) state = n->nud_state; @@ -1745,20 +1997,22 @@ void fib_select_multipath(struct fib_result *res, int hash) struct net *net = fi->fib_net; bool first = false; - for_nexthops(fi) { + change_nexthops(fi) { if (net->ipv4.sysctl_fib_multipath_use_neigh) { - if (!fib_good_nh(nh)) + if (!fib_good_nh(nexthop_nh)) continue; if (!first) { res->nh_sel = nhsel; + res->nhc = &nexthop_nh->nh_common; first = true; } } - if (hash > atomic_read(&nh->nh_upper_bound)) + if (hash > atomic_read(&nexthop_nh->fib_nh_upper_bound)) continue; res->nh_sel = nhsel; + res->nhc = &nexthop_nh->nh_common; return; } endfor_nexthops(fi); } @@ -1785,5 +2039,5 @@ void fib_select_path(struct net *net, struct fib_result *res, check_saddr: if (!fl4->saddr) - fl4->saddr = FIB_RES_PREFSRC(net, *res); + fl4->saddr = fib_result_prefsrc(net, res); } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index a573e37e0615..334f723bdf80 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -183,14 +183,16 @@ struct trie { }; static struct key_vector *resize(struct trie *t, struct key_vector *tn); -static size_t tnode_free_size; +static unsigned int tnode_free_size; /* - * synchronize_rcu after call_rcu for that many pages; it should be especially - * useful before resizing the root node with PREEMPT_NONE configs; the value was - * obtained experimentally, aiming to avoid visible slowdown. + * synchronize_rcu after call_rcu for outstanding dirty memory; it should be + * especially useful before resizing the root node with PREEMPT_NONE configs; + * the value was obtained experimentally, aiming to avoid visible slowdown. */ -static const int sync_pages = 128; +unsigned int sysctl_fib_sync_mem = 512 * 1024; +unsigned int sysctl_fib_sync_mem_min = 64 * 1024; +unsigned int sysctl_fib_sync_mem_max = 64 * 1024 * 1024; static struct kmem_cache *fn_alias_kmem __ro_after_init; static struct kmem_cache *trie_leaf_kmem __ro_after_init; @@ -504,7 +506,7 @@ static void tnode_free(struct key_vector *tn) tn = container_of(head, struct tnode, rcu)->kv; } - if (tnode_free_size >= PAGE_SIZE * sync_pages) { + if (tnode_free_size >= sysctl_fib_sync_mem) { tnode_free_size = 0; synchronize_rcu(); } @@ -1468,19 +1470,17 @@ found: if (fi->fib_flags & RTNH_F_DEAD) continue; for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { - const struct fib_nh *nh = &fi->fib_nh[nhsel]; - struct in_device *in_dev = __in_dev_get_rcu(nh->nh_dev); + struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel); - if (nh->nh_flags & RTNH_F_DEAD) + if (nhc->nhc_flags & RTNH_F_DEAD) continue; - if (in_dev && - IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && - nh->nh_flags & RTNH_F_LINKDOWN && + if (ip_ignore_linkdown(nhc->nhc_dev) && + nhc->nhc_flags & RTNH_F_LINKDOWN && !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) continue; if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) { if (flp->flowi4_oif && - flp->flowi4_oif != nh->nh_oif) + flp->flowi4_oif != nhc->nhc_oif) continue; } @@ -1490,6 +1490,7 @@ found: res->prefix = htonl(n->key); res->prefixlen = KEYLENGTH - fa->fa_slen; res->nh_sel = nhsel; + res->nhc = nhc; res->type = fa->fa_type; res->scope = fi->fib_scope; res->fi = fi; @@ -1498,7 +1499,7 @@ found: #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->semantic_match_passed); #endif - trace_fib_table_lookup(tb->tb_id, flp, nh, err); + trace_fib_table_lookup(tb->tb_id, flp, nhc, err); return err; } @@ -2651,7 +2652,7 @@ static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT) flags = RTF_REJECT; - if (fi && fi->fib_nh->nh_gw) + if (fi && fi->fib_nh->fib_nh_gw4) flags |= RTF_GATEWAY; if (mask == htonl(0xFFFFFFFF)) flags |= RTF_HOST; @@ -2702,7 +2703,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) "%d\t%08X\t%d\t%u\t%u", fi->fib_dev ? fi->fib_dev->name : "*", prefix, - fi->fib_nh->nh_gw, flags, 0, 0, + fi->fib_nh->fib_nh_gw4, flags, 0, 0, fi->fib_priority, mask, (fi->fib_advmss ? diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 12ce6c526d72..ca95051317ed 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -137,7 +137,7 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) break; case 1: { - /* Direct encasulation of IPv4 or IPv6 */ + /* Direct encapsulation of IPv4 or IPv6 */ int prot; @@ -171,9 +171,7 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) /* guehdr may change after pull */ guehdr = (struct guehdr *)&udp_hdr(skb)[1]; - hdrlen = sizeof(struct guehdr) + optlen; - - if (guehdr->version != 0 || validate_gue_flags(guehdr, optlen)) + if (validate_gue_flags(guehdr, optlen)) goto drop; hdrlen = sizeof(struct guehdr) + optlen; @@ -501,15 +499,45 @@ out_unlock: return err; } -static int fou_add_to_port_list(struct net *net, struct fou *fou) +static bool fou_cfg_cmp(struct fou *fou, struct fou_cfg *cfg) +{ + struct sock *sk = fou->sock->sk; + struct udp_port_cfg *udp_cfg = &cfg->udp_config; + + if (fou->family != udp_cfg->family || + fou->port != udp_cfg->local_udp_port || + sk->sk_dport != udp_cfg->peer_udp_port || + sk->sk_bound_dev_if != udp_cfg->bind_ifindex) + return false; + + if (fou->family == AF_INET) { + if (sk->sk_rcv_saddr != udp_cfg->local_ip.s_addr || + sk->sk_daddr != udp_cfg->peer_ip.s_addr) + return false; + else + return true; +#if IS_ENABLED(CONFIG_IPV6) + } else { + if (ipv6_addr_cmp(&sk->sk_v6_rcv_saddr, &udp_cfg->local_ip6) || + ipv6_addr_cmp(&sk->sk_v6_daddr, &udp_cfg->peer_ip6)) + return false; + else + return true; +#endif + } + + return false; +} + +static int fou_add_to_port_list(struct net *net, struct fou *fou, + struct fou_cfg *cfg) { struct fou_net *fn = net_generic(net, fou_net_id); struct fou *fout; mutex_lock(&fn->fou_lock); list_for_each_entry(fout, &fn->fou_list, list) { - if (fou->port == fout->port && - fou->family == fout->family) { + if (fou_cfg_cmp(fout, cfg)) { mutex_unlock(&fn->fou_lock); return -EALREADY; } @@ -587,7 +615,7 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, sk->sk_allocation = GFP_ATOMIC; - err = fou_add_to_port_list(net, fou); + err = fou_add_to_port_list(net, fou, cfg); if (err) goto error; @@ -607,14 +635,12 @@ error: static int fou_destroy(struct net *net, struct fou_cfg *cfg) { struct fou_net *fn = net_generic(net, fou_net_id); - __be16 port = cfg->udp_config.local_udp_port; - u8 family = cfg->udp_config.family; int err = -EINVAL; struct fou *fou; mutex_lock(&fn->fou_lock); list_for_each_entry(fou, &fn->fou_list, list) { - if (fou->port == port && fou->family == family) { + if (fou_cfg_cmp(fou, cfg)) { fou_release(fou); err = 0; break; @@ -628,16 +654,27 @@ static int fou_destroy(struct net *net, struct fou_cfg *cfg) static struct genl_family fou_nl_family; static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = { - [FOU_ATTR_PORT] = { .type = NLA_U16, }, - [FOU_ATTR_AF] = { .type = NLA_U8, }, - [FOU_ATTR_IPPROTO] = { .type = NLA_U8, }, - [FOU_ATTR_TYPE] = { .type = NLA_U8, }, - [FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, }, + [FOU_ATTR_PORT] = { .type = NLA_U16, }, + [FOU_ATTR_AF] = { .type = NLA_U8, }, + [FOU_ATTR_IPPROTO] = { .type = NLA_U8, }, + [FOU_ATTR_TYPE] = { .type = NLA_U8, }, + [FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, }, + [FOU_ATTR_LOCAL_V4] = { .type = NLA_U32, }, + [FOU_ATTR_PEER_V4] = { .type = NLA_U32, }, + [FOU_ATTR_LOCAL_V6] = { .type = sizeof(struct in6_addr), }, + [FOU_ATTR_PEER_V6] = { .type = sizeof(struct in6_addr), }, + [FOU_ATTR_PEER_PORT] = { .type = NLA_U16, }, + [FOU_ATTR_IFINDEX] = { .type = NLA_S32, }, }; static int parse_nl_config(struct genl_info *info, struct fou_cfg *cfg) { + bool has_local = false, has_peer = false; + struct nlattr *attr; + int ifindex; + __be16 port; + memset(cfg, 0, sizeof(*cfg)); cfg->udp_config.family = AF_INET; @@ -659,8 +696,7 @@ static int parse_nl_config(struct genl_info *info, } if (info->attrs[FOU_ATTR_PORT]) { - __be16 port = nla_get_be16(info->attrs[FOU_ATTR_PORT]); - + port = nla_get_be16(info->attrs[FOU_ATTR_PORT]); cfg->udp_config.local_udp_port = port; } @@ -673,6 +709,52 @@ static int parse_nl_config(struct genl_info *info, if (info->attrs[FOU_ATTR_REMCSUM_NOPARTIAL]) cfg->flags |= FOU_F_REMCSUM_NOPARTIAL; + if (cfg->udp_config.family == AF_INET) { + if (info->attrs[FOU_ATTR_LOCAL_V4]) { + attr = info->attrs[FOU_ATTR_LOCAL_V4]; + cfg->udp_config.local_ip.s_addr = nla_get_in_addr(attr); + has_local = true; + } + + if (info->attrs[FOU_ATTR_PEER_V4]) { + attr = info->attrs[FOU_ATTR_PEER_V4]; + cfg->udp_config.peer_ip.s_addr = nla_get_in_addr(attr); + has_peer = true; + } +#if IS_ENABLED(CONFIG_IPV6) + } else { + if (info->attrs[FOU_ATTR_LOCAL_V6]) { + attr = info->attrs[FOU_ATTR_LOCAL_V6]; + cfg->udp_config.local_ip6 = nla_get_in6_addr(attr); + has_local = true; + } + + if (info->attrs[FOU_ATTR_PEER_V6]) { + attr = info->attrs[FOU_ATTR_PEER_V6]; + cfg->udp_config.peer_ip6 = nla_get_in6_addr(attr); + has_peer = true; + } +#endif + } + + if (has_peer) { + if (info->attrs[FOU_ATTR_PEER_PORT]) { + port = nla_get_be16(info->attrs[FOU_ATTR_PEER_PORT]); + cfg->udp_config.peer_udp_port = port; + } else { + return -EINVAL; + } + } + + if (info->attrs[FOU_ATTR_IFINDEX]) { + if (!has_local) + return -EINVAL; + + ifindex = nla_get_s32(info->attrs[FOU_ATTR_IFINDEX]); + + cfg->udp_config.bind_ifindex = ifindex; + } + return 0; } @@ -704,15 +786,37 @@ static int fou_nl_cmd_rm_port(struct sk_buff *skb, struct genl_info *info) static int fou_fill_info(struct fou *fou, struct sk_buff *msg) { + struct sock *sk = fou->sock->sk; + if (nla_put_u8(msg, FOU_ATTR_AF, fou->sock->sk->sk_family) || nla_put_be16(msg, FOU_ATTR_PORT, fou->port) || + nla_put_be16(msg, FOU_ATTR_PEER_PORT, sk->sk_dport) || nla_put_u8(msg, FOU_ATTR_IPPROTO, fou->protocol) || - nla_put_u8(msg, FOU_ATTR_TYPE, fou->type)) + nla_put_u8(msg, FOU_ATTR_TYPE, fou->type) || + nla_put_s32(msg, FOU_ATTR_IFINDEX, sk->sk_bound_dev_if)) return -1; if (fou->flags & FOU_F_REMCSUM_NOPARTIAL) if (nla_put_flag(msg, FOU_ATTR_REMCSUM_NOPARTIAL)) return -1; + + if (fou->sock->sk->sk_family == AF_INET) { + if (nla_put_in_addr(msg, FOU_ATTR_LOCAL_V4, sk->sk_rcv_saddr)) + return -1; + + if (nla_put_in_addr(msg, FOU_ATTR_PEER_V4, sk->sk_daddr)) + return -1; +#if IS_ENABLED(CONFIG_IPV6) + } else { + if (nla_put_in6_addr(msg, FOU_ATTR_LOCAL_V6, + &sk->sk_v6_rcv_saddr)) + return -1; + + if (nla_put_in6_addr(msg, FOU_ATTR_PEER_V6, &sk->sk_v6_daddr)) + return -1; +#endif + } + return 0; } @@ -765,7 +869,7 @@ static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info) ret = -ESRCH; mutex_lock(&fn->fou_lock); list_for_each_entry(fout, &fn->fou_list, list) { - if (port == fout->port && family == fout->family) { + if (fou_cfg_cmp(fout, &cfg)) { ret = fou_dump_info(fout, info->snd_portid, info->snd_seq, 0, msg, info->genlhdr->cmd); @@ -809,21 +913,21 @@ static int fou_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) static const struct genl_ops fou_nl_ops[] = { { .cmd = FOU_CMD_ADD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = fou_nl_cmd_add_port, - .policy = fou_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = FOU_CMD_DEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = fou_nl_cmd_rm_port, - .policy = fou_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = FOU_CMD_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = fou_nl_cmd_get_port, .dumpit = fou_nl_dump, - .policy = fou_nl_policy, }, }; @@ -832,6 +936,7 @@ static struct genl_family fou_nl_family __ro_after_init = { .name = FOU_GENL_NAME, .version = FOU_GENL_VERSION, .maxattr = FOU_ATTR_MAX, + .policy = fou_nl_policy, .netnsok = true, .module = THIS_MODULE, .ops = fou_nl_ops, @@ -1035,7 +1140,7 @@ static int gue_err(struct sk_buff *skb, u32 info) case 0: /* Full GUE header present */ break; case 1: { - /* Direct encasulation of IPv4 or IPv6 */ + /* Direct encapsulation of IPv4 or IPv6 */ skb_set_transport_header(skb, -(int)sizeof(struct icmphdr)); switch (((struct iphdr *)guehdr)->version) { diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 6ea523d71947..a175e3e7ae97 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -564,7 +564,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) goto no_route; - if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) + if (opt && opt->opt.is_strictroute && rt->rt_gw_family) goto route_err; rcu_read_unlock(); return &rt->dst; @@ -602,7 +602,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) goto no_route; - if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) + if (opt && opt->opt.is_strictroute && rt->rt_gw_family) goto route_err; return &rt->dst; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 00ec819f949b..06f6f280b9ff 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -123,7 +123,7 @@ int ip_forward(struct sk_buff *skb) rt = skb_rtable(skb); - if (opt->is_strictroute && rt->rt_uses_gateway) + if (opt->is_strictroute && rt->rt_gw_family) goto sr_failed; IPCB(skb)->flags |= IPSKB_FORWARDED; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 1132d6d1796a..ed97724c5e33 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -130,6 +130,7 @@ #include <linux/inetdevice.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> +#include <linux/indirect_call_wrapper.h> #include <net/snmp.h> #include <net/ip.h> @@ -188,6 +189,8 @@ bool ip_call_ra_chain(struct sk_buff *skb) return false; } +INDIRECT_CALLABLE_DECLARE(int udp_rcv(struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int tcp_v4_rcv(struct sk_buff *)); void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int protocol) { const struct net_protocol *ipprot; @@ -205,7 +208,8 @@ resubmit: } nf_reset(skb); } - ret = ipprot->handler(skb); + ret = INDIRECT_CALL_2(ipprot->handler, tcp_v4_rcv, udp_rcv, + skb); if (ret < 0) { protocol = -ret; goto resubmit; @@ -305,6 +309,8 @@ drop: return true; } +INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *)); static int ip_rcv_finish_core(struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *dev) { @@ -322,7 +328,8 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, ipprot = rcu_dereference(inet_protos[protocol]); if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) { - err = edemux(skb); + err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux, + udp_v4_early_demux, skb); if (unlikely(err)) goto drop_error; /* must reload iph, skb->head might have changed */ diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e8bb2e85c5a4..ac880beda8a7 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -188,7 +188,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s struct net_device *dev = dst->dev; unsigned int hh_len = LL_RESERVED_SPACE(dev); struct neighbour *neigh; - u32 nexthop; + bool is_v6gw = false; if (rt->rt_type == RTN_MULTICAST) { IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTMCAST, skb->len); @@ -218,16 +218,13 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s } rcu_read_lock_bh(); - nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr); - neigh = __ipv4_neigh_lookup_noref(dev, nexthop); - if (unlikely(!neigh)) - neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); + neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); if (!IS_ERR(neigh)) { int res; sock_confirm_neigh(skb, neigh); - res = neigh_output(neigh, skb); - + /* if crossing protocols, can not use the cached header */ + res = neigh_output(neigh, skb, is_v6gw); rcu_read_unlock_bh(); return res; } @@ -472,7 +469,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, skb_dst_set_noref(skb, &rt->dst); packet_routed: - if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_uses_gateway) + if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_gw_family) goto no_route; /* OK, we know where to send it, allocate and build IP header. */ @@ -694,11 +691,8 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, return 0; } - while (frag) { - skb = frag->next; - kfree_skb(frag); - frag = skb; - } + kfree_skb_list(frag); + IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); return err; diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index c3f3d28d1087..30c1c264bdfc 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -239,8 +239,8 @@ static int ip_tun_build_state(struct nlattr *attr, struct nlattr *tb[LWTUNNEL_IP_MAX + 1]; int err; - err = nla_parse_nested(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy, - extack); + err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP_MAX, attr, + ip_tun_policy, extack); if (err < 0) return err; @@ -356,8 +356,8 @@ static int ip6_tun_build_state(struct nlattr *attr, struct nlattr *tb[LWTUNNEL_IP6_MAX + 1]; int err; - err = nla_parse_nested(tb, LWTUNNEL_IP6_MAX, attr, ip6_tun_policy, - extack); + err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP6_MAX, attr, + ip6_tun_policy, extack); if (err < 0) return err; diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 35d8346742e2..254a42e83ff9 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -50,7 +50,7 @@ static unsigned int vti_net_id __read_mostly; static int vti_tunnel_init(struct net_device *dev); static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi, - int encap_type) + int encap_type, bool update_skb_dev) { struct ip_tunnel *tunnel; const struct iphdr *iph = ip_hdr(skb); @@ -65,6 +65,9 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi, XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel; + if (update_skb_dev) + skb->dev = tunnel->dev; + return xfrm_input(skb, nexthdr, spi, encap_type); } @@ -74,47 +77,28 @@ drop: return 0; } -static int vti_input_ipip(struct sk_buff *skb, int nexthdr, __be32 spi, - int encap_type) +static int vti_input_proto(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type) { - struct ip_tunnel *tunnel; - const struct iphdr *iph = ip_hdr(skb); - struct net *net = dev_net(skb->dev); - struct ip_tunnel_net *itn = net_generic(net, vti_net_id); - - tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, - iph->saddr, iph->daddr, 0); - if (tunnel) { - if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) - goto drop; - - XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel; - - skb->dev = tunnel->dev; - - return xfrm_input(skb, nexthdr, spi, encap_type); - } - - return -EINVAL; -drop: - kfree_skb(skb); - return 0; + return vti_input(skb, nexthdr, spi, encap_type, false); } -static int vti_rcv(struct sk_buff *skb) +static int vti_rcv(struct sk_buff *skb, __be32 spi, bool update_skb_dev) { XFRM_SPI_SKB_CB(skb)->family = AF_INET; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); - return vti_input(skb, ip_hdr(skb)->protocol, 0, 0); + return vti_input(skb, ip_hdr(skb)->protocol, spi, 0, update_skb_dev); } -static int vti_rcv_ipip(struct sk_buff *skb) +static int vti_rcv_proto(struct sk_buff *skb) { - XFRM_SPI_SKB_CB(skb)->family = AF_INET; - XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); + return vti_rcv(skb, 0, false); +} - return vti_input_ipip(skb, ip_hdr(skb)->protocol, ip_hdr(skb)->saddr, 0); +static int vti_rcv_tunnel(struct sk_buff *skb) +{ + return vti_rcv(skb, ip_hdr(skb)->saddr, true); } static int vti_rcv_cb(struct sk_buff *skb, int err) @@ -123,7 +107,7 @@ static int vti_rcv_cb(struct sk_buff *skb, int err) struct net_device *dev; struct pcpu_sw_netstats *tstats; struct xfrm_state *x; - struct xfrm_mode *inner_mode; + const struct xfrm_mode *inner_mode; struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4; u32 orig_mark = skb->mark; int ret; @@ -142,7 +126,7 @@ static int vti_rcv_cb(struct sk_buff *skb, int err) x = xfrm_input_state(skb); - inner_mode = x->inner_mode; + inner_mode = &x->inner_mode; if (x->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); @@ -153,7 +137,7 @@ static int vti_rcv_cb(struct sk_buff *skb, int err) } } - family = inner_mode->afinfo->family; + family = inner_mode->family; skb->mark = be32_to_cpu(tunnel->parms.i_key); ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family); @@ -447,31 +431,31 @@ static void __net_init vti_fb_tunnel_init(struct net_device *dev) } static struct xfrm4_protocol vti_esp4_protocol __read_mostly = { - .handler = vti_rcv, - .input_handler = vti_input, + .handler = vti_rcv_proto, + .input_handler = vti_input_proto, .cb_handler = vti_rcv_cb, .err_handler = vti4_err, .priority = 100, }; static struct xfrm4_protocol vti_ah4_protocol __read_mostly = { - .handler = vti_rcv, - .input_handler = vti_input, + .handler = vti_rcv_proto, + .input_handler = vti_input_proto, .cb_handler = vti_rcv_cb, .err_handler = vti4_err, .priority = 100, }; static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = { - .handler = vti_rcv, - .input_handler = vti_input, + .handler = vti_rcv_proto, + .input_handler = vti_input_proto, .cb_handler = vti_rcv_cb, .err_handler = vti4_err, .priority = 100, }; static struct xfrm_tunnel ipip_handler __read_mostly = { - .handler = vti_rcv_ipip, + .handler = vti_rcv_tunnel, .err_handler = vti4_err, .priority = 0, }; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 2c931120c494..2c61e10a60e3 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -66,7 +66,7 @@ #include <net/netlink.h> #include <net/fib_rules.h> #include <linux/netconf.h> -#include <net/nexthop.h> +#include <net/rtnh.h> #include <linux/nospec.h> @@ -373,7 +373,6 @@ static const struct rhashtable_params ipmr_rht_params = { .key_offset = offsetof(struct mfc_cache, cmparg), .key_len = sizeof(struct mfc_cache_cmp_arg), .nelem_hint = 3, - .locks_mul = 1, .obj_cmpfn = ipmr_hash_cmp, .automatic_shrinking = true, }; @@ -2499,8 +2498,8 @@ static int ipmr_rtm_valid_getroute_req(struct sk_buff *skb, } if (!netlink_strict_get_check(skb)) - return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_ipv4_policy, extack); + return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); rtm = nlmsg_data(nlh); if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) || @@ -2511,8 +2510,8 @@ static int ipmr_rtm_valid_getroute_req(struct sk_buff *skb, return -EINVAL; } - err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_ipv4_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); if (err) return err; @@ -2675,8 +2674,8 @@ static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, struct rtmsg *rtm; int ret, rem; - ret = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy, - extack); + ret = nlmsg_validate_deprecated(nlh, sizeof(*rtm), RTA_MAX, + rtm_ipmr_policy, extack); if (ret < 0) goto out; rtm = nlmsg_data(nlh); @@ -2784,7 +2783,7 @@ static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb) return true; vif = &mrt->vif_table[vifid]; - vif_nest = nla_nest_start(skb, IPMRA_VIF); + vif_nest = nla_nest_start_noflag(skb, IPMRA_VIF); if (!vif_nest) return false; if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif->dev->ifindex) || @@ -2868,7 +2867,7 @@ static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb) memset(hdr, 0, sizeof(*hdr)); hdr->ifi_family = RTNL_FAMILY_IPMR; - af = nla_nest_start(skb, IFLA_AF_SPEC); + af = nla_nest_start_noflag(skb, IFLA_AF_SPEC); if (!af) { nlmsg_cancel(skb, nlh); goto out; @@ -2879,7 +2878,7 @@ static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb) goto out; } - vifs = nla_nest_start(skb, IPMRA_TABLE_VIFS); + vifs = nla_nest_start_noflag(skb, IPMRA_TABLE_VIFS); if (!vifs) { nla_nest_end(skb, af); nlmsg_end(skb, nlh); diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index 3e614cc824f7..ea48bd15a575 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -228,7 +228,7 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, if (c->mfc_flags & MFC_OFFLOAD) rtm->rtm_flags |= RTNH_F_OFFLOAD; - mp_attr = nla_nest_start(skb, RTA_MULTIPATH); + mp_attr = nla_nest_start_noflag(skb, RTA_MULTIPATH); if (!mp_attr) return -EMSGSIZE; @@ -335,8 +335,6 @@ next_entry2: } spin_unlock_bh(lock); err = 0; - e = 0; - out: cb->args[1] = e; return err; @@ -374,6 +372,7 @@ int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, err = mr_table_dump(mrt, skb, cb, fill, lock, filter); if (err < 0) break; + cb->args[1] = 0; next_table: t++; } diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index c98391d49200..1412b029f37f 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -27,14 +27,6 @@ config NF_TABLES_IPV4 if NF_TABLES_IPV4 -config NFT_CHAIN_ROUTE_IPV4 - tristate "IPv4 nf_tables route chain support" - help - This option enables the "route" chain for IPv4 in nf_tables. This - chain type is used to force packet re-routing after mangling header - fields such as the source, destination, type of service and - the packet mark. - config NFT_REJECT_IPV4 select NF_REJECT_IPV4 default NFT_REJECT @@ -232,16 +224,10 @@ if IP_NF_NAT config IP_NF_TARGET_MASQUERADE tristate "MASQUERADE target support" - select NF_NAT_MASQUERADE - default m if NETFILTER_ADVANCED=n + select NETFILTER_XT_TARGET_MASQUERADE help - Masquerading is a special case of NAT: all outgoing connections are - changed to seem to come from a particular interface's address, and - if the interface goes down, those connections are lost. This is - only useful for dialup accounts with dynamic IP address (ie. your IP - address will be different on next dialup). - - To compile it as a module, choose M here. If unsure, say N. + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects NETFILTER_XT_TARGET_MASQUERADE. config IP_NF_TARGET_NETMAP tristate "NETMAP target support" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index e241f5188ebe..c50e0ec095d2 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -24,7 +24,6 @@ nf_nat_snmp_basic-y := nf_nat_snmp_basic.asn1.o nf_nat_snmp_basic_main.o $(obj)/nf_nat_snmp_basic_main.o: $(obj)/nf_nat_snmp_basic.asn1.h obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o -obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o @@ -49,7 +48,6 @@ obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o # targets obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o -obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o obj-$(CONFIG_IP_NF_TARGET_SYNPROXY) += ipt_SYNPROXY.o diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index 4e6b53ab6c33..7875c98072eb 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -631,4 +631,4 @@ module_exit(fini); MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>"); MODULE_DESCRIPTION("H.323 NAT helper"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("ip_nat_h323"); +MODULE_ALIAS_NF_NAT_HELPER("h323"); diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 68b4d450391b..e17b4ee7604c 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -37,7 +37,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP"); -MODULE_ALIAS("ip_nat_pptp"); +MODULE_ALIAS_NF_NAT_HELPER("pptp"); static void pptp_nat_expected(struct nf_conn *ct, struct nf_conntrack_expect *exp) diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c deleted file mode 100644 index 7d82934c46f4..000000000000 --- a/net/ipv4/netfilter/nft_chain_route_ipv4.c +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> - * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/init.h> -#include <linux/list.h> -#include <linux/skbuff.h> -#include <linux/netlink.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv4.h> -#include <linux/netfilter/nfnetlink.h> -#include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables_ipv4.h> -#include <net/route.h> -#include <net/ip.h> - -static unsigned int nf_route_table_hook(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - unsigned int ret; - struct nft_pktinfo pkt; - u32 mark; - __be32 saddr, daddr; - u_int8_t tos; - const struct iphdr *iph; - int err; - - nft_set_pktinfo(&pkt, skb, state); - nft_set_pktinfo_ipv4(&pkt, skb); - - mark = skb->mark; - iph = ip_hdr(skb); - saddr = iph->saddr; - daddr = iph->daddr; - tos = iph->tos; - - ret = nft_do_chain(&pkt, priv); - if (ret != NF_DROP && ret != NF_STOLEN) { - iph = ip_hdr(skb); - - if (iph->saddr != saddr || - iph->daddr != daddr || - skb->mark != mark || - iph->tos != tos) { - err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); - if (err < 0) - ret = NF_DROP_ERR(err); - } - } - return ret; -} - -static const struct nft_chain_type nft_chain_route_ipv4 = { - .name = "route", - .type = NFT_CHAIN_T_ROUTE, - .family = NFPROTO_IPV4, - .owner = THIS_MODULE, - .hook_mask = (1 << NF_INET_LOCAL_OUT), - .hooks = { - [NF_INET_LOCAL_OUT] = nf_route_table_hook, - }, -}; - -static int __init nft_chain_route_init(void) -{ - nft_register_chain_type(&nft_chain_route_ipv4); - - return 0; -} - -static void __exit nft_chain_route_exit(void) -{ - nft_unregister_chain_type(&nft_chain_route_ipv4); -} - -module_init(nft_chain_route_init); -module_exit(nft_chain_route_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_ALIAS_NFT_CHAIN(AF_INET, "route"); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6fdf1c195d8e..11ddc276776e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -434,37 +434,46 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, struct sk_buff *skb, const void *daddr) { + const struct rtable *rt = container_of(dst, struct rtable, dst); struct net_device *dev = dst->dev; - const __be32 *pkey = daddr; - const struct rtable *rt; struct neighbour *n; - rt = (const struct rtable *) dst; - if (rt->rt_gateway) - pkey = (const __be32 *) &rt->rt_gateway; - else if (skb) - pkey = &ip_hdr(skb)->daddr; + rcu_read_lock_bh(); + + if (likely(rt->rt_gw_family == AF_INET)) { + n = ip_neigh_gw4(dev, rt->rt_gw4); + } else if (rt->rt_gw_family == AF_INET6) { + n = ip_neigh_gw6(dev, &rt->rt_gw6); + } else { + __be32 pkey; + + pkey = skb ? ip_hdr(skb)->daddr : *((__be32 *) daddr); + n = ip_neigh_gw4(dev, pkey); + } + + if (n && !refcount_inc_not_zero(&n->refcnt)) + n = NULL; + + rcu_read_unlock_bh(); - n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey); - if (n) - return n; - return neigh_create(&arp_tbl, pkey, dev); + return n; } static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr) { + const struct rtable *rt = container_of(dst, struct rtable, dst); struct net_device *dev = dst->dev; const __be32 *pkey = daddr; - const struct rtable *rt; - rt = (const struct rtable *)dst; - if (rt->rt_gateway) - pkey = (const __be32 *)&rt->rt_gateway; - else if (!daddr || + if (rt->rt_gw_family == AF_INET) { + pkey = (const __be32 *)&rt->rt_gw4; + } else if (rt->rt_gw_family == AF_INET6) { + return __ipv6_confirm_neigh_stub(dev, &rt->rt_gw6); + } else if (!daddr || (rt->rt_flags & - (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) + (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) { return; - + } __ipv4_confirm_neigh(dev, *(__force u32 *)pkey); } @@ -500,15 +509,17 @@ EXPORT_SYMBOL(ip_idents_reserve); void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) { - static u32 ip_idents_hashrnd __read_mostly; u32 hash, id; - net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd)); + /* Note the following code is not safe, but this is okay. */ + if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key))) + get_random_bytes(&net->ipv4.ip_id_key, + sizeof(net->ipv4.ip_id_key)); - hash = jhash_3words((__force u32)iph->daddr, + hash = siphash_3u32((__force u32)iph->daddr, (__force u32)iph->saddr, - iph->protocol ^ net_hash_mix(net), - ip_idents_hashrnd); + iph->protocol, + &net->ipv4.ip_id_key); id = ip_idents_reserve(hash, segs); iph->id = htons(id); } @@ -627,13 +638,14 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh if (fnhe->fnhe_gw) { rt->rt_flags |= RTCF_REDIRECTED; - rt->rt_gateway = fnhe->fnhe_gw; - rt->rt_uses_gateway = 1; + rt->rt_gw_family = AF_INET; + rt->rt_gw4 = fnhe->fnhe_gw; } } -static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, - u32 pmtu, bool lock, unsigned long expires) +static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr, + __be32 gw, u32 pmtu, bool lock, + unsigned long expires) { struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe; @@ -642,17 +654,17 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, unsigned int i; int depth; - genid = fnhe_genid(dev_net(nh->nh_dev)); + genid = fnhe_genid(dev_net(nhc->nhc_dev)); hval = fnhe_hashfun(daddr); spin_lock_bh(&fnhe_lock); - hash = rcu_dereference(nh->nh_exceptions); + hash = rcu_dereference(nhc->nhc_exceptions); if (!hash) { hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC); if (!hash) goto out_unlock; - rcu_assign_pointer(nh->nh_exceptions, hash); + rcu_assign_pointer(nhc->nhc_exceptions, hash); } hash += hval; @@ -704,13 +716,13 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, * stale, so anyone caching it rechecks if this exception * applies to them. */ - rt = rcu_dereference(nh->nh_rth_input); + rt = rcu_dereference(nhc->nhc_rth_input); if (rt) rt->dst.obsolete = DST_OBSOLETE_KILL; for_each_possible_cpu(i) { struct rtable __rcu **prt; - prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i); + prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i); rt = rcu_dereference(*prt); if (rt) rt->dst.obsolete = DST_OBSOLETE_KILL; @@ -745,7 +757,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow return; } - if (rt->rt_gateway != old_gw) + if (rt->rt_gw_family != AF_INET || rt->rt_gw4 != old_gw) return; in_dev = __in_dev_get_rcu(dev); @@ -776,9 +788,9 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow neigh_event_send(n, NULL); } else { if (fib_lookup(net, fl4, &res, 0) == 0) { - struct fib_nh *nh = &FIB_RES_NH(res); + struct fib_nh_common *nhc = FIB_RES_NHC(res); - update_or_create_fnhe(nh, fl4->daddr, new_gw, + update_or_create_fnhe(nhc, fl4->daddr, new_gw, 0, false, jiffies + ip_rt_gc_timeout); } @@ -1025,9 +1037,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) rcu_read_lock(); if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) { - struct fib_nh *nh = &FIB_RES_NH(res); + struct fib_nh_common *nhc = FIB_RES_NHC(res); - update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock, + update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock, jiffies + ip_rt_mtu_expires); } rcu_read_unlock(); @@ -1176,7 +1188,7 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) * * When a PMTU/redirect information update invalidates a route, * this is indicated by setting obsolete to DST_OBSOLETE_KILL or - * DST_OBSOLETE_DEAD by dst_free(). + * DST_OBSOLETE_DEAD. */ if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt)) return NULL; @@ -1261,7 +1273,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) rcu_read_lock(); if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0) - src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res); + src = fib_result_prefsrc(dev_net(rt->dst.dev), &res); else src = inet_select_addr(rt->dst.dev, rt_nexthop(rt, iph->daddr), @@ -1304,7 +1316,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) mtu = READ_ONCE(dst->dev->mtu); if (unlikely(ip_mtu_locked(dst))) { - if (rt->rt_uses_gateway && mtu > 576) + if (rt->rt_gw_family && mtu > 576) mtu = 576; } @@ -1313,7 +1325,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } -static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) +static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr) { struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe, __rcu **fnhe_p; @@ -1321,7 +1333,7 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) spin_lock_bh(&fnhe_lock); - hash = rcu_dereference_protected(nh->nh_exceptions, + hash = rcu_dereference_protected(nhc->nhc_exceptions, lockdep_is_held(&fnhe_lock)); hash += hval; @@ -1347,9 +1359,10 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) spin_unlock_bh(&fnhe_lock); } -static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) +static struct fib_nh_exception *find_exception(struct fib_nh_common *nhc, + __be32 daddr) { - struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions); + struct fnhe_hash_bucket *hash = rcu_dereference(nhc->nhc_exceptions); struct fib_nh_exception *fnhe; u32 hval; @@ -1363,7 +1376,7 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) if (fnhe->fnhe_daddr == daddr) { if (fnhe->fnhe_expires && time_after(jiffies, fnhe->fnhe_expires)) { - ip_del_fnhe(nh, daddr); + ip_del_fnhe(nhc, daddr); break; } return fnhe; @@ -1380,9 +1393,9 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) { + struct fib_nh_common *nhc = res->nhc; + struct net_device *dev = nhc->nhc_dev; struct fib_info *fi = res->fi; - struct fib_nh *nh = &fi->fib_nh[res->nh_sel]; - struct net_device *dev = nh->nh_dev; u32 mtu = 0; if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu || @@ -1392,7 +1405,7 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) if (likely(!mtu)) { struct fib_nh_exception *fnhe; - fnhe = find_exception(nh, daddr); + fnhe = find_exception(nhc, daddr); if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires)) mtu = fnhe->fnhe_pmtu; } @@ -1400,7 +1413,7 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) if (likely(!mtu)) mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU); - return mtu - lwtunnel_headroom(nh->nh_lwtstate, mtu); + return mtu - lwtunnel_headroom(nhc->nhc_lwtstate, mtu); } static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, @@ -1431,8 +1444,10 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, orig = NULL; } fill_route_from_fnhe(rt, fnhe); - if (!rt->rt_gateway) - rt->rt_gateway = daddr; + if (!rt->rt_gw4) { + rt->rt_gw4 = daddr; + rt->rt_gw_family = AF_INET; + } if (do_cache) { dst_hold(&rt->dst); @@ -1451,15 +1466,15 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, return ret; } -static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) +static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt) { struct rtable *orig, *prev, **p; bool ret = true; if (rt_is_input_route(rt)) { - p = (struct rtable **)&nh->nh_rth_input; + p = (struct rtable **)&nhc->nhc_rth_input; } else { - p = (struct rtable **)raw_cpu_ptr(nh->nh_pcpu_rth_output); + p = (struct rtable **)raw_cpu_ptr(nhc->nhc_pcpu_rth_output); } orig = *p; @@ -1555,30 +1570,42 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, bool cached = false; if (fi) { - struct fib_nh *nh = &FIB_RES_NH(*res); + struct fib_nh_common *nhc = FIB_RES_NHC(*res); - if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) { - rt->rt_gateway = nh->nh_gw; - rt->rt_uses_gateway = 1; + if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) { + rt->rt_gw_family = nhc->nhc_gw_family; + /* only INET and INET6 are supported */ + if (likely(nhc->nhc_gw_family == AF_INET)) + rt->rt_gw4 = nhc->nhc_gw.ipv4; + else + rt->rt_gw6 = nhc->nhc_gw.ipv6; } + ip_dst_init_metrics(&rt->dst, fi->fib_metrics); #ifdef CONFIG_IP_ROUTE_CLASSID - rt->dst.tclassid = nh->nh_tclassid; + { + struct fib_nh *nh; + + nh = container_of(nhc, struct fib_nh, nh_common); + rt->dst.tclassid = nh->nh_tclassid; + } #endif - rt->dst.lwtstate = lwtstate_get(nh->nh_lwtstate); + rt->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate); if (unlikely(fnhe)) cached = rt_bind_exception(rt, fnhe, daddr, do_cache); else if (do_cache) - cached = rt_cache_route(nh, rt); + cached = rt_cache_route(nhc, rt); if (unlikely(!cached)) { /* Routes we intend to cache in nexthop exception or * FIB nexthop have the DST_NOCACHE bit clear. * However, if we are unsuccessful at storing this * route into the cache we really need to set it. */ - if (!rt->rt_gateway) - rt->rt_gateway = daddr; + if (!rt->rt_gw4) { + rt->rt_gw_family = AF_INET; + rt->rt_gw4 = daddr; + } rt_add_uncached_list(rt); } } else @@ -1611,8 +1638,8 @@ struct rtable *rt_dst_alloc(struct net_device *dev, rt->rt_iif = 0; rt->rt_pmtu = 0; rt->rt_mtu_locked = 0; - rt->rt_gateway = 0; - rt->rt_uses_gateway = 0; + rt->rt_gw_family = 0; + rt->rt_gw4 = 0; INIT_LIST_HEAD(&rt->rt_uncached); rt->dst.output = ip_output; @@ -1725,6 +1752,8 @@ static int __mkroute_input(struct sk_buff *skb, struct in_device *in_dev, __be32 daddr, __be32 saddr, u32 tos) { + struct fib_nh_common *nhc = FIB_RES_NHC(*res); + struct net_device *dev = nhc->nhc_dev; struct fib_nh_exception *fnhe; struct rtable *rth; int err; @@ -1733,7 +1762,7 @@ static int __mkroute_input(struct sk_buff *skb, u32 itag = 0; /* get a working reference to the output device */ - out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); + out_dev = __in_dev_get_rcu(dev); if (!out_dev) { net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n"); return -EINVAL; @@ -1750,10 +1779,14 @@ static int __mkroute_input(struct sk_buff *skb, do_cache = res->fi && !itag; if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) && - skb->protocol == htons(ETH_P_IP) && - (IN_DEV_SHARED_MEDIA(out_dev) || - inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) - IPCB(skb)->flags |= IPSKB_DOREDIRECT; + skb->protocol == htons(ETH_P_IP)) { + __be32 gw; + + gw = nhc->nhc_gw_family == AF_INET ? nhc->nhc_gw.ipv4 : 0; + if (IN_DEV_SHARED_MEDIA(out_dev) || + inet_addr_onlink(out_dev, saddr, gw)) + IPCB(skb)->flags |= IPSKB_DOREDIRECT; + } if (skb->protocol != htons(ETH_P_IP)) { /* Not IP (i.e. ARP). Do not create route, if it is @@ -1770,12 +1803,12 @@ static int __mkroute_input(struct sk_buff *skb, } } - fnhe = find_exception(&FIB_RES_NH(*res), daddr); + fnhe = find_exception(nhc, daddr); if (do_cache) { if (fnhe) rth = rcu_dereference(fnhe->fnhe_rth_input); else - rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); + rth = rcu_dereference(nhc->nhc_rth_input); if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); goto out; @@ -2069,7 +2102,9 @@ local_input: do_cache = false; if (res->fi) { if (!itag) { - rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); + struct fib_nh_common *nhc = FIB_RES_NHC(*res); + + rth = rcu_dereference(nhc->nhc_rth_input); if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); err = 0; @@ -2099,16 +2134,16 @@ local_input: } if (do_cache) { - struct fib_nh *nh = &FIB_RES_NH(*res); + struct fib_nh_common *nhc = FIB_RES_NHC(*res); - rth->dst.lwtstate = lwtstate_get(nh->nh_lwtstate); + rth->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate); if (lwtunnel_input_redirect(rth->dst.lwtstate)) { WARN_ON(rth->dst.input == lwtunnel_input); rth->dst.lwtstate->orig_input = rth->dst.input; rth->dst.input = lwtunnel_input; } - if (unlikely(!rt_cache_route(nh, rth))) + if (unlikely(!rt_cache_route(nhc, rth))) rt_add_uncached_list(rth); } skb_dst_set(skb, &rth->dst); @@ -2279,10 +2314,10 @@ static struct rtable *__mkroute_output(const struct fib_result *res, fnhe = NULL; do_cache &= fi != NULL; if (fi) { + struct fib_nh_common *nhc = FIB_RES_NHC(*res); struct rtable __rcu **prth; - struct fib_nh *nh = &FIB_RES_NH(*res); - fnhe = find_exception(nh, fl4->daddr); + fnhe = find_exception(nhc, fl4->daddr); if (!do_cache) goto add; if (fnhe) { @@ -2290,12 +2325,12 @@ static struct rtable *__mkroute_output(const struct fib_result *res, } else { if (unlikely(fl4->flowi4_flags & FLOWI_FLAG_KNOWN_NH && - !(nh->nh_gw && - nh->nh_scope == RT_SCOPE_LINK))) { + !(nhc->nhc_gw_family && + nhc->nhc_scope == RT_SCOPE_LINK))) { do_cache = false; goto add; } - prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); + prth = raw_cpu_ptr(nhc->nhc_pcpu_rth_output); } rth = rcu_dereference(*prth); if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst)) @@ -2600,8 +2635,11 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_genid = rt_genid_ipv4(net); rt->rt_flags = ort->rt_flags; rt->rt_type = ort->rt_type; - rt->rt_gateway = ort->rt_gateway; - rt->rt_uses_gateway = ort->rt_uses_gateway; + rt->rt_gw_family = ort->rt_gw_family; + if (rt->rt_gw_family == AF_INET) + rt->rt_gw4 = ort->rt_gw4; + else if (rt->rt_gw_family == AF_INET6) + rt->rt_gw6 = ort->rt_gw6; INIT_LIST_HEAD(&rt->rt_uncached); } @@ -2680,9 +2718,22 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr)) goto nla_put_failure; } - if (rt->rt_uses_gateway && - nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gateway)) + if (rt->rt_gw_family == AF_INET && + nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) { goto nla_put_failure; + } else if (rt->rt_gw_family == AF_INET6) { + int alen = sizeof(struct in6_addr); + struct nlattr *nla; + struct rtvia *via; + + nla = nla_reserve(skb, RTA_VIA, alen + 2); + if (!nla) + goto nla_put_failure; + + via = nla_data(nla); + via->rtvia_family = AF_INET6; + memcpy(via->rtvia_addr, &rt->rt_gw6, alen); + } expires = rt->dst.expires; if (expires) { @@ -2819,8 +2870,8 @@ static int inet_rtm_valid_getroute_req(struct sk_buff *skb, } if (!netlink_strict_get_check(skb)) - return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_ipv4_policy, extack); + return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); rtm = nlmsg_data(nlh); if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) || @@ -2838,8 +2889,8 @@ static int inet_rtm_valid_getroute_req(struct sk_buff *skb, return -EINVAL; } - err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_ipv4_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); if (err) return err; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index e531344611a0..008545f63667 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -216,16 +216,15 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, refcount_set(&req->rsk_refcnt, 1); tcp_sk(child)->tsoffset = tsoff; sock_rps_save_rxhash(child, skb); - if (!inet_csk_reqsk_queue_add(sk, req, child)) { - bh_unlock_sock(child); - sock_put(child); - child = NULL; - reqsk_put(req); - } - } else { - reqsk_free(req); + if (inet_csk_reqsk_queue_add(sk, req, child)) + return child; + + bh_unlock_sock(child); + sock_put(child); } - return child; + __reqsk_free(req); + + return NULL; } EXPORT_SYMBOL(tcp_get_cookie_sock); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index eeb4041fa5f9..875867b64d6a 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -550,6 +550,15 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, + { + .procname = "fib_sync_mem", + .data = &sysctl_fib_sync_mem, + .maxlen = sizeof(sysctl_fib_sync_mem), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = &sysctl_fib_sync_mem_min, + .extra2 = &sysctl_fib_sync_mem_max, + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6baa6dc1b13b..1fa15beb8380 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -457,18 +457,6 @@ void tcp_init_sock(struct sock *sk) } EXPORT_SYMBOL(tcp_init_sock); -void tcp_init_transfer(struct sock *sk, int bpf_op) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - tcp_mtup_init(sk); - icsk->icsk_af_ops->rebuild_header(sk); - tcp_init_metrics(sk); - tcp_call_bpf(sk, bpf_op, 0, NULL); - tcp_init_congestion_control(sk); - tcp_init_buffer_space(sk); -} - static void tcp_tx_timestamp(struct sock *sk, u16 tsflags) { struct sk_buff *skb = tcp_write_queue_tail(sk); @@ -865,6 +853,18 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, { struct sk_buff *skb; + if (likely(!size)) { + skb = sk->sk_tx_skb_cache; + if (skb && !skb_cloned(skb)) { + skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); + sk->sk_tx_skb_cache = NULL; + pskb_trim(skb, 0); + INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); + skb_shinfo(skb)->tx_flags = 0; + memset(TCP_SKB_CB(skb), 0, sizeof(struct tcp_skb_cb)); + return skb; + } + } /* The TCP header must be at least 32-bit aligned. */ size = ALIGN(size, 4); @@ -1098,30 +1098,6 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, } EXPORT_SYMBOL(tcp_sendpage); -/* Do not bother using a page frag for very small frames. - * But use this heuristic only for the first skb in write queue. - * - * Having no payload in skb->head allows better SACK shifting - * in tcp_shift_skb_data(), reducing sack/rack overhead, because - * write queue has less skbs. - * Each skb can hold up to MAX_SKB_FRAGS * 32Kbytes, or ~0.5 MB. - * This also speeds up tso_fragment(), since it wont fallback - * to tcp_fragment(). - */ -static int linear_payload_sz(bool first_skb) -{ - if (first_skb) - return SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER); - return 0; -} - -static int select_size(bool first_skb, bool zc) -{ - if (zc) - return 0; - return linear_payload_sz(first_skb); -} - void tcp_free_fastopen_req(struct tcp_sock *tp) { if (tp->fastopen_req) { @@ -1272,7 +1248,6 @@ restart: if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) { bool first_skb; - int linear; new_segment: if (!sk_stream_memory_free(sk)) @@ -1283,8 +1258,7 @@ new_segment: goto restart; } first_skb = tcp_rtx_and_write_queues_empty(sk); - linear = select_size(first_skb, zc); - skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation, + skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, first_skb); if (!skb) goto wait_for_memory; @@ -2552,6 +2526,11 @@ void tcp_write_queue_purge(struct sock *sk) sk_wmem_free_skb(sk, skb); } tcp_rtx_queue_purge(sk); + skb = sk->sk_tx_skb_cache; + if (skb) { + __kfree_skb(skb); + sk->sk_tx_skb_cache = NULL; + } INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue); sk_mem_reclaim(sk); tcp_clear_all_retrans_hints(tcp_sk(sk)); @@ -2587,6 +2566,10 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_clear_xmit_timers(sk); __skb_queue_purge(&sk->sk_receive_queue); + if (sk->sk_rx_skb_cache) { + __kfree_skb(sk->sk_rx_skb_cache); + sk->sk_rx_skb_cache = NULL; + } tp->copied_seq = tp->rcv_nxt; tp->urg_data = 0; tcp_write_queue_purge(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 731d3045b50a..077d9abdfcf5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2252,7 +2252,7 @@ static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp, */ static inline bool tcp_packet_delayed(const struct tcp_sock *tp) { - return !tp->retrans_stamp || + return tp->retrans_stamp && tcp_tsopt_ecr_before(tp, tp->retrans_stamp); } @@ -3521,7 +3521,7 @@ static void tcp_xmit_recovery(struct sock *sk, int rexmit) { struct tcp_sock *tp = tcp_sk(sk); - if (rexmit == REXMIT_NONE) + if (rexmit == REXMIT_NONE || sk->sk_state == TCP_SYN_SENT) return; if (unlikely(rexmit == 2)) { @@ -5647,6 +5647,32 @@ discard: } EXPORT_SYMBOL(tcp_rcv_established); +void tcp_init_transfer(struct sock *sk, int bpf_op) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + + tcp_mtup_init(sk); + icsk->icsk_af_ops->rebuild_header(sk); + tcp_init_metrics(sk); + + /* Initialize the congestion window to start the transfer. + * Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been + * retransmitted. In light of RFC6298 more aggressive 1sec + * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK + * retransmission has occurred. + */ + if (tp->total_retrans > 1 && tp->undo_marker) + tp->snd_cwnd = 1; + else + tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk)); + tp->snd_cwnd_stamp = tcp_jiffies32; + + tcp_call_bpf(sk, bpf_op, 0, NULL); + tcp_init_congestion_control(sk); + tcp_init_buffer_space(sk); +} + void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); @@ -5748,6 +5774,21 @@ static void smc_check_reset_syn(struct tcp_sock *tp) #endif } +static void tcp_try_undo_spurious_syn(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + u32 syn_stamp; + + /* undo_marker is set when SYN or SYNACK times out. The timeout is + * spurious if the ACK's timestamp option echo value matches the + * original SYN timestamp. + */ + syn_stamp = tp->retrans_stamp; + if (tp->undo_marker && syn_stamp && tp->rx_opt.saw_tstamp && + syn_stamp == tp->rx_opt.rcv_tsecr) + tp->undo_marker = 0; +} + static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th) { @@ -5815,6 +5856,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_ecn_rcv_synack(tp, th); tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); + tcp_try_undo_spurious_syn(sk); tcp_ack(sk, skb, FLAG_SLOWPATH); /* Ok.. it's good. Set up sequence numbers and @@ -5973,6 +6015,27 @@ reset_and_undo: return 1; } +static void tcp_rcv_synrecv_state_fastopen(struct sock *sk) +{ + tcp_try_undo_loss(sk, false); + inet_csk(sk)->icsk_retransmits = 0; + + /* Once we leave TCP_SYN_RECV or TCP_FIN_WAIT_1, + * we no longer need req so release it. + */ + reqsk_fastopen_remove(sk, tcp_sk(sk)->fastopen_rsk, false); + + /* Re-arm the timer because data may have been sent out. + * This is similar to the regular data transmission case + * when new data has just been ack'ed. + * + * (TFO) - we could try to be more aggressive and + * retransmitting any data sooner based on when they + * are sent out. + */ + tcp_rearm_rto(sk); +} + /* * This function implements the receiving procedure of RFC 793 for * all states except ESTABLISHED and TIME_WAIT. @@ -6069,22 +6132,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (!tp->srtt_us) tcp_synack_rtt_meas(sk, req); - /* Once we leave TCP_SYN_RECV, we no longer need req - * so release it. - */ if (req) { - inet_csk(sk)->icsk_retransmits = 0; - reqsk_fastopen_remove(sk, req, false); - /* Re-arm the timer because data may have been sent out. - * This is similar to the regular data transmission case - * when new data has just been ack'ed. - * - * (TFO) - we could try to be more aggressive and - * retransmitting any data sooner based on when they - * are sent out. - */ - tcp_rearm_rto(sk); + tcp_rcv_synrecv_state_fastopen(sk); } else { + tcp_try_undo_spurious_syn(sk); + tp->retrans_stamp = 0; tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB); tp->copied_seq = tp->rcv_nxt; } @@ -6119,16 +6171,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) case TCP_FIN_WAIT1: { int tmo; - /* If we enter the TCP_FIN_WAIT1 state and we are a - * Fast Open socket and this is the first acceptable - * ACK we have received, this would have acknowledged - * our SYNACK so stop the SYNACK timer. - */ - if (req) { - /* We no longer need the request sock. */ - reqsk_fastopen_remove(sk, req, false); - tcp_rearm_rto(sk); - } + if (req) + tcp_rcv_synrecv_state_fastopen(sk); + if (tp->snd_una != tp->write_seq) break; @@ -6263,6 +6308,11 @@ static inline void pr_drop_req(struct request_sock *req, __u16 port, int family) * congestion control: Linux DCTCP asserts ECT on all packets, * including SYN, which is most optimal solution; however, * others, such as FreeBSD do not. + * + * Exception: At least one of the reserved bits of the TCP header (th->res1) is + * set, indicating the use of a future TCP extension (such as AccECN). See + * RFC8311 §4.3 which updates RFC3168 to allow the development of such + * extensions. */ static void tcp_ecn_create_request(struct request_sock *req, const struct sk_buff *skb, @@ -6282,7 +6332,7 @@ static void tcp_ecn_create_request(struct request_sock *req, ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK); ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst; - if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) || + if (((!ect || th->res1) && ecn_ok) || tcp_ca_needs_ecn(listen_sk) || (ecn_ok_dst & DST_FEATURE_ECN_CA) || tcp_bpf_ca_needs_ecn((struct sock *)req)) inet_rsk(req)->ecn_ok = 1; @@ -6298,7 +6348,7 @@ static void tcp_openreq_init(struct request_sock *req, req->cookie_ts = 0; tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; - tcp_rsk(req)->snt_synack = tcp_clock_us(); + tcp_rsk(req)->snt_synack = 0; tcp_rsk(req)->last_oow_ack_time = 0; req->mss = rx_opt->mss_clamp; req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0; @@ -6502,8 +6552,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, reqsk_fastopen_remove(fastopen_sk, req, false); bh_unlock_sock(fastopen_sk); sock_put(fastopen_sk); - reqsk_put(req); - goto drop; + goto drop_and_free; } sk->sk_data_ready(sk); bh_unlock_sock(fastopen_sk); @@ -6527,7 +6576,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, drop_and_release: dst_release(dst); drop_and_free: - reqsk_free(req); + __reqsk_free(req); drop: tcp_listendrop(sk); return 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a2896944aa37..af81e4a6a8d8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1785,6 +1785,7 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph, int tcp_v4_rcv(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); + struct sk_buff *skb_to_free; int sdif = inet_sdif(skb); const struct iphdr *iph; const struct tcphdr *th; @@ -1916,11 +1917,17 @@ process: tcp_segs_in(tcp_sk(sk), skb); ret = 0; if (!sock_owned_by_user(sk)) { + skb_to_free = sk->sk_rx_skb_cache; + sk->sk_rx_skb_cache = NULL; ret = tcp_v4_do_rcv(sk, skb); - } else if (tcp_add_backlog(sk, skb)) { - goto discard_and_relse; + } else { + if (tcp_add_backlog(sk, skb)) + goto discard_and_relse; + skb_to_free = NULL; } bh_unlock_sock(sk); + if (skb_to_free) + __kfree_skb(skb_to_free); put_and_return: if (refcounted) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index b467a7cabf40..c4848e7a0aad 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -512,16 +512,6 @@ reset: inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; } - /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been - * retransmitted. In light of RFC6298 more aggressive 1sec - * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK - * retransmission has occurred. - */ - if (tp->total_retrans > 1) - tp->snd_cwnd = 1; - else - tp->snd_cwnd = tcp_init_cwnd(tp, dst); - tp->snd_cwnd_stamp = tcp_jiffies32; } bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst) @@ -658,7 +648,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, { int n = 0; - nest = nla_nest_start(msg, TCP_METRICS_ATTR_VALS); + nest = nla_nest_start_noflag(msg, TCP_METRICS_ATTR_VALS); if (!nest) goto nla_put_failure; for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) { @@ -951,14 +941,14 @@ static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) static const struct genl_ops tcp_metrics_nl_ops[] = { { .cmd = TCP_METRICS_CMD_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tcp_metrics_nl_cmd_get, .dumpit = tcp_metrics_nl_dump, - .policy = tcp_metrics_nl_policy, }, { .cmd = TCP_METRICS_CMD_DEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tcp_metrics_nl_cmd_del, - .policy = tcp_metrics_nl_policy, .flags = GENL_ADMIN_PERM, }, }; @@ -968,6 +958,7 @@ static struct genl_family tcp_metrics_nl_family __ro_after_init = { .name = TCP_METRICS_GENL_NAME, .version = TCP_METRICS_GENL_VERSION, .maxattr = TCP_METRICS_ATTR_MAX, + .policy = tcp_metrics_nl_policy, .netnsok = true, .module = THIS_MODULE, .ops = tcp_metrics_nl_ops, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 79900f783e0d..9c2a0d36fb20 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -522,6 +522,11 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->rx_opt.ts_recent_stamp = 0; newtp->tcp_header_len = sizeof(struct tcphdr); } + if (req->num_timeout) { + newtp->undo_marker = treq->snt_isn; + newtp->retrans_stamp = div_u64(treq->snt_synack, + USEC_PER_SEC / TCP_TS_HZ); + } newtp->tsoffset = treq->ts_off; #ifdef CONFIG_TCP_MD5SIG newtp->md5sig_info = NULL; /*XXX*/ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4522579aaca2..0c4ed66dc1bf 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -52,12 +52,8 @@ void tcp_mstamp_refresh(struct tcp_sock *tp) { u64 val = tcp_clock_ns(); - if (val > tp->tcp_clock_cache) - tp->tcp_clock_cache = val; - - val = div_u64(val, NSEC_PER_USEC); - if (val > tp->tcp_mstamp) - tp->tcp_mstamp = val; + tp->tcp_clock_cache = val; + tp->tcp_mstamp = div_u64(val, NSEC_PER_USEC); } static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, @@ -3092,7 +3088,6 @@ void tcp_send_fin(struct sock *sk) tskb = skb_rb_last(&sk->tcp_rtx_queue); if (tskb) { -coalesce: TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; TCP_SKB_CB(tskb)->end_seq++; tp->write_seq++; @@ -3108,11 +3103,9 @@ coalesce: } } else { skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation); - if (unlikely(!skb)) { - if (tskb) - goto coalesce; + if (unlikely(!skb)) return; - } + INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); skb_reserve(skb, MAX_TCP_HEADER); sk_forced_mem_schedule(sk, skb->truesize); @@ -3254,7 +3247,11 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, skb->skb_mstamp_ns = cookie_init_timestamp(req); else #endif + { skb->skb_mstamp_ns = tcp_clock_ns(); + if (!tcp_rsk(req)->snt_synack) /* Timestamp first SYNACK */ + tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb); + } #ifdef CONFIG_TCP_MD5SIG rcu_read_lock(); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index f0c86398e6a7..2ac23da42dd2 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -393,6 +393,9 @@ static void tcp_fastopen_synack_timer(struct sock *sk) tcp_write_err(sk); return; } + /* Lower cwnd after certain SYNACK timeout like tcp_init_transfer() */ + if (icsk->icsk_retransmits == 1) + tcp_enter_loss(sk); /* XXX (TFO) - Unlike regular SYN-ACK retransmit, we ignore error * returned from rtx_syn_ack() to make it more persistent like * regular retransmit because if the child socket has been accepted diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 372fdc5381a9..3c58ba02af7d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1631,7 +1631,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) EXPORT_SYMBOL(udp_ioctl); struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, - int noblock, int *peeked, int *off, int *err) + int noblock, int *off, int *err) { struct sk_buff_head *sk_queue = &sk->sk_receive_queue; struct sk_buff_head *queue; @@ -1650,13 +1650,11 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, break; error = -EAGAIN; - *peeked = 0; do { spin_lock_bh(&queue->lock); skb = __skb_try_recv_from_queue(sk, queue, flags, udp_skb_destructor, - peeked, off, err, - &last); + off, err, &last); if (skb) { spin_unlock_bh(&queue->lock); return skb; @@ -1677,8 +1675,7 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, skb = __skb_try_recv_from_queue(sk, queue, flags, udp_skb_dtor_locked, - peeked, off, err, - &last); + off, err, &last); spin_unlock(&sk_queue->lock); spin_unlock_bh(&queue->lock); if (skb) @@ -1713,8 +1710,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); struct sk_buff *skb; unsigned int ulen, copied; - int peeked, peeking, off; - int err; + int off, err, peeking = flags & MSG_PEEK; int is_udplite = IS_UDPLITE(sk); bool checksum_valid = false; @@ -1722,9 +1718,8 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, return ip_recv_error(sk, msg, len, addr_len); try_again: - peeking = flags & MSG_PEEK; off = sk_peek_offset(sk, flags); - skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err); + skb = __skb_recv_udp(sk, flags, noblock, &off, &err); if (!skb) return err; @@ -1762,7 +1757,7 @@ try_again: } if (unlikely(err)) { - if (!peeked) { + if (!peeking) { atomic_inc(&sk->sk_drops); UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); @@ -1771,7 +1766,7 @@ try_again: return err; } - if (!peeked) + if (!peeking) UDP_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS, is_udplite); diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c deleted file mode 100644 index 856d2dfdb44b..000000000000 --- a/net/ipv4/xfrm4_mode_beet.c +++ /dev/null @@ -1,155 +0,0 @@ -/* - * xfrm4_mode_beet.c - BEET mode encapsulation for IPv4. - * - * Copyright (c) 2006 Diego Beltrami <diego.beltrami@gmail.com> - * Miika Komu <miika@iki.fi> - * Herbert Xu <herbert@gondor.apana.org.au> - * Abhinav Pathak <abhinav.pathak@hiit.fi> - * Jeff Ahrenholz <ahrenholz@gmail.com> - */ - -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/stringify.h> -#include <net/dst.h> -#include <net/ip.h> -#include <net/xfrm.h> - -static void xfrm4_beet_make_header(struct sk_buff *skb) -{ - struct iphdr *iph = ip_hdr(skb); - - iph->ihl = 5; - iph->version = 4; - - iph->protocol = XFRM_MODE_SKB_CB(skb)->protocol; - iph->tos = XFRM_MODE_SKB_CB(skb)->tos; - - iph->id = XFRM_MODE_SKB_CB(skb)->id; - iph->frag_off = XFRM_MODE_SKB_CB(skb)->frag_off; - iph->ttl = XFRM_MODE_SKB_CB(skb)->ttl; -} - -/* Add encapsulation header. - * - * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. - */ -static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb) -{ - struct ip_beet_phdr *ph; - struct iphdr *top_iph; - int hdrlen, optlen; - - hdrlen = 0; - optlen = XFRM_MODE_SKB_CB(skb)->optlen; - if (unlikely(optlen)) - hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); - - skb_set_network_header(skb, -x->props.header_len - - hdrlen + (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph))); - if (x->sel.family != AF_INET6) - skb->network_header += IPV4_BEET_PHMAXLEN; - skb->mac_header = skb->network_header + - offsetof(struct iphdr, protocol); - skb->transport_header = skb->network_header + sizeof(*top_iph); - - xfrm4_beet_make_header(skb); - - ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdrlen); - - top_iph = ip_hdr(skb); - - if (unlikely(optlen)) { - BUG_ON(optlen < 0); - - ph->padlen = 4 - (optlen & 4); - ph->hdrlen = optlen / 8; - ph->nexthdr = top_iph->protocol; - if (ph->padlen) - memset(ph + 1, IPOPT_NOP, ph->padlen); - - top_iph->protocol = IPPROTO_BEETPH; - top_iph->ihl = sizeof(struct iphdr) / 4; - } - - top_iph->saddr = x->props.saddr.a4; - top_iph->daddr = x->id.daddr.a4; - - return 0; -} - -static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb) -{ - struct iphdr *iph; - int optlen = 0; - int err = -EINVAL; - - if (unlikely(XFRM_MODE_SKB_CB(skb)->protocol == IPPROTO_BEETPH)) { - struct ip_beet_phdr *ph; - int phlen; - - if (!pskb_may_pull(skb, sizeof(*ph))) - goto out; - - ph = (struct ip_beet_phdr *)skb->data; - - phlen = sizeof(*ph) + ph->padlen; - optlen = ph->hdrlen * 8 + (IPV4_BEET_PHMAXLEN - phlen); - if (optlen < 0 || optlen & 3 || optlen > 250) - goto out; - - XFRM_MODE_SKB_CB(skb)->protocol = ph->nexthdr; - - if (!pskb_may_pull(skb, phlen)) - goto out; - __skb_pull(skb, phlen); - } - - skb_push(skb, sizeof(*iph)); - skb_reset_network_header(skb); - skb_mac_header_rebuild(skb); - - xfrm4_beet_make_header(skb); - - iph = ip_hdr(skb); - - iph->ihl += optlen / 4; - iph->tot_len = htons(skb->len); - iph->daddr = x->sel.daddr.a4; - iph->saddr = x->sel.saddr.a4; - iph->check = 0; - iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl); - err = 0; -out: - return err; -} - -static struct xfrm_mode xfrm4_beet_mode = { - .input2 = xfrm4_beet_input, - .input = xfrm_prepare_input, - .output2 = xfrm4_beet_output, - .output = xfrm4_prepare_output, - .owner = THIS_MODULE, - .encap = XFRM_MODE_BEET, - .flags = XFRM_MODE_FLAG_TUNNEL, -}; - -static int __init xfrm4_beet_init(void) -{ - return xfrm_register_mode(&xfrm4_beet_mode, AF_INET); -} - -static void __exit xfrm4_beet_exit(void) -{ - int err; - - err = xfrm_unregister_mode(&xfrm4_beet_mode, AF_INET); - BUG_ON(err); -} - -module_init(xfrm4_beet_init); -module_exit(xfrm4_beet_exit); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_BEET); diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c deleted file mode 100644 index 1ad2c2c4e250..000000000000 --- a/net/ipv4/xfrm4_mode_transport.c +++ /dev/null @@ -1,114 +0,0 @@ -/* - * xfrm4_mode_transport.c - Transport mode encapsulation for IPv4. - * - * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au> - */ - -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/stringify.h> -#include <net/dst.h> -#include <net/ip.h> -#include <net/xfrm.h> -#include <net/protocol.h> - -/* Add encapsulation header. - * - * The IP header will be moved forward to make space for the encapsulation - * header. - */ -static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) -{ - struct iphdr *iph = ip_hdr(skb); - int ihl = iph->ihl * 4; - - skb_set_inner_transport_header(skb, skb_transport_offset(skb)); - - skb_set_network_header(skb, -x->props.header_len); - skb->mac_header = skb->network_header + - offsetof(struct iphdr, protocol); - skb->transport_header = skb->network_header + ihl; - __skb_pull(skb, ihl); - memmove(skb_network_header(skb), iph, ihl); - return 0; -} - -/* Remove encapsulation header. - * - * The IP header will be moved over the top of the encapsulation header. - * - * On entry, skb->h shall point to where the IP header should be and skb->nh - * shall be set to where the IP header currently is. skb->data shall point - * to the start of the payload. - */ -static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) -{ - int ihl = skb->data - skb_transport_header(skb); - - if (skb->transport_header != skb->network_header) { - memmove(skb_transport_header(skb), - skb_network_header(skb), ihl); - skb->network_header = skb->transport_header; - } - ip_hdr(skb)->tot_len = htons(skb->len + ihl); - skb_reset_transport_header(skb); - return 0; -} - -static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x, - struct sk_buff *skb, - netdev_features_t features) -{ - const struct net_offload *ops; - struct sk_buff *segs = ERR_PTR(-EINVAL); - struct xfrm_offload *xo = xfrm_offload(skb); - - skb->transport_header += x->props.header_len; - ops = rcu_dereference(inet_offloads[xo->proto]); - if (likely(ops && ops->callbacks.gso_segment)) - segs = ops->callbacks.gso_segment(skb, features); - - return segs; -} - -static void xfrm4_transport_xmit(struct xfrm_state *x, struct sk_buff *skb) -{ - struct xfrm_offload *xo = xfrm_offload(skb); - - skb_reset_mac_len(skb); - pskb_pull(skb, skb->mac_len + sizeof(struct iphdr) + x->props.header_len); - - if (xo->flags & XFRM_GSO_SEGMENT) { - skb_reset_transport_header(skb); - skb->transport_header -= x->props.header_len; - } -} - -static struct xfrm_mode xfrm4_transport_mode = { - .input = xfrm4_transport_input, - .output = xfrm4_transport_output, - .gso_segment = xfrm4_transport_gso_segment, - .xmit = xfrm4_transport_xmit, - .owner = THIS_MODULE, - .encap = XFRM_MODE_TRANSPORT, -}; - -static int __init xfrm4_transport_init(void) -{ - return xfrm_register_mode(&xfrm4_transport_mode, AF_INET); -} - -static void __exit xfrm4_transport_exit(void) -{ - int err; - - err = xfrm_unregister_mode(&xfrm4_transport_mode, AF_INET); - BUG_ON(err); -} - -module_init(xfrm4_transport_init); -module_exit(xfrm4_transport_exit); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_TRANSPORT); diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c deleted file mode 100644 index 2a9764bd1719..000000000000 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ /dev/null @@ -1,152 +0,0 @@ -/* - * xfrm4_mode_tunnel.c - Tunnel mode encapsulation for IPv4. - * - * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au> - */ - -#include <linux/gfp.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/stringify.h> -#include <net/dst.h> -#include <net/inet_ecn.h> -#include <net/ip.h> -#include <net/xfrm.h> - -static inline void ipip_ecn_decapsulate(struct sk_buff *skb) -{ - struct iphdr *inner_iph = ipip_hdr(skb); - - if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos)) - IP_ECN_set_ce(inner_iph); -} - -/* Add encapsulation header. - * - * The top IP header will be constructed per RFC 2401. - */ -static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - struct iphdr *top_iph; - int flags; - - skb_set_inner_network_header(skb, skb_network_offset(skb)); - skb_set_inner_transport_header(skb, skb_transport_offset(skb)); - - skb_set_network_header(skb, -x->props.header_len); - skb->mac_header = skb->network_header + - offsetof(struct iphdr, protocol); - skb->transport_header = skb->network_header + sizeof(*top_iph); - top_iph = ip_hdr(skb); - - top_iph->ihl = 5; - top_iph->version = 4; - - top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family); - - /* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */ - if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) - top_iph->tos = 0; - else - top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos; - top_iph->tos = INET_ECN_encapsulate(top_iph->tos, - XFRM_MODE_SKB_CB(skb)->tos); - - flags = x->props.flags; - if (flags & XFRM_STATE_NOECN) - IP_ECN_clear(top_iph); - - top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? - 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); - - top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst)); - - top_iph->saddr = x->props.saddr.a4; - top_iph->daddr = x->id.daddr.a4; - ip_select_ident(dev_net(dst->dev), skb, NULL); - - return 0; -} - -static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) -{ - int err = -EINVAL; - - if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP) - goto out; - - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - goto out; - - err = skb_unclone(skb, GFP_ATOMIC); - if (err) - goto out; - - if (x->props.flags & XFRM_STATE_DECAP_DSCP) - ipv4_copy_dscp(XFRM_MODE_SKB_CB(skb)->tos, ipip_hdr(skb)); - if (!(x->props.flags & XFRM_STATE_NOECN)) - ipip_ecn_decapsulate(skb); - - skb_reset_network_header(skb); - skb_mac_header_rebuild(skb); - if (skb->mac_len) - eth_hdr(skb)->h_proto = skb->protocol; - - err = 0; - -out: - return err; -} - -static struct sk_buff *xfrm4_mode_tunnel_gso_segment(struct xfrm_state *x, - struct sk_buff *skb, - netdev_features_t features) -{ - __skb_push(skb, skb->mac_len); - return skb_mac_gso_segment(skb, features); -} - -static void xfrm4_mode_tunnel_xmit(struct xfrm_state *x, struct sk_buff *skb) -{ - struct xfrm_offload *xo = xfrm_offload(skb); - - if (xo->flags & XFRM_GSO_SEGMENT) - skb->transport_header = skb->network_header + - sizeof(struct iphdr); - - skb_reset_mac_len(skb); - pskb_pull(skb, skb->mac_len + x->props.header_len); -} - -static struct xfrm_mode xfrm4_tunnel_mode = { - .input2 = xfrm4_mode_tunnel_input, - .input = xfrm_prepare_input, - .output2 = xfrm4_mode_tunnel_output, - .output = xfrm4_prepare_output, - .gso_segment = xfrm4_mode_tunnel_gso_segment, - .xmit = xfrm4_mode_tunnel_xmit, - .owner = THIS_MODULE, - .encap = XFRM_MODE_TUNNEL, - .flags = XFRM_MODE_FLAG_TUNNEL, -}; - -static int __init xfrm4_mode_tunnel_init(void) -{ - return xfrm_register_mode(&xfrm4_tunnel_mode, AF_INET); -} - -static void __exit xfrm4_mode_tunnel_exit(void) -{ - int err; - - err = xfrm_unregister_mode(&xfrm4_tunnel_mode, AF_INET); - BUG_ON(err); -} - -module_init(xfrm4_mode_tunnel_init); -module_exit(xfrm4_mode_tunnel_exit); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_TUNNEL); diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index be980c195fc5..9bb8905088c7 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -58,21 +58,6 @@ int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb) return xfrm4_extract_header(skb); } -int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) -{ - int err; - - err = xfrm_inner_extract_output(x, skb); - if (err) - return err; - - IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; - skb->protocol = htons(ETH_P_IP); - - return x->outer_mode->output2(x, skb); -} -EXPORT_SYMBOL(xfrm4_prepare_output); - int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb) { memset(IPCB(skb), 0, sizeof(*IPCB(skb))); @@ -87,6 +72,8 @@ int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb) static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct xfrm_state *x = skb_dst(skb)->xfrm; + const struct xfrm_state_afinfo *afinfo; + int ret = -EAFNOSUPPORT; #ifdef CONFIG_NETFILTER if (!x) { @@ -95,7 +82,15 @@ static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) } #endif - return x->outer_mode->afinfo->output_finish(sk, skb); + rcu_read_lock(); + afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family); + if (likely(afinfo)) + ret = afinfo->output_finish(sk, skb); + else + kfree_skb(skb); + rcu_read_unlock(); + + return ret; } int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 2b144b92ae46..cdef8f9a3b01 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -12,7 +12,6 @@ #include <linux/err.h> #include <linux/kernel.h> #include <linux/inetdevice.h> -#include <linux/if_tunnel.h> #include <net/dst.h> #include <net/xfrm.h> #include <net/ip.h> @@ -69,17 +68,6 @@ static int xfrm4_get_saddr(struct net *net, int oif, return 0; } -static int xfrm4_get_tos(const struct flowi *fl) -{ - return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos; /* Strip ECN bits */ -} - -static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, - int nfheader_len) -{ - return 0; -} - static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl) { @@ -97,8 +85,11 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL); xdst->u.rt.rt_type = rt->rt_type; - xdst->u.rt.rt_gateway = rt->rt_gateway; - xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway; + xdst->u.rt.rt_gw_family = rt->rt_gw_family; + if (rt->rt_gw_family == AF_INET) + xdst->u.rt.rt_gw4 = rt->rt_gw4; + else if (rt->rt_gw_family == AF_INET6) + xdst->u.rt.rt_gw6 = rt->rt_gw6; xdst->u.rt.rt_pmtu = rt->rt_pmtu; xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked; INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); @@ -107,120 +98,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, return 0; } -static void -_decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) -{ - const struct iphdr *iph = ip_hdr(skb); - int ihl = iph->ihl; - u8 *xprth = skb_network_header(skb) + ihl * 4; - struct flowi4 *fl4 = &fl->u.ip4; - int oif = 0; - - if (skb_dst(skb)) - oif = skb_dst(skb)->dev->ifindex; - - memset(fl4, 0, sizeof(struct flowi4)); - fl4->flowi4_mark = skb->mark; - fl4->flowi4_oif = reverse ? skb->skb_iif : oif; - - fl4->flowi4_proto = iph->protocol; - fl4->daddr = reverse ? iph->saddr : iph->daddr; - fl4->saddr = reverse ? iph->daddr : iph->saddr; - fl4->flowi4_tos = iph->tos; - - if (!ip_is_fragment(iph)) { - switch (iph->protocol) { - case IPPROTO_UDP: - case IPPROTO_UDPLITE: - case IPPROTO_TCP: - case IPPROTO_SCTP: - case IPPROTO_DCCP: - if (xprth + 4 < skb->data || - pskb_may_pull(skb, xprth + 4 - skb->data)) { - __be16 *ports; - - xprth = skb_network_header(skb) + ihl * 4; - ports = (__be16 *)xprth; - - fl4->fl4_sport = ports[!!reverse]; - fl4->fl4_dport = ports[!reverse]; - } - break; - - case IPPROTO_ICMP: - if (xprth + 2 < skb->data || - pskb_may_pull(skb, xprth + 2 - skb->data)) { - u8 *icmp; - - xprth = skb_network_header(skb) + ihl * 4; - icmp = xprth; - - fl4->fl4_icmp_type = icmp[0]; - fl4->fl4_icmp_code = icmp[1]; - } - break; - - case IPPROTO_ESP: - if (xprth + 4 < skb->data || - pskb_may_pull(skb, xprth + 4 - skb->data)) { - __be32 *ehdr; - - xprth = skb_network_header(skb) + ihl * 4; - ehdr = (__be32 *)xprth; - - fl4->fl4_ipsec_spi = ehdr[0]; - } - break; - - case IPPROTO_AH: - if (xprth + 8 < skb->data || - pskb_may_pull(skb, xprth + 8 - skb->data)) { - __be32 *ah_hdr; - - xprth = skb_network_header(skb) + ihl * 4; - ah_hdr = (__be32 *)xprth; - - fl4->fl4_ipsec_spi = ah_hdr[1]; - } - break; - - case IPPROTO_COMP: - if (xprth + 4 < skb->data || - pskb_may_pull(skb, xprth + 4 - skb->data)) { - __be16 *ipcomp_hdr; - - xprth = skb_network_header(skb) + ihl * 4; - ipcomp_hdr = (__be16 *)xprth; - - fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); - } - break; - - case IPPROTO_GRE: - if (xprth + 12 < skb->data || - pskb_may_pull(skb, xprth + 12 - skb->data)) { - __be16 *greflags; - __be32 *gre_hdr; - - xprth = skb_network_header(skb) + ihl * 4; - greflags = (__be16 *)xprth; - gre_hdr = (__be32 *)xprth; - - if (greflags[0] & GRE_KEY) { - if (greflags[0] & GRE_CSUM) - gre_hdr++; - fl4->fl4_gre_key = gre_hdr[1]; - } - } - break; - - default: - fl4->fl4_ipsec_spi = 0; - break; - } - } -} - static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu) { @@ -273,9 +150,6 @@ static const struct xfrm_policy_afinfo xfrm4_policy_afinfo = { .dst_ops = &xfrm4_dst_ops_template, .dst_lookup = xfrm4_dst_lookup, .get_saddr = xfrm4_get_saddr, - .decode_session = _decode_session4, - .get_tos = xfrm4_get_tos, - .init_path = xfrm4_init_path, .fill_dst = xfrm4_fill_dst, .blackhole_route = ipv4_blackhole_route, }; diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c index 35c54865dc42..bcab48944c15 100644 --- a/net/ipv4/xfrm4_protocol.c +++ b/net/ipv4/xfrm4_protocol.c @@ -46,7 +46,7 @@ static inline struct xfrm4_protocol __rcu **proto_handlers(u8 protocol) handler != NULL; \ handler = rcu_dereference(handler->next)) \ -int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err) +static int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err) { int ret; struct xfrm4_protocol *handler; @@ -61,7 +61,6 @@ int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err) return 0; } -EXPORT_SYMBOL(xfrm4_rcv_cb); int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 613282c65a10..cd915e332c98 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -135,44 +135,11 @@ config INET6_TUNNEL tristate default n -config INET6_XFRM_MODE_TRANSPORT - tristate "IPv6: IPsec transport mode" - default IPV6 - select XFRM - ---help--- - Support for IPsec transport mode. - - If unsure, say Y. - -config INET6_XFRM_MODE_TUNNEL - tristate "IPv6: IPsec tunnel mode" - default IPV6 - select XFRM - ---help--- - Support for IPsec tunnel mode. - - If unsure, say Y. - -config INET6_XFRM_MODE_BEET - tristate "IPv6: IPsec BEET mode" - default IPV6 - select XFRM - ---help--- - Support for IPsec BEET mode. - - If unsure, say Y. - -config INET6_XFRM_MODE_ROUTEOPTIMIZATION - tristate "IPv6: MIPv6 route optimization mode" - select XFRM - ---help--- - Support for MIPv6 route optimization mode. - config IPV6_VTI tristate "Virtual (secure) IPv6: tunneling" select IPV6_TUNNEL select NET_IP_TUNNEL - depends on INET6_XFRM_MODE_TUNNEL + select XFRM ---help--- Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index e0026fa1261b..8ccf35514015 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -35,10 +35,6 @@ obj-$(CONFIG_INET6_ESP_OFFLOAD) += esp6_offload.o obj-$(CONFIG_INET6_IPCOMP) += ipcomp6.o obj-$(CONFIG_INET6_XFRM_TUNNEL) += xfrm6_tunnel.o obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o -obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o -obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o -obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o -obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o obj-$(CONFIG_IPV6_MIP6) += mip6.o obj-$(CONFIG_IPV6_ILA) += ila/ obj-$(CONFIG_NETFILTER) += netfilter/ diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4ae17a966ae3..f96d1de79509 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -173,7 +173,8 @@ static int addrconf_ifdown(struct net_device *dev, int how); static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, int plen, const struct net_device *dev, - u32 flags, u32 noflags); + u32 flags, u32 noflags, + bool no_gw); static void addrconf_dad_start(struct inet6_ifaddr *ifp); static void addrconf_dad_work(struct work_struct *w); @@ -610,11 +611,13 @@ static int inet6_netconf_valid_get_req(struct sk_buff *skb, } if (!netlink_strict_get_check(skb)) - return nlmsg_parse(nlh, sizeof(struct netconfmsg), tb, - NETCONFA_MAX, devconf_ipv6_policy, extack); + return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg), + tb, NETCONFA_MAX, + devconf_ipv6_policy, extack); - err = nlmsg_parse_strict(nlh, sizeof(struct netconfmsg), tb, - NETCONFA_MAX, devconf_ipv6_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg), + tb, NETCONFA_MAX, + devconf_ipv6_policy, extack); if (err) return err; @@ -1230,10 +1233,8 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_r { struct fib6_info *f6i; - f6i = addrconf_get_prefix_route(&ifp->addr, - ifp->prefix_len, - ifp->idev->dev, - 0, RTF_GATEWAY | RTF_DEFAULT); + f6i = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len, + ifp->idev->dev, 0, RTF_DEFAULT, true); if (f6i) { if (del_rt) ip6_del_rt(dev_net(ifp->idev->dev), f6i); @@ -2402,7 +2403,8 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, u32 metric, static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, int plen, const struct net_device *dev, - u32 flags, u32 noflags) + u32 flags, u32 noflags, + bool no_gw) { struct fib6_node *fn; struct fib6_info *rt = NULL; @@ -2419,7 +2421,9 @@ static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, goto out; for_each_fib6_node_rt_rcu(fn) { - if (rt->fib6_nh.nh_dev->ifindex != dev->ifindex) + if (rt->fib6_nh.fib_nh_dev->ifindex != dev->ifindex) + continue; + if (no_gw && rt->fib6_nh.fib_nh_gw_family) continue; if ((rt->fib6_flags & flags) != flags) continue; @@ -2717,7 +2721,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) pinfo->prefix_len, dev, RTF_ADDRCONF | RTF_PREFIX_RT, - RTF_GATEWAY | RTF_DEFAULT); + RTF_DEFAULT, true); if (rt) { /* Autoconf prefix route */ @@ -4563,8 +4567,8 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, u32 ifa_flags; int err; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv6_policy, extack); if (err < 0) return err; @@ -4588,10 +4592,8 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp, struct fib6_info *f6i; u32 prio; - f6i = addrconf_get_prefix_route(&ifp->addr, - ifp->prefix_len, - ifp->idev->dev, - 0, RTF_GATEWAY | RTF_DEFAULT); + f6i = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len, + ifp->idev->dev, 0, RTF_DEFAULT, true); if (!f6i) return -ENOENT; @@ -4729,8 +4731,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, struct ifa6_config cfg; int err; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv6_policy, extack); if (err < 0) return err; @@ -5086,8 +5088,8 @@ static int inet6_valid_dump_ifaddr_req(const struct nlmsghdr *nlh, fillargs->flags |= NLM_F_DUMP_FILTERED; } - err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX, - ifa_ipv6_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv6_policy, extack); if (err < 0) return err; @@ -5237,11 +5239,11 @@ static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb, } if (!netlink_strict_get_check(skb)) - return nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, - ifa_ipv6_policy, extack); + return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv6_policy, extack); - err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX, - ifa_ipv6_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv6_policy, extack); if (err) return err; @@ -5667,8 +5669,8 @@ static int inet6_validate_link_af(const struct net_device *dev, if (dev && !__in6_dev_get(dev)) return -EAFNOSUPPORT; - return nla_parse_nested(tb, IFLA_INET6_MAX, nla, inet6_af_policy, - NULL); + return nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, + inet6_af_policy, NULL); } static int check_addr_gen_mode(int mode) @@ -5700,7 +5702,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) if (!idev) return -EAFNOSUPPORT; - if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0) + if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0) BUG(); if (tb[IFLA_INET6_TOKEN]) { @@ -5752,7 +5754,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, nla_put_u8(skb, IFLA_OPERSTATE, netif_running(dev) ? dev->operstate : IF_OPER_DOWN)) goto nla_put_failure; - protoinfo = nla_nest_start(skb, IFLA_PROTINFO); + protoinfo = nla_nest_start_noflag(skb, IFLA_PROTINFO); if (!protoinfo) goto nla_put_failure; @@ -5972,7 +5974,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) struct fib6_info *rt; rt = addrconf_get_prefix_route(&ifp->peer_addr, 128, - ifp->idev->dev, 0, 0); + ifp->idev->dev, 0, 0, + false); if (rt) ip6_del_rt(net, rt); } diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 6c79af056d9b..763a947e0d14 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -5,7 +5,7 @@ #include <linux/export.h> #include <net/ipv6.h> -#include <net/addrconf.h> +#include <net/ipv6_stubs.h> #include <net/ip.h> /* if ipv6 module registers this function is used by xfrm to force all @@ -144,43 +144,53 @@ static struct fib6_table *eafnosupport_fib6_get_table(struct net *net, u32 id) return NULL; } -static struct fib6_info * +static int eafnosupport_fib6_table_lookup(struct net *net, struct fib6_table *table, - int oif, struct flowi6 *fl6, int flags) + int oif, struct flowi6 *fl6, + struct fib6_result *res, int flags) { - return NULL; + return -EAFNOSUPPORT; } -static struct fib6_info * +static int eafnosupport_fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, - int flags) + struct fib6_result *res, int flags) { - return NULL; + return -EAFNOSUPPORT; } -static struct fib6_info * -eafnosupport_fib6_multipath_select(const struct net *net, struct fib6_info *f6i, - struct flowi6 *fl6, int oif, - const struct sk_buff *skb, int strict) +static void +eafnosupport_fib6_select_path(const struct net *net, struct fib6_result *res, + struct flowi6 *fl6, int oif, bool have_oif_match, + const struct sk_buff *skb, int strict) { - return f6i; } static u32 -eafnosupport_ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr, - struct in6_addr *saddr) +eafnosupport_ip6_mtu_from_fib6(const struct fib6_result *res, + const struct in6_addr *daddr, + const struct in6_addr *saddr) { return 0; } +static int eafnosupport_fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, + struct fib6_config *cfg, gfp_t gfp_flags, + struct netlink_ext_ack *extack) +{ + NL_SET_ERR_MSG(extack, "IPv6 support not enabled in kernel"); + return -EAFNOSUPPORT; +} + const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) { .ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup, .ipv6_route_input = eafnosupport_ipv6_route_input, .fib6_get_table = eafnosupport_fib6_get_table, .fib6_table_lookup = eafnosupport_fib6_table_lookup, .fib6_lookup = eafnosupport_fib6_lookup, - .fib6_multipath_select = eafnosupport_fib6_multipath_select, + .fib6_select_path = eafnosupport_fib6_select_path, .ip6_mtu_from_fib6 = eafnosupport_ip6_mtu_from_fib6, + .fib6_nh_init = eafnosupport_fib6_nh_init, }; EXPORT_SYMBOL_GPL(ipv6_stub); diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 1766325423b5..642fc6ac13d2 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -383,8 +383,8 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, u32 label; int err = 0; - err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ifal), tb, IFAL_MAX, + ifal_policy, extack); if (err < 0) return err; @@ -537,8 +537,8 @@ static int ip6addrlbl_valid_get_req(struct sk_buff *skb, } if (!netlink_strict_get_check(skb)) - return nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, - ifal_policy, extack); + return nlmsg_parse_deprecated(nlh, sizeof(*ifal), tb, + IFAL_MAX, ifal_policy, extack); ifal = nlmsg_data(nlh); if (ifal->__ifal_reserved || ifal->ifal_flags || ifal->ifal_seq) { @@ -546,8 +546,8 @@ static int ip6addrlbl_valid_get_req(struct sk_buff *skb, return -EINVAL; } - err = nlmsg_parse_strict(nlh, sizeof(*ifal), tb, IFAL_MAX, - ifal_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifal), tb, IFAL_MAX, + ifal_policy, extack); if (err) return err; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 2f45d2a3e3a3..c04ae282f4e4 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -56,6 +56,7 @@ #include <net/transp_v6.h> #include <net/ip6_route.h> #include <net/addrconf.h> +#include <net/ipv6_stubs.h> #include <net/ndisc.h> #ifdef CONFIG_IPV6_TUNNEL #include <net/ip6_tunnel.h> @@ -546,12 +547,6 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct net *net = sock_net(sk); switch (cmd) { - case SIOCGSTAMP: - return sock_get_timestamp(sk, (struct timeval __user *)arg); - - case SIOCGSTAMPNS: - return sock_get_timestampns(sk, (struct timespec __user *)arg); - case SIOCADDRT: case SIOCDELRT: @@ -584,6 +579,7 @@ const struct proto_ops inet6_stream_ops = { .getname = inet6_getname, .poll = tcp_poll, /* ok */ .ioctl = inet6_ioctl, /* must change */ + .gettstamp = sock_gettstamp, .listen = inet_listen, /* ok */ .shutdown = inet_shutdown, /* ok */ .setsockopt = sock_common_setsockopt, /* ok */ @@ -617,6 +613,7 @@ const struct proto_ops inet6_dgram_ops = { .getname = inet6_getname, .poll = udp_poll, /* ok */ .ioctl = inet6_ioctl, /* must change */ + .gettstamp = sock_gettstamp, .listen = sock_no_listen, /* ok */ .shutdown = inet_shutdown, /* ok */ .setsockopt = sock_common_setsockopt, /* ok */ @@ -847,6 +844,17 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.bindv6only = 0; net->ipv6.sysctl.icmpv6_time = 1*HZ; net->ipv6.sysctl.icmpv6_echo_ignore_all = 0; + net->ipv6.sysctl.icmpv6_echo_ignore_multicast = 0; + net->ipv6.sysctl.icmpv6_echo_ignore_anycast = 0; + + /* By default, rate limit error messages. + * Except for pmtu discovery, it would break it. + * proc_do_large_bitmap needs pointer to the bitmap. + */ + bitmap_set(net->ipv6.sysctl.icmpv6_ratemask, 0, ICMPV6_ERRMSG_MAX + 1); + bitmap_clear(net->ipv6.sysctl.icmpv6_ratemask, ICMPV6_PKT_TOOBIG, 1); + net->ipv6.sysctl.icmpv6_ratemask_ptr = net->ipv6.sysctl.icmpv6_ratemask; + net->ipv6.sysctl.flowlabel_consistency = 1; net->ipv6.sysctl.auto_flowlabels = IP6_DEFAULT_AUTO_FLOW_LABELS; net->ipv6.sysctl.idgen_retries = 3; @@ -914,8 +922,10 @@ static const struct ipv6_stub ipv6_stub_impl = { .fib6_get_table = fib6_get_table, .fib6_table_lookup = fib6_table_lookup, .fib6_lookup = fib6_lookup, - .fib6_multipath_select = fib6_multipath_select, + .fib6_select_path = fib6_select_path, .ip6_mtu_from_fib6 = ip6_mtu_from_fib6, + .fib6_nh_init = fib6_nh_init, + .fib6_nh_release = fib6_nh_release, .udpv6_encap_enable = udpv6_encap_enable, .ndisc_send_na = ndisc_send_na, .nd_tbl = &nd_tbl, diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index cb99f6fb79b7..d453cf417b03 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -136,6 +136,44 @@ static void esp6_gso_encap(struct xfrm_state *x, struct sk_buff *skb) xo->proto = proto; } +static struct sk_buff *xfrm6_tunnel_gso_segment(struct xfrm_state *x, + struct sk_buff *skb, + netdev_features_t features) +{ + __skb_push(skb, skb->mac_len); + return skb_mac_gso_segment(skb, features); +} + +static struct sk_buff *xfrm6_transport_gso_segment(struct xfrm_state *x, + struct sk_buff *skb, + netdev_features_t features) +{ + const struct net_offload *ops; + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct xfrm_offload *xo = xfrm_offload(skb); + + skb->transport_header += x->props.header_len; + ops = rcu_dereference(inet6_offloads[xo->proto]); + if (likely(ops && ops->callbacks.gso_segment)) + segs = ops->callbacks.gso_segment(skb, features); + + return segs; +} + +static struct sk_buff *xfrm6_outer_mode_gso_segment(struct xfrm_state *x, + struct sk_buff *skb, + netdev_features_t features) +{ + switch (x->outer_mode.encap) { + case XFRM_MODE_TUNNEL: + return xfrm6_tunnel_gso_segment(x, skb, features); + case XFRM_MODE_TRANSPORT: + return xfrm6_transport_gso_segment(x, skb, features); + } + + return ERR_PTR(-EOPNOTSUPP); +} + static struct sk_buff *esp6_gso_segment(struct sk_buff *skb, netdev_features_t features) { @@ -174,7 +212,7 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb, xo->flags |= XFRM_GSO_SEGMENT; - return x->outer_mode->gso_segment(x, skb, esp_features); + return xfrm6_outer_mode_gso_segment(x, skb, esp_features); } static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index f590446595d8..06d1b7763600 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -61,16 +61,16 @@ unsigned int fib6_rules_seq_read(struct net *net) } /* called with rcu lock held; no reference taken on fib6_info */ -struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, - int flags) +int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, + struct fib6_result *res, int flags) { - struct fib6_info *f6i; int err; if (net->ipv6.fib6_has_custom_rules) { struct fib_lookup_arg arg = { .lookup_ptr = fib6_table_lookup, .lookup_data = &oif, + .result = res, .flags = FIB_LOOKUP_NOREF, }; @@ -78,19 +78,15 @@ struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, err = fib_rules_lookup(net->ipv6.fib6_rules_ops, flowi6_to_flowi(fl6), flags, &arg); - if (err) - return ERR_PTR(err); - - f6i = arg.result ? : net->ipv6.fib6_null_entry; } else { - f6i = fib6_table_lookup(net, net->ipv6.fib6_local_tbl, - oif, fl6, flags); - if (!f6i || f6i == net->ipv6.fib6_null_entry) - f6i = fib6_table_lookup(net, net->ipv6.fib6_main_tbl, - oif, fl6, flags); + err = fib6_table_lookup(net, net->ipv6.fib6_local_tbl, oif, + fl6, res, flags); + if (err || res->f6i == net->ipv6.fib6_null_entry) + err = fib6_table_lookup(net, net->ipv6.fib6_main_tbl, + oif, fl6, res, flags); } - return f6i; + return err; } struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, @@ -98,9 +94,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, int flags, pol_lookup_t lookup) { if (net->ipv6.fib6_has_custom_rules) { + struct fib6_result res = {}; struct fib_lookup_arg arg = { .lookup_ptr = lookup, .lookup_data = skb, + .result = &res, .flags = FIB_LOOKUP_NOREF, }; @@ -110,8 +108,8 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, fib_rules_lookup(net->ipv6.fib6_rules_ops, flowi6_to_flowi(fl6), flags, &arg); - if (arg.result) - return arg.result; + if (res.rt6) + return &res.rt6->dst; } else { struct rt6_info *rt; @@ -157,11 +155,11 @@ static int fib6_rule_saddr(struct net *net, struct fib_rule *rule, int flags, static int fib6_rule_action_alt(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg) { + struct fib6_result *res = arg->result; struct flowi6 *flp6 = &flp->u.ip6; struct net *net = rule->fr_net; struct fib6_table *table; - struct fib6_info *f6i; - int err = -EAGAIN, *oif; + int err, *oif; u32 tb_id; switch (rule->action) { @@ -182,14 +180,12 @@ static int fib6_rule_action_alt(struct fib_rule *rule, struct flowi *flp, return -EAGAIN; oif = (int *)arg->lookup_data; - f6i = fib6_table_lookup(net, table, *oif, flp6, flags); - if (f6i != net->ipv6.fib6_null_entry) { + err = fib6_table_lookup(net, table, *oif, flp6, res, flags); + if (!err && res->f6i != net->ipv6.fib6_null_entry) err = fib6_rule_saddr(net, rule, flags, flp6, - fib6_info_nh_dev(f6i)); - - if (likely(!err)) - arg->result = f6i; - } + res->nh->fib_nh_dev); + else + err = -EAGAIN; return err; } @@ -197,6 +193,7 @@ static int fib6_rule_action_alt(struct fib_rule *rule, struct flowi *flp, static int __fib6_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg) { + struct fib6_result *res = arg->result; struct flowi6 *flp6 = &flp->u.ip6; struct rt6_info *rt = NULL; struct fib6_table *table; @@ -251,7 +248,7 @@ again: discard_pkt: dst_hold(&rt->dst); out: - arg->result = rt; + res->rt6 = rt; return err; } @@ -266,9 +263,13 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg) { - struct rt6_info *rt = (struct rt6_info *) arg->result; + struct fib6_result *res = arg->result; + struct rt6_info *rt = res->rt6; struct net_device *dev = NULL; + if (!rt) + return false; + if (rt->rt6i_idev) dev = rt->rt6i_idev->dev; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 802faa2fcc0e..afb915807cd0 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -168,22 +168,21 @@ static bool is_ineligible(const struct sk_buff *skb) return false; } -static bool icmpv6_mask_allow(int type) +static bool icmpv6_mask_allow(struct net *net, int type) { - /* Informational messages are not limited. */ - if (type & ICMPV6_INFOMSG_MASK) + if (type > ICMPV6_MSG_MAX) return true; - /* Do not limit pmtu discovery, it would break it. */ - if (type == ICMPV6_PKT_TOOBIG) + /* Limit if icmp type is set in ratemask. */ + if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask)) return true; return false; } -static bool icmpv6_global_allow(int type) +static bool icmpv6_global_allow(struct net *net, int type) { - if (icmpv6_mask_allow(type)) + if (icmpv6_mask_allow(net, type)) return true; if (icmp_global_allow()) @@ -202,7 +201,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, struct dst_entry *dst; bool res = false; - if (icmpv6_mask_allow(type)) + if (icmpv6_mask_allow(net, type)) return true; /* @@ -511,7 +510,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, local_bh_disable(); /* Check global sysctl_icmp_msgs_per_sec ratelimit */ - if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type)) + if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type)) goto out_bh_enable; mip6_addr_swap(skb); @@ -683,12 +682,20 @@ static void icmpv6_echo_reply(struct sk_buff *skb) struct dst_entry *dst; struct ipcm6_cookie ipc6; u32 mark = IP6_REPLY_MARK(net, skb->mark); + bool acast; + + if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) && + net->ipv6.sysctl.icmpv6_echo_ignore_multicast) + return; saddr = &ipv6_hdr(skb)->daddr; + acast = ipv6_anycast_destination(skb_dst(skb), saddr); + if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast) + return; + if (!ipv6_unicast_destination(skb) && - !(net->ipv6.sysctl.anycast_src_echo_reply && - ipv6_anycast_destination(skb_dst(skb), saddr))) + !(net->ipv6.sysctl.anycast_src_echo_reply && acast)) saddr = NULL; memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr)); @@ -723,6 +730,11 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (IS_ERR(dst)) goto out; + /* Check the ratelimit */ + if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) || + !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6)) + goto out_dst_release; + idev = __in6_dev_get(skb->dev); msg.skb = skb; @@ -743,6 +755,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, skb->len + sizeof(struct icmp6hdr)); } +out_dst_release: dst_release(dst); out: icmpv6_xmit_unlock(sk); @@ -1115,6 +1128,27 @@ static struct ctl_table ipv6_icmp_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "echo_ignore_multicast", + .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "echo_ignore_anycast", + .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "ratemask", + .data = &init_net.ipv6.sysctl.icmpv6_ratemask_ptr, + .maxlen = ICMPV6_MSG_MAX + 1, + .mode = 0644, + .proc_handler = proc_do_large_bitmap, + }, { }, }; @@ -1129,6 +1163,9 @@ struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net) if (table) { table[0].data = &net->ipv6.sysctl.icmpv6_time; table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all; + table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast; + table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast; + table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr; } return table; } diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index 3d56a2fb6f86..422dcc691f71 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -146,7 +146,8 @@ static int ila_build_state(struct nlattr *nla, if (family != AF_INET6) return -EINVAL; - ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack); + ret = nla_parse_nested_deprecated(tb, ILA_ATTR_MAX, nla, + ila_nl_policy, extack); if (ret < 0) return ret; diff --git a/net/ipv6/ila/ila_main.c b/net/ipv6/ila/ila_main.c index 18fac76b9520..257d2b681246 100644 --- a/net/ipv6/ila/ila_main.c +++ b/net/ipv6/ila/ila_main.c @@ -16,29 +16,29 @@ static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { static const struct genl_ops ila_nl_ops[] = { { .cmd = ILA_CMD_ADD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ila_xlat_nl_cmd_add_mapping, - .policy = ila_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = ILA_CMD_DEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ila_xlat_nl_cmd_del_mapping, - .policy = ila_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = ILA_CMD_FLUSH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ila_xlat_nl_cmd_flush, - .policy = ila_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = ILA_CMD_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ila_xlat_nl_cmd_get_mapping, .start = ila_xlat_nl_dump_start, .dumpit = ila_xlat_nl_dump, .done = ila_xlat_nl_dump_done, - .policy = ila_nl_policy, }, }; @@ -49,6 +49,7 @@ struct genl_family ila_nl_family __ro_after_init = { .name = ILA_GENL_NAME, .version = ILA_GENL_VERSION, .maxattr = ILA_ATTR_MAX, + .policy = ila_nl_policy, .netnsok = true, .parallel_ops = true, .module = THIS_MODULE, diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 91247a6fc67f..08e0390e001c 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -162,7 +162,7 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags) } INIT_LIST_HEAD(&f6i->fib6_siblings); - atomic_inc(&f6i->fib6_ref); + refcount_set(&f6i->fib6_ref, 1); return f6i; } @@ -175,10 +175,7 @@ void fib6_info_destroy_rcu(struct rcu_head *head) WARN_ON(f6i->fib6_node); bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1); - if (bucket) { - f6i->rt6i_exception_bucket = NULL; - kfree(bucket); - } + kfree(bucket); if (f6i->rt6i_pcpu) { int cpu; @@ -199,10 +196,7 @@ void fib6_info_destroy_rcu(struct rcu_head *head) free_percpu(f6i->rt6i_pcpu); } - lwtstate_put(f6i->fib6_nh.nh_lwtstate); - - if (f6i->fib6_nh.nh_dev) - dev_put(f6i->fib6_nh.nh_dev); + fib6_nh_release(&f6i->fib6_nh); ip_fib_metrics_put(f6i->fib6_metrics); @@ -357,10 +351,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, } /* called with rcu lock held; no reference taken on fib6_info */ -struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, - int flags) +int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, + struct fib6_result *res, int flags) { - return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6, flags); + return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6, + res, flags); } static void __net_init fib6_tables_init(struct net *net) @@ -851,8 +846,8 @@ insert_above: RCU_INIT_POINTER(in->parent, pn); in->leaf = fn->leaf; - atomic_inc(&rcu_dereference_protected(in->leaf, - lockdep_is_held(&table->tb6_lock))->fib6_ref); + fib6_info_hold(rcu_dereference_protected(in->leaf, + lockdep_is_held(&table->tb6_lock))); /* update parent pointer */ if (dir) @@ -932,7 +927,7 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, { struct fib6_table *table = rt->fib6_table; - if (atomic_read(&rt->fib6_ref) != 1) { + if (refcount_read(&rt->fib6_ref) != 1) { /* This route is used as dummy address holder in some split * nodes. It is not leaked, but it still holds other resources, * which must be released in time. So, scan ascendant nodes @@ -945,7 +940,7 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, struct fib6_info *new_leaf; if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) { new_leaf = fib6_find_prefix(net, table, fn); - atomic_inc(&new_leaf->fib6_ref); + fib6_info_hold(new_leaf); rcu_assign_pointer(fn->leaf, new_leaf); fib6_info_release(rt); @@ -1111,7 +1106,7 @@ add: return err; rcu_assign_pointer(rt->fib6_next, iter); - atomic_inc(&rt->fib6_ref); + fib6_info_hold(rt); rcu_assign_pointer(rt->fib6_node, fn); rcu_assign_pointer(*ins, rt); if (!info->skip_notify) @@ -1139,7 +1134,7 @@ add: if (err) return err; - atomic_inc(&rt->fib6_ref); + fib6_info_hold(rt); rcu_assign_pointer(rt->fib6_node, fn); rt->fib6_next = iter->fib6_next; rcu_assign_pointer(*ins, rt); @@ -1281,7 +1276,7 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, if (!sfn) goto failure; - atomic_inc(&info->nl_net->ipv6.fib6_null_entry->fib6_ref); + fib6_info_hold(info->nl_net->ipv6.fib6_null_entry); rcu_assign_pointer(sfn->leaf, info->nl_net->ipv6.fib6_null_entry); sfn->fn_flags = RTN_ROOT; @@ -1324,7 +1319,7 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, rcu_assign_pointer(fn->leaf, info->nl_net->ipv6.fib6_null_entry); } else { - atomic_inc(&rt->fib6_ref); + fib6_info_hold(rt); rcu_assign_pointer(fn->leaf, rt); } } @@ -2295,6 +2290,7 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v) { struct fib6_info *rt = v; struct ipv6_route_iter *iter = seq->private; + unsigned int flags = rt->fib6_flags; const struct net_device *dev; seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen); @@ -2304,15 +2300,17 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v) #else seq_puts(seq, "00000000000000000000000000000000 00 "); #endif - if (rt->fib6_flags & RTF_GATEWAY) - seq_printf(seq, "%pi6", &rt->fib6_nh.nh_gw); - else + if (rt->fib6_nh.fib_nh_gw_family) { + flags |= RTF_GATEWAY; + seq_printf(seq, "%pi6", &rt->fib6_nh.fib_nh_gw6); + } else { seq_puts(seq, "00000000000000000000000000000000"); + } - dev = rt->fib6_nh.nh_dev; + dev = rt->fib6_nh.fib_nh_dev; seq_printf(seq, " %08x %08x %08x %08x %8s\n", - rt->fib6_metric, atomic_read(&rt->fib6_ref), 0, - rt->fib6_flags, dev ? dev->name : ""); + rt->fib6_metric, refcount_read(&rt->fib6_ref), 0, + flags, dev ? dev->name : ""); iter->w.leaf = NULL; return 0; } diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index c7ed2b6d5a1d..b50b1af1f530 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -29,6 +29,7 @@ #include <linux/icmpv6.h> #include <linux/mroute6.h> #include <linux/slab.h> +#include <linux/indirect_call_wrapper.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> @@ -47,6 +48,8 @@ #include <net/inet_ecn.h> #include <net/dst_metadata.h> +INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *)); static void ip6_rcv_finish_core(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -57,7 +60,8 @@ static void ip6_rcv_finish_core(struct net *net, struct sock *sk, ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) - edemux(skb); + INDIRECT_CALL_2(edemux, tcp_v6_early_demux, + udp_v6_early_demux, skb); } if (!skb_valid_dst(skb)) ip6_route_input(skb); @@ -316,6 +320,9 @@ void ipv6_list_rcv(struct list_head *head, struct packet_type *pt, ip6_sublist_rcv(&sublist, curr_dev, curr_net); } +INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *)); + /* * Deliver the packet to the host */ @@ -391,7 +398,8 @@ resubmit_final: !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard; - ret = ipprot->handler(skb); + ret = INDIRECT_CALL_2(ipprot->handler, tcp_v6_rcv, udpv6_rcv, + skb); if (ret > 0) { if (ipprot->flags & INET6_PROTO_FINAL) { /* Not an extension header, most likely UDP diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e51f3c648b09..adef2236abe2 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -117,7 +117,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); if (!IS_ERR(neigh)) { sock_confirm_neigh(skb, neigh); - ret = neigh_output(neigh, skb); + ret = neigh_output(neigh, skb, false); rcu_read_unlock_bh(); return ret; } diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 8b6eefff2f7e..218a0dedc8f4 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -342,7 +342,7 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err) struct net_device *dev; struct pcpu_sw_netstats *tstats; struct xfrm_state *x; - struct xfrm_mode *inner_mode; + const struct xfrm_mode *inner_mode; struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6; u32 orig_mark = skb->mark; int ret; @@ -361,7 +361,7 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err) x = xfrm_input_state(skb); - inner_mode = x->inner_mode; + inner_mode = &x->inner_mode; if (x->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); @@ -372,7 +372,7 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err) } } - family = inner_mode->afinfo->family; + family = inner_mode->family; skb->mark = be32_to_cpu(t->parms.i_key); ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index e4dd57976737..4e69847ed5be 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -355,7 +355,6 @@ static const struct rhashtable_params ip6mr_rht_params = { .key_offset = offsetof(struct mfc6_cache, cmparg), .key_len = sizeof(struct mfc6_cache_cmp_arg), .nelem_hint = 3, - .locks_mul = 1, .obj_cmpfn = ip6mr_hash_cmp, .automatic_shrinking = true, }; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 659ecf4e4b3c..4c8e2ea8bf19 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -77,6 +77,8 @@ static u32 ndisc_hash(const void *pkey, const struct net_device *dev, __u32 *hash_rnd); static bool ndisc_key_eq(const struct neighbour *neigh, const void *pkey); +static bool ndisc_allow_add(const struct net_device *dev, + struct netlink_ext_ack *extack); static int ndisc_constructor(struct neighbour *neigh); static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb); static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb); @@ -117,6 +119,7 @@ struct neigh_table nd_tbl = { .pconstructor = pndisc_constructor, .pdestructor = pndisc_destructor, .proxy_redo = pndisc_redo, + .allow_add = ndisc_allow_add, .id = "ndisc_cache", .parms = { .tbl = &nd_tbl, @@ -392,6 +395,20 @@ static void pndisc_destructor(struct pneigh_entry *n) ipv6_dev_mc_dec(dev, &maddr); } +/* called with rtnl held */ +static bool ndisc_allow_add(const struct net_device *dev, + struct netlink_ext_ack *extack) +{ + struct inet6_dev *idev = __in6_dev_get(dev); + + if (!idev || idev->cnf.disable_ipv6) { + NL_SET_ERR_MSG(extack, "IPv6 is disabled on this device"); + return false; + } + + return true; +} + static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, int len) { @@ -1276,8 +1293,8 @@ static void ndisc_router_discovery(struct sk_buff *skb) rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev); if (rt) { - neigh = ip6_neigh_lookup(&rt->fib6_nh.nh_gw, - rt->fib6_nh.nh_dev, NULL, + neigh = ip6_neigh_lookup(&rt->fib6_nh.fib_nh_gw6, + rt->fib6_nh.fib_nh_dev, NULL, &ipv6_hdr(skb)->saddr); if (!neigh) { ND_PRINTK(0, err, @@ -1306,8 +1323,8 @@ static void ndisc_router_discovery(struct sk_buff *skb) return; } - neigh = ip6_neigh_lookup(&rt->fib6_nh.nh_gw, - rt->fib6_nh.nh_dev, NULL, + neigh = ip6_neigh_lookup(&rt->fib6_nh.fib_nh_gw6, + rt->fib6_nh.fib_nh_dev, NULL, &ipv6_hdr(skb)->saddr); if (!neigh) { ND_PRINTK(0, err, diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index ddc99a1653aa..086fc669279e 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -23,14 +23,6 @@ config NF_TABLES_IPV6 if NF_TABLES_IPV6 -config NFT_CHAIN_ROUTE_IPV6 - tristate "IPv6 nf_tables route chain support" - help - This option enables the "route" chain for IPv6 in nf_tables. This - chain type is used to force packet re-routing after mangling header - fields such as the source, destination, flowlabel, hop-limit and - the packet mark. - config NFT_REJECT_IPV6 select NF_REJECT_IPV6 default NFT_REJECT @@ -278,15 +270,10 @@ if IP6_NF_NAT config IP6_NF_TARGET_MASQUERADE tristate "MASQUERADE target support" - select NF_NAT_MASQUERADE + select NETFILTER_XT_TARGET_MASQUERADE help - Masquerading is a special case of NAT: all outgoing connections are - changed to seem to come from a particular interface's address, and - if the interface goes down, those connections are lost. This is - only useful for dialup accounts with dynamic IP address (ie. your IP - address will be different on next dialup). - - To compile it as a module, choose M here. If unsure, say N. + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects NETFILTER_XT_TARGET_MASQUERADE. config IP6_NF_TARGET_NPT tristate "NPT (Network Prefix translation) target support" diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 3853c648ebaa..731a74c60dca 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -27,7 +27,6 @@ obj-$(CONFIG_NF_REJECT_IPV6) += nf_reject_ipv6.o obj-$(CONFIG_NF_DUP_IPV6) += nf_dup_ipv6.o # nf_tables -obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o @@ -47,7 +46,6 @@ obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o obj-$(CONFIG_IP6_NF_MATCH_SRH) += ip6t_srh.o # targets -obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o obj-$(CONFIG_IP6_NF_TARGET_SYNPROXY) += ip6t_SYNPROXY.o diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c deleted file mode 100644 index 29c7f1915a96..000000000000 --- a/net/ipv6/netfilter/ip6t_MASQUERADE.c +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on Rusty Russell's IPv6 MASQUERADE target. Development of IPv6 - * NAT funded by Astaro. - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/netdevice.h> -#include <linux/ipv6.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv6.h> -#include <linux/netfilter/x_tables.h> -#include <net/netfilter/nf_nat.h> -#include <net/addrconf.h> -#include <net/ipv6.h> -#include <net/netfilter/ipv6/nf_nat_masquerade.h> - -static unsigned int -masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par) -{ - return nf_nat_masquerade_ipv6(skb, par->targinfo, xt_out(par)); -} - -static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par) -{ - const struct nf_nat_range2 *range = par->targinfo; - - if (range->flags & NF_NAT_RANGE_MAP_IPS) - return -EINVAL; - return nf_ct_netns_get(par->net, par->family); -} - -static void masquerade_tg6_destroy(const struct xt_tgdtor_param *par) -{ - nf_ct_netns_put(par->net, par->family); -} - -static struct xt_target masquerade_tg6_reg __read_mostly = { - .name = "MASQUERADE", - .family = NFPROTO_IPV6, - .checkentry = masquerade_tg6_checkentry, - .destroy = masquerade_tg6_destroy, - .target = masquerade_tg6, - .targetsize = sizeof(struct nf_nat_range), - .table = "nat", - .hooks = 1 << NF_INET_POST_ROUTING, - .me = THIS_MODULE, -}; - -static int __init masquerade_tg6_init(void) -{ - int err; - - err = xt_register_target(&masquerade_tg6_reg); - if (err) - return err; - - err = nf_nat_masquerade_ipv6_register_notifier(); - if (err) - xt_unregister_target(&masquerade_tg6_reg); - - return err; -} -static void __exit masquerade_tg6_exit(void) -{ - nf_nat_masquerade_ipv6_unregister_notifier(); - xt_unregister_target(&masquerade_tg6_reg); -} - -module_init(masquerade_tg6_init); -module_exit(masquerade_tg6_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_DESCRIPTION("Xtables: automatic address SNAT"); diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c deleted file mode 100644 index da3f1f8cb325..000000000000 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> - * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Development of this code funded by Astaro AG (http://www.astaro.com/) - */ - -#include <linux/module.h> -#include <linux/init.h> -#include <linux/list.h> -#include <linux/skbuff.h> -#include <linux/netlink.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv6.h> -#include <linux/netfilter/nfnetlink.h> -#include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables_ipv6.h> -#include <net/route.h> - -static unsigned int nf_route_table_hook(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - unsigned int ret; - struct nft_pktinfo pkt; - struct in6_addr saddr, daddr; - u_int8_t hop_limit; - u32 mark, flowlabel; - int err; - - nft_set_pktinfo(&pkt, skb, state); - nft_set_pktinfo_ipv6(&pkt, skb); - - /* save source/dest address, mark, hoplimit, flowlabel, priority */ - memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); - memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr)); - mark = skb->mark; - hop_limit = ipv6_hdr(skb)->hop_limit; - - /* flowlabel and prio (includes version, which shouldn't change either */ - flowlabel = *((u32 *)ipv6_hdr(skb)); - - ret = nft_do_chain(&pkt, priv); - if (ret != NF_DROP && ret != NF_STOLEN && - (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || - memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || - skb->mark != mark || - ipv6_hdr(skb)->hop_limit != hop_limit || - flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) { - err = ip6_route_me_harder(state->net, skb); - if (err < 0) - ret = NF_DROP_ERR(err); - } - - return ret; -} - -static const struct nft_chain_type nft_chain_route_ipv6 = { - .name = "route", - .type = NFT_CHAIN_T_ROUTE, - .family = NFPROTO_IPV6, - .owner = THIS_MODULE, - .hook_mask = (1 << NF_INET_LOCAL_OUT), - .hooks = { - [NF_INET_LOCAL_OUT] = nf_route_table_hook, - }, -}; - -static int __init nft_chain_route_init(void) -{ - nft_register_chain_type(&nft_chain_route_ipv6); - - return 0; -} - -static void __exit nft_chain_route_exit(void) -{ - nft_unregister_chain_type(&nft_chain_route_ipv6); -} - -module_init(nft_chain_route_init); -module_exit(nft_chain_route_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_ALIAS_NFT_CHAIN(AF_INET6, "route"); diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 4fe7c90962dd..868ae23dbae1 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -10,15 +10,25 @@ #include <net/secure_seq.h> #include <linux/netfilter.h> -static u32 __ipv6_select_ident(struct net *net, u32 hashrnd, +static u32 __ipv6_select_ident(struct net *net, const struct in6_addr *dst, const struct in6_addr *src) { + const struct { + struct in6_addr dst; + struct in6_addr src; + } __aligned(SIPHASH_ALIGNMENT) combined = { + .dst = *dst, + .src = *src, + }; u32 hash, id; - hash = __ipv6_addr_jhash(dst, hashrnd); - hash = __ipv6_addr_jhash(src, hash); - hash ^= net_hash_mix(net); + /* Note the following code is not safe, but this is okay. */ + if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key))) + get_random_bytes(&net->ipv4.ip_id_key, + sizeof(net->ipv4.ip_id_key)); + + hash = siphash(&combined, sizeof(combined), &net->ipv4.ip_id_key); /* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve, * set the hight order instead thus minimizing possible future @@ -41,7 +51,6 @@ static u32 __ipv6_select_ident(struct net *net, u32 hashrnd, */ __be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) { - static u32 ip6_proxy_idents_hashrnd __read_mostly; struct in6_addr buf[2]; struct in6_addr *addrs; u32 id; @@ -53,11 +62,7 @@ __be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) if (!addrs) return 0; - net_get_random_once(&ip6_proxy_idents_hashrnd, - sizeof(ip6_proxy_idents_hashrnd)); - - id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd, - &addrs[1], &addrs[0]); + id = __ipv6_select_ident(net, &addrs[1], &addrs[0]); return htonl(id); } EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); @@ -66,12 +71,9 @@ __be32 ipv6_select_ident(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr) { - static u32 ip6_idents_hashrnd __read_mostly; u32 id; - net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); - - id = __ipv6_select_ident(net, ip6_idents_hashrnd, daddr, saddr); + id = __ipv6_select_ident(net, daddr, saddr); return htonl(id); } EXPORT_SYMBOL(ipv6_select_ident); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 5a426226c762..84dbe21b71e5 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1356,6 +1356,7 @@ const struct proto_ops inet6_sockraw_ops = { .getname = inet6_getname, .poll = datagram_poll, /* ok */ .ioctl = inet6_ioctl, /* must change */ + .gettstamp = sock_gettstamp, .listen = sock_no_listen, /* ok */ .shutdown = inet_shutdown, /* ok */ .setsockopt = sock_common_setsockopt, /* ok */ diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0520aca3354b..23a20d62daac 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -59,7 +59,7 @@ #include <net/xfrm.h> #include <net/netevent.h> #include <net/netlink.h> -#include <net/nexthop.h> +#include <net/rtnh.h> #include <net/lwtunnel.h> #include <net/ip_tunnels.h> #include <net/l3mdev.h> @@ -102,14 +102,15 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); -static int rt6_score_route(struct fib6_info *rt, int oif, int strict); +static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif, + int strict); static size_t rt6_nlmsg_size(struct fib6_info *rt); static int rt6_fill_node(struct net *net, struct sk_buff *skb, struct fib6_info *rt, struct dst_entry *dst, struct in6_addr *dest, struct in6_addr *src, int iif, int type, u32 portid, u32 seq, unsigned int flags); -static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt, +static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res, struct in6_addr *daddr, struct in6_addr *saddr); @@ -295,7 +296,7 @@ static const struct fib6_info fib6_null_entry_template = { .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP), .fib6_protocol = RTPROT_KERNEL, .fib6_metric = ~(u32)0, - .fib6_ref = ATOMIC_INIT(1), + .fib6_ref = REFCOUNT_INIT(1), .fib6_type = RTN_UNREACHABLE, .fib6_metrics = (struct dst_metrics *)&dst_default_metrics, }; @@ -424,13 +425,15 @@ static bool rt6_check_expired(const struct rt6_info *rt) return false; } -struct fib6_info *fib6_multipath_select(const struct net *net, - struct fib6_info *match, - struct flowi6 *fl6, int oif, - const struct sk_buff *skb, - int strict) +void fib6_select_path(const struct net *net, struct fib6_result *res, + struct flowi6 *fl6, int oif, bool have_oif_match, + const struct sk_buff *skb, int strict) { struct fib6_info *sibling, *next_sibling; + struct fib6_info *match = res->f6i; + + if (!match->fib6_nsiblings || have_oif_match) + goto out; /* We might have already computed the hash for ICMPv6 errors. In such * case it will always be non-zero. Otherwise now is the time to do it. @@ -438,61 +441,89 @@ struct fib6_info *fib6_multipath_select(const struct net *net, if (!fl6->mp_hash) fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL); - if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound)) - return match; + if (fl6->mp_hash <= atomic_read(&match->fib6_nh.fib_nh_upper_bound)) + goto out; list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings, fib6_siblings) { + const struct fib6_nh *nh = &sibling->fib6_nh; int nh_upper_bound; - nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound); + nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound); if (fl6->mp_hash > nh_upper_bound) continue; - if (rt6_score_route(sibling, oif, strict) < 0) + if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0) break; match = sibling; break; } - return match; +out: + res->f6i = match; + res->nh = &match->fib6_nh; } /* * Route lookup. rcu_read_lock() should be held. */ -static inline struct fib6_info *rt6_device_match(struct net *net, - struct fib6_info *rt, - const struct in6_addr *saddr, - int oif, - int flags) +static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh, + const struct in6_addr *saddr, int oif, int flags) { - struct fib6_info *sprt; + const struct net_device *dev; - if (!oif && ipv6_addr_any(saddr) && - !(rt->fib6_nh.nh_flags & RTNH_F_DEAD)) - return rt; + if (nh->fib_nh_flags & RTNH_F_DEAD) + return false; - for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) { - const struct net_device *dev = sprt->fib6_nh.nh_dev; + dev = nh->fib_nh_dev; + if (oif) { + if (dev->ifindex == oif) + return true; + } else { + if (ipv6_chk_addr(net, saddr, dev, + flags & RT6_LOOKUP_F_IFACE)) + return true; + } - if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD) - continue; + return false; +} - if (oif) { - if (dev->ifindex == oif) - return sprt; - } else { - if (ipv6_chk_addr(net, saddr, dev, - flags & RT6_LOOKUP_F_IFACE)) - return sprt; +static void rt6_device_match(struct net *net, struct fib6_result *res, + const struct in6_addr *saddr, int oif, int flags) +{ + struct fib6_info *f6i = res->f6i; + struct fib6_info *spf6i; + struct fib6_nh *nh; + + if (!oif && ipv6_addr_any(saddr)) { + nh = &f6i->fib6_nh; + if (!(nh->fib_nh_flags & RTNH_F_DEAD)) + goto out; + } + + for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) { + nh = &spf6i->fib6_nh; + if (__rt6_device_match(net, nh, saddr, oif, flags)) { + res->f6i = spf6i; + goto out; } } - if (oif && flags & RT6_LOOKUP_F_IFACE) - return net->ipv6.fib6_null_entry; + if (oif && flags & RT6_LOOKUP_F_IFACE) { + res->f6i = net->ipv6.fib6_null_entry; + nh = &res->f6i->fib6_nh; + goto out; + } - return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt; + nh = &f6i->fib6_nh; + if (nh->fib_nh_flags & RTNH_F_DEAD) { + res->f6i = net->ipv6.fib6_null_entry; + nh = &res->f6i->fib6_nh; + } +out: + res->nh = nh; + res->fib6_type = res->f6i->fib6_type; + res->fib6_flags = res->f6i->fib6_flags; } #ifdef CONFIG_IPV6_ROUTER_PREF @@ -514,7 +545,7 @@ static void rt6_probe_deferred(struct work_struct *w) kfree(work); } -static void rt6_probe(struct fib6_info *rt) +static void rt6_probe(struct fib6_nh *fib6_nh) { struct __rt6_probe_work *work = NULL; const struct in6_addr *nh_gw; @@ -530,11 +561,11 @@ static void rt6_probe(struct fib6_info *rt) * Router Reachability Probe MUST be rate-limited * to no more than one per minute. */ - if (!rt || !(rt->fib6_flags & RTF_GATEWAY)) + if (fib6_nh->fib_nh_gw_family) return; - nh_gw = &rt->fib6_nh.nh_gw; - dev = rt->fib6_nh.nh_dev; + nh_gw = &fib6_nh->fib_nh_gw6; + dev = fib6_nh->fib_nh_dev; rcu_read_lock_bh(); idev = __in6_dev_get(dev); neigh = __ipv6_neigh_lookup_noref(dev, nh_gw); @@ -551,13 +582,13 @@ static void rt6_probe(struct fib6_info *rt) __neigh_set_probe_once(neigh); } write_unlock(&neigh->lock); - } else if (time_after(jiffies, rt->last_probe + + } else if (time_after(jiffies, fib6_nh->last_probe + idev->cnf.rtr_probe_interval)) { work = kmalloc(sizeof(*work), GFP_ATOMIC); } if (work) { - rt->last_probe = jiffies; + fib6_nh->last_probe = jiffies; INIT_WORK(&work->work, rt6_probe_deferred); work->target = *nh_gw; dev_hold(dev); @@ -569,7 +600,7 @@ out: rcu_read_unlock_bh(); } #else -static inline void rt6_probe(struct fib6_info *rt) +static inline void rt6_probe(struct fib6_nh *fib6_nh) { } #endif @@ -577,27 +608,14 @@ static inline void rt6_probe(struct fib6_info *rt) /* * Default Router Selection (RFC 2461 6.3.6) */ -static inline int rt6_check_dev(struct fib6_info *rt, int oif) -{ - const struct net_device *dev = rt->fib6_nh.nh_dev; - - if (!oif || dev->ifindex == oif) - return 2; - return 0; -} - -static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt) +static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh) { enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; struct neighbour *neigh; - if (rt->fib6_flags & RTF_NONEXTHOP || - !(rt->fib6_flags & RTF_GATEWAY)) - return RT6_NUD_SUCCEED; - rcu_read_lock_bh(); - neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev, - &rt->fib6_nh.nh_gw); + neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev, + &fib6_nh->fib_nh_gw6); if (neigh) { read_lock(&neigh->lock); if (neigh->nud_state & NUD_VALID) @@ -618,58 +636,44 @@ static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt) return ret; } -static int rt6_score_route(struct fib6_info *rt, int oif, int strict) +static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif, + int strict) { - int m; + int m = 0; + + if (!oif || nh->fib_nh_dev->ifindex == oif) + m = 2; - m = rt6_check_dev(rt, oif); if (!m && (strict & RT6_LOOKUP_F_IFACE)) return RT6_NUD_FAIL_HARD; #ifdef CONFIG_IPV6_ROUTER_PREF - m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2; + m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2; #endif - if (strict & RT6_LOOKUP_F_REACHABLE) { - int n = rt6_check_neigh(rt); + if ((strict & RT6_LOOKUP_F_REACHABLE) && + !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) { + int n = rt6_check_neigh(nh); if (n < 0) return n; } return m; } -/* called with rc_read_lock held */ -static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i) +static bool find_match(struct fib6_nh *nh, u32 fib6_flags, + int oif, int strict, int *mpri, bool *do_rr) { - const struct net_device *dev = fib6_info_nh_dev(f6i); + bool match_do_rr = false; bool rc = false; - - if (dev) { - const struct inet6_dev *idev = __in6_dev_get(dev); - - rc = !!idev->cnf.ignore_routes_with_linkdown; - } - - return rc; -} - -static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict, - int *mpri, struct fib6_info *match, - bool *do_rr) -{ int m; - bool match_do_rr = false; - if (rt->fib6_nh.nh_flags & RTNH_F_DEAD) + if (nh->fib_nh_flags & RTNH_F_DEAD) goto out; - if (fib6_ignore_linkdown(rt) && - rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN && + if (ip6_ignore_linkdown(nh->fib_nh_dev) && + nh->fib_nh_flags & RTNH_F_LINKDOWN && !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE)) goto out; - if (fib6_check_expired(rt)) - goto out; - - m = rt6_score_route(rt, oif, strict); + m = rt6_score_route(nh, fib6_flags, oif, strict); if (m == RT6_NUD_FAIL_DO_RR) { match_do_rr = true; m = 0; /* lowest valid score */ @@ -678,67 +682,82 @@ static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict, } if (strict & RT6_LOOKUP_F_REACHABLE) - rt6_probe(rt); + rt6_probe(nh); /* note that m can be RT6_NUD_FAIL_PROBE at this point */ if (m > *mpri) { *do_rr = match_do_rr; *mpri = m; - match = rt; + rc = true; } out: - return match; + return rc; } -static struct fib6_info *find_rr_leaf(struct fib6_node *fn, - struct fib6_info *leaf, - struct fib6_info *rr_head, - u32 metric, int oif, int strict, - bool *do_rr) +static void __find_rr_leaf(struct fib6_info *f6i_start, + struct fib6_info *nomatch, u32 metric, + struct fib6_result *res, struct fib6_info **cont, + int oif, int strict, bool *do_rr, int *mpri) { - struct fib6_info *rt, *match, *cont; - int mpri = -1; + struct fib6_info *f6i; - match = NULL; - cont = NULL; - for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) { - if (rt->fib6_metric != metric) { - cont = rt; - break; + for (f6i = f6i_start; + f6i && f6i != nomatch; + f6i = rcu_dereference(f6i->fib6_next)) { + struct fib6_nh *nh; + + if (cont && f6i->fib6_metric != metric) { + *cont = f6i; + return; } - match = find_match(rt, oif, strict, &mpri, match, do_rr); - } + if (fib6_check_expired(f6i)) + continue; - for (rt = leaf; rt && rt != rr_head; - rt = rcu_dereference(rt->fib6_next)) { - if (rt->fib6_metric != metric) { - cont = rt; - break; + nh = &f6i->fib6_nh; + if (find_match(nh, f6i->fib6_flags, oif, strict, mpri, do_rr)) { + res->f6i = f6i; + res->nh = nh; + res->fib6_flags = f6i->fib6_flags; + res->fib6_type = f6i->fib6_type; } - - match = find_match(rt, oif, strict, &mpri, match, do_rr); } +} - if (match || !cont) - return match; +static void find_rr_leaf(struct fib6_node *fn, struct fib6_info *leaf, + struct fib6_info *rr_head, int oif, int strict, + bool *do_rr, struct fib6_result *res) +{ + u32 metric = rr_head->fib6_metric; + struct fib6_info *cont = NULL; + int mpri = -1; - for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next)) - match = find_match(rt, oif, strict, &mpri, match, do_rr); + __find_rr_leaf(rr_head, NULL, metric, res, &cont, + oif, strict, do_rr, &mpri); - return match; + __find_rr_leaf(leaf, rr_head, metric, res, &cont, + oif, strict, do_rr, &mpri); + + if (res->f6i || !cont) + return; + + __find_rr_leaf(cont, NULL, metric, res, NULL, + oif, strict, do_rr, &mpri); } -static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn, - int oif, int strict) +static void rt6_select(struct net *net, struct fib6_node *fn, int oif, + struct fib6_result *res, int strict) { struct fib6_info *leaf = rcu_dereference(fn->leaf); - struct fib6_info *match, *rt0; + struct fib6_info *rt0; bool do_rr = false; int key_plen; + /* make sure this function or its helpers sets f6i */ + res->f6i = NULL; + if (!leaf || leaf == net->ipv6.fib6_null_entry) - return net->ipv6.fib6_null_entry; + goto out; rt0 = rcu_dereference(fn->rr_ptr); if (!rt0) @@ -755,11 +774,9 @@ static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn, key_plen = rt0->fib6_src.plen; #endif if (fn->fn_bit != key_plen) - return net->ipv6.fib6_null_entry; - - match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict, - &do_rr); + goto out; + find_rr_leaf(fn, leaf, rt0, oif, strict, &do_rr, res); if (do_rr) { struct fib6_info *next = rcu_dereference(rt0->fib6_next); @@ -776,12 +793,19 @@ static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn, } } - return match ? match : net->ipv6.fib6_null_entry; +out: + if (!res->f6i) { + res->f6i = net->ipv6.fib6_null_entry; + res->nh = &res->f6i->fib6_nh; + res->fib6_flags = res->f6i->fib6_flags; + res->fib6_type = res->f6i->fib6_type; + } } -static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt) +static bool rt6_is_gw_or_nonexthop(const struct fib6_result *res) { - return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY)); + return (res->f6i->fib6_flags & RTF_NONEXTHOP) || + res->nh->fib_nh_gw_family; } #ifdef CONFIG_IPV6_ROUTE_INFO @@ -865,17 +889,17 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, */ /* called with rcu_lock held */ -static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt) +static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res) { - struct net_device *dev = rt->fib6_nh.nh_dev; + struct net_device *dev = res->nh->fib_nh_dev; - if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) { + if (res->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) { /* for copies of local routes, dst->dev needs to be the * device if it is a master device, the master device if * device is enslaved, and the loopback as the default */ if (netif_is_l3_slave(dev) && - !rt6_need_strict(&rt->fib6_dst.addr)) + !rt6_need_strict(&res->f6i->fib6_dst.addr)) dev = l3mdev_master_dev_rcu(dev); else if (!netif_is_l3_master(dev)) dev = dev_net(dev)->loopback_dev; @@ -921,11 +945,11 @@ static unsigned short fib6_info_dst_flags(struct fib6_info *rt) return flags; } -static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort) +static void ip6_rt_init_dst_reject(struct rt6_info *rt, u8 fib6_type) { - rt->dst.error = ip6_rt_type_to_error(ort->fib6_type); + rt->dst.error = ip6_rt_type_to_error(fib6_type); - switch (ort->fib6_type) { + switch (fib6_type) { case RTN_BLACKHOLE: rt->dst.output = dst_discard_out; rt->dst.input = dst_discard; @@ -943,26 +967,28 @@ static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort) } } -static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort) +static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res) { - if (ort->fib6_flags & RTF_REJECT) { - ip6_rt_init_dst_reject(rt, ort); + struct fib6_info *f6i = res->f6i; + + if (res->fib6_flags & RTF_REJECT) { + ip6_rt_init_dst_reject(rt, res->fib6_type); return; } rt->dst.error = 0; rt->dst.output = ip6_output; - if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) { + if (res->fib6_type == RTN_LOCAL || res->fib6_type == RTN_ANYCAST) { rt->dst.input = ip6_input; - } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) { + } else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) { rt->dst.input = ip6_mc_input; } else { rt->dst.input = ip6_forward; } - if (ort->fib6_nh.nh_lwtstate) { - rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate); + if (res->nh->fib_nh_lws) { + rt->dst.lwtstate = lwtstate_get(res->nh->fib_nh_lws); lwtunnel_set_redirect(&rt->dst); } @@ -977,20 +1003,25 @@ static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from) ip_dst_init_metrics(&rt->dst, from->fib6_metrics); } -/* Caller must already hold reference to @ort */ -static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort) +/* Caller must already hold reference to f6i in result */ +static void ip6_rt_copy_init(struct rt6_info *rt, const struct fib6_result *res) { - struct net_device *dev = fib6_info_nh_dev(ort); + const struct fib6_nh *nh = res->nh; + const struct net_device *dev = nh->fib_nh_dev; + struct fib6_info *f6i = res->f6i; - ip6_rt_init_dst(rt, ort); + ip6_rt_init_dst(rt, res); - rt->rt6i_dst = ort->fib6_dst; + rt->rt6i_dst = f6i->fib6_dst; rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL; - rt->rt6i_gateway = ort->fib6_nh.nh_gw; - rt->rt6i_flags = ort->fib6_flags; - rt6_set_from(rt, ort); + rt->rt6i_flags = res->fib6_flags; + if (nh->fib_nh_gw_family) { + rt->rt6i_gateway = nh->fib_nh_gw6; + rt->rt6i_flags |= RTF_GATEWAY; + } + rt6_set_from(rt, f6i); #ifdef CONFIG_IPV6_SUBTREES - rt->rt6i_src = ort->fib6_src; + rt->rt6i_src = f6i->fib6_src; #endif } @@ -1012,14 +1043,13 @@ static struct fib6_node* fib6_backtrack(struct fib6_node *fn, } } -static bool ip6_hold_safe(struct net *net, struct rt6_info **prt, - bool null_fallback) +static bool ip6_hold_safe(struct net *net, struct rt6_info **prt) { struct rt6_info *rt = *prt; if (dst_hold_safe(&rt->dst)) return true; - if (null_fallback) { + if (net) { rt = net->ipv6.ip6_null_entry; dst_hold(&rt->dst); } else { @@ -1030,22 +1060,24 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt, } /* called with rcu_lock held */ -static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt) +static struct rt6_info *ip6_create_rt_rcu(const struct fib6_result *res) { - unsigned short flags = fib6_info_dst_flags(rt); - struct net_device *dev = rt->fib6_nh.nh_dev; + struct net_device *dev = res->nh->fib_nh_dev; + struct fib6_info *f6i = res->f6i; + unsigned short flags; struct rt6_info *nrt; - if (!fib6_info_hold_safe(rt)) + if (!fib6_info_hold_safe(f6i)) goto fallback; + flags = fib6_info_dst_flags(f6i); nrt = ip6_dst_alloc(dev_net(dev), dev, flags); if (!nrt) { - fib6_info_release(rt); + fib6_info_release(f6i); goto fallback; } - ip6_rt_copy_init(nrt, rt); + ip6_rt_copy_init(nrt, res); return nrt; fallback: @@ -1060,7 +1092,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net, const struct sk_buff *skb, int flags) { - struct fib6_info *f6i; + struct fib6_result res = {}; struct fib6_node *fn; struct rt6_info *rt; @@ -1070,37 +1102,38 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net, rcu_read_lock(); fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: - f6i = rcu_dereference(fn->leaf); - if (!f6i) { - f6i = net->ipv6.fib6_null_entry; - } else { - f6i = rt6_device_match(net, f6i, &fl6->saddr, - fl6->flowi6_oif, flags); - if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0) - f6i = fib6_multipath_select(net, f6i, fl6, - fl6->flowi6_oif, skb, - flags); - } - if (f6i == net->ipv6.fib6_null_entry) { + res.f6i = rcu_dereference(fn->leaf); + if (!res.f6i) + res.f6i = net->ipv6.fib6_null_entry; + else + rt6_device_match(net, &res, &fl6->saddr, fl6->flowi6_oif, + flags); + + if (res.f6i == net->ipv6.fib6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) goto restart; + + rt = net->ipv6.ip6_null_entry; + dst_hold(&rt->dst); + goto out; } - trace_fib6_table_lookup(net, f6i, table, fl6); + fib6_select_path(net, &res, fl6, fl6->flowi6_oif, + fl6->flowi6_oif != 0, skb, flags); /* Search through exception table */ - rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr); + rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr); if (rt) { - if (ip6_hold_safe(net, &rt, true)) + if (ip6_hold_safe(net, &rt)) dst_use_noref(&rt->dst, jiffies); - } else if (f6i == net->ipv6.fib6_null_entry) { - rt = net->ipv6.ip6_null_entry; - dst_hold(&rt->dst); } else { - rt = ip6_create_rt_rcu(f6i); + rt = ip6_create_rt_rcu(&res); } +out: + trace_fib6_table_lookup(net, &res, table, fl6); + rcu_read_unlock(); return rt; @@ -1166,10 +1199,11 @@ int ip6_ins_rt(struct net *net, struct fib6_info *rt) return __ip6_ins_rt(rt, &info, NULL); } -static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort, +static struct rt6_info *ip6_rt_cache_alloc(const struct fib6_result *res, const struct in6_addr *daddr, const struct in6_addr *saddr) { + struct fib6_info *f6i = res->f6i; struct net_device *dev; struct rt6_info *rt; @@ -1177,25 +1211,25 @@ static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort, * Clone the route. */ - if (!fib6_info_hold_safe(ort)) + if (!fib6_info_hold_safe(f6i)) return NULL; - dev = ip6_rt_get_dev_rcu(ort); + dev = ip6_rt_get_dev_rcu(res); rt = ip6_dst_alloc(dev_net(dev), dev, 0); if (!rt) { - fib6_info_release(ort); + fib6_info_release(f6i); return NULL; } - ip6_rt_copy_init(rt, ort); + ip6_rt_copy_init(rt, res); rt->rt6i_flags |= RTF_CACHE; rt->dst.flags |= DST_HOST; rt->rt6i_dst.addr = *daddr; rt->rt6i_dst.plen = 128; - if (!rt6_is_gw_or_nonexthop(ort)) { - if (ort->fib6_dst.plen != 128 && - ipv6_addr_equal(&ort->fib6_dst.addr, daddr)) + if (!rt6_is_gw_or_nonexthop(res)) { + if (f6i->fib6_dst.plen != 128 && + ipv6_addr_equal(&f6i->fib6_dst.addr, daddr)) rt->rt6i_flags |= RTF_ANYCAST; #ifdef CONFIG_IPV6_SUBTREES if (rt->rt6i_src.plen && saddr) { @@ -1208,55 +1242,56 @@ static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort, return rt; } -static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt) +static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res) { - unsigned short flags = fib6_info_dst_flags(rt); + struct fib6_info *f6i = res->f6i; + unsigned short flags = fib6_info_dst_flags(f6i); struct net_device *dev; struct rt6_info *pcpu_rt; - if (!fib6_info_hold_safe(rt)) + if (!fib6_info_hold_safe(f6i)) return NULL; rcu_read_lock(); - dev = ip6_rt_get_dev_rcu(rt); + dev = ip6_rt_get_dev_rcu(res); pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags); rcu_read_unlock(); if (!pcpu_rt) { - fib6_info_release(rt); + fib6_info_release(f6i); return NULL; } - ip6_rt_copy_init(pcpu_rt, rt); + ip6_rt_copy_init(pcpu_rt, res); pcpu_rt->rt6i_flags |= RTF_PCPU; return pcpu_rt; } /* It should be called with rcu_read_lock() acquired */ -static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt) +static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res) { struct rt6_info *pcpu_rt, **p; - p = this_cpu_ptr(rt->rt6i_pcpu); + p = this_cpu_ptr(res->f6i->rt6i_pcpu); pcpu_rt = *p; if (pcpu_rt) - ip6_hold_safe(NULL, &pcpu_rt, false); + ip6_hold_safe(NULL, &pcpu_rt); return pcpu_rt; } static struct rt6_info *rt6_make_pcpu_route(struct net *net, - struct fib6_info *rt) + const struct fib6_result *res) { struct rt6_info *pcpu_rt, *prev, **p; - pcpu_rt = ip6_rt_pcpu_alloc(rt); + pcpu_rt = ip6_rt_pcpu_alloc(res); if (!pcpu_rt) { dst_hold(&net->ipv6.ip6_null_entry->dst); return net->ipv6.ip6_null_entry; } dst_hold(&pcpu_rt->dst); - p = this_cpu_ptr(rt->rt6i_pcpu); + p = this_cpu_ptr(res->f6i->rt6i_pcpu); prev = cmpxchg(p, NULL, pcpu_rt); BUG_ON(prev); @@ -1397,14 +1432,15 @@ __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket, return NULL; } -static unsigned int fib6_mtu(const struct fib6_info *rt) +static unsigned int fib6_mtu(const struct fib6_result *res) { + const struct fib6_nh *nh = res->nh; unsigned int mtu; - if (rt->fib6_pmtu) { - mtu = rt->fib6_pmtu; + if (res->f6i->fib6_pmtu) { + mtu = res->f6i->fib6_pmtu; } else { - struct net_device *dev = fib6_info_nh_dev(rt); + struct net_device *dev = nh->fib_nh_dev; struct inet6_dev *idev; rcu_read_lock(); @@ -1415,26 +1451,27 @@ static unsigned int fib6_mtu(const struct fib6_info *rt) mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); - return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu); + return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu); } static int rt6_insert_exception(struct rt6_info *nrt, - struct fib6_info *ort) + const struct fib6_result *res) { struct net *net = dev_net(nrt->dst.dev); struct rt6_exception_bucket *bucket; struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; + struct fib6_info *f6i = res->f6i; int err = 0; spin_lock_bh(&rt6_exception_lock); - if (ort->exception_bucket_flushed) { + if (f6i->exception_bucket_flushed) { err = -EINVAL; goto out; } - bucket = rcu_dereference_protected(ort->rt6i_exception_bucket, + bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, lockdep_is_held(&rt6_exception_lock)); if (!bucket) { bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket), @@ -1443,24 +1480,24 @@ static int rt6_insert_exception(struct rt6_info *nrt, err = -ENOMEM; goto out; } - rcu_assign_pointer(ort->rt6i_exception_bucket, bucket); + rcu_assign_pointer(f6i->rt6i_exception_bucket, bucket); } #ifdef CONFIG_IPV6_SUBTREES - /* rt6i_src.plen != 0 indicates ort is in subtree + /* fib6_src.plen != 0 indicates f6i is in subtree * and exception table is indexed by a hash of - * both rt6i_dst and rt6i_src. + * both fib6_dst and fib6_src. * Otherwise, the exception table is indexed by - * a hash of only rt6i_dst. + * a hash of only fib6_dst. */ - if (ort->fib6_src.plen) + if (f6i->fib6_src.plen) src_key = &nrt->rt6i_src.addr; #endif - /* rt6_mtu_change() might lower mtu on ort. + /* rt6_mtu_change() might lower mtu on f6i. * Only insert this exception route if its mtu - * is less than ort's mtu value. + * is less than f6i's mtu value. */ - if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) { + if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(res)) { err = -EINVAL; goto out; } @@ -1489,9 +1526,9 @@ out: /* Update fn->fn_sernum to invalidate all cached dst */ if (!err) { - spin_lock_bh(&ort->fib6_table->tb6_lock); - fib6_update_sernum(net, ort); - spin_unlock_bh(&ort->fib6_table->tb6_lock); + spin_lock_bh(&f6i->fib6_table->tb6_lock); + fib6_update_sernum(net, f6i); + spin_unlock_bh(&f6i->fib6_table->tb6_lock); fib6_force_start_gc(net); } @@ -1528,33 +1565,33 @@ out: /* Find cached rt in the hash table inside passed in rt * Caller has to hold rcu_read_lock() */ -static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt, +static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res, struct in6_addr *daddr, struct in6_addr *saddr) { struct rt6_exception_bucket *bucket; struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; - struct rt6_info *res = NULL; + struct rt6_info *ret = NULL; - bucket = rcu_dereference(rt->rt6i_exception_bucket); + bucket = rcu_dereference(res->f6i->rt6i_exception_bucket); #ifdef CONFIG_IPV6_SUBTREES - /* rt6i_src.plen != 0 indicates rt is in subtree + /* fib6i_src.plen != 0 indicates f6i is in subtree * and exception table is indexed by a hash of - * both rt6i_dst and rt6i_src. + * both fib6_dst and fib6_src. * Otherwise, the exception table is indexed by - * a hash of only rt6i_dst. + * a hash of only fib6_dst. */ - if (rt->fib6_src.plen) + if (res->f6i->fib6_src.plen) src_key = saddr; #endif rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key); if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i)) - res = rt6_ex->rt6i; + ret = rt6_ex->rt6i; - return res; + return ret; } /* Remove the passed in cached rt from the hash table that contains it */ @@ -1802,11 +1839,10 @@ void rt6_age_exceptions(struct fib6_info *rt, } /* must be called with rcu lock held */ -struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table, - int oif, struct flowi6 *fl6, int strict) +int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif, + struct flowi6 *fl6, struct fib6_result *res, int strict) { struct fib6_node *fn, *saved_fn; - struct fib6_info *f6i; fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); saved_fn = fn; @@ -1815,8 +1851,8 @@ struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table, oif = 0; redo_rt6_select: - f6i = rt6_select(net, fn, oif, strict); - if (f6i == net->ipv6.fib6_null_entry) { + rt6_select(net, fn, oif, res, strict); + if (res->f6i == net->ipv6.fib6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) goto redo_rt6_select; @@ -1828,16 +1864,16 @@ redo_rt6_select: } } - trace_fib6_table_lookup(net, f6i, table, fl6); + trace_fib6_table_lookup(net, res, table, fl6); - return f6i; + return 0; } struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, struct flowi6 *fl6, const struct sk_buff *skb, int flags) { - struct fib6_info *f6i; + struct fib6_result res = {}; struct rt6_info *rt; int strict = 0; @@ -1848,27 +1884,26 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, rcu_read_lock(); - f6i = fib6_table_lookup(net, table, oif, fl6, strict); - if (f6i->fib6_nsiblings) - f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict); - - if (f6i == net->ipv6.fib6_null_entry) { + fib6_table_lookup(net, table, oif, fl6, &res, strict); + if (res.f6i == net->ipv6.fib6_null_entry) { rt = net->ipv6.ip6_null_entry; rcu_read_unlock(); dst_hold(&rt->dst); return rt; } + fib6_select_path(net, &res, fl6, oif, false, skb, strict); + /*Search through exception table */ - rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr); + rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr); if (rt) { - if (ip6_hold_safe(net, &rt, true)) + if (ip6_hold_safe(net, &rt)) dst_use_noref(&rt->dst, jiffies); rcu_read_unlock(); return rt; } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && - !(f6i->fib6_flags & RTF_GATEWAY))) { + !res.nh->fib_nh_gw_family)) { /* Create a RTF_CACHE clone which will not be * owned by the fib6 tree. It is for the special case where * the daddr in the skb during the neighbor look-up is different @@ -1876,7 +1911,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, */ struct rt6_info *uncached_rt; - uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL); + uncached_rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL); rcu_read_unlock(); @@ -1898,10 +1933,10 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, struct rt6_info *pcpu_rt; local_bh_disable(); - pcpu_rt = rt6_get_pcpu_route(f6i); + pcpu_rt = rt6_get_pcpu_route(&res); if (!pcpu_rt) - pcpu_rt = rt6_make_pcpu_route(net, f6i); + pcpu_rt = rt6_make_pcpu_route(net, &res); local_bh_enable(); rcu_read_unlock(); @@ -2320,19 +2355,23 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, if (rt6->rt6i_flags & RTF_CACHE) rt6_update_exception_stamp_rt(rt6); } else if (daddr) { - struct fib6_info *from; + struct fib6_result res = {}; struct rt6_info *nrt6; rcu_read_lock(); - from = rcu_dereference(rt6->from); - if (!from) { + res.f6i = rcu_dereference(rt6->from); + if (!res.f6i) { rcu_read_unlock(); return; } - nrt6 = ip6_rt_cache_alloc(from, daddr, saddr); + res.nh = &res.f6i->fib6_nh; + res.fib6_flags = res.f6i->fib6_flags; + res.fib6_type = res.f6i->fib6_type; + + nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr); if (nrt6) { rt6_do_update_pmtu(nrt6, mtu); - if (rt6_insert_exception(nrt6, from)) + if (rt6_insert_exception(nrt6, &res)) dst_release_immediate(&nrt6->dst); } rcu_read_unlock(); @@ -2405,6 +2444,36 @@ void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst, NULL); } +static bool ip6_redirect_nh_match(const struct fib6_result *res, + struct flowi6 *fl6, + const struct in6_addr *gw, + struct rt6_info **ret) +{ + const struct fib6_nh *nh = res->nh; + + if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family || + fl6->flowi6_oif != nh->fib_nh_dev->ifindex) + return false; + + /* rt_cache's gateway might be different from its 'parent' + * in the case of an ip redirect. + * So we keep searching in the exception table if the gateway + * is different. + */ + if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) { + struct rt6_info *rt_cache; + + rt_cache = rt6_find_cached_rt(res, &fl6->daddr, &fl6->saddr); + if (rt_cache && + ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) { + *ret = rt_cache; + return true; + } + return false; + } + return true; +} + /* Handle redirects */ struct ip6rd_flowi { struct flowi6 fl6; @@ -2418,7 +2487,8 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, int flags) { struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; - struct rt6_info *ret = NULL, *rt_cache; + struct rt6_info *ret = NULL; + struct fib6_result res = {}; struct fib6_info *rt; struct fib6_node *fn; @@ -2436,34 +2506,15 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: for_each_fib6_node_rt_rcu(fn) { - if (rt->fib6_nh.nh_flags & RTNH_F_DEAD) - continue; + res.f6i = rt; + res.nh = &rt->fib6_nh; + if (fib6_check_expired(rt)) continue; if (rt->fib6_flags & RTF_REJECT) break; - if (!(rt->fib6_flags & RTF_GATEWAY)) - continue; - if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex) - continue; - /* rt_cache's gateway might be different from its 'parent' - * in the case of an ip redirect. - * So we keep searching in the exception table if the gateway - * is different. - */ - if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) { - rt_cache = rt6_find_cached_rt(rt, - &fl6->daddr, - &fl6->saddr); - if (rt_cache && - ipv6_addr_equal(&rdfl->gateway, - &rt_cache->rt6i_gateway)) { - ret = rt_cache; - break; - } - continue; - } - break; + if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway, &ret)) + goto out; } if (!rt) @@ -2479,15 +2530,20 @@ restart: goto restart; } + res.f6i = rt; + res.nh = &rt->fib6_nh; out: - if (ret) - ip6_hold_safe(net, &ret, true); - else - ret = ip6_create_rt_rcu(rt); + if (ret) { + ip6_hold_safe(net, &ret); + } else { + res.fib6_flags = res.f6i->fib6_flags; + res.fib6_type = res.f6i->fib6_type; + ret = ip6_create_rt_rcu(&res); + } rcu_read_unlock(); - trace_fib6_table_lookup(net, rt, table, fl6); + trace_fib6_table_lookup(net, &res, table, fl6); return ret; }; @@ -2605,12 +2661,15 @@ out: * based on ip6_dst_mtu_forward and exception logic of * rt6_find_cached_rt; called with rcu_read_lock */ -u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr, - struct in6_addr *saddr) +u32 ip6_mtu_from_fib6(const struct fib6_result *res, + const struct in6_addr *daddr, + const struct in6_addr *saddr) { struct rt6_exception_bucket *bucket; + const struct fib6_nh *nh = res->nh; + struct fib6_info *f6i = res->f6i; + const struct in6_addr *src_key; struct rt6_exception *rt6_ex; - struct in6_addr *src_key; struct inet6_dev *idev; u32 mtu = 0; @@ -2632,7 +2691,7 @@ u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr, mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU); if (likely(!mtu)) { - struct net_device *dev = fib6_info_nh_dev(f6i); + struct net_device *dev = nh->fib_nh_dev; mtu = IPV6_MIN_MTU; idev = __in6_dev_get(dev); @@ -2642,7 +2701,7 @@ u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr, mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); out: - return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu); + return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu); } struct dst_entry *icmp6_dst_alloc(struct net_device *dev, @@ -2898,17 +2957,143 @@ out: return err; } +static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type) +{ + if ((flags & RTF_REJECT) || + (dev && (dev->flags & IFF_LOOPBACK) && + !(addr_type & IPV6_ADDR_LOOPBACK) && + !(flags & RTF_LOCAL))) + return true; + + return false; +} + +int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, + struct fib6_config *cfg, gfp_t gfp_flags, + struct netlink_ext_ack *extack) +{ + struct net_device *dev = NULL; + struct inet6_dev *idev = NULL; + int addr_type; + int err; + + fib6_nh->fib_nh_family = AF_INET6; + + err = -ENODEV; + if (cfg->fc_ifindex) { + dev = dev_get_by_index(net, cfg->fc_ifindex); + if (!dev) + goto out; + idev = in6_dev_get(dev); + if (!idev) + goto out; + } + + if (cfg->fc_flags & RTNH_F_ONLINK) { + if (!dev) { + NL_SET_ERR_MSG(extack, + "Nexthop device required for onlink"); + goto out; + } + + if (!(dev->flags & IFF_UP)) { + NL_SET_ERR_MSG(extack, "Nexthop device is not up"); + err = -ENETDOWN; + goto out; + } + + fib6_nh->fib_nh_flags |= RTNH_F_ONLINK; + } + + fib6_nh->fib_nh_weight = 1; + + /* We cannot add true routes via loopback here, + * they would result in kernel looping; promote them to reject routes + */ + addr_type = ipv6_addr_type(&cfg->fc_dst); + if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) { + /* hold loopback dev/idev if we haven't done so. */ + if (dev != net->loopback_dev) { + if (dev) { + dev_put(dev); + in6_dev_put(idev); + } + dev = net->loopback_dev; + dev_hold(dev); + idev = in6_dev_get(dev); + if (!idev) { + err = -ENODEV; + goto out; + } + } + goto set_dev; + } + + if (cfg->fc_flags & RTF_GATEWAY) { + err = ip6_validate_gw(net, cfg, &dev, &idev, extack); + if (err) + goto out; + + fib6_nh->fib_nh_gw6 = cfg->fc_gateway; + fib6_nh->fib_nh_gw_family = AF_INET6; + } + + err = -ENODEV; + if (!dev) + goto out; + + if (idev->cnf.disable_ipv6) { + NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device"); + err = -EACCES; + goto out; + } + + if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) { + NL_SET_ERR_MSG(extack, "Nexthop device is not up"); + err = -ENETDOWN; + goto out; + } + + if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) && + !netif_carrier_ok(dev)) + fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN; + + err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap, + cfg->fc_encap_type, cfg, gfp_flags, extack); + if (err) + goto out; +set_dev: + fib6_nh->fib_nh_dev = dev; + fib6_nh->fib_nh_oif = dev->ifindex; + err = 0; +out: + if (idev) + in6_dev_put(idev); + + if (err) { + lwtstate_put(fib6_nh->fib_nh_lws); + fib6_nh->fib_nh_lws = NULL; + if (dev) + dev_put(dev); + } + + return err; +} + +void fib6_nh_release(struct fib6_nh *fib6_nh) +{ + fib_nh_common_release(&fib6_nh->nh_common); +} + static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack) { struct net *net = cfg->fc_nlinfo.nl_net; struct fib6_info *rt = NULL; - struct net_device *dev = NULL; - struct inet6_dev *idev = NULL; struct fib6_table *table; - int addr_type; int err = -EINVAL; + int addr_type; /* RTF_PCPU is an internal flag; can not be set by userspace */ if (cfg->fc_flags & RTF_PCPU) { @@ -2942,33 +3127,6 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, goto out; } #endif - if (cfg->fc_ifindex) { - err = -ENODEV; - dev = dev_get_by_index(net, cfg->fc_ifindex); - if (!dev) - goto out; - idev = in6_dev_get(dev); - if (!idev) - goto out; - } - - if (cfg->fc_metric == 0) - cfg->fc_metric = IP6_RT_PRIO_USER; - - if (cfg->fc_flags & RTNH_F_ONLINK) { - if (!dev) { - NL_SET_ERR_MSG(extack, - "Nexthop device required for onlink"); - err = -ENODEV; - goto out; - } - - if (!(dev->flags & IFF_UP)) { - NL_SET_ERR_MSG(extack, "Nexthop device is not up"); - err = -ENETDOWN; - goto out; - } - } err = -ENOBUFS; if (cfg->fc_nlinfo.nlh && @@ -3012,18 +3170,10 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, cfg->fc_protocol = RTPROT_BOOT; rt->fib6_protocol = cfg->fc_protocol; - addr_type = ipv6_addr_type(&cfg->fc_dst); - - if (cfg->fc_encap) { - struct lwtunnel_state *lwtstate; - - err = lwtunnel_build_state(cfg->fc_encap_type, - cfg->fc_encap, AF_INET6, cfg, - &lwtstate, extack); - if (err) - goto out; - rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate); - } + rt->fib6_table = table; + rt->fib6_metric = cfg->fc_metric; + rt->fib6_type = cfg->fc_type; + rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY; ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); rt->fib6_dst.plen = cfg->fc_dst_len; @@ -3034,62 +3184,20 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len); rt->fib6_src.plen = cfg->fc_src_len; #endif - - rt->fib6_metric = cfg->fc_metric; - rt->fib6_nh.nh_weight = 1; - - rt->fib6_type = cfg->fc_type; + err = fib6_nh_init(net, &rt->fib6_nh, cfg, gfp_flags, extack); + if (err) + goto out; /* We cannot add true routes via loopback here, - they would result in kernel looping; promote them to reject routes + * they would result in kernel looping; promote them to reject routes */ - if ((cfg->fc_flags & RTF_REJECT) || - (dev && (dev->flags & IFF_LOOPBACK) && - !(addr_type & IPV6_ADDR_LOOPBACK) && - !(cfg->fc_flags & RTF_LOCAL))) { - /* hold loopback dev/idev if we haven't done so. */ - if (dev != net->loopback_dev) { - if (dev) { - dev_put(dev); - in6_dev_put(idev); - } - dev = net->loopback_dev; - dev_hold(dev); - idev = in6_dev_get(dev); - if (!idev) { - err = -ENODEV; - goto out; - } - } - rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP; - goto install_route; - } - - if (cfg->fc_flags & RTF_GATEWAY) { - err = ip6_validate_gw(net, cfg, &dev, &idev, extack); - if (err) - goto out; - - rt->fib6_nh.nh_gw = cfg->fc_gateway; - } - - err = -ENODEV; - if (!dev) - goto out; - - if (idev->cnf.disable_ipv6) { - NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device"); - err = -EACCES; - goto out; - } - - if (!(dev->flags & IFF_UP)) { - NL_SET_ERR_MSG(extack, "Nexthop device is not up"); - err = -ENETDOWN; - goto out; - } + addr_type = ipv6_addr_type(&cfg->fc_dst); + if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.fib_nh_dev, addr_type)) + rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP; if (!ipv6_addr_any(&cfg->fc_prefsrc)) { + struct net_device *dev = fib6_info_nh_dev(rt); + if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { NL_SET_ERR_MSG(extack, "Invalid source address"); err = -EINVAL; @@ -3100,26 +3208,8 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, } else rt->fib6_prefsrc.plen = 0; - rt->fib6_flags = cfg->fc_flags; - -install_route: - if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) && - !netif_carrier_ok(dev)) - rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN; - rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK); - rt->fib6_nh.nh_dev = dev; - rt->fib6_table = table; - - if (idev) - in6_dev_put(idev); - return rt; out: - if (dev) - dev_put(dev); - if (idev) - in6_dev_put(idev); - fib6_info_release(rt); return ERR_PTR(err); } @@ -3260,10 +3350,16 @@ static int ip6_route_del(struct fib6_config *cfg, if (fn) { for_each_fib6_node_rt_rcu(fn) { + struct fib6_nh *nh; + if (cfg->fc_flags & RTF_CACHE) { + struct fib6_result res = { + .f6i = rt, + }; int rc; - rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst, + rt_cache = rt6_find_cached_rt(&res, + &cfg->fc_dst, &cfg->fc_src); if (rt_cache) { rc = ip6_del_cached_rt(rt_cache, cfg); @@ -3274,12 +3370,14 @@ static int ip6_route_del(struct fib6_config *cfg, } continue; } + + nh = &rt->fib6_nh; if (cfg->fc_ifindex && - (!rt->fib6_nh.nh_dev || - rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex)) + (!nh->fib_nh_dev || + nh->fib_nh_dev->ifindex != cfg->fc_ifindex)) continue; if (cfg->fc_flags & RTF_GATEWAY && - !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw)) + !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6)) continue; if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric) continue; @@ -3305,10 +3403,10 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu { struct netevent_redirect netevent; struct rt6_info *rt, *nrt = NULL; + struct fib6_result res = {}; struct ndisc_options ndopts; struct inet6_dev *in6_dev; struct neighbour *neigh; - struct fib6_info *from; struct rd_msg *msg; int optlen, on_link; u8 *lladdr; @@ -3391,11 +3489,14 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu NDISC_REDIRECT, &ndopts); rcu_read_lock(); - from = rcu_dereference(rt->from); - if (!from) + res.f6i = rcu_dereference(rt->from); + if (!res.f6i) goto out; - nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL); + res.nh = &res.f6i->fib6_nh; + res.fib6_flags = res.f6i->fib6_flags; + res.fib6_type = res.f6i->fib6_type; + nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL); if (!nrt) goto out; @@ -3406,7 +3507,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; /* rt6_insert_exception() will take care of duplicated exceptions */ - if (rt6_insert_exception(nrt, from)) { + if (rt6_insert_exception(nrt, &res)) { dst_release_immediate(&nrt->dst); goto out; } @@ -3444,11 +3545,12 @@ static struct fib6_info *rt6_get_route_info(struct net *net, goto out; for_each_fib6_node_rt_rcu(fn) { - if (rt->fib6_nh.nh_dev->ifindex != ifindex) + if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex) continue; - if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) + if (!(rt->fib6_flags & RTF_ROUTEINFO) || + !rt->fib6_nh.fib_nh_gw_family) continue; - if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr)) + if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr)) continue; if (!fib6_info_hold_safe(rt)) continue; @@ -3506,9 +3608,11 @@ struct fib6_info *rt6_get_dflt_router(struct net *net, rcu_read_lock(); for_each_fib6_node_rt_rcu(&table->tb6_root) { - if (dev == rt->fib6_nh.nh_dev && + struct fib6_nh *nh = &rt->fib6_nh; + + if (dev == nh->fib_nh_dev && ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && - ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr)) + ipv6_addr_equal(&nh->fib_nh_gw6, addr)) break; } if (rt && !fib6_info_hold_safe(rt)) @@ -3599,7 +3703,7 @@ static void rtmsg_to_fib6_config(struct net *net, .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ? : RT6_TABLE_MAIN, .fc_ifindex = rtmsg->rtmsg_ifindex, - .fc_metric = rtmsg->rtmsg_metric, + .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER, .fc_expires = rtmsg->rtmsg_info, .fc_dst_len = rtmsg->rtmsg_dst_len, .fc_src_len = rtmsg->rtmsg_src_len, @@ -3721,36 +3825,26 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net, const struct in6_addr *addr, bool anycast, gfp_t gfp_flags) { - u32 tb_id; - struct net_device *dev = idev->dev; - struct fib6_info *f6i; - - f6i = fib6_info_alloc(gfp_flags); - if (!f6i) - return ERR_PTR(-ENOMEM); + struct fib6_config cfg = { + .fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL, + .fc_ifindex = idev->dev->ifindex, + .fc_flags = RTF_UP | RTF_ADDRCONF | RTF_NONEXTHOP, + .fc_dst = *addr, + .fc_dst_len = 128, + .fc_protocol = RTPROT_KERNEL, + .fc_nlinfo.nl_net = net, + .fc_ignore_dev_down = true, + }; - f6i->fib6_metrics = ip_fib_metrics_init(net, NULL, 0, NULL); - f6i->dst_nocount = true; - f6i->dst_host = true; - f6i->fib6_protocol = RTPROT_KERNEL; - f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP; if (anycast) { - f6i->fib6_type = RTN_ANYCAST; - f6i->fib6_flags |= RTF_ANYCAST; + cfg.fc_type = RTN_ANYCAST; + cfg.fc_flags |= RTF_ANYCAST; } else { - f6i->fib6_type = RTN_LOCAL; - f6i->fib6_flags |= RTF_LOCAL; + cfg.fc_type = RTN_LOCAL; + cfg.fc_flags |= RTF_LOCAL; } - f6i->fib6_nh.nh_gw = *addr; - dev_hold(dev); - f6i->fib6_nh.nh_dev = dev; - f6i->fib6_dst.addr = *addr; - f6i->fib6_dst.plen = 128; - tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL; - f6i->fib6_table = fib6_get_table(net, tb_id); - - return f6i; + return ip6_route_info_create(&cfg, gfp_flags, NULL); } /* remove deleted ip from prefsrc entries */ @@ -3766,7 +3860,7 @@ static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg) struct net *net = ((struct arg_dev_net_ip *)arg)->net; struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; - if (((void *)rt->fib6_nh.nh_dev == dev || !dev) && + if (((void *)rt->fib6_nh.fib_nh_dev == dev || !dev) && rt != net->ipv6.fib6_null_entry && ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) { spin_lock_bh(&rt6_exception_lock); @@ -3788,7 +3882,7 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) fib6_clean_all(net, fib6_remove_prefsrc, &adni); } -#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY) +#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT) /* Remove routers and update dst entries when gateway turn into host. */ static int fib6_clean_tohost(struct fib6_info *rt, void *arg) @@ -3796,7 +3890,8 @@ static int fib6_clean_tohost(struct fib6_info *rt, void *arg) struct in6_addr *gateway = (struct in6_addr *)arg; if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) && - ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) { + rt->fib6_nh.fib_nh_gw_family && + ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) { return -1; } @@ -3817,7 +3912,7 @@ void rt6_clean_tohost(struct net *net, struct in6_addr *gateway) struct arg_netdev_event { const struct net_device *dev; union { - unsigned int nh_flags; + unsigned char nh_flags; unsigned long event; }; }; @@ -3844,9 +3939,9 @@ static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt) static bool rt6_is_dead(const struct fib6_info *rt) { - if (rt->fib6_nh.nh_flags & RTNH_F_DEAD || - (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN && - fib6_ignore_linkdown(rt))) + if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD || + (rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN && + ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev))) return true; return false; @@ -3858,11 +3953,11 @@ static int rt6_multipath_total_weight(const struct fib6_info *rt) int total = 0; if (!rt6_is_dead(rt)) - total += rt->fib6_nh.nh_weight; + total += rt->fib6_nh.fib_nh_weight; list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) { if (!rt6_is_dead(iter)) - total += iter->fib6_nh.nh_weight; + total += iter->fib6_nh.fib_nh_weight; } return total; @@ -3873,11 +3968,11 @@ static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total) int upper_bound = -1; if (!rt6_is_dead(rt)) { - *weight += rt->fib6_nh.nh_weight; + *weight += rt->fib6_nh.fib_nh_weight; upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31, total) - 1; } - atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound); + atomic_set(&rt->fib6_nh.fib_nh_upper_bound, upper_bound); } static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total) @@ -3920,8 +4015,9 @@ static int fib6_ifup(struct fib6_info *rt, void *p_arg) const struct arg_netdev_event *arg = p_arg; struct net *net = dev_net(arg->dev); - if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) { - rt->fib6_nh.nh_flags &= ~arg->nh_flags; + if (rt != net->ipv6.fib6_null_entry && + rt->fib6_nh.fib_nh_dev == arg->dev) { + rt->fib6_nh.fib_nh_flags &= ~arg->nh_flags; fib6_update_sernum_upto_root(net, rt); rt6_multipath_rebalance(rt); } @@ -3929,7 +4025,7 @@ static int fib6_ifup(struct fib6_info *rt, void *p_arg) return 0; } -void rt6_sync_up(struct net_device *dev, unsigned int nh_flags) +void rt6_sync_up(struct net_device *dev, unsigned char nh_flags) { struct arg_netdev_event arg = { .dev = dev, @@ -3949,10 +4045,10 @@ static bool rt6_multipath_uses_dev(const struct fib6_info *rt, { struct fib6_info *iter; - if (rt->fib6_nh.nh_dev == dev) + if (rt->fib6_nh.fib_nh_dev == dev) return true; list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) - if (iter->fib6_nh.nh_dev == dev) + if (iter->fib6_nh.fib_nh_dev == dev) return true; return false; @@ -3973,12 +4069,12 @@ static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt, struct fib6_info *iter; unsigned int dead = 0; - if (rt->fib6_nh.nh_dev == down_dev || - rt->fib6_nh.nh_flags & RTNH_F_DEAD) + if (rt->fib6_nh.fib_nh_dev == down_dev || + rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD) dead++; list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) - if (iter->fib6_nh.nh_dev == down_dev || - iter->fib6_nh.nh_flags & RTNH_F_DEAD) + if (iter->fib6_nh.fib_nh_dev == down_dev || + iter->fib6_nh.fib_nh_flags & RTNH_F_DEAD) dead++; return dead; @@ -3986,15 +4082,15 @@ static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt, static void rt6_multipath_nh_flags_set(struct fib6_info *rt, const struct net_device *dev, - unsigned int nh_flags) + unsigned char nh_flags) { struct fib6_info *iter; - if (rt->fib6_nh.nh_dev == dev) - rt->fib6_nh.nh_flags |= nh_flags; + if (rt->fib6_nh.fib_nh_dev == dev) + rt->fib6_nh.fib_nh_flags |= nh_flags; list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) - if (iter->fib6_nh.nh_dev == dev) - iter->fib6_nh.nh_flags |= nh_flags; + if (iter->fib6_nh.fib_nh_dev == dev) + iter->fib6_nh.fib_nh_flags |= nh_flags; } /* called with write lock held for table with rt */ @@ -4009,12 +4105,12 @@ static int fib6_ifdown(struct fib6_info *rt, void *p_arg) switch (arg->event) { case NETDEV_UNREGISTER: - return rt->fib6_nh.nh_dev == dev ? -1 : 0; + return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0; case NETDEV_DOWN: if (rt->should_flush) return -1; if (!rt->fib6_nsiblings) - return rt->fib6_nh.nh_dev == dev ? -1 : 0; + return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0; if (rt6_multipath_uses_dev(rt, dev)) { unsigned int count; @@ -4030,10 +4126,10 @@ static int fib6_ifdown(struct fib6_info *rt, void *p_arg) } return -2; case NETDEV_CHANGE: - if (rt->fib6_nh.nh_dev != dev || + if (rt->fib6_nh.fib_nh_dev != dev || rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) break; - rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN; + rt->fib6_nh.fib_nh_flags |= RTNH_F_LINKDOWN; rt6_multipath_rebalance(rt); break; } @@ -4089,7 +4185,7 @@ static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg) Since RFC 1981 doesn't include administrative MTU increase update PMTU increase is a MUST. (i.e. jumbo frame) */ - if (rt->fib6_nh.nh_dev == arg->dev && + if (rt->fib6_nh.fib_nh_dev == arg->dev && !fib6_metric_locked(rt, RTAX_MTU)) { u32 mtu = rt->fib6_pmtu; @@ -4143,8 +4239,8 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, unsigned int pref; int err; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv6_policy, extack); if (err < 0) goto errout; @@ -4380,7 +4476,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, goto cleanup; } - rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1; + rt->fib6_nh.fib_nh_weight = rtnh->rtnh_hops + 1; err = ip6_route_info_append(info->nl_net, &rt6_nh_list, rt, &r_cfg); @@ -4530,6 +4626,9 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) return err; + if (cfg.fc_metric == 0) + cfg.fc_metric = IP6_RT_PRIO_USER; + if (cfg.fc_mp) return ip6_route_multipath_add(&cfg, extack); else @@ -4544,7 +4643,7 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt) nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */ + NLA_ALIGN(sizeof(struct rtnexthop)) + nla_total_size(16) /* RTA_GATEWAY */ - + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate); + + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws); nexthop_len *= rt->fib6_nsiblings; } @@ -4562,77 +4661,10 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt) + nla_total_size(sizeof(struct rta_cacheinfo)) + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ + nla_total_size(1) /* RTA_PREF */ - + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate) + + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws) + nexthop_len; } -static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt, - unsigned int *flags, bool skip_oif) -{ - if (rt->fib6_nh.nh_flags & RTNH_F_DEAD) - *flags |= RTNH_F_DEAD; - - if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) { - *flags |= RTNH_F_LINKDOWN; - - rcu_read_lock(); - if (fib6_ignore_linkdown(rt)) - *flags |= RTNH_F_DEAD; - rcu_read_unlock(); - } - - if (rt->fib6_flags & RTF_GATEWAY) { - if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0) - goto nla_put_failure; - } - - *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK); - if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD) - *flags |= RTNH_F_OFFLOAD; - - /* not needed for multipath encoding b/c it has a rtnexthop struct */ - if (!skip_oif && rt->fib6_nh.nh_dev && - nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex)) - goto nla_put_failure; - - if (rt->fib6_nh.nh_lwtstate && - lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0) - goto nla_put_failure; - - return 0; - -nla_put_failure: - return -EMSGSIZE; -} - -/* add multipath next hop */ -static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt) -{ - const struct net_device *dev = rt->fib6_nh.nh_dev; - struct rtnexthop *rtnh; - unsigned int flags = 0; - - rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); - if (!rtnh) - goto nla_put_failure; - - rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1; - rtnh->rtnh_ifindex = dev ? dev->ifindex : 0; - - if (rt6_nexthop_info(skb, rt, &flags, true) < 0) - goto nla_put_failure; - - rtnh->rtnh_flags = flags; - - /* length of rtnetlink header + attributes */ - rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; - - return 0; - -nla_put_failure: - return -EMSGSIZE; -} - static int rt6_fill_node(struct net *net, struct sk_buff *skb, struct fib6_info *rt, struct dst_entry *dst, struct in6_addr *dest, struct in6_addr *src, @@ -4745,23 +4777,30 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, struct fib6_info *sibling, *next_sibling; struct nlattr *mp; - mp = nla_nest_start(skb, RTA_MULTIPATH); + mp = nla_nest_start_noflag(skb, RTA_MULTIPATH); if (!mp) goto nla_put_failure; - if (rt6_add_nexthop(skb, rt) < 0) + if (fib_add_nexthop(skb, &rt->fib6_nh.nh_common, + rt->fib6_nh.fib_nh_weight) < 0) goto nla_put_failure; list_for_each_entry_safe(sibling, next_sibling, &rt->fib6_siblings, fib6_siblings) { - if (rt6_add_nexthop(skb, sibling) < 0) + if (fib_add_nexthop(skb, &sibling->fib6_nh.nh_common, + sibling->fib6_nh.fib_nh_weight) < 0) goto nla_put_failure; } nla_nest_end(skb, mp); } else { - if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0) + unsigned char nh_flags = 0; + + if (fib_nexthop_info(skb, &rt->fib6_nh.nh_common, + &nh_flags, false) < 0) goto nla_put_failure; + + rtm->rtm_flags |= nh_flags; } if (rt6_flags & RTF_EXPIRES) { @@ -4787,7 +4826,7 @@ nla_put_failure: static bool fib6_info_uses_dev(const struct fib6_info *f6i, const struct net_device *dev) { - if (f6i->fib6_nh.nh_dev == dev) + if (f6i->fib6_nh.fib_nh_dev == dev) return true; if (f6i->fib6_nsiblings) { @@ -4795,7 +4834,7 @@ static bool fib6_info_uses_dev(const struct fib6_info *f6i, list_for_each_entry_safe(sibling, next_sibling, &f6i->fib6_siblings, fib6_siblings) { - if (sibling->fib6_nh.nh_dev == dev) + if (sibling->fib6_nh.fib_nh_dev == dev) return true; } } @@ -4847,8 +4886,8 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb, } if (!netlink_strict_get_check(skb)) - return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_ipv6_policy, extack); + return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv6_policy, extack); rtm = nlmsg_data(nlh); if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) || @@ -4864,8 +4903,8 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb, return -EINVAL; } - err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_ipv6_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv6_policy, extack); if (err) return err; @@ -5084,7 +5123,7 @@ static int ip6_route_dev_notify(struct notifier_block *this, return NOTIFY_OK; if (event == NETDEV_REGISTER) { - net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev; + net->ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = dev; net->ipv6.ip6_null_entry->dst.dev = dev; net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES @@ -5419,7 +5458,7 @@ void __init ip6_route_init_special_entries(void) /* Registering of the loopback is done before this portion of code, * the loopback reference in rt6_info will not be taken, do it * manually for init_net */ - init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev; + init_net.ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = init_net.loopback_dev; init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 9b2f272ca164..0c5479ef9b38 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -398,28 +398,28 @@ static struct pernet_operations ip6_segments_ops = { static const struct genl_ops seg6_genl_ops[] = { { .cmd = SEG6_CMD_SETHMAC, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = seg6_genl_sethmac, - .policy = seg6_genl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = SEG6_CMD_DUMPHMAC, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .start = seg6_genl_dumphmac_start, .dumpit = seg6_genl_dumphmac, .done = seg6_genl_dumphmac_done, - .policy = seg6_genl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = SEG6_CMD_SET_TUNSRC, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = seg6_genl_set_tunsrc, - .policy = seg6_genl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = SEG6_CMD_GET_TUNSRC, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = seg6_genl_get_tunsrc, - .policy = seg6_genl_policy, .flags = GENL_ADMIN_PERM, }, }; @@ -429,6 +429,7 @@ static struct genl_family seg6_genl_family __ro_after_init = { .name = SEG6_GENL_NAME, .version = SEG6_GENL_VERSION, .maxattr = SEG6_ATTR_MAX, + .policy = seg6_genl_policy, .netnsok = true, .parallel_ops = true, .ops = seg6_genl_ops, diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index ee5403cbe655..7a525fda8978 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -396,8 +396,8 @@ static int seg6_build_state(struct nlattr *nla, if (family != AF_INET && family != AF_INET6) return -EINVAL; - err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla, - seg6_iptunnel_policy, extack); + err = nla_parse_nested_deprecated(tb, SEG6_IPTUNNEL_MAX, nla, + seg6_iptunnel_policy, extack); if (err < 0) return err; diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 60325dbfe88b..78155fdb8c36 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -823,8 +823,9 @@ static int parse_nla_bpf(struct nlattr **attrs, struct seg6_local_lwt *slwt) int ret; u32 fd; - ret = nla_parse_nested(tb, SEG6_LOCAL_BPF_PROG_MAX, - attrs[SEG6_LOCAL_BPF], bpf_prog_policy, NULL); + ret = nla_parse_nested_deprecated(tb, SEG6_LOCAL_BPF_PROG_MAX, + attrs[SEG6_LOCAL_BPF], + bpf_prog_policy, NULL); if (ret < 0) return ret; @@ -853,7 +854,7 @@ static int put_nla_bpf(struct sk_buff *skb, struct seg6_local_lwt *slwt) if (!slwt->bpf.prog) return 0; - nest = nla_nest_start(skb, SEG6_LOCAL_BPF); + nest = nla_nest_start_noflag(skb, SEG6_LOCAL_BPF); if (!nest) return -EMSGSIZE; @@ -959,8 +960,8 @@ static int seg6_local_build_state(struct nlattr *nla, unsigned int family, if (family != AF_INET6) return -EINVAL; - err = nla_parse_nested(tb, SEG6_LOCAL_MAX, nla, seg6_local_policy, - extack); + err = nla_parse_nested_deprecated(tb, SEG6_LOCAL_MAX, nla, + seg6_local_policy, extack); if (err < 0) return err; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index b2109b74857d..971d60bf9640 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1084,7 +1084,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) if (!tdev && tunnel->parms.link) tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); - if (tdev) { + if (tdev && !netif_is_l3_master(tdev)) { int t_hlen = tunnel->hlen + sizeof(struct iphdr); dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 44d431849d39..beaf28456301 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -43,6 +43,7 @@ #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/random.h> +#include <linux/indirect_call_wrapper.h> #include <net/tcp.h> #include <net/ndisc.h> @@ -90,6 +91,18 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, } #endif +/* Helper returning the inet6 address from a given tcp socket. + * It can be used in TCP stack instead of inet6_sk(sk). + * This avoids a dereference and allow compiler optimizations. + * It is a specialized version of inet6_sk_generic(). + */ +static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk) +{ + unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo); + + return (struct ipv6_pinfo *)(((u8 *)sk) + offset); +} + static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -99,7 +112,7 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) sk->sk_rx_dst = dst; inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; - inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt); + tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt); } } @@ -138,7 +151,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_sock *inet = inet_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); - struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; struct ipv6_txoptions *opt; @@ -390,7 +403,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk->sk_state == TCP_CLOSE) goto out; - if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) { + if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) { __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); goto out; } @@ -405,7 +418,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - np = inet6_sk(sk); + np = tcp_inet6_sk(sk); if (type == NDISC_REDIRECT) { if (!sock_owned_by_user(sk)) { @@ -478,7 +491,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, enum tcp_synack_type synack_type) { struct inet_request_sock *ireq = inet_rsk(req); - struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct ipv6_txoptions *opt; struct flowi6 *fl6 = &fl->u.ip6; struct sk_buff *skb; @@ -737,7 +750,7 @@ static void tcp_v6_init_req(struct request_sock *req, { bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); struct inet_request_sock *ireq = inet_rsk(req); - const struct ipv6_pinfo *np = inet6_sk(sk_listener); + const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; @@ -1066,9 +1079,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * { struct inet_request_sock *ireq; struct ipv6_pinfo *newnp; - const struct ipv6_pinfo *np = inet6_sk(sk); + const struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct ipv6_txoptions *opt; - struct tcp6_sock *newtcp6sk; struct inet_sock *newinet; struct tcp_sock *newtp; struct sock *newsk; @@ -1088,11 +1100,10 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * if (!newsk) return NULL; - newtcp6sk = (struct tcp6_sock *)newsk; - inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; + inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); newinet = inet_sk(newsk); - newnp = inet6_sk(newsk); + newnp = tcp_inet6_sk(newsk); newtp = tcp_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); @@ -1156,12 +1167,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * ip6_dst_store(newsk, dst, NULL, NULL); inet6_sk_rx_dst_set(newsk, skb); - newtcp6sk = (struct tcp6_sock *)newsk; - inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; + inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); newtp = tcp_sk(newsk); newinet = inet_sk(newsk); - newnp = inet6_sk(newsk); + newnp = tcp_inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); @@ -1276,9 +1286,9 @@ out: */ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) { - struct ipv6_pinfo *np = inet6_sk(sk); - struct tcp_sock *tp; + struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct sk_buff *opt_skb = NULL; + struct tcp_sock *tp; /* Imagine: socket is IPv6. IPv4 packet arrives, goes to IPv4 receive handler and backlogged. @@ -1426,8 +1436,9 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, skb->tstamp || skb_hwtstamps(skb)->hwtstamp; } -static int tcp_v6_rcv(struct sk_buff *skb) +INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) { + struct sk_buff *skb_to_free; int sdif = inet6_sdif(skb); const struct tcphdr *th; const struct ipv6hdr *hdr; @@ -1524,7 +1535,7 @@ process: return 0; } } - if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) { + if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) { __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); goto discard_and_relse; } @@ -1554,12 +1565,17 @@ process: tcp_segs_in(tcp_sk(sk), skb); ret = 0; if (!sock_owned_by_user(sk)) { + skb_to_free = sk->sk_rx_skb_cache; + sk->sk_rx_skb_cache = NULL; ret = tcp_v6_do_rcv(sk, skb); - } else if (tcp_add_backlog(sk, skb)) { - goto discard_and_relse; + } else { + if (tcp_add_backlog(sk, skb)) + goto discard_and_relse; + skb_to_free = NULL; } bh_unlock_sock(sk); - + if (skb_to_free) + __kfree_skb(skb_to_free); put_and_return: if (refcounted) sock_put(sk); @@ -1639,7 +1655,7 @@ do_time_wait: goto discard_it; } -static void tcp_v6_early_demux(struct sk_buff *skb) +INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb) { const struct ipv6hdr *hdr; const struct tcphdr *th; @@ -1669,7 +1685,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb) struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); if (dst) - dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); + dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie); if (dst && inet_sk(sk)->rx_dst_ifindex == skb->skb_iif) skb_dst_set_noref(skb, dst); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 622eeaf5732b..07fa579dfb96 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -36,6 +36,7 @@ #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/uaccess.h> +#include <linux/indirect_call_wrapper.h> #include <net/addrconf.h> #include <net/ndisc.h> @@ -285,8 +286,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; unsigned int ulen, copied; - int peeked, peeking, off; - int err; + int off, err, peeking = flags & MSG_PEEK; int is_udplite = IS_UDPLITE(sk); struct udp_mib __percpu *mib; bool checksum_valid = false; @@ -299,9 +299,8 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, return ipv6_recv_rxpmtu(sk, msg, len, addr_len); try_again: - peeking = flags & MSG_PEEK; off = sk_peek_offset(sk, flags); - skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err); + skb = __skb_recv_udp(sk, flags, noblock, &off, &err); if (!skb) return err; @@ -340,14 +339,14 @@ try_again: goto csum_copy_err; } if (unlikely(err)) { - if (!peeked) { + if (!peeking) { atomic_inc(&sk->sk_drops); SNMP_INC_STATS(mib, UDP_MIB_INERRORS); } kfree_skb(skb); return err; } - if (!peeked) + if (!peeking) SNMP_INC_STATS(mib, UDP_MIB_INDATAGRAMS); sock_recv_ts_and_drops(msg, sk, skb); @@ -982,7 +981,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net, return NULL; } -static void udp_v6_early_demux(struct sk_buff *skb) +INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); const struct udphdr *uh; @@ -1023,7 +1022,7 @@ static void udp_v6_early_demux(struct sk_buff *skb) } } -static __inline__ int udpv6_rcv(struct sk_buff *skb) +INDIRECT_CALLABLE_SCOPE int udpv6_rcv(struct sk_buff *skb) { return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP); } diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c deleted file mode 100644 index 57fd314ec2b8..000000000000 --- a/net/ipv6/xfrm6_mode_beet.c +++ /dev/null @@ -1,131 +0,0 @@ -/* - * xfrm6_mode_beet.c - BEET mode encapsulation for IPv6. - * - * Copyright (c) 2006 Diego Beltrami <diego.beltrami@gmail.com> - * Miika Komu <miika@iki.fi> - * Herbert Xu <herbert@gondor.apana.org.au> - * Abhinav Pathak <abhinav.pathak@hiit.fi> - * Jeff Ahrenholz <ahrenholz@gmail.com> - */ - -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/stringify.h> -#include <net/dsfield.h> -#include <net/dst.h> -#include <net/inet_ecn.h> -#include <net/ipv6.h> -#include <net/xfrm.h> - -static void xfrm6_beet_make_header(struct sk_buff *skb) -{ - struct ipv6hdr *iph = ipv6_hdr(skb); - - iph->version = 6; - - memcpy(iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl, - sizeof(iph->flow_lbl)); - iph->nexthdr = XFRM_MODE_SKB_CB(skb)->protocol; - - ipv6_change_dsfield(iph, 0, XFRM_MODE_SKB_CB(skb)->tos); - iph->hop_limit = XFRM_MODE_SKB_CB(skb)->ttl; -} - -/* Add encapsulation header. - * - * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. - */ -static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb) -{ - struct ipv6hdr *top_iph; - struct ip_beet_phdr *ph; - int optlen, hdr_len; - - hdr_len = 0; - optlen = XFRM_MODE_SKB_CB(skb)->optlen; - if (unlikely(optlen)) - hdr_len += IPV4_BEET_PHMAXLEN - (optlen & 4); - - skb_set_network_header(skb, -x->props.header_len - hdr_len); - if (x->sel.family != AF_INET6) - skb->network_header += IPV4_BEET_PHMAXLEN; - skb->mac_header = skb->network_header + - offsetof(struct ipv6hdr, nexthdr); - skb->transport_header = skb->network_header + sizeof(*top_iph); - ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdr_len); - - xfrm6_beet_make_header(skb); - - top_iph = ipv6_hdr(skb); - if (unlikely(optlen)) { - - BUG_ON(optlen < 0); - - ph->padlen = 4 - (optlen & 4); - ph->hdrlen = optlen / 8; - ph->nexthdr = top_iph->nexthdr; - if (ph->padlen) - memset(ph + 1, IPOPT_NOP, ph->padlen); - - top_iph->nexthdr = IPPROTO_BEETPH; - } - - top_iph->saddr = *(struct in6_addr *)&x->props.saddr; - top_iph->daddr = *(struct in6_addr *)&x->id.daddr; - return 0; -} - -static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb) -{ - struct ipv6hdr *ip6h; - int size = sizeof(struct ipv6hdr); - int err; - - err = skb_cow_head(skb, size + skb->mac_len); - if (err) - goto out; - - __skb_push(skb, size); - skb_reset_network_header(skb); - skb_mac_header_rebuild(skb); - - xfrm6_beet_make_header(skb); - - ip6h = ipv6_hdr(skb); - ip6h->payload_len = htons(skb->len - size); - ip6h->daddr = x->sel.daddr.in6; - ip6h->saddr = x->sel.saddr.in6; - err = 0; -out: - return err; -} - -static struct xfrm_mode xfrm6_beet_mode = { - .input2 = xfrm6_beet_input, - .input = xfrm_prepare_input, - .output2 = xfrm6_beet_output, - .output = xfrm6_prepare_output, - .owner = THIS_MODULE, - .encap = XFRM_MODE_BEET, - .flags = XFRM_MODE_FLAG_TUNNEL, -}; - -static int __init xfrm6_beet_init(void) -{ - return xfrm_register_mode(&xfrm6_beet_mode, AF_INET6); -} - -static void __exit xfrm6_beet_exit(void) -{ - int err; - - err = xfrm_unregister_mode(&xfrm6_beet_mode, AF_INET6); - BUG_ON(err); -} - -module_init(xfrm6_beet_init); -module_exit(xfrm6_beet_exit); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_BEET); diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c deleted file mode 100644 index da28e4407b8f..000000000000 --- a/net/ipv6/xfrm6_mode_ro.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * xfrm6_mode_ro.c - Route optimization mode for IPv6. - * - * Copyright (C)2003-2006 Helsinki University of Technology - * Copyright (C)2003-2006 USAGI/WIDE Project - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - */ -/* - * Authors: - * Noriaki TAKAMIYA @USAGI - * Masahide NAKAMURA @USAGI - */ - -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/spinlock.h> -#include <linux/stringify.h> -#include <linux/time.h> -#include <net/ipv6.h> -#include <net/xfrm.h> - -/* Add route optimization header space. - * - * The IP header and mutable extension headers will be moved forward to make - * space for the route optimization header. - */ -static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) -{ - struct ipv6hdr *iph; - u8 *prevhdr; - int hdr_len; - - iph = ipv6_hdr(skb); - - hdr_len = x->type->hdr_offset(x, skb, &prevhdr); - if (hdr_len < 0) - return hdr_len; - skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data); - skb_set_network_header(skb, -x->props.header_len); - skb->transport_header = skb->network_header + hdr_len; - __skb_pull(skb, hdr_len); - memmove(ipv6_hdr(skb), iph, hdr_len); - - x->lastused = ktime_get_real_seconds(); - - return 0; -} - -static struct xfrm_mode xfrm6_ro_mode = { - .output = xfrm6_ro_output, - .owner = THIS_MODULE, - .encap = XFRM_MODE_ROUTEOPTIMIZATION, -}; - -static int __init xfrm6_ro_init(void) -{ - return xfrm_register_mode(&xfrm6_ro_mode, AF_INET6); -} - -static void __exit xfrm6_ro_exit(void) -{ - int err; - - err = xfrm_unregister_mode(&xfrm6_ro_mode, AF_INET6); - BUG_ON(err); -} - -module_init(xfrm6_ro_init); -module_exit(xfrm6_ro_exit); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_ROUTEOPTIMIZATION); diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c deleted file mode 100644 index 3c29da5defe6..000000000000 --- a/net/ipv6/xfrm6_mode_transport.c +++ /dev/null @@ -1,121 +0,0 @@ -/* - * xfrm6_mode_transport.c - Transport mode encapsulation for IPv6. - * - * Copyright (C) 2002 USAGI/WIDE Project - * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au> - */ - -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/stringify.h> -#include <net/dst.h> -#include <net/ipv6.h> -#include <net/xfrm.h> -#include <net/protocol.h> - -/* Add encapsulation header. - * - * The IP header and mutable extension headers will be moved forward to make - * space for the encapsulation header. - */ -static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) -{ - struct ipv6hdr *iph; - u8 *prevhdr; - int hdr_len; - - iph = ipv6_hdr(skb); - skb_set_inner_transport_header(skb, skb_transport_offset(skb)); - - hdr_len = x->type->hdr_offset(x, skb, &prevhdr); - if (hdr_len < 0) - return hdr_len; - skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data); - skb_set_network_header(skb, -x->props.header_len); - skb->transport_header = skb->network_header + hdr_len; - __skb_pull(skb, hdr_len); - memmove(ipv6_hdr(skb), iph, hdr_len); - return 0; -} - -/* Remove encapsulation header. - * - * The IP header will be moved over the top of the encapsulation header. - * - * On entry, skb->h shall point to where the IP header should be and skb->nh - * shall be set to where the IP header currently is. skb->data shall point - * to the start of the payload. - */ -static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) -{ - int ihl = skb->data - skb_transport_header(skb); - - if (skb->transport_header != skb->network_header) { - memmove(skb_transport_header(skb), - skb_network_header(skb), ihl); - skb->network_header = skb->transport_header; - } - ipv6_hdr(skb)->payload_len = htons(skb->len + ihl - - sizeof(struct ipv6hdr)); - skb_reset_transport_header(skb); - return 0; -} - -static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x, - struct sk_buff *skb, - netdev_features_t features) -{ - const struct net_offload *ops; - struct sk_buff *segs = ERR_PTR(-EINVAL); - struct xfrm_offload *xo = xfrm_offload(skb); - - skb->transport_header += x->props.header_len; - ops = rcu_dereference(inet6_offloads[xo->proto]); - if (likely(ops && ops->callbacks.gso_segment)) - segs = ops->callbacks.gso_segment(skb, features); - - return segs; -} - -static void xfrm6_transport_xmit(struct xfrm_state *x, struct sk_buff *skb) -{ - struct xfrm_offload *xo = xfrm_offload(skb); - - skb_reset_mac_len(skb); - pskb_pull(skb, skb->mac_len + sizeof(struct ipv6hdr) + x->props.header_len); - - if (xo->flags & XFRM_GSO_SEGMENT) { - skb_reset_transport_header(skb); - skb->transport_header -= x->props.header_len; - } -} - - -static struct xfrm_mode xfrm6_transport_mode = { - .input = xfrm6_transport_input, - .output = xfrm6_transport_output, - .gso_segment = xfrm4_transport_gso_segment, - .xmit = xfrm6_transport_xmit, - .owner = THIS_MODULE, - .encap = XFRM_MODE_TRANSPORT, -}; - -static int __init xfrm6_transport_init(void) -{ - return xfrm_register_mode(&xfrm6_transport_mode, AF_INET6); -} - -static void __exit xfrm6_transport_exit(void) -{ - int err; - - err = xfrm_unregister_mode(&xfrm6_transport_mode, AF_INET6); - BUG_ON(err); -} - -module_init(xfrm6_transport_init); -module_exit(xfrm6_transport_exit); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_TRANSPORT); diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c deleted file mode 100644 index de1b0b8c53b0..000000000000 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ /dev/null @@ -1,151 +0,0 @@ -/* - * xfrm6_mode_tunnel.c - Tunnel mode encapsulation for IPv6. - * - * Copyright (C) 2002 USAGI/WIDE Project - * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au> - */ - -#include <linux/gfp.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/stringify.h> -#include <net/dsfield.h> -#include <net/dst.h> -#include <net/inet_ecn.h> -#include <net/ip6_route.h> -#include <net/ipv6.h> -#include <net/xfrm.h> - -static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) -{ - struct ipv6hdr *inner_iph = ipipv6_hdr(skb); - - if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos)) - IP6_ECN_set_ce(skb, inner_iph); -} - -/* Add encapsulation header. - * - * The top IP header will be constructed per RFC 2401. - */ -static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - struct ipv6hdr *top_iph; - int dsfield; - - skb_set_inner_network_header(skb, skb_network_offset(skb)); - skb_set_inner_transport_header(skb, skb_transport_offset(skb)); - - skb_set_network_header(skb, -x->props.header_len); - skb->mac_header = skb->network_header + - offsetof(struct ipv6hdr, nexthdr); - skb->transport_header = skb->network_header + sizeof(*top_iph); - top_iph = ipv6_hdr(skb); - - top_iph->version = 6; - - memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl, - sizeof(top_iph->flow_lbl)); - top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family); - - if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) - dsfield = 0; - else - dsfield = XFRM_MODE_SKB_CB(skb)->tos; - dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos); - if (x->props.flags & XFRM_STATE_NOECN) - dsfield &= ~INET_ECN_MASK; - ipv6_change_dsfield(top_iph, 0, dsfield); - top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst)); - top_iph->saddr = *(struct in6_addr *)&x->props.saddr; - top_iph->daddr = *(struct in6_addr *)&x->id.daddr; - return 0; -} - -#define for_each_input_rcu(head, handler) \ - for (handler = rcu_dereference(head); \ - handler != NULL; \ - handler = rcu_dereference(handler->next)) - - -static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) -{ - int err = -EINVAL; - - if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6) - goto out; - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) - goto out; - - err = skb_unclone(skb, GFP_ATOMIC); - if (err) - goto out; - - if (x->props.flags & XFRM_STATE_DECAP_DSCP) - ipv6_copy_dscp(ipv6_get_dsfield(ipv6_hdr(skb)), - ipipv6_hdr(skb)); - if (!(x->props.flags & XFRM_STATE_NOECN)) - ipip6_ecn_decapsulate(skb); - - skb_reset_network_header(skb); - skb_mac_header_rebuild(skb); - if (skb->mac_len) - eth_hdr(skb)->h_proto = skb->protocol; - - err = 0; - -out: - return err; -} - -static struct sk_buff *xfrm6_mode_tunnel_gso_segment(struct xfrm_state *x, - struct sk_buff *skb, - netdev_features_t features) -{ - __skb_push(skb, skb->mac_len); - return skb_mac_gso_segment(skb, features); -} - -static void xfrm6_mode_tunnel_xmit(struct xfrm_state *x, struct sk_buff *skb) -{ - struct xfrm_offload *xo = xfrm_offload(skb); - - if (xo->flags & XFRM_GSO_SEGMENT) - skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); - - skb_reset_mac_len(skb); - pskb_pull(skb, skb->mac_len + x->props.header_len); -} - -static struct xfrm_mode xfrm6_tunnel_mode = { - .input2 = xfrm6_mode_tunnel_input, - .input = xfrm_prepare_input, - .output2 = xfrm6_mode_tunnel_output, - .output = xfrm6_prepare_output, - .gso_segment = xfrm6_mode_tunnel_gso_segment, - .xmit = xfrm6_mode_tunnel_xmit, - .owner = THIS_MODULE, - .encap = XFRM_MODE_TUNNEL, - .flags = XFRM_MODE_FLAG_TUNNEL, -}; - -static int __init xfrm6_mode_tunnel_init(void) -{ - return xfrm_register_mode(&xfrm6_tunnel_mode, AF_INET6); -} - -static void __exit xfrm6_mode_tunnel_exit(void) -{ - int err; - - err = xfrm_unregister_mode(&xfrm6_tunnel_mode, AF_INET6); - BUG_ON(err); -} - -module_init(xfrm6_mode_tunnel_init); -module_exit(xfrm6_mode_tunnel_exit); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_TUNNEL); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 6a74080005cf..8ad5e54eb8ca 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -111,21 +111,6 @@ int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) return xfrm6_extract_header(skb); } -int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) -{ - int err; - - err = xfrm_inner_extract_output(x, skb); - if (err) - return err; - - skb->ignore_df = 1; - skb->protocol = htons(ETH_P_IPV6); - - return x->outer_mode->output2(x, skb); -} -EXPORT_SYMBOL(xfrm6_prepare_output); - int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb) { memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); @@ -137,11 +122,28 @@ int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb) return xfrm_output(sk, skb); } +static int __xfrm6_output_state_finish(struct xfrm_state *x, struct sock *sk, + struct sk_buff *skb) +{ + const struct xfrm_state_afinfo *afinfo; + int ret = -EAFNOSUPPORT; + + rcu_read_lock(); + afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family); + if (likely(afinfo)) + ret = afinfo->output_finish(sk, skb); + else + kfree_skb(skb); + rcu_read_unlock(); + + return ret; +} + static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { struct xfrm_state *x = skb_dst(skb)->xfrm; - return x->outer_mode->afinfo->output_finish(sk, skb); + return __xfrm6_output_state_finish(x, sk, skb); } static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -183,7 +185,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) __xfrm6_output_finish); skip_frag: - return x->outer_mode->afinfo->output_finish(sk, skb); + return __xfrm6_output_state_finish(x, sk, skb); } int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 769f8f78d3b8..699e0730ce8e 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -22,9 +22,6 @@ #include <net/ipv6.h> #include <net/ip6_route.h> #include <net/l3mdev.h> -#if IS_ENABLED(CONFIG_IPV6_MIP6) -#include <net/mip6.h> -#endif static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, const xfrm_address_t *saddr, @@ -71,24 +68,6 @@ static int xfrm6_get_saddr(struct net *net, int oif, return 0; } -static int xfrm6_get_tos(const struct flowi *fl) -{ - return 0; -} - -static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, - int nfheader_len) -{ - if (dst->ops->family == AF_INET6) { - struct rt6_info *rt = (struct rt6_info *)dst; - path->path_cookie = rt6_get_cookie(rt); - } - - path->u.rt6.rt6i_nfheader_len = nfheader_len; - - return 0; -} - static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl) { @@ -118,108 +97,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, return 0; } -static inline void -_decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) -{ - struct flowi6 *fl6 = &fl->u.ip6; - int onlyproto = 0; - const struct ipv6hdr *hdr = ipv6_hdr(skb); - u32 offset = sizeof(*hdr); - struct ipv6_opt_hdr *exthdr; - const unsigned char *nh = skb_network_header(skb); - u16 nhoff = IP6CB(skb)->nhoff; - int oif = 0; - u8 nexthdr; - - if (!nhoff) - nhoff = offsetof(struct ipv6hdr, nexthdr); - - nexthdr = nh[nhoff]; - - if (skb_dst(skb)) - oif = skb_dst(skb)->dev->ifindex; - - memset(fl6, 0, sizeof(struct flowi6)); - fl6->flowi6_mark = skb->mark; - fl6->flowi6_oif = reverse ? skb->skb_iif : oif; - - fl6->daddr = reverse ? hdr->saddr : hdr->daddr; - fl6->saddr = reverse ? hdr->daddr : hdr->saddr; - - while (nh + offset + sizeof(*exthdr) < skb->data || - pskb_may_pull(skb, nh + offset + sizeof(*exthdr) - skb->data)) { - nh = skb_network_header(skb); - exthdr = (struct ipv6_opt_hdr *)(nh + offset); - - switch (nexthdr) { - case NEXTHDR_FRAGMENT: - onlyproto = 1; - /* fall through */ - case NEXTHDR_ROUTING: - case NEXTHDR_HOP: - case NEXTHDR_DEST: - offset += ipv6_optlen(exthdr); - nexthdr = exthdr->nexthdr; - exthdr = (struct ipv6_opt_hdr *)(nh + offset); - break; - - case IPPROTO_UDP: - case IPPROTO_UDPLITE: - case IPPROTO_TCP: - case IPPROTO_SCTP: - case IPPROTO_DCCP: - if (!onlyproto && (nh + offset + 4 < skb->data || - pskb_may_pull(skb, nh + offset + 4 - skb->data))) { - __be16 *ports; - - nh = skb_network_header(skb); - ports = (__be16 *)(nh + offset); - fl6->fl6_sport = ports[!!reverse]; - fl6->fl6_dport = ports[!reverse]; - } - fl6->flowi6_proto = nexthdr; - return; - - case IPPROTO_ICMPV6: - if (!onlyproto && (nh + offset + 2 < skb->data || - pskb_may_pull(skb, nh + offset + 2 - skb->data))) { - u8 *icmp; - - nh = skb_network_header(skb); - icmp = (u8 *)(nh + offset); - fl6->fl6_icmp_type = icmp[0]; - fl6->fl6_icmp_code = icmp[1]; - } - fl6->flowi6_proto = nexthdr; - return; - -#if IS_ENABLED(CONFIG_IPV6_MIP6) - case IPPROTO_MH: - offset += ipv6_optlen(exthdr); - if (!onlyproto && (nh + offset + 3 < skb->data || - pskb_may_pull(skb, nh + offset + 3 - skb->data))) { - struct ip6_mh *mh; - - nh = skb_network_header(skb); - mh = (struct ip6_mh *)(nh + offset); - fl6->fl6_mh_type = mh->ip6mh_type; - } - fl6->flowi6_proto = nexthdr; - return; -#endif - - /* XXX Why are there these headers? */ - case IPPROTO_AH: - case IPPROTO_ESP: - case IPPROTO_COMP: - default: - fl6->fl6_ipsec_spi = 0; - fl6->flowi6_proto = nexthdr; - return; - } - } -} - static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu) { @@ -291,9 +168,6 @@ static const struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .dst_ops = &xfrm6_dst_ops_template, .dst_lookup = xfrm6_dst_lookup, .get_saddr = xfrm6_get_saddr, - .decode_session = _decode_session6, - .get_tos = xfrm6_get_tos, - .init_path = xfrm6_init_path, .fill_dst = xfrm6_fill_dst, .blackhole_route = ip6_blackhole_route, }; diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c index cc979b702c89..aaacac7fdbce 100644 --- a/net/ipv6/xfrm6_protocol.c +++ b/net/ipv6/xfrm6_protocol.c @@ -46,7 +46,7 @@ static inline struct xfrm6_protocol __rcu **proto_handlers(u8 protocol) handler != NULL; \ handler = rcu_dereference(handler->next)) \ -int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err) +static int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err) { int ret; struct xfrm6_protocol *handler; @@ -61,7 +61,6 @@ int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err) return 0; } -EXPORT_SYMBOL(xfrm6_rcv_cb); static int xfrm6_esp_rcv(struct sk_buff *skb) { diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 52b5a2797c0c..e4dec03a19fe 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1735,7 +1735,8 @@ static __net_exit void l2tp_exit_net(struct net *net) } rcu_read_unlock_bh(); - flush_workqueue(l2tp_wq); + if (l2tp_wq) + flush_workqueue(l2tp_wq); rcu_barrier(); for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index d4c60523c549..2cac910c1cd4 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -618,6 +618,7 @@ static const struct proto_ops l2tp_ip_ops = { .getname = l2tp_ip_getname, .poll = datagram_poll, .ioctl = inet_ioctl, + .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 37a69df17cab..4ec546cc1dd6 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -752,6 +752,7 @@ static const struct proto_ops l2tp_ip6_ops = { .getname = l2tp_ip6_getname, .poll = datagram_poll, .ioctl = inet6_ioctl, + .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index edbd5d1fbcde..6acc7f869b0c 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -345,7 +345,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla nla_put_u16(skb, L2TP_ATTR_ENCAP_TYPE, tunnel->encap)) goto nla_put_failure; - nest = nla_nest_start(skb, L2TP_ATTR_STATS); + nest = nla_nest_start_noflag(skb, L2TP_ATTR_STATS); if (nest == NULL) goto nla_put_failure; @@ -742,7 +742,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl session->reorder_timeout, L2TP_ATTR_PAD))) goto nla_put_failure; - nest = nla_nest_start(skb, L2TP_ATTR_STATS); + nest = nla_nest_start_noflag(skb, L2TP_ATTR_STATS); if (nest == NULL) goto nla_put_failure; @@ -915,58 +915,58 @@ static const struct nla_policy l2tp_nl_policy[L2TP_ATTR_MAX + 1] = { static const struct genl_ops l2tp_nl_ops[] = { { .cmd = L2TP_CMD_NOOP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_noop, - .policy = l2tp_nl_policy, /* can be retrieved by unprivileged users */ }, { .cmd = L2TP_CMD_TUNNEL_CREATE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_tunnel_create, - .policy = l2tp_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_TUNNEL_DELETE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_tunnel_delete, - .policy = l2tp_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_TUNNEL_MODIFY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_tunnel_modify, - .policy = l2tp_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_TUNNEL_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_tunnel_get, .dumpit = l2tp_nl_cmd_tunnel_dump, - .policy = l2tp_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_SESSION_CREATE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_session_create, - .policy = l2tp_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_SESSION_DELETE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_session_delete, - .policy = l2tp_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_SESSION_MODIFY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_session_modify, - .policy = l2tp_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_SESSION_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_session_get, .dumpit = l2tp_nl_cmd_session_dump, - .policy = l2tp_nl_policy, .flags = GENL_ADMIN_PERM, }, }; @@ -976,6 +976,7 @@ static struct genl_family l2tp_nl_family __ro_after_init = { .version = L2TP_GENL_VERSION, .hdrsize = 0, .maxattr = L2TP_ATTR_MAX, + .policy = l2tp_nl_policy, .netnsok = true, .module = THIS_MODULE, .ops = l2tp_nl_ops, diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 04d9946dcdba..f36cae785e82 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1070,7 +1070,6 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd, { struct pppol2tp_ioc_stats stats; struct l2tp_session *session; - int val; switch (cmd) { case PPPIOCGMRU: @@ -1097,7 +1096,7 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd, if (!session->session_id && !session->peer_session_id) return -ENOSYS; - if (get_user(val, (int __user *)arg)) + if (!access_ok((int __user *)arg, sizeof(int))) return -EFAULT; break; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 09dd1c2860fc..52e6a091b7e4 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -351,6 +351,36 @@ static int ieee80211_set_noack_map(struct wiphy *wiphy, return 0; } +static int ieee80211_set_tx(struct ieee80211_sub_if_data *sdata, + const u8 *mac_addr, u8 key_idx) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_key *key; + struct sta_info *sta; + int ret = -EINVAL; + + if (!wiphy_ext_feature_isset(local->hw.wiphy, + NL80211_EXT_FEATURE_EXT_KEY_ID)) + return -EINVAL; + + sta = sta_info_get_bss(sdata, mac_addr); + + if (!sta) + return -EINVAL; + + if (sta->ptk_idx == key_idx) + return 0; + + mutex_lock(&local->key_mtx); + key = key_mtx_dereference(local, sta->ptk[key_idx]); + + if (key && key->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX) + ret = ieee80211_set_tx_key(key); + + mutex_unlock(&local->key_mtx); + return ret; +} + static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, u8 key_idx, bool pairwise, const u8 *mac_addr, struct key_params *params) @@ -365,6 +395,9 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, if (!ieee80211_sdata_running(sdata)) return -ENETDOWN; + if (pairwise && params->mode == NL80211_KEY_SET_TX) + return ieee80211_set_tx(sdata, mac_addr, key_idx); + /* reject WEP and TKIP keys if WEP failed to initialize */ switch (params->cipher) { case WLAN_CIPHER_SUITE_WEP40: @@ -396,6 +429,9 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, if (pairwise) key->conf.flags |= IEEE80211_KEY_FLAG_PAIRWISE; + if (params->mode == NL80211_KEY_NO_TX) + key->conf.flags |= IEEE80211_KEY_FLAG_NO_AUTO_TX; + mutex_lock(&local->sta_mtx); if (mac_addr) { @@ -1421,6 +1457,15 @@ static int sta_apply_parameters(struct ieee80211_local *local, if (params->listen_interval >= 0) sta->listen_interval = params->listen_interval; + if (params->sta_modify_mask & STATION_PARAM_APPLY_STA_TXPOWER) { + sta->sta.txpwr.type = params->txpwr.type; + if (params->txpwr.type == NL80211_TX_POWER_LIMITED) + sta->sta.txpwr.power = params->txpwr.power; + ret = drv_sta_set_txpwr(local, sdata, sta); + if (ret) + return ret; + } + if (params->supported_rates) { ieee80211_parse_bitrates(&sdata->vif.bss_conf.chandef, sband, params->supported_rates, @@ -3990,4 +4035,5 @@ const struct cfg80211_ops mac80211_config_ops = { .get_ftm_responder_stats = ieee80211_get_ftm_responder_stats, .start_pmsr = ieee80211_start_pmsr, .abort_pmsr = ieee80211_abort_pmsr, + .probe_mesh_link = ieee80211_probe_mesh_link, }; diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 2d43bc127043..0d462206eef6 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -150,6 +150,58 @@ static const struct file_operations aqm_ops = { .llseek = default_llseek, }; +static ssize_t force_tx_status_read(struct file *file, + char __user *user_buf, + size_t count, + loff_t *ppos) +{ + struct ieee80211_local *local = file->private_data; + char buf[3]; + int len = 0; + + len = scnprintf(buf, sizeof(buf), "%d\n", (int)local->force_tx_status); + + return simple_read_from_buffer(user_buf, count, ppos, + buf, len); +} + +static ssize_t force_tx_status_write(struct file *file, + const char __user *user_buf, + size_t count, + loff_t *ppos) +{ + struct ieee80211_local *local = file->private_data; + char buf[3]; + size_t len; + + if (count > sizeof(buf)) + return -EINVAL; + + if (copy_from_user(buf, user_buf, count)) + return -EFAULT; + + buf[sizeof(buf) - 1] = '\0'; + len = strlen(buf); + if (len > 0 && buf[len - 1] == '\n') + buf[len - 1] = 0; + + if (buf[0] == '0' && buf[1] == '\0') + local->force_tx_status = 0; + else if (buf[0] == '1' && buf[1] == '\0') + local->force_tx_status = 1; + else + return -EINVAL; + + return count; +} + +static const struct file_operations force_tx_status_ops = { + .write = force_tx_status_write, + .read = force_tx_status_read, + .open = simple_open, + .llseek = default_llseek, +}; + #ifdef CONFIG_PM static ssize_t reset_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) @@ -221,6 +273,7 @@ static const char *hw_flag_names[] = { FLAG(TX_STATUS_NO_AMPDU_LEN), FLAG(SUPPORTS_MULTI_BSSID), FLAG(SUPPORTS_ONLY_HE_MULTI_BSSID), + FLAG(EXT_KEY_ID_NATIVE), #undef FLAG }; @@ -382,6 +435,7 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_ADD(hwflags); DEBUGFS_ADD(user_power); DEBUGFS_ADD(power); + DEBUGFS_ADD_MODE(force_tx_status, 0600); if (local->ops->wake_tx_queue) DEBUGFS_ADD_MODE(aqm, 0600); diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index bb886e7db47f..839c0022a29c 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -138,6 +138,27 @@ int drv_sta_state(struct ieee80211_local *local, return ret; } +__must_check +int drv_sta_set_txpwr(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct sta_info *sta) +{ + int ret = -EOPNOTSUPP; + + might_sleep(); + + sdata = get_bss_sdata(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; + + trace_drv_sta_set_txpwr(local, sdata, &sta->sta); + if (local->ops->sta_set_txpwr) + ret = local->ops->sta_set_txpwr(&local->hw, &sdata->vif, + &sta->sta); + trace_drv_return_int(local, ret); + return ret; +} + void drv_sta_rc_update(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta, u32 changed) diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index ae4f0be3b393..c2d8b5451a5e 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -529,6 +529,11 @@ int drv_sta_state(struct ieee80211_local *local, enum ieee80211_sta_state old_state, enum ieee80211_sta_state new_state); +__must_check +int drv_sta_set_txpwr(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct sta_info *sta); + void drv_sta_rc_update(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta, u32 changed); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e170f986d226..073a8235ae1b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1269,7 +1269,7 @@ struct ieee80211_local { /* * Key mutex, protects sdata's key_list and sta_info's - * key pointers (write access, they're RCU.) + * key pointers and ptk_idx (write access, they're RCU.) */ struct mutex key_mtx; @@ -1384,6 +1384,7 @@ struct ieee80211_local { struct dentry *rcdir; struct dentry *keys; } debugfs; + bool force_tx_status; #endif /* @@ -1505,7 +1506,6 @@ struct ieee802_11_elems { const struct ieee80211_bss_max_idle_period_ie *max_idle_period_ie; const struct ieee80211_multiple_bssid_configuration *mbssid_config_ie; const struct ieee80211_bssid_index *bssid_index; - const u8 *nontransmitted_bssid_profile; u8 max_bssid_indicator; u8 dtim_count; u8 dtim_period; @@ -1761,7 +1761,8 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev); void __ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev, - u32 info_flags); + u32 info_flags, + u32 ctrl_flags); void ieee80211_purge_tx_queue(struct ieee80211_hw *hw, struct sk_buff_head *skbs); struct sk_buff * @@ -1778,6 +1779,8 @@ void ieee80211_clear_fast_xmit(struct sta_info *sta); int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, const u8 *buf, size_t len, const u8 *dest, __be16 proto, bool unencrypted); +int ieee80211_probe_mesh_link(struct wiphy *wiphy, struct net_device *dev, + const u8 *buf, size_t len); /* HT */ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 02d2e6f11e93..410685d38c46 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1133,8 +1133,7 @@ static void ieee80211_uninit(struct net_device *dev) static u16 ieee80211_netdev_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { return ieee80211_select_queue(IEEE80211_DEV_TO_SUB_IF(dev), skb); } @@ -1179,8 +1178,7 @@ static const struct net_device_ops ieee80211_dataif_ops = { static u16 ieee80211_monitor_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; @@ -1227,6 +1225,7 @@ static void ieee80211_if_setup(struct net_device *dev) static void ieee80211_if_setup_no_queue(struct net_device *dev) { ieee80211_if_setup(dev); + dev->features |= NETIF_F_LLTX; dev->priv_flags |= IFF_NO_QUEUE; } @@ -1764,13 +1763,13 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, txq_size += sizeof(struct txq_info) + local->hw.txq_data_size; - if (local->ops->wake_tx_queue) + if (local->ops->wake_tx_queue) { if_setup = ieee80211_if_setup_no_queue; - else + } else { if_setup = ieee80211_if_setup; - - if (local->hw.queues >= IEEE80211_NUM_ACS) - txqs = IEEE80211_NUM_ACS; + if (local->hw.queues >= IEEE80211_NUM_ACS) + txqs = IEEE80211_NUM_ACS; + } ndev = alloc_netdev_mqs(size + txq_size, name, name_assign_type, diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 37e372896230..20bf9db7a388 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -140,6 +140,12 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) * so clear that flag now to avoid trying to remove * it again later. */ + if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && + !(key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC | + IEEE80211_KEY_FLAG_PUT_MIC_SPACE | + IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) + increment_tailroom_need_count(sdata); + key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; return -EINVAL; } @@ -179,9 +185,9 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) if (!ret) { key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; - if (!((key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC | - IEEE80211_KEY_FLAG_PUT_MIC_SPACE)) || - (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) + if (!(key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC | + IEEE80211_KEY_FLAG_PUT_MIC_SPACE | + IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) decrease_tailroom_need_count(sdata, 1); WARN_ON((key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) && @@ -242,9 +248,9 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) sta = key->sta; sdata = key->sdata; - if (!((key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC | - IEEE80211_KEY_FLAG_PUT_MIC_SPACE)) || - (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) + if (!(key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC | + IEEE80211_KEY_FLAG_PUT_MIC_SPACE | + IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) increment_tailroom_need_count(sdata); key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; @@ -258,9 +264,24 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) sta ? sta->sta.addr : bcast_addr, ret); } +int ieee80211_set_tx_key(struct ieee80211_key *key) +{ + struct sta_info *sta = key->sta; + struct ieee80211_local *local = key->local; + struct ieee80211_key *old; + + assert_key_lock(local); + + old = key_mtx_dereference(local, sta->ptk[sta->ptk_idx]); + sta->ptk_idx = key->conf.keyidx; + ieee80211_check_fast_xmit(sta); + + return 0; +} + static int ieee80211_hw_key_replace(struct ieee80211_key *old_key, struct ieee80211_key *new_key, - bool ptk0rekey) + bool pairwise) { struct ieee80211_sub_if_data *sdata; struct ieee80211_local *local; @@ -277,8 +298,9 @@ static int ieee80211_hw_key_replace(struct ieee80211_key *old_key, assert_key_lock(old_key->local); sta = old_key->sta; - /* PTK only using key ID 0 needs special handling on rekey */ - if (new_key && sta && ptk0rekey) { + /* Unicast rekey without Extended Key ID needs special handling */ + if (new_key && sta && pairwise && + rcu_access_pointer(sta->ptk[sta->ptk_idx]) == old_key) { local = old_key->local; sdata = old_key->sdata; @@ -394,10 +416,6 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, if (old) { idx = old->conf.keyidx; - /* TODO: proper implement and test "Extended Key ID for - * Individually Addressed Frames" from IEEE 802.11-2016. - * Till then always assume only key ID 0 is used for - * pairwise keys.*/ ret = ieee80211_hw_key_replace(old, new, pairwise); } else { /* new must be provided in case old is not */ @@ -414,15 +432,20 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, if (sta) { if (pairwise) { rcu_assign_pointer(sta->ptk[idx], new); - sta->ptk_idx = idx; - if (new) { + if (new && + !(new->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX)) { + sta->ptk_idx = idx; clear_sta_flag(sta, WLAN_STA_BLOCK_BA); ieee80211_check_fast_xmit(sta); } } else { rcu_assign_pointer(sta->gtk[idx], new); } - if (new) + /* Only needed for transition from no key -> key. + * Still triggers unnecessary when using Extended Key ID + * and installing the second key ID the first time. + */ + if (new && !old) ieee80211_check_fast_rx(sta); } else { defunikey = old && @@ -738,16 +761,34 @@ int ieee80211_key_link(struct ieee80211_key *key, * can cause warnings to appear. */ bool delay_tailroom = sdata->vif.type == NL80211_IFTYPE_STATION; - int ret; + int ret = -EOPNOTSUPP; mutex_lock(&sdata->local->key_mtx); - if (sta && pairwise) + if (sta && pairwise) { + struct ieee80211_key *alt_key; + old_key = key_mtx_dereference(sdata->local, sta->ptk[idx]); - else if (sta) + alt_key = key_mtx_dereference(sdata->local, sta->ptk[idx ^ 1]); + + /* The rekey code assumes that the old and new key are using + * the same cipher. Enforce the assumption for pairwise keys. + */ + if (key && + ((alt_key && alt_key->conf.cipher != key->conf.cipher) || + (old_key && old_key->conf.cipher != key->conf.cipher))) + goto out; + } else if (sta) { old_key = key_mtx_dereference(sdata->local, sta->gtk[idx]); - else + } else { old_key = key_mtx_dereference(sdata->local, sdata->keys[idx]); + } + + /* Non-pairwise keys must also not switch the cipher on rekey */ + if (!pairwise) { + if (key && old_key && old_key->conf.cipher != key->conf.cipher) + goto out; + } /* * Silently accept key re-installation without really installing the @@ -1187,9 +1228,9 @@ void ieee80211_remove_key(struct ieee80211_key_conf *keyconf) if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; - if (!((key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC | - IEEE80211_KEY_FLAG_PUT_MIC_SPACE)) || - (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) + if (!(key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC | + IEEE80211_KEY_FLAG_PUT_MIC_SPACE | + IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) increment_tailroom_need_count(key->sdata); } diff --git a/net/mac80211/key.h b/net/mac80211/key.h index ebdb80b85dc3..f06fbd03d235 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -18,6 +18,7 @@ #define NUM_DEFAULT_KEYS 4 #define NUM_DEFAULT_MGMT_KEYS 2 +#define INVALID_PTK_KEYIDX 2 /* Keyidx always pointing to a NULL key for PTK */ struct ieee80211_local; struct ieee80211_sub_if_data; @@ -146,6 +147,7 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, int ieee80211_key_link(struct ieee80211_key *key, struct ieee80211_sub_if_data *sdata, struct sta_info *sta); +int ieee80211_set_tx_key(struct ieee80211_key *key); void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom); void ieee80211_key_free_unused(struct ieee80211_key *key); void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx, diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 800e67615e2a..2b608044ae23 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1051,6 +1051,22 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) } } + /* Enable Extended Key IDs when driver allowed it, or when it + * supports neither HW crypto nor A-MPDUs + */ + if ((!local->ops->set_key && + !ieee80211_hw_check(hw, AMPDU_AGGREGATION)) || + ieee80211_hw_check(&local->hw, EXT_KEY_ID_NATIVE)) + wiphy_ext_feature_set(local->hw.wiphy, + NL80211_EXT_FEATURE_EXT_KEY_ID); + + /* Mac80211 and therefore all cards only using SW crypto are able to + * handle PTK rekeys correctly + */ + if (!local->ops->set_key) + wiphy_ext_feature_set(local->hw.wiphy, + NL80211_EXT_FEATURE_CAN_REPLACE_PTK0); + /* * Calculate scan IE length -- we need this to alloc * memory and to subtract from the driver limit. It diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 574c3891c4b2..88535a2e62bc 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -278,6 +278,8 @@ mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst); int mesh_path_add_gate(struct mesh_path *mpath); int mesh_path_send_to_gates(struct mesh_path *mpath); int mesh_gate_num(struct ieee80211_sub_if_data *sdata); +u32 airtime_link_metric_get(struct ieee80211_local *local, + struct sta_info *sta); /* Mesh plinks */ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index f7517668e77a..bf8e13cd5fd1 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -318,8 +318,8 @@ void ieee80211s_update_metric(struct ieee80211_local *local, cfg80211_calculate_bitrate(&rinfo)); } -static u32 airtime_link_metric_get(struct ieee80211_local *local, - struct sta_info *sta) +u32 airtime_link_metric_get(struct ieee80211_local *local, + struct sta_info *sta) { /* This should be adjusted for each device */ int device_constant = 1 << ARITH_SHIFT; @@ -1130,16 +1130,17 @@ int mesh_nexthop_resolve(struct ieee80211_sub_if_data *sdata, struct mesh_path *mpath; struct sk_buff *skb_to_free = NULL; u8 *target_addr = hdr->addr3; - int err = 0; /* Nulls are only sent to peers for PS and should be pre-addressed */ if (ieee80211_is_qos_nullfunc(hdr->frame_control)) return 0; - rcu_read_lock(); - err = mesh_nexthop_lookup(sdata, skb); - if (!err) - goto endlookup; + /* Allow injected packets to bypass mesh routing */ + if (info->control.flags & IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP) + return 0; + + if (!mesh_nexthop_lookup(sdata, skb)) + return 0; /* no nexthop found, start resolving */ mpath = mesh_path_lookup(sdata, target_addr); @@ -1147,8 +1148,7 @@ int mesh_nexthop_resolve(struct ieee80211_sub_if_data *sdata, mpath = mesh_path_add(sdata, target_addr); if (IS_ERR(mpath)) { mesh_path_discard_frame(sdata, skb); - err = PTR_ERR(mpath); - goto endlookup; + return PTR_ERR(mpath); } } @@ -1161,13 +1161,10 @@ int mesh_nexthop_resolve(struct ieee80211_sub_if_data *sdata, info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; ieee80211_set_qos_hdr(sdata, skb); skb_queue_tail(&mpath->frame_queue, skb); - err = -ENOENT; if (skb_to_free) mesh_path_discard_frame(sdata, skb_to_free); -endlookup: - rcu_read_unlock(); - return err; + return -ENOENT; } /** @@ -1187,13 +1184,10 @@ int mesh_nexthop_lookup(struct ieee80211_sub_if_data *sdata, struct sta_info *next_hop; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; u8 *target_addr = hdr->addr3; - int err = -ENOENT; - rcu_read_lock(); mpath = mesh_path_lookup(sdata, target_addr); - if (!mpath || !(mpath->flags & MESH_PATH_ACTIVE)) - goto endlookup; + return -ENOENT; if (time_after(jiffies, mpath->exp_time - @@ -1208,12 +1202,10 @@ int mesh_nexthop_lookup(struct ieee80211_sub_if_data *sdata, memcpy(hdr->addr1, next_hop->sta.addr, ETH_ALEN); memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); ieee80211_mps_set_frame_flags(sdata, next_hop, hdr); - err = 0; + return 0; } -endlookup: - rcu_read_unlock(); - return err; + return -ENOENT; } void mesh_path_timer(struct timer_list *t) diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index b76a2aefa9ec..796b069ad251 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -217,7 +217,7 @@ static struct mesh_path *mpath_lookup(struct mesh_table *tbl, const u8 *dst, { struct mesh_path *mpath; - mpath = rhashtable_lookup_fast(&tbl->rhead, dst, mesh_rht_params); + mpath = rhashtable_lookup(&tbl->rhead, dst, mesh_rht_params); if (mpath && mpath_expired(mpath)) { spin_lock_bh(&mpath->state_lock); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 2dbcf5d5512e..b7a9fe3d5fcb 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1188,9 +1188,6 @@ static void ieee80211_chswitch_work(struct work_struct *work) goto out; } - /* XXX: shouldn't really modify cfg80211-owned data! */ - ifmgd->associated->channel = sdata->csa_chandef.chan; - ifmgd->csa_waiting_bcn = true; ieee80211_sta_reset_beacon_monitor(sdata); diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index ccaf951e4e31..8b168724c5e7 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -51,8 +51,13 @@ MINSTREL_MAX_STREAMS * _sgi + \ _streams - 1 +#define _MAX(a, b) (((a)>(b))?(a):(b)) + +#define GROUP_SHIFT(duration) \ + _MAX(0, 16 - __builtin_clz(duration)) + /* MCS rate information for an MCS group */ -#define MCS_GROUP(_streams, _sgi, _ht40, _s) \ +#define __MCS_GROUP(_streams, _sgi, _ht40, _s) \ [GROUP_IDX(_streams, _sgi, _ht40)] = { \ .streams = _streams, \ .shift = _s, \ @@ -72,6 +77,13 @@ } \ } +#define MCS_GROUP_SHIFT(_streams, _sgi, _ht40) \ + GROUP_SHIFT(MCS_DURATION(_streams, _sgi, _ht40 ? 54 : 26)) + +#define MCS_GROUP(_streams, _sgi, _ht40) \ + __MCS_GROUP(_streams, _sgi, _ht40, \ + MCS_GROUP_SHIFT(_streams, _sgi, _ht40)) + #define VHT_GROUP_IDX(_streams, _sgi, _bw) \ (MINSTREL_VHT_GROUP_0 + \ MINSTREL_MAX_STREAMS * 2 * (_bw) + \ @@ -81,7 +93,7 @@ #define BW2VBPS(_bw, r3, r2, r1) \ (_bw == BW_80 ? r3 : _bw == BW_40 ? r2 : r1) -#define VHT_GROUP(_streams, _sgi, _bw, _s) \ +#define __VHT_GROUP(_streams, _sgi, _bw, _s) \ [VHT_GROUP_IDX(_streams, _sgi, _bw)] = { \ .streams = _streams, \ .shift = _s, \ @@ -114,6 +126,14 @@ } \ } +#define VHT_GROUP_SHIFT(_streams, _sgi, _bw) \ + GROUP_SHIFT(MCS_DURATION(_streams, _sgi, \ + BW2VBPS(_bw, 117, 54, 26))) + +#define VHT_GROUP(_streams, _sgi, _bw) \ + __VHT_GROUP(_streams, _sgi, _bw, \ + VHT_GROUP_SHIFT(_streams, _sgi, _bw)) + #define CCK_DURATION(_bitrate, _short, _len) \ (1000 * (10 /* SIFS */ + \ (_short ? 72 + 24 : 144 + 48) + \ @@ -129,7 +149,7 @@ CCK_ACK_DURATION(55, _short) >> _s, \ CCK_ACK_DURATION(110, _short) >> _s -#define CCK_GROUP(_s) \ +#define __CCK_GROUP(_s) \ [MINSTREL_CCK_GROUP] = { \ .streams = 1, \ .flags = 0, \ @@ -140,6 +160,12 @@ } \ } +#define CCK_GROUP_SHIFT \ + GROUP_SHIFT(CCK_ACK_DURATION(10, false)) + +#define CCK_GROUP __CCK_GROUP(CCK_GROUP_SHIFT) + + static bool minstrel_vht_only = true; module_param(minstrel_vht_only, bool, 0644); MODULE_PARM_DESC(minstrel_vht_only, @@ -154,47 +180,57 @@ MODULE_PARM_DESC(minstrel_vht_only, * BW -> SGI -> #streams */ const struct mcs_group minstrel_mcs_groups[] = { - MCS_GROUP(1, 0, BW_20, 5), - MCS_GROUP(2, 0, BW_20, 4), - MCS_GROUP(3, 0, BW_20, 4), - - MCS_GROUP(1, 1, BW_20, 5), - MCS_GROUP(2, 1, BW_20, 4), - MCS_GROUP(3, 1, BW_20, 4), - - MCS_GROUP(1, 0, BW_40, 4), - MCS_GROUP(2, 0, BW_40, 4), - MCS_GROUP(3, 0, BW_40, 4), - - MCS_GROUP(1, 1, BW_40, 4), - MCS_GROUP(2, 1, BW_40, 4), - MCS_GROUP(3, 1, BW_40, 4), - - CCK_GROUP(8), - - VHT_GROUP(1, 0, BW_20, 5), - VHT_GROUP(2, 0, BW_20, 4), - VHT_GROUP(3, 0, BW_20, 4), - - VHT_GROUP(1, 1, BW_20, 5), - VHT_GROUP(2, 1, BW_20, 4), - VHT_GROUP(3, 1, BW_20, 4), - - VHT_GROUP(1, 0, BW_40, 4), - VHT_GROUP(2, 0, BW_40, 4), - VHT_GROUP(3, 0, BW_40, 4), - - VHT_GROUP(1, 1, BW_40, 4), - VHT_GROUP(2, 1, BW_40, 4), - VHT_GROUP(3, 1, BW_40, 4), - - VHT_GROUP(1, 0, BW_80, 4), - VHT_GROUP(2, 0, BW_80, 4), - VHT_GROUP(3, 0, BW_80, 4), - - VHT_GROUP(1, 1, BW_80, 4), - VHT_GROUP(2, 1, BW_80, 4), - VHT_GROUP(3, 1, BW_80, 4), + MCS_GROUP(1, 0, BW_20), + MCS_GROUP(2, 0, BW_20), + MCS_GROUP(3, 0, BW_20), + MCS_GROUP(4, 0, BW_20), + + MCS_GROUP(1, 1, BW_20), + MCS_GROUP(2, 1, BW_20), + MCS_GROUP(3, 1, BW_20), + MCS_GROUP(4, 1, BW_20), + + MCS_GROUP(1, 0, BW_40), + MCS_GROUP(2, 0, BW_40), + MCS_GROUP(3, 0, BW_40), + MCS_GROUP(4, 0, BW_40), + + MCS_GROUP(1, 1, BW_40), + MCS_GROUP(2, 1, BW_40), + MCS_GROUP(3, 1, BW_40), + MCS_GROUP(4, 1, BW_40), + + CCK_GROUP, + + VHT_GROUP(1, 0, BW_20), + VHT_GROUP(2, 0, BW_20), + VHT_GROUP(3, 0, BW_20), + VHT_GROUP(4, 0, BW_20), + + VHT_GROUP(1, 1, BW_20), + VHT_GROUP(2, 1, BW_20), + VHT_GROUP(3, 1, BW_20), + VHT_GROUP(4, 1, BW_20), + + VHT_GROUP(1, 0, BW_40), + VHT_GROUP(2, 0, BW_40), + VHT_GROUP(3, 0, BW_40), + VHT_GROUP(4, 0, BW_40), + + VHT_GROUP(1, 1, BW_40), + VHT_GROUP(2, 1, BW_40), + VHT_GROUP(3, 1, BW_40), + VHT_GROUP(4, 1, BW_40), + + VHT_GROUP(1, 0, BW_80), + VHT_GROUP(2, 0, BW_80), + VHT_GROUP(3, 0, BW_80), + VHT_GROUP(4, 0, BW_80), + + VHT_GROUP(1, 1, BW_80), + VHT_GROUP(2, 1, BW_80), + VHT_GROUP(3, 1, BW_80), + VHT_GROUP(4, 1, BW_80), }; static u8 sample_table[SAMPLE_COLUMNS][MCS_GROUP_RATES] __read_mostly; diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h index 26b7a3244b47..f762e5ba7c2e 100644 --- a/net/mac80211/rc80211_minstrel_ht.h +++ b/net/mac80211/rc80211_minstrel_ht.h @@ -13,7 +13,7 @@ * The number of streams can be changed to 2 to reduce code * size and memory footprint. */ -#define MINSTREL_MAX_STREAMS 3 +#define MINSTREL_MAX_STREAMS 4 #define MINSTREL_HT_STREAM_GROUPS 4 /* BW(=2) * SGI(=2) */ #define MINSTREL_VHT_STREAM_GROUPS 6 /* BW(=3) * SGI(=2) */ diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index bf0b187f994e..25577ede2986 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1005,23 +1005,43 @@ static int ieee80211_get_mmie_keyidx(struct sk_buff *skb) return -1; } -static int ieee80211_get_cs_keyid(const struct ieee80211_cipher_scheme *cs, - struct sk_buff *skb) +static int ieee80211_get_keyid(struct sk_buff *skb, + const struct ieee80211_cipher_scheme *cs) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; __le16 fc; int hdrlen; + int minlen; + u8 key_idx_off; + u8 key_idx_shift; u8 keyid; fc = hdr->frame_control; hdrlen = ieee80211_hdrlen(fc); - if (skb->len < hdrlen + cs->hdr_len) + if (cs) { + minlen = hdrlen + cs->hdr_len; + key_idx_off = hdrlen + cs->key_idx_off; + key_idx_shift = cs->key_idx_shift; + } else { + /* WEP, TKIP, CCMP and GCMP */ + minlen = hdrlen + IEEE80211_WEP_IV_LEN; + key_idx_off = hdrlen + 3; + key_idx_shift = 6; + } + + if (unlikely(skb->len < minlen)) return -EINVAL; - skb_copy_bits(skb, hdrlen + cs->key_idx_off, &keyid, 1); - keyid &= cs->key_idx_mask; - keyid >>= cs->key_idx_shift; + skb_copy_bits(skb, key_idx_off, &keyid, 1); + + if (cs) + keyid &= cs->key_idx_mask; + keyid >>= key_idx_shift; + + /* cs could use more than the usual two bits for the keyid */ + if (unlikely(keyid >= NUM_DEFAULT_KEYS)) + return -EINVAL; return keyid; } @@ -1860,9 +1880,9 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; int keyidx; - int hdrlen; ieee80211_rx_result result = RX_DROP_UNUSABLE; struct ieee80211_key *sta_ptk = NULL; + struct ieee80211_key *ptk_idx = NULL; int mmie_keyidx = -1; __le16 fc; const struct ieee80211_cipher_scheme *cs = NULL; @@ -1900,21 +1920,24 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) if (rx->sta) { int keyid = rx->sta->ptk_idx; + sta_ptk = rcu_dereference(rx->sta->ptk[keyid]); - if (ieee80211_has_protected(fc) && rx->sta->cipher_scheme) { + if (ieee80211_has_protected(fc)) { cs = rx->sta->cipher_scheme; - keyid = ieee80211_get_cs_keyid(cs, rx->skb); + keyid = ieee80211_get_keyid(rx->skb, cs); + if (unlikely(keyid < 0)) return RX_DROP_UNUSABLE; + + ptk_idx = rcu_dereference(rx->sta->ptk[keyid]); } - sta_ptk = rcu_dereference(rx->sta->ptk[keyid]); } if (!ieee80211_has_protected(fc)) mmie_keyidx = ieee80211_get_mmie_keyidx(rx->skb); if (!is_multicast_ether_addr(hdr->addr1) && sta_ptk) { - rx->key = sta_ptk; + rx->key = ptk_idx ? ptk_idx : sta_ptk; if ((status->flag & RX_FLAG_DECRYPTED) && (status->flag & RX_FLAG_IV_STRIPPED)) return RX_CONTINUE; @@ -1974,8 +1997,6 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) } return RX_CONTINUE; } else { - u8 keyid; - /* * The device doesn't give us the IV so we won't be * able to look up the key. That's ok though, we @@ -1989,23 +2010,10 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) (status->flag & RX_FLAG_IV_STRIPPED)) return RX_CONTINUE; - hdrlen = ieee80211_hdrlen(fc); - - if (cs) { - keyidx = ieee80211_get_cs_keyid(cs, rx->skb); + keyidx = ieee80211_get_keyid(rx->skb, cs); - if (unlikely(keyidx < 0)) - return RX_DROP_UNUSABLE; - } else { - if (rx->skb->len < 8 + hdrlen) - return RX_DROP_UNUSABLE; /* TODO: count this? */ - /* - * no need to call ieee80211_wep_get_keyidx, - * it verifies a bunch of things we've done already - */ - skb_copy_bits(rx->skb, hdrlen + 3, &keyid, 1); - keyidx = keyid >> 6; - } + if (unlikely(keyidx < 0)) + return RX_DROP_UNUSABLE; /* check per-station GTK first, if multicast packet */ if (is_multicast_ether_addr(hdr->addr1) && rx->sta) @@ -4050,12 +4058,8 @@ void ieee80211_check_fast_rx(struct sta_info *sta) case WLAN_CIPHER_SUITE_GCMP_256: break; default: - /* we also don't want to deal with WEP or cipher scheme - * since those require looking up the key idx in the - * frame, rather than assuming the PTK is used - * (we need to revisit this once we implement the real - * PTK index, which is now valid in the spec, but we - * haven't implemented that part yet) + /* We also don't want to deal with + * WEP or cipher scheme. */ goto clear_rcu; } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 11f058987a54..a4932ee3595c 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -347,6 +347,15 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->sta.max_rx_aggregation_subframes = local->hw.max_rx_aggregation_subframes; + /* Extended Key ID needs to install keys for keyid 0 and 1 Rx-only. + * The Tx path starts to use a key as soon as the key slot ptk_idx + * references to is not NULL. To not use the initial Rx-only key + * prematurely for Tx initialize ptk_idx to an impossible PTK keyid + * which always will refer to a NULL key. + */ + BUILD_BUG_ON(ARRAY_SIZE(sta->ptk) <= INVALID_PTK_KEYIDX); + sta->ptk_idx = INVALID_PTK_KEYIDX; + sta->local = local; sta->sdata = sdata; sta->rx_stats.last_rx = jiffies; @@ -2373,6 +2382,12 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, sinfo->filled |= BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL_AVG); } + + if (ieee80211_vif_is_mesh(&sdata->vif)) { + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_AIRTIME_LINK_METRIC); + sinfo->airtime_link_metric = + airtime_link_metric_get(local, sta); + } } u32 sta_get_expected_throughput(struct sta_info *sta) diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index d30690d79a58..24c37f91ca46 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -1056,7 +1056,7 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev, /* disable bottom halves when entering the Tx path */ local_bh_disable(); - __ieee80211_subif_start_xmit(skb, dev, flags); + __ieee80211_subif_start_xmit(skb, dev, flags, 0); local_bh_enable(); return ret; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 8ba70d26b82e..3bb4459b52c7 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -828,6 +828,36 @@ TRACE_EVENT(drv_sta_state, ) ); +TRACE_EVENT(drv_sta_set_txpwr, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta), + + TP_ARGS(local, sdata, sta), + + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + STA_ENTRY + __field(s16, txpwr) + __field(u8, type) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + STA_ASSIGN; + __entry->txpwr = sta->txpwr.power; + __entry->type = sta->txpwr.type; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " txpwr: %d type %d", + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, + __entry->txpwr, __entry->type + ) +); + TRACE_EVENT(drv_sta_rc_update, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 2e816dd67be7..dd220b977025 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1399,11 +1399,15 @@ static void ieee80211_txq_enqueue(struct ieee80211_local *local, { struct fq *fq = &local->fq; struct fq_tin *tin = &txqi->tin; + u32 flow_idx = fq_flow_idx(fq, skb); ieee80211_set_skb_enqueue_time(skb); - fq_tin_enqueue(fq, tin, skb, + + spin_lock_bh(&fq->lock); + fq_tin_enqueue(fq, tin, flow_idx, skb, fq_skb_free_func, fq_flow_get_default_func); + spin_unlock_bh(&fq->lock); } static bool fq_vlan_filter_func(struct fq *fq, struct fq_tin *tin, @@ -1590,7 +1594,6 @@ static bool ieee80211_queue_skb(struct ieee80211_local *local, struct sta_info *sta, struct sk_buff *skb) { - struct fq *fq = &local->fq; struct ieee80211_vif *vif; struct txq_info *txqi; @@ -1608,9 +1611,7 @@ static bool ieee80211_queue_skb(struct ieee80211_local *local, if (!txqi) return false; - spin_lock_bh(&fq->lock); ieee80211_txq_enqueue(local, txqi, skb); - spin_unlock_bh(&fq->lock); schedule_and_wake_txq(local, txqi); @@ -2431,6 +2432,7 @@ static int ieee80211_lookup_ra_sta(struct ieee80211_sub_if_data *sdata, * @sdata: virtual interface to build the header for * @skb: the skb to build the header in * @info_flags: skb flags to set + * @ctrl_flags: info control flags to set * * This function takes the skb with 802.3 header and reformats the header to * the appropriate IEEE 802.11 header based on which interface the packet is @@ -2446,7 +2448,7 @@ static int ieee80211_lookup_ra_sta(struct ieee80211_sub_if_data *sdata, */ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, u32 info_flags, - struct sta_info *sta) + struct sta_info *sta, u32 ctrl_flags) { struct ieee80211_local *local = sdata->local; struct ieee80211_tx_info *info; @@ -2470,6 +2472,11 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, if (IS_ERR(sta)) sta = NULL; +#ifdef CONFIG_MAC80211_DEBUGFS + if (local->force_tx_status) + info_flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; +#endif + /* convert Ethernet header to proper 802.11 header (based on * operation mode) */ ethertype = (skb->data[12] << 8) | skb->data[13]; @@ -2600,6 +2607,13 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, goto free; } band = chanctx_conf->def.chan->band; + + /* For injected frames, fill RA right away as nexthop lookup + * will be skipped. + */ + if ((ctrl_flags & IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP) && + is_zero_ether_addr(hdr.addr1)) + memcpy(hdr.addr1, skb->data, ETH_ALEN); break; #endif case NL80211_IFTYPE_STATION: @@ -2818,6 +2832,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, info->flags = info_flags; info->ack_frame_id = info_id; info->band = band; + info->control.flags = ctrl_flags; return skb; free: @@ -3000,23 +3015,15 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) switch (build.key->conf.cipher) { case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: - /* add fixed key ID */ - if (gen_iv) { - (build.hdr + build.hdr_len)[3] = - 0x20 | (build.key->conf.keyidx << 6); + if (gen_iv) build.pn_offs = build.hdr_len; - } if (gen_iv || iv_spc) build.hdr_len += IEEE80211_CCMP_HDR_LEN; break; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: - /* add fixed key ID */ - if (gen_iv) { - (build.hdr + build.hdr_len)[3] = - 0x20 | (build.key->conf.keyidx << 6); + if (gen_iv) build.pn_offs = build.hdr_len; - } if (gen_iv || iv_spc) build.hdr_len += IEEE80211_GCMP_HDR_LEN; break; @@ -3222,6 +3229,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, int max_frags = local->hw.max_tx_fragments; int max_amsdu_len = sta->sta.max_amsdu_len; int orig_truesize; + u32 flow_idx; __be16 len; void *data; bool ret = false; @@ -3250,6 +3258,8 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, max_amsdu_len = min_t(int, max_amsdu_len, sta->sta.max_tid_amsdu_len[tid]); + flow_idx = fq_flow_idx(fq, skb); + spin_lock_bh(&fq->lock); /* TODO: Ideally aggregation should be done on dequeue to remain @@ -3257,7 +3267,8 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, */ tin = &txqi->tin; - flow = fq_flow_classify(fq, tin, skb, fq_flow_get_default_func); + flow = fq_flow_classify(fq, tin, flow_idx, skb, + fq_flow_get_default_func); head = skb_peek_tail(&flow->queue); if (!head || skb_is_gso(head)) goto out; @@ -3386,6 +3397,7 @@ static void ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata, pn = atomic64_inc_return(&key->conf.tx_pn); crypto_hdr[0] = pn; crypto_hdr[1] = pn >> 8; + crypto_hdr[3] = 0x20 | (key->conf.keyidx << 6); crypto_hdr[4] = pn >> 16; crypto_hdr[5] = pn >> 24; crypto_hdr[6] = pn >> 32; @@ -3478,6 +3490,11 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, (tid_tx ? IEEE80211_TX_CTL_AMPDU : 0); info->control.flags = IEEE80211_TX_CTRL_FAST_XMIT; +#ifdef CONFIG_MAC80211_DEBUGFS + if (local->force_tx_status) + info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; +#endif + if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; *ieee80211_get_qos_ctl(hdr) = tid; @@ -3533,6 +3550,7 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, ieee80211_tx_result r; struct ieee80211_vif *vif = txq->vif; +begin: spin_lock_bh(&fq->lock); if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags) || @@ -3549,11 +3567,12 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, if (skb) goto out; -begin: skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func); if (!skb) goto out; + spin_unlock_bh(&fq->lock); + hdr = (struct ieee80211_hdr *)skb->data; info = IEEE80211_SKB_CB(skb); @@ -3598,8 +3617,11 @@ begin: skb = __skb_dequeue(&tx.skbs); - if (!skb_queue_empty(&tx.skbs)) + if (!skb_queue_empty(&tx.skbs)) { + spin_lock_bh(&fq->lock); skb_queue_splice_tail(&tx.skbs, &txqi->frags); + spin_unlock_bh(&fq->lock); + } } if (skb_has_frag_list(skb) && @@ -3638,6 +3660,7 @@ begin: } IEEE80211_SKB_CB(skb)->control.vif = vif; + return skb; out: spin_unlock_bh(&fq->lock); @@ -3783,9 +3806,11 @@ EXPORT_SYMBOL(ieee80211_txq_schedule_start); void __ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev, - u32 info_flags) + u32 info_flags, + u32 ctrl_flags) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; struct sta_info *sta; struct sk_buff *next; @@ -3799,7 +3824,15 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, if (ieee80211_lookup_ra_sta(sdata, skb, &sta)) goto out_free; - if (!IS_ERR_OR_NULL(sta)) { + if (IS_ERR(sta)) + sta = NULL; + + if (local->ops->wake_tx_queue) { + u16 queue = __ieee80211_select_queue(sdata, sta, skb); + skb_set_queue_mapping(skb, queue); + } + + if (sta) { struct ieee80211_fast_tx *fast_tx; sk_pacing_shift_update(skb->sk, sdata->local->hw.tx_sk_pacing_shift); @@ -3848,7 +3881,8 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, skb->prev = NULL; skb->next = NULL; - skb = ieee80211_build_hdr(sdata, skb, info_flags, sta); + skb = ieee80211_build_hdr(sdata, skb, info_flags, + sta, ctrl_flags); if (IS_ERR(skb)) goto out; @@ -3988,9 +4022,9 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, __skb_queue_head_init(&queue); ieee80211_convert_to_unicast(skb, dev, &queue); while ((skb = __skb_dequeue(&queue))) - __ieee80211_subif_start_xmit(skb, dev, 0); + __ieee80211_subif_start_xmit(skb, dev, 0, 0); } else { - __ieee80211_subif_start_xmit(skb, dev, 0); + __ieee80211_subif_start_xmit(skb, dev, 0, 0); } return NETDEV_TX_OK; @@ -4015,7 +4049,7 @@ ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata, goto out; } - skb = ieee80211_build_hdr(sdata, skb, info_flags, sta); + skb = ieee80211_build_hdr(sdata, skb, info_flags, sta, 0); if (IS_ERR(skb)) goto out; @@ -5052,7 +5086,36 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, skb_reset_mac_header(skb); local_bh_disable(); - __ieee80211_subif_start_xmit(skb, skb->dev, flags); + __ieee80211_subif_start_xmit(skb, skb->dev, flags, 0); + local_bh_enable(); + + return 0; +} + +int ieee80211_probe_mesh_link(struct wiphy *wiphy, struct net_device *dev, + const u8 *buf, size_t len) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + len + + 30 + /* header size */ + 18); /* 11s header size */ + if (!skb) + return -ENOMEM; + + skb_reserve(skb, local->hw.extra_tx_headroom); + skb_put_data(skb, buf, len); + + skb->dev = dev; + skb->protocol = htons(ETH_P_802_3); + skb_reset_network_header(skb); + skb_reset_mac_header(skb); + + local_bh_disable(); + __ieee80211_subif_start_xmit(skb, skb->dev, 0, + IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP); local_bh_enable(); return 0; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 4c1655972565..cba4633cd6cf 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -894,10 +894,10 @@ EXPORT_SYMBOL(ieee80211_queue_delayed_work); static u32 _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, struct ieee802_11_elems *elems, - u64 filter, u32 crc, u8 *transmitter_bssid, - u8 *bss_bssid) + u64 filter, u32 crc, + const struct element *check_inherit) { - const struct element *elem, *sub; + const struct element *elem; bool calc_crc = filter != 0; DECLARE_BITMAP(seen_elems, 256); const u8 *ie; @@ -910,6 +910,11 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, u8 elen = elem->datalen; const u8 *pos = elem->data; + if (check_inherit && + !cfg80211_is_element_inherited(elem, + check_inherit)) + continue; + switch (id) { case WLAN_EID_SSID: case WLAN_EID_SUPP_RATES: @@ -1208,57 +1213,6 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, if (elen >= sizeof(*elems->max_idle_period_ie)) elems->max_idle_period_ie = (void *)pos; break; - case WLAN_EID_MULTIPLE_BSSID: - if (!bss_bssid || !transmitter_bssid || elen < 4) - break; - - elems->max_bssid_indicator = pos[0]; - - for_each_element(sub, pos + 1, elen - 1) { - u8 sub_len = sub->datalen; - u8 new_bssid[ETH_ALEN]; - const u8 *index; - - /* - * we only expect the "non-transmitted BSSID - * profile" subelement (subelement id 0) - */ - if (sub->id != 0 || sub->datalen < 4) { - /* not a valid BSS profile */ - continue; - } - - if (sub->data[0] != WLAN_EID_NON_TX_BSSID_CAP || - sub->data[1] != 2) { - /* The first element of the - * Nontransmitted BSSID Profile is not - * the Nontransmitted BSSID Capability - * element. - */ - continue; - } - - /* found a Nontransmitted BSSID Profile */ - index = cfg80211_find_ie(WLAN_EID_MULTI_BSSID_IDX, - sub->data, sub_len); - if (!index || index[1] < 1 || index[2] == 0) { - /* Invalid MBSSID Index element */ - continue; - } - - cfg80211_gen_new_bssid(transmitter_bssid, - pos[0], - index[2], - new_bssid); - if (ether_addr_equal(new_bssid, bss_bssid)) { - elems->nontransmitted_bssid_profile = - (void *)sub; - elems->bssid_index_len = index[1]; - elems->bssid_index = (void *)&index[2]; - break; - } - } - break; case WLAN_EID_EXTENSION: if (pos[0] == WLAN_EID_EXT_HE_MU_EDCA && elen >= (sizeof(*elems->mu_edca_param_set) + 1)) { @@ -1300,26 +1254,108 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, return crc; } +static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len, + struct ieee802_11_elems *elems, + u8 *transmitter_bssid, + u8 *bss_bssid, + u8 *nontransmitted_profile) +{ + const struct element *elem, *sub; + size_t profile_len = 0; + bool found = false; + + if (!bss_bssid || !transmitter_bssid) + return profile_len; + + for_each_element_id(elem, WLAN_EID_MULTIPLE_BSSID, start, len) { + if (elem->datalen < 2) + continue; + + for_each_element(sub, elem->data + 1, elem->datalen - 1) { + u8 new_bssid[ETH_ALEN]; + const u8 *index; + + if (sub->id != 0 || sub->datalen < 4) { + /* not a valid BSS profile */ + continue; + } + + if (sub->data[0] != WLAN_EID_NON_TX_BSSID_CAP || + sub->data[1] != 2) { + /* The first element of the + * Nontransmitted BSSID Profile is not + * the Nontransmitted BSSID Capability + * element. + */ + continue; + } + + memset(nontransmitted_profile, 0, len); + profile_len = cfg80211_merge_profile(start, len, + elem, + sub, + nontransmitted_profile, + len); + + /* found a Nontransmitted BSSID Profile */ + index = cfg80211_find_ie(WLAN_EID_MULTI_BSSID_IDX, + nontransmitted_profile, + profile_len); + if (!index || index[1] < 1 || index[2] == 0) { + /* Invalid MBSSID Index element */ + continue; + } + + cfg80211_gen_new_bssid(transmitter_bssid, + elem->data[0], + index[2], + new_bssid); + if (ether_addr_equal(new_bssid, bss_bssid)) { + found = true; + elems->bssid_index_len = index[1]; + elems->bssid_index = (void *)&index[2]; + break; + } + } + } + + return found ? profile_len : 0; +} + u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, struct ieee802_11_elems *elems, u64 filter, u32 crc, u8 *transmitter_bssid, u8 *bss_bssid) { + const struct element *non_inherit = NULL; + u8 *nontransmitted_profile; + int nontransmitted_profile_len = 0; + memset(elems, 0, sizeof(*elems)); elems->ie_start = start; elems->total_len = len; + nontransmitted_profile = kmalloc(len, GFP_ATOMIC); + if (nontransmitted_profile) { + nontransmitted_profile_len = + ieee802_11_find_bssid_profile(start, len, elems, + transmitter_bssid, + bss_bssid, + nontransmitted_profile); + non_inherit = + cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, + nontransmitted_profile, + nontransmitted_profile_len); + } + crc = _ieee802_11_parse_elems_crc(start, len, action, elems, filter, - crc, transmitter_bssid, bss_bssid); + crc, non_inherit); /* Override with nontransmitted profile, if found */ - if (transmitter_bssid && elems->nontransmitted_bssid_profile) { - const u8 *profile = elems->nontransmitted_bssid_profile; - - _ieee802_11_parse_elems_crc(&profile[2], profile[1], - action, elems, 0, 0, - transmitter_bssid, bss_bssid); - } + if (nontransmitted_profile_len) + _ieee802_11_parse_elems_crc(nontransmitted_profile, + nontransmitted_profile_len, + action, elems, 0, 0, NULL); if (elems->tim && !elems->parse_error) { const struct ieee80211_tim_ie *tim_ie = elems->tim; @@ -1339,6 +1375,8 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, offsetofend(struct ieee80211_bssid_index, dtim_count)) elems->dtim_count = elems->bssid_index->dtim_count; + kfree(nontransmitted_profile); + return crc; } diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 5f7c96368b11..6a3187883c4b 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -141,6 +141,42 @@ u16 ieee80211_select_queue_80211(struct ieee80211_sub_if_data *sdata, return ieee80211_downgrade_queue(sdata, NULL, skb); } +u16 __ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, struct sk_buff *skb) +{ + struct mac80211_qos_map *qos_map; + bool qos; + + /* all mesh/ocb stations are required to support WME */ + if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT || + sdata->vif.type == NL80211_IFTYPE_OCB) + qos = true; + else if (sta) + qos = sta->sta.wme; + else + qos = false; + + if (!qos) { + skb->priority = 0; /* required for correct WPA/11i MIC */ + return IEEE80211_AC_BE; + } + + if (skb->protocol == sdata->control_port_protocol) { + skb->priority = 7; + goto downgrade; + } + + /* use the data classifier to determine what 802.1d tag the + * data frame has */ + qos_map = rcu_dereference(sdata->qos_map); + skb->priority = cfg80211_classify8021d(skb, qos_map ? + &qos_map->qos_map : NULL); + + downgrade: + return ieee80211_downgrade_queue(sdata, sta, skb); +} + + /* Indicate which queue to use. */ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) @@ -148,10 +184,12 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sta_info *sta = NULL; const u8 *ra = NULL; - bool qos = false; - struct mac80211_qos_map *qos_map; u16 ret; + /* when using iTXQ, we can do this later */ + if (local->ops->wake_tx_queue) + return 0; + if (local->hw.queues < IEEE80211_NUM_ACS || skb->len < 6) { skb->priority = 0; /* required for correct WPA/11i MIC */ return 0; @@ -161,10 +199,8 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, switch (sdata->vif.type) { case NL80211_IFTYPE_AP_VLAN: sta = rcu_dereference(sdata->u.vlan.sta); - if (sta) { - qos = sta->sta.wme; + if (sta) break; - } /* fall through */ case NL80211_IFTYPE_AP: ra = skb->data; @@ -172,56 +208,26 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_WDS: ra = sdata->u.wds.remote_addr; break; -#ifdef CONFIG_MAC80211_MESH - case NL80211_IFTYPE_MESH_POINT: - qos = true; - break; -#endif case NL80211_IFTYPE_STATION: /* might be a TDLS station */ sta = sta_info_get(sdata, skb->data); if (sta) - qos = sta->sta.wme; + break; ra = sdata->u.mgd.bssid; break; case NL80211_IFTYPE_ADHOC: ra = skb->data; break; - case NL80211_IFTYPE_OCB: - /* all stations are required to support WME */ - qos = true; - break; default: break; } - if (!sta && ra && !is_multicast_ether_addr(ra)) { + if (!sta && ra && !is_multicast_ether_addr(ra)) sta = sta_info_get(sdata, ra); - if (sta) - qos = sta->sta.wme; - } - if (!qos) { - skb->priority = 0; /* required for correct WPA/11i MIC */ - ret = IEEE80211_AC_BE; - goto out; - } + ret = __ieee80211_select_queue(sdata, sta, skb); - if (skb->protocol == sdata->control_port_protocol) { - skb->priority = 7; - goto downgrade; - } - - /* use the data classifier to determine what 802.1d tag the - * data frame has */ - qos_map = rcu_dereference(sdata->qos_map); - skb->priority = cfg80211_classify8021d(skb, qos_map ? - &qos_map->qos_map : NULL); - - downgrade: - ret = ieee80211_downgrade_queue(sdata, sta, skb); - out: rcu_read_unlock(); return ret; } diff --git a/net/mac80211/wme.h b/net/mac80211/wme.h index 80151edc5195..b1b1439cb91b 100644 --- a/net/mac80211/wme.h +++ b/net/mac80211/wme.h @@ -16,6 +16,8 @@ u16 ieee80211_select_queue_80211(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, struct ieee80211_hdr *hdr); +u16 __ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, struct sk_buff *skb); u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); void ieee80211_set_qos_hdr(struct ieee80211_sub_if_data *sdata, diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index f7c544592ec8..baa098291fb0 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -22,8 +22,8 @@ #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> #endif -#include <net/addrconf.h> -#include <net/nexthop.h> +#include <net/ipv6_stubs.h> +#include <net/rtnh.h> #include "internal.h" /* max memory we will use for mpls_route */ @@ -1223,11 +1223,13 @@ static int mpls_netconf_valid_get_req(struct sk_buff *skb, } if (!netlink_strict_get_check(skb)) - return nlmsg_parse(nlh, sizeof(struct netconfmsg), tb, - NETCONFA_MAX, devconf_mpls_policy, extack); + return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg), + tb, NETCONFA_MAX, + devconf_mpls_policy, extack); - err = nlmsg_parse_strict(nlh, sizeof(struct netconfmsg), tb, - NETCONFA_MAX, devconf_mpls_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg), + tb, NETCONFA_MAX, + devconf_mpls_policy, extack); if (err) return err; @@ -1788,8 +1790,8 @@ static int rtm_to_route_config(struct sk_buff *skb, int index; int err; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_mpls_policy, extack); if (err < 0) goto errout; @@ -2017,7 +2019,7 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, u8 linkdown = 0; u8 dead = 0; - mp = nla_nest_start(skb, RTA_MULTIPATH); + mp = nla_nest_start_noflag(skb, RTA_MULTIPATH); if (!mp) goto nla_put_failure; @@ -2106,8 +2108,8 @@ static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, cb->answer_flags = NLM_F_DUMP_FILTERED; } - err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_mpls_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_mpls_policy, extack); if (err < 0) return err; @@ -2290,8 +2292,8 @@ static int mpls_valid_getroute_req(struct sk_buff *skb, } if (!netlink_strict_get_check(skb)) - return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_mpls_policy, extack); + return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_mpls_policy, extack); rtm = nlmsg_data(nlh); if ((rtm->rtm_dst_len && rtm->rtm_dst_len != 20) || @@ -2306,8 +2308,8 @@ static int mpls_valid_getroute_req(struct sk_buff *skb, return -EINVAL; } - err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_mpls_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_mpls_policy, extack); if (err) return err; diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index f3a8557494d6..951b52d5835b 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -137,10 +137,14 @@ static int mpls_xmit(struct sk_buff *skb) mpls_stats_inc_outucastpkts(out_dev, skb); - if (rt) - err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt->rt_gateway, - skb); - else if (rt6) { + if (rt) { + if (rt->rt_gw_family == AF_INET) + err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt->rt_gw4, + skb); + else if (rt->rt_gw_family == AF_INET6) + err = neigh_xmit(NEIGH_ND_TABLE, out_dev, &rt->rt_gw6, + skb); + } else if (rt6) { if (ipv6_addr_v4mapped(&rt6->rt6i_gateway)) { /* 6PE (RFC 4798) */ err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt6->rt6i_gateway.s6_addr32[3], @@ -174,8 +178,8 @@ static int mpls_build_state(struct nlattr *nla, u8 n_labels; int ret; - ret = nla_parse_nested(tb, MPLS_IPTUNNEL_MAX, nla, - mpls_iptunnel_policy, extack); + ret = nla_parse_nested_deprecated(tb, MPLS_IPTUNNEL_MAX, nla, + mpls_iptunnel_policy, extack); if (ret < 0) return ret; diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index bad17bba8ba7..7fc4feddafa3 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -79,7 +79,7 @@ static int ncsi_write_channel_info(struct sk_buff *skb, nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.alpha2); nla_put_string(skb, NCSI_CHANNEL_ATTR_VERSION_STR, nc->version.fw_name); - vid_nest = nla_nest_start(skb, NCSI_CHANNEL_ATTR_VLAN_LIST); + vid_nest = nla_nest_start_noflag(skb, NCSI_CHANNEL_ATTR_VLAN_LIST); if (!vid_nest) return -ENOMEM; ncf = &nc->vlan_filter; @@ -113,19 +113,19 @@ static int ncsi_write_package_info(struct sk_buff *skb, NCSI_FOR_EACH_PACKAGE(ndp, np) { if (np->id != id) continue; - pnest = nla_nest_start(skb, NCSI_PKG_ATTR); + pnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR); if (!pnest) return -ENOMEM; nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id); if ((0x1 << np->id) == ndp->package_whitelist) nla_put_flag(skb, NCSI_PKG_ATTR_FORCED); - cnest = nla_nest_start(skb, NCSI_PKG_ATTR_CHANNEL_LIST); + cnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR_CHANNEL_LIST); if (!cnest) { nla_nest_cancel(skb, pnest); return -ENOMEM; } NCSI_FOR_EACH_CHANNEL(np, nc) { - nest = nla_nest_start(skb, NCSI_CHANNEL_ATTR); + nest = nla_nest_start_noflag(skb, NCSI_CHANNEL_ATTR); if (!nest) { nla_nest_cancel(skb, cnest); nla_nest_cancel(skb, pnest); @@ -187,7 +187,7 @@ static int ncsi_pkg_info_nl(struct sk_buff *msg, struct genl_info *info) package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]); - attr = nla_nest_start(skb, NCSI_ATTR_PACKAGE_LIST); + attr = nla_nest_start_noflag(skb, NCSI_ATTR_PACKAGE_LIST); if (!attr) { kfree_skb(skb); return -EMSGSIZE; @@ -220,8 +220,8 @@ static int ncsi_pkg_info_all_nl(struct sk_buff *skb, void *hdr; int rc; - rc = genlmsg_parse(cb->nlh, &ncsi_genl_family, attrs, NCSI_ATTR_MAX, - ncsi_genl_policy, NULL); + rc = genlmsg_parse_deprecated(cb->nlh, &ncsi_genl_family, attrs, NCSI_ATTR_MAX, + ncsi_genl_policy, NULL); if (rc) return rc; @@ -250,7 +250,7 @@ static int ncsi_pkg_info_all_nl(struct sk_buff *skb, goto err; } - attr = nla_nest_start(skb, NCSI_ATTR_PACKAGE_LIST); + attr = nla_nest_start_noflag(skb, NCSI_ATTR_PACKAGE_LIST); if (!attr) { rc = -EMSGSIZE; goto err; @@ -723,38 +723,38 @@ static int ncsi_set_channel_mask_nl(struct sk_buff *msg, static const struct genl_ops ncsi_ops[] = { { .cmd = NCSI_CMD_PKG_INFO, - .policy = ncsi_genl_policy, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ncsi_pkg_info_nl, .dumpit = ncsi_pkg_info_all_nl, .flags = 0, }, { .cmd = NCSI_CMD_SET_INTERFACE, - .policy = ncsi_genl_policy, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ncsi_set_interface_nl, .flags = GENL_ADMIN_PERM, }, { .cmd = NCSI_CMD_CLEAR_INTERFACE, - .policy = ncsi_genl_policy, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ncsi_clear_interface_nl, .flags = GENL_ADMIN_PERM, }, { .cmd = NCSI_CMD_SEND_CMD, - .policy = ncsi_genl_policy, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ncsi_send_cmd_nl, .flags = GENL_ADMIN_PERM, }, { .cmd = NCSI_CMD_SET_PACKAGE_MASK, - .policy = ncsi_genl_policy, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ncsi_set_package_mask_nl, .flags = GENL_ADMIN_PERM, }, { .cmd = NCSI_CMD_SET_CHANNEL_MASK, - .policy = ncsi_genl_policy, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ncsi_set_channel_mask_nl, .flags = GENL_ADMIN_PERM, }, @@ -764,6 +764,7 @@ static struct genl_family ncsi_genl_family __ro_after_init = { .name = "NCSI", .version = 0, .maxattr = NCSI_ATTR_MAX, + .policy = ncsi_genl_policy, .module = THIS_MODULE, .ops = ncsi_ops, .n_ops = ARRAY_SIZE(ncsi_ops), diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 6548271209a0..02b281d3c167 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -404,11 +404,6 @@ config NF_NAT forms of full Network Address Port Translation. This can be controlled by iptables, ip6tables or nft. -config NF_NAT_NEEDED - bool - depends on NF_NAT - default y - config NF_NAT_AMANDA tristate depends on NF_CONNTRACK && NF_NAT @@ -1002,6 +997,20 @@ config NETFILTER_XT_TARGET_REDIRECT To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_TARGET_MASQUERADE + tristate "MASQUERADE target support" + depends on NF_NAT + default m if NETFILTER_ADVANCED=n + select NF_NAT_MASQUERADE + help + Masquerading is a special case of NAT: all outgoing connections are + changed to seem to come from a particular interface's address, and + if the interface goes down, those connections are lost. This is + only useful for dialup accounts with dynamic IP address (ie. your IP + address will be different on next dialup). + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_TEE tristate '"TEE" - packet cloning to alternate destination' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 4894a85cdd0b..72cca6b48960 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -77,7 +77,8 @@ obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \ nf_tables_trace.o nft_immediate.o nft_cmp.o nft_range.o \ nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \ - nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o + nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o \ + nft_chain_route.o nf_tables_set-objs := nf_tables_set_core.o \ nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o @@ -147,6 +148,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o obj-$(CONFIG_NETFILTER_XT_TARGET_RATEEST) += xt_RATEEST.o obj-$(CONFIG_NETFILTER_XT_TARGET_REDIRECT) += xt_REDIRECT.o +obj-$(CONFIG_NETFILTER_XT_TARGET_MASQUERADE) += xt_MASQUERADE.o obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 93aaec3a54ec..71f06900473e 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -23,6 +23,7 @@ #include <linux/mm.h> #include <linux/rcupdate.h> #include <net/net_namespace.h> +#include <net/netfilter/nf_queue.h> #include <net/sock.h> #include "nf_internals.h" diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 257ca393e6f2..38ef2ea838cb 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -99,7 +99,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) struct nlattr *nested; size_t memsize = mtype_memsize(map, set->dsize) + set->ext_size; - nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + nested = nla_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; if (mtype_do_head(skb, map) || @@ -109,7 +109,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) goto nla_put_failure; if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; - ipset_nest_end(skb, nested); + nla_nest_end(skb, nested); return 0; nla_put_failure: @@ -213,7 +213,7 @@ mtype_list(const struct ip_set *set, u32 id, first = cb->args[IPSET_CB_ARG0]; int ret = 0; - adt = ipset_nest_start(skb, IPSET_ATTR_ADT); + adt = nla_nest_start(skb, IPSET_ATTR_ADT); if (!adt) return -EMSGSIZE; /* Extensions may be replaced */ @@ -230,7 +230,7 @@ mtype_list(const struct ip_set *set, #endif ip_set_timeout_expired(ext_timeout(x, set)))) continue; - nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + nested = nla_nest_start(skb, IPSET_ATTR_DATA); if (!nested) { if (id == first) { nla_nest_cancel(skb, adt); @@ -244,9 +244,9 @@ mtype_list(const struct ip_set *set, goto nla_put_failure; if (ip_set_put_extensions(skb, set, x, mtype_is_filled(x))) goto nla_put_failure; - ipset_nest_end(skb, nested); + nla_nest_end(skb, nested); } - ipset_nest_end(skb, adt); + nla_nest_end(skb, adt); /* Set listing finished */ cb->args[IPSET_CB_ARG0] = 0; @@ -259,7 +259,7 @@ nla_put_failure: cb->args[IPSET_CB_ARG0] = 0; ret = -EMSGSIZE; } - ipset_nest_end(skb, adt); + nla_nest_end(skb, adt); out: rcu_read_unlock(); return ret; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 45a257695bef..3f4a4936f63c 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -299,8 +299,7 @@ ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr) if (unlikely(!flag_nested(nla))) return -IPSET_ERR_PROTOCOL; - if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, - ipaddr_policy, NULL)) + if (nla_parse_nested_deprecated(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy, NULL)) return -IPSET_ERR_PROTOCOL; if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4))) return -IPSET_ERR_PROTOCOL; @@ -318,8 +317,7 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) if (unlikely(!flag_nested(nla))) return -IPSET_ERR_PROTOCOL; - if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, - ipaddr_policy, NULL)) + if (nla_parse_nested_deprecated(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy, NULL)) return -IPSET_ERR_PROTOCOL; if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6))) return -IPSET_ERR_PROTOCOL; @@ -939,8 +937,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl, /* Without holding any locks, create private part. */ if (attr[IPSET_ATTR_DATA] && - nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA], - set->type->create_policy, NULL)) { + nla_parse_nested_deprecated(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA], set->type->create_policy, NULL)) { ret = -IPSET_ERR_PROTOCOL; goto put_out; } @@ -1298,8 +1295,9 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst) ip_set_id_t index; /* Second pass, so parser can't fail */ - nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len, - ip_set_setname_policy, NULL); + nla_parse_deprecated(cda, IPSET_ATTR_CMD_MAX, attr, + nlh->nlmsg_len - min_len, ip_set_setname_policy, + NULL); cb->args[IPSET_CB_PROTO] = nla_get_u8(cda[IPSET_ATTR_PROTOCOL]); if (cda[IPSET_ATTR_SETNAME]) { @@ -1546,8 +1544,9 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, memcpy(&errmsg->msg, nlh, nlh->nlmsg_len); cmdattr = (void *)&errmsg->msg + min_len; - nla_parse(cda, IPSET_ATTR_CMD_MAX, cmdattr, - nlh->nlmsg_len - min_len, ip_set_adt_policy, NULL); + nla_parse_deprecated(cda, IPSET_ATTR_CMD_MAX, cmdattr, + nlh->nlmsg_len - min_len, + ip_set_adt_policy, NULL); errline = nla_data(cda[IPSET_ATTR_LINENO]); @@ -1592,9 +1591,7 @@ static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb, use_lineno = !!attr[IPSET_ATTR_LINENO]; if (attr[IPSET_ATTR_DATA]) { - if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, - attr[IPSET_ATTR_DATA], - set->type->adt_policy, NULL)) + if (nla_parse_nested_deprecated(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags, use_lineno); @@ -1605,8 +1602,7 @@ static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb, memset(tb, 0, sizeof(tb)); if (nla_type(nla) != IPSET_ATTR_DATA || !flag_nested(nla) || - nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla, - set->type->adt_policy, NULL)) + nla_parse_nested_deprecated(tb, IPSET_ATTR_ADT_MAX, nla, set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags, use_lineno); @@ -1647,9 +1643,7 @@ static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb, use_lineno = !!attr[IPSET_ATTR_LINENO]; if (attr[IPSET_ATTR_DATA]) { - if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, - attr[IPSET_ATTR_DATA], - set->type->adt_policy, NULL)) + if (nla_parse_nested_deprecated(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags, use_lineno); @@ -1660,8 +1654,7 @@ static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb, memset(tb, 0, sizeof(*tb)); if (nla_type(nla) != IPSET_ATTR_DATA || !flag_nested(nla) || - nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla, - set->type->adt_policy, NULL)) + nla_parse_nested_deprecated(tb, IPSET_ATTR_ADT_MAX, nla, set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags, use_lineno); @@ -1692,8 +1685,7 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb, if (!set) return -ENOENT; - if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], - set->type->adt_policy, NULL)) + if (nla_parse_nested_deprecated(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; rcu_read_lock_bh(); diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 2c9609929c71..01d51f775f12 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -1057,7 +1057,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) htable_bits = t->htable_bits; rcu_read_unlock_bh(); - nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + nested = nla_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE, @@ -1079,7 +1079,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) goto nla_put_failure; if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; - ipset_nest_end(skb, nested); + nla_nest_end(skb, nested); return 0; nla_put_failure: @@ -1124,7 +1124,7 @@ mtype_list(const struct ip_set *set, void *incomplete; int i, ret = 0; - atd = ipset_nest_start(skb, IPSET_ATTR_ADT); + atd = nla_nest_start(skb, IPSET_ATTR_ADT); if (!atd) return -EMSGSIZE; @@ -1150,7 +1150,7 @@ mtype_list(const struct ip_set *set, continue; pr_debug("list hash %lu hbucket %p i %u, data %p\n", cb->args[IPSET_CB_ARG0], n, i, e); - nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + nested = nla_nest_start(skb, IPSET_ATTR_DATA); if (!nested) { if (cb->args[IPSET_CB_ARG0] == first) { nla_nest_cancel(skb, atd); @@ -1163,10 +1163,10 @@ mtype_list(const struct ip_set *set, goto nla_put_failure; if (ip_set_put_extensions(skb, set, e, true)) goto nla_put_failure; - ipset_nest_end(skb, nested); + nla_nest_end(skb, nested); } } - ipset_nest_end(skb, atd); + nla_nest_end(skb, atd); /* Set listing finished */ cb->args[IPSET_CB_ARG0] = 0; @@ -1180,7 +1180,7 @@ nla_put_failure: cb->args[IPSET_CB_ARG0] = 0; ret = -EMSGSIZE; } else { - ipset_nest_end(skb, atd); + nla_nest_end(skb, atd); } out: rcu_read_unlock(); diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 8da228da53ae..4f894165cdcd 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -466,7 +466,7 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) struct nlattr *nested; size_t memsize = list_set_memsize(map, set->dsize) + set->ext_size; - nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + nested = nla_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || @@ -476,7 +476,7 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) goto nla_put_failure; if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; - ipset_nest_end(skb, nested); + nla_nest_end(skb, nested); return 0; nla_put_failure: @@ -494,7 +494,7 @@ list_set_list(const struct ip_set *set, struct set_elem *e; int ret = 0; - atd = ipset_nest_start(skb, IPSET_ATTR_ADT); + atd = nla_nest_start(skb, IPSET_ATTR_ADT); if (!atd) return -EMSGSIZE; @@ -506,7 +506,7 @@ list_set_list(const struct ip_set *set, i++; continue; } - nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + nested = nla_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; ip_set_name_byindex(map->net, e->id, name); @@ -514,11 +514,11 @@ list_set_list(const struct ip_set *set, goto nla_put_failure; if (ip_set_put_extensions(skb, set, e, true)) goto nla_put_failure; - ipset_nest_end(skb, nested); + nla_nest_end(skb, nested); i++; } - ipset_nest_end(skb, atd); + nla_nest_end(skb, atd); /* Set listing finished */ cb->args[IPSET_CB_ARG0] = 0; goto out; @@ -531,7 +531,7 @@ nla_put_failure: ret = -EMSGSIZE; } else { cb->args[IPSET_CB_ARG0] = i; - ipset_nest_end(skb, atd); + nla_nest_end(skb, atd); } out: rcu_read_unlock(); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 053cd96b9c76..0e887159425c 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -831,6 +831,10 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK; conn_flags |= IP_VS_CONN_F_INACTIVE; + /* set the tunnel info */ + dest->tun_type = udest->tun_type; + dest->tun_port = udest->tun_port; + /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) { conn_flags |= IP_VS_CONN_F_NOOUTPUT; @@ -987,6 +991,13 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) return -ERANGE; } + if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { + if (udest->tun_port == 0) { + pr_err("%s(): tunnel port is zero\n", __func__); + return -EINVAL; + } + } + ip_vs_addr_copy(udest->af, &daddr, &udest->addr); /* We use function that requires RCU lock */ @@ -1051,6 +1062,13 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) return -ERANGE; } + if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { + if (udest->tun_port == 0) { + pr_err("%s(): tunnel port is zero\n", __func__); + return -EINVAL; + } + } + ip_vs_addr_copy(udest->af, &daddr, &udest->addr); /* We use function that requires RCU lock */ @@ -2333,6 +2351,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, udest->u_threshold = udest_compat->u_threshold; udest->l_threshold = udest_compat->l_threshold; udest->af = AF_INET; + udest->tun_type = IP_VS_CONN_F_TUNNEL_TYPE_IPIP; } static int @@ -2890,12 +2909,14 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 }, [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED }, [IPVS_DEST_ATTR_ADDR_FAMILY] = { .type = NLA_U16 }, + [IPVS_DEST_ATTR_TUN_TYPE] = { .type = NLA_U8 }, + [IPVS_DEST_ATTR_TUN_PORT] = { .type = NLA_U16 }, }; static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, struct ip_vs_kstats *kstats) { - struct nlattr *nl_stats = nla_nest_start(skb, container_type); + struct nlattr *nl_stats = nla_nest_start_noflag(skb, container_type); if (!nl_stats) return -EMSGSIZE; @@ -2925,7 +2946,7 @@ nla_put_failure: static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type, struct ip_vs_kstats *kstats) { - struct nlattr *nl_stats = nla_nest_start(skb, container_type); + struct nlattr *nl_stats = nla_nest_start_noflag(skb, container_type); if (!nl_stats) return -EMSGSIZE; @@ -2971,7 +2992,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, struct ip_vs_kstats kstats; char *sched_name; - nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE); + nl_service = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_SERVICE); if (!nl_service) return -EMSGSIZE; @@ -3095,8 +3116,7 @@ static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs, /* Parse mandatory identifying service fields first */ if (nla == NULL || - nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, - ip_vs_svc_policy, NULL)) + nla_parse_nested_deprecated(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy, NULL)) return -EINVAL; nla_af = attrs[IPVS_SVC_ATTR_AF]; @@ -3182,7 +3202,7 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) struct nlattr *nl_dest; struct ip_vs_kstats kstats; - nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST); + nl_dest = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_DEST); if (!nl_dest) return -EMSGSIZE; @@ -3193,6 +3213,10 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) IP_VS_CONN_F_FWD_MASK)) || nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight)) || + nla_put_u8(skb, IPVS_DEST_ATTR_TUN_TYPE, + dest->tun_type) || + nla_put_be16(skb, IPVS_DEST_ATTR_TUN_PORT, + dest->tun_port) || nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) || nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) || nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS, @@ -3254,8 +3278,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, mutex_lock(&__ip_vs_mutex); /* Try to find the service for which to dump destinations */ - if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs, IPVS_CMD_ATTR_MAX, - ip_vs_cmd_policy, cb->extack)) + if (nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN, attrs, IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy, cb->extack)) goto out_err; @@ -3291,8 +3314,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, /* Parse mandatory identifying destination fields first */ if (nla == NULL || - nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, - ip_vs_dest_policy, NULL)) + nla_parse_nested_deprecated(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy, NULL)) return -EINVAL; nla_addr = attrs[IPVS_DEST_ATTR_ADDR]; @@ -3315,12 +3337,14 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, /* If a full entry was requested, check for the additional fields */ if (full_entry) { struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh, - *nla_l_thresh; + *nla_l_thresh, *nla_tun_type, *nla_tun_port; nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD]; nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT]; nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH]; nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH]; + nla_tun_type = attrs[IPVS_DEST_ATTR_TUN_TYPE]; + nla_tun_port = attrs[IPVS_DEST_ATTR_TUN_PORT]; if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh)) return -EINVAL; @@ -3330,6 +3354,12 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, udest->weight = nla_get_u32(nla_weight); udest->u_threshold = nla_get_u32(nla_u_thresh); udest->l_threshold = nla_get_u32(nla_l_thresh); + + if (nla_tun_type) + udest->tun_type = nla_get_u8(nla_tun_type); + + if (nla_tun_port) + udest->tun_port = nla_get_be16(nla_tun_port); } return 0; @@ -3340,7 +3370,7 @@ static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state, { struct nlattr *nl_daemon; - nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON); + nl_daemon = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_DAEMON); if (!nl_daemon) return -EMSGSIZE; @@ -3528,9 +3558,7 @@ static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1]; if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || - nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX, - info->attrs[IPVS_CMD_ATTR_DAEMON], - ip_vs_daemon_policy, info->extack)) + nla_parse_nested_deprecated(daemon_attrs, IPVS_DAEMON_ATTR_MAX, info->attrs[IPVS_CMD_ATTR_DAEMON], ip_vs_daemon_policy, info->extack)) goto out; if (cmd == IPVS_CMD_NEW_DAEMON) @@ -3774,94 +3802,98 @@ out: static const struct genl_ops ip_vs_genl_ops[] = { { .cmd = IPVS_CMD_NEW_SERVICE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_cmd, }, { .cmd = IPVS_CMD_SET_SERVICE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_cmd, }, { .cmd = IPVS_CMD_DEL_SERVICE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_cmd, }, { .cmd = IPVS_CMD_GET_SERVICE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = ip_vs_genl_get_cmd, .dumpit = ip_vs_genl_dump_services, - .policy = ip_vs_cmd_policy, }, { .cmd = IPVS_CMD_NEW_DEST, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_cmd, }, { .cmd = IPVS_CMD_SET_DEST, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_cmd, }, { .cmd = IPVS_CMD_DEL_DEST, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_cmd, }, { .cmd = IPVS_CMD_GET_DEST, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .dumpit = ip_vs_genl_dump_dests, }, { .cmd = IPVS_CMD_NEW_DAEMON, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_daemon, }, { .cmd = IPVS_CMD_DEL_DAEMON, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_daemon, }, { .cmd = IPVS_CMD_GET_DAEMON, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .dumpit = ip_vs_genl_dump_daemons, }, { .cmd = IPVS_CMD_SET_CONFIG, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_cmd, }, { .cmd = IPVS_CMD_GET_CONFIG, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = ip_vs_genl_get_cmd, }, { .cmd = IPVS_CMD_GET_INFO, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = ip_vs_genl_get_cmd, }, { .cmd = IPVS_CMD_ZERO, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_cmd, }, { .cmd = IPVS_CMD_FLUSH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = ip_vs_genl_set_cmd, }, @@ -3872,6 +3904,7 @@ static struct genl_family ip_vs_genl_family __ro_after_init = { .name = IPVS_GENL_NAME, .version = IPVS_GENL_VERSION, .maxattr = IPVS_CMD_ATTR_MAX, + .policy = ip_vs_cmd_policy, .netnsok = true, /* Make ipvsadm to work on netns */ .module = THIS_MODULE, .ops = ip_vs_genl_ops, diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 175349fcf91f..8d6f94b67772 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -32,6 +32,7 @@ #include <linux/slab.h> #include <linux/tcp.h> /* for tcphdr */ #include <net/ip.h> +#include <net/gue.h> #include <net/tcp.h> /* for csum_tcpudp_magic */ #include <net/udp.h> #include <net/icmp.h> /* for icmp_send */ @@ -382,6 +383,10 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, mtu = dst_mtu(&rt->dst); } else { mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); + if (!dest) + goto err_put; + if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) + mtu -= sizeof(struct udphdr) + sizeof(struct guehdr); if (mtu < 68) { IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); goto err_put; @@ -533,6 +538,10 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, mtu = dst_mtu(&rt->dst); else { mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); + if (!dest) + goto err_put; + if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) + mtu -= sizeof(struct udphdr) + sizeof(struct guehdr); if (mtu < IPV6_MIN_MTU) { IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, IPV6_MIN_MTU); @@ -989,6 +998,41 @@ static inline int __tun_gso_type_mask(int encaps_af, int orig_af) } } +static int +ipvs_gue_encap(struct net *net, struct sk_buff *skb, + struct ip_vs_conn *cp, __u8 *next_protocol) +{ + __be16 dport; + __be16 sport = udp_flow_src_port(net, skb, 0, 0, false); + struct udphdr *udph; /* Our new UDP header */ + struct guehdr *gueh; /* Our new GUE header */ + + skb_push(skb, sizeof(struct guehdr)); + + gueh = (struct guehdr *)skb->data; + + gueh->control = 0; + gueh->version = 0; + gueh->hlen = 0; + gueh->flags = 0; + gueh->proto_ctype = *next_protocol; + + skb_push(skb, sizeof(struct udphdr)); + skb_reset_transport_header(skb); + + udph = udp_hdr(skb); + + dport = cp->dest->tun_port; + udph->dest = dport; + udph->source = sport; + udph->len = htons(skb->len); + udph->check = 0; + + *next_protocol = IPPROTO_UDP; + + return 0; +} + /* * IP Tunneling transmitter * @@ -1025,6 +1069,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ int ret, local; + int tun_type, gso_type; EnterFunction(10); @@ -1046,6 +1091,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, */ max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr); + tun_type = cp->dest->tun_type; + + if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) + max_headroom += sizeof(struct udphdr) + sizeof(struct guehdr); + /* We only care about the df field if sysctl_pmtu_disc(ipvs) is set */ dfp = sysctl_pmtu_disc(ipvs) ? &df : NULL; skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom, @@ -1054,11 +1104,20 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (IS_ERR(skb)) goto tx_error; - if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET, cp->af))) + gso_type = __tun_gso_type_mask(AF_INET, cp->af); + if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) + gso_type |= SKB_GSO_UDP_TUNNEL; + + if (iptunnel_handle_offloads(skb, gso_type)) goto tx_error; skb->transport_header = skb->network_header; + skb_set_inner_ipproto(skb, next_protocol); + + if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) + ipvs_gue_encap(net, skb, cp, &next_protocol); + skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); @@ -1102,6 +1161,8 @@ int ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { + struct netns_ipvs *ipvs = cp->ipvs; + struct net *net = ipvs->net; struct rt6_info *rt; /* Route to the other host */ struct in6_addr saddr; /* Source for tunnel */ struct net_device *tdev; /* Device to other host */ @@ -1112,10 +1173,11 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ipv6hdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ int ret, local; + int tun_type, gso_type; EnterFunction(10); - local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest, + local = __ip_vs_get_out_rt_v6(ipvs, cp->af, skb, cp->dest, &cp->daddr.in6, &saddr, ipvsh, 1, IP_VS_RT_MODE_LOCAL | @@ -1134,17 +1196,31 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, */ max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr); + tun_type = cp->dest->tun_type; + + if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) + max_headroom += sizeof(struct udphdr) + sizeof(struct guehdr); + skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom, &next_protocol, &payload_len, &dsfield, &ttl, NULL); if (IS_ERR(skb)) goto tx_error; - if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET6, cp->af))) + gso_type = __tun_gso_type_mask(AF_INET6, cp->af); + if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) + gso_type |= SKB_GSO_UDP_TUNNEL; + + if (iptunnel_handle_offloads(skb, gso_type)) goto tx_error; skb->transport_header = skb->network_header; + skb_set_inner_ipproto(skb, next_protocol); + + if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) + ipvs_gue_encap(net, skb, cp, &next_protocol); + skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); @@ -1167,7 +1243,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ret = ip_vs_tunnel_xmit_prepare(skb, cp); if (ret == NF_ACCEPT) - ip6_local_out(cp->ipvs->net, skb->sk, skb); + ip6_local_out(net, skb->sk, skb); else if (ret == NF_DROP) kfree_skb(skb); diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index f2681ec5b5f6..dbec6fca0d9e 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -28,11 +28,13 @@ static unsigned int master_timeout __read_mostly = 300; static char *ts_algo = "kmp"; +#define HELPER_NAME "amanda" + MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>"); MODULE_DESCRIPTION("Amanda connection tracking module"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_conntrack_amanda"); -MODULE_ALIAS_NFCT_HELPER("amanda"); +MODULE_ALIAS_NFCT_HELPER(HELPER_NAME); module_param(master_timeout, uint, 0600); MODULE_PARM_DESC(master_timeout, "timeout for the master connection"); @@ -179,13 +181,14 @@ static const struct nf_conntrack_expect_policy amanda_exp_policy = { static struct nf_conntrack_helper amanda_helper[2] __read_mostly = { { - .name = "amanda", + .name = HELPER_NAME, .me = THIS_MODULE, .help = amanda_help, .tuple.src.l3num = AF_INET, .tuple.src.u.udp.port = cpu_to_be16(10080), .tuple.dst.protonum = IPPROTO_UDP, .expect_policy = &amanda_exp_policy, + .nat_mod_name = NF_NAT_HELPER_NAME(HELPER_NAME), }, { .name = "amanda", @@ -195,6 +198,7 @@ static struct nf_conntrack_helper amanda_helper[2] __read_mostly = { .tuple.src.u.udp.port = cpu_to_be16(10080), .tuple.dst.protonum = IPPROTO_UDP, .expect_policy = &amanda_exp_policy, + .nat_mod_name = NF_NAT_HELPER_NAME(HELPER_NAME), }, }; diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 334d6e5b7762..59c18804a10a 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -336,7 +336,7 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class, exp->tuple.dst.u.all = *dst; -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) memset(&exp->saved_addr, 0, sizeof(exp->saved_addr)); memset(&exp->saved_proto, 0, sizeof(exp->saved_proto)); #endif diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index a11c304fb771..32aeac1c4760 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -29,11 +29,13 @@ #include <net/netfilter/nf_conntrack_helper.h> #include <linux/netfilter/nf_conntrack_ftp.h> +#define HELPER_NAME "ftp" + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>"); MODULE_DESCRIPTION("ftp connection tracking helper"); MODULE_ALIAS("ip_conntrack_ftp"); -MODULE_ALIAS_NFCT_HELPER("ftp"); +MODULE_ALIAS_NFCT_HELPER(HELPER_NAME); /* This is slow, but it's simple. --RR */ static char *ftp_buffer; @@ -588,12 +590,14 @@ static int __init nf_conntrack_ftp_init(void) /* FIXME should be configurable whether IPv4 and IPv6 FTP connections are tracked or not - YK */ for (i = 0; i < ports_c; i++) { - nf_ct_helper_init(&ftp[2 * i], AF_INET, IPPROTO_TCP, "ftp", - FTP_PORT, ports[i], ports[i], &ftp_exp_policy, - 0, help, nf_ct_ftp_from_nlattr, THIS_MODULE); - nf_ct_helper_init(&ftp[2 * i + 1], AF_INET6, IPPROTO_TCP, "ftp", - FTP_PORT, ports[i], ports[i], &ftp_exp_policy, - 0, help, nf_ct_ftp_from_nlattr, THIS_MODULE); + nf_ct_helper_init(&ftp[2 * i], AF_INET, IPPROTO_TCP, + HELPER_NAME, FTP_PORT, ports[i], ports[i], + &ftp_exp_policy, 0, help, + nf_ct_ftp_from_nlattr, THIS_MODULE); + nf_ct_helper_init(&ftp[2 * i + 1], AF_INET6, IPPROTO_TCP, + HELPER_NAME, FTP_PORT, ports[i], ports[i], + &ftp_exp_policy, 0, help, + nf_ct_ftp_from_nlattr, THIS_MODULE); } ret = nf_conntrack_helpers_register(ftp, ports_c * 2); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 274baf1dab87..918df7f71c8f 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -42,6 +42,9 @@ module_param_named(nf_conntrack_helper, nf_ct_auto_assign_helper, bool, 0644); MODULE_PARM_DESC(nf_conntrack_helper, "Enable automatic conntrack helper assignment (default 0)"); +static DEFINE_MUTEX(nf_ct_nat_helpers_mutex); +static struct list_head nf_ct_nat_helpers __read_mostly; + /* Stupid hash, but collision free for the default registrations of the * helpers currently in the kernel. */ static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple) @@ -130,6 +133,70 @@ void nf_conntrack_helper_put(struct nf_conntrack_helper *helper) } EXPORT_SYMBOL_GPL(nf_conntrack_helper_put); +static struct nf_conntrack_nat_helper * +nf_conntrack_nat_helper_find(const char *mod_name) +{ + struct nf_conntrack_nat_helper *cur; + bool found = false; + + list_for_each_entry_rcu(cur, &nf_ct_nat_helpers, list) { + if (!strcmp(cur->mod_name, mod_name)) { + found = true; + break; + } + } + return found ? cur : NULL; +} + +int +nf_nat_helper_try_module_get(const char *name, u16 l3num, u8 protonum) +{ + struct nf_conntrack_helper *h; + struct nf_conntrack_nat_helper *nat; + char mod_name[NF_CT_HELPER_NAME_LEN]; + int ret = 0; + + rcu_read_lock(); + h = __nf_conntrack_helper_find(name, l3num, protonum); + if (!h) { + rcu_read_unlock(); + return -ENOENT; + } + + nat = nf_conntrack_nat_helper_find(h->nat_mod_name); + if (!nat) { + snprintf(mod_name, sizeof(mod_name), "%s", h->nat_mod_name); + rcu_read_unlock(); + request_module(mod_name); + + rcu_read_lock(); + nat = nf_conntrack_nat_helper_find(mod_name); + if (!nat) { + rcu_read_unlock(); + return -ENOENT; + } + } + + if (!try_module_get(nat->module)) + ret = -ENOENT; + + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(nf_nat_helper_try_module_get); + +void nf_nat_helper_put(struct nf_conntrack_helper *helper) +{ + struct nf_conntrack_nat_helper *nat; + + nat = nf_conntrack_nat_helper_find(helper->nat_mod_name); + if (WARN_ON_ONCE(!nat)) + return; + + module_put(nat->module); +} +EXPORT_SYMBOL_GPL(nf_nat_helper_put); + struct nf_conn_help * nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) { @@ -430,6 +497,8 @@ void nf_ct_helper_init(struct nf_conntrack_helper *helper, helper->help = help; helper->from_nlattr = from_nlattr; helper->me = module; + snprintf(helper->nat_mod_name, sizeof(helper->nat_mod_name), + NF_NAT_HELPER_PREFIX "%s", name); if (spec_port == default_port) snprintf(helper->name, sizeof(helper->name), "%s", name); @@ -466,6 +535,22 @@ void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *helper, } EXPORT_SYMBOL_GPL(nf_conntrack_helpers_unregister); +void nf_nat_helper_register(struct nf_conntrack_nat_helper *nat) +{ + mutex_lock(&nf_ct_nat_helpers_mutex); + list_add_rcu(&nat->list, &nf_ct_nat_helpers); + mutex_unlock(&nf_ct_nat_helpers_mutex); +} +EXPORT_SYMBOL_GPL(nf_nat_helper_register); + +void nf_nat_helper_unregister(struct nf_conntrack_nat_helper *nat) +{ + mutex_lock(&nf_ct_nat_helpers_mutex); + list_del_rcu(&nat->list); + mutex_unlock(&nf_ct_nat_helpers_mutex); +} +EXPORT_SYMBOL_GPL(nf_nat_helper_unregister); + static const struct nf_ct_ext_type helper_extend = { .len = sizeof(struct nf_conn_help), .align = __alignof__(struct nf_conn_help), @@ -493,6 +578,7 @@ int nf_conntrack_helper_init(void) goto out_extend; } + INIT_LIST_HEAD(&nf_ct_nat_helpers); return 0; out_extend: kvfree(nf_ct_helper_hash); diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 4099f4d79bae..79e5014b3b0d 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -42,11 +42,13 @@ unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb, struct nf_conntrack_expect *exp) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_irc_hook); +#define HELPER_NAME "irc" + MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("IRC (DCC) connection tracking helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_conntrack_irc"); -MODULE_ALIAS_NFCT_HELPER("irc"); +MODULE_ALIAS_NFCT_HELPER(HELPER_NAME); module_param_array(ports, ushort, &ports_c, 0400); MODULE_PARM_DESC(ports, "port numbers of IRC servers"); @@ -259,7 +261,7 @@ static int __init nf_conntrack_irc_init(void) ports[ports_c++] = IRC_PORT; for (i = 0; i < ports_c; i++) { - nf_ct_helper_init(&irc[i], AF_INET, IPPROTO_TCP, "irc", + nf_ct_helper_init(&irc[i], AF_INET, IPPROTO_TCP, HELPER_NAME, IRC_PORT, ports[i], i, &irc_exp_policy, 0, help, NULL, THIS_MODULE); } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d7f61b0547c6..8dcc064d518d 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -46,7 +46,7 @@ #include <net/netfilter/nf_conntrack_timestamp.h> #include <net/netfilter/nf_conntrack_labels.h> #include <net/netfilter/nf_conntrack_synproxy.h> -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_helper.h> #endif @@ -63,7 +63,7 @@ static int ctnetlink_dump_tuples_proto(struct sk_buff *skb, int ret = 0; struct nlattr *nest_parms; - nest_parms = nla_nest_start(skb, CTA_TUPLE_PROTO | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_TUPLE_PROTO); if (!nest_parms) goto nla_put_failure; if (nla_put_u8(skb, CTA_PROTO_NUM, tuple->dst.protonum)) @@ -104,7 +104,7 @@ static int ctnetlink_dump_tuples_ip(struct sk_buff *skb, int ret = 0; struct nlattr *nest_parms; - nest_parms = nla_nest_start(skb, CTA_TUPLE_IP | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_TUPLE_IP); if (!nest_parms) goto nla_put_failure; @@ -187,7 +187,7 @@ static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct) if (!l4proto->to_nlattr) return 0; - nest_proto = nla_nest_start(skb, CTA_PROTOINFO | NLA_F_NESTED); + nest_proto = nla_nest_start(skb, CTA_PROTOINFO); if (!nest_proto) goto nla_put_failure; @@ -215,7 +215,7 @@ static int ctnetlink_dump_helpinfo(struct sk_buff *skb, if (!helper) goto out; - nest_helper = nla_nest_start(skb, CTA_HELP | NLA_F_NESTED); + nest_helper = nla_nest_start(skb, CTA_HELP); if (!nest_helper) goto nla_put_failure; if (nla_put_string(skb, CTA_HELP_NAME, helper->name)) @@ -249,7 +249,7 @@ dump_counters(struct sk_buff *skb, struct nf_conn_acct *acct, bytes = atomic64_read(&counter[dir].bytes); } - nest_count = nla_nest_start(skb, attr | NLA_F_NESTED); + nest_count = nla_nest_start(skb, attr); if (!nest_count) goto nla_put_failure; @@ -293,7 +293,7 @@ ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct) if (!tstamp) return 0; - nest_count = nla_nest_start(skb, CTA_TIMESTAMP | NLA_F_NESTED); + nest_count = nla_nest_start(skb, CTA_TIMESTAMP); if (!nest_count) goto nla_put_failure; @@ -337,7 +337,7 @@ static int ctnetlink_dump_secctx(struct sk_buff *skb, const struct nf_conn *ct) return 0; ret = -1; - nest_secctx = nla_nest_start(skb, CTA_SECCTX | NLA_F_NESTED); + nest_secctx = nla_nest_start(skb, CTA_SECCTX); if (!nest_secctx) goto nla_put_failure; @@ -397,7 +397,7 @@ static int ctnetlink_dump_master(struct sk_buff *skb, const struct nf_conn *ct) if (!(ct->status & IPS_EXPECTED)) return 0; - nest_parms = nla_nest_start(skb, CTA_TUPLE_MASTER | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_TUPLE_MASTER); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, master_tuple(ct)) < 0) @@ -415,7 +415,7 @@ dump_ct_seq_adj(struct sk_buff *skb, const struct nf_ct_seqadj *seq, int type) { struct nlattr *nest_parms; - nest_parms = nla_nest_start(skb, type | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, type); if (!nest_parms) goto nla_put_failure; @@ -467,7 +467,7 @@ static int ctnetlink_dump_ct_synproxy(struct sk_buff *skb, struct nf_conn *ct) if (!synproxy) return 0; - nest_parms = nla_nest_start(skb, CTA_SYNPROXY | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_SYNPROXY); if (!nest_parms) goto nla_put_failure; @@ -528,7 +528,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, zone = nf_ct_zone(ct); - nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) @@ -538,7 +538,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, goto nla_put_failure; nla_nest_end(skb, nest_parms); - nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0) @@ -658,7 +658,7 @@ static size_t ctnetlink_nlmsg_size(const struct nf_conn *ct) + nla_total_size(0) /* CTA_HELP */ + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */ + ctnetlink_secctx_size(ct) -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */ + 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */ #endif @@ -720,7 +720,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) zone = nf_ct_zone(ct); - nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) @@ -730,7 +730,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) goto nla_put_failure; nla_nest_end(skb, nest_parms); - nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0) @@ -1020,12 +1020,12 @@ static int ctnetlink_parse_tuple_ip(struct nlattr *attr, struct nlattr *tb[CTA_IP_MAX+1]; int ret = 0; - ret = nla_parse_nested(tb, CTA_IP_MAX, attr, NULL, NULL); + ret = nla_parse_nested_deprecated(tb, CTA_IP_MAX, attr, NULL, NULL); if (ret < 0) return ret; - ret = nla_validate_nested(attr, CTA_IP_MAX, - cta_ip_nla_policy, NULL); + ret = nla_validate_nested_deprecated(attr, CTA_IP_MAX, + cta_ip_nla_policy, NULL); if (ret) return ret; @@ -1052,8 +1052,8 @@ static int ctnetlink_parse_tuple_proto(struct nlattr *attr, struct nlattr *tb[CTA_PROTO_MAX+1]; int ret = 0; - ret = nla_parse_nested(tb, CTA_PROTO_MAX, attr, proto_nla_policy, - NULL); + ret = nla_parse_nested_deprecated(tb, CTA_PROTO_MAX, attr, + proto_nla_policy, NULL); if (ret < 0) return ret; @@ -1065,8 +1065,9 @@ static int ctnetlink_parse_tuple_proto(struct nlattr *attr, l4proto = nf_ct_l4proto_find(tuple->dst.protonum); if (likely(l4proto->nlattr_to_tuple)) { - ret = nla_validate_nested(attr, CTA_PROTO_MAX, - l4proto->nla_policy, NULL); + ret = nla_validate_nested_deprecated(attr, CTA_PROTO_MAX, + l4proto->nla_policy, + NULL); if (ret == 0) ret = l4proto->nlattr_to_tuple(tb, tuple); } @@ -1129,8 +1130,8 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[], memset(tuple, 0, sizeof(*tuple)); - err = nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy, - NULL); + err = nla_parse_nested_deprecated(tb, CTA_TUPLE_MAX, cda[type], + tuple_nla_policy, NULL); if (err < 0) return err; @@ -1180,7 +1181,8 @@ static int ctnetlink_parse_help(const struct nlattr *attr, char **helper_name, int err; struct nlattr *tb[CTA_HELP_MAX+1]; - err = nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy, NULL); + err = nla_parse_nested_deprecated(tb, CTA_HELP_MAX, attr, + help_nla_policy, NULL); if (err < 0) return err; @@ -1498,7 +1500,7 @@ static int ctnetlink_get_ct_unconfirmed(struct net *net, struct sock *ctnl, return -EOPNOTSUPP; } -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) static int ctnetlink_parse_nat_setup(struct nf_conn *ct, enum nf_nat_manip_type manip, @@ -1590,7 +1592,7 @@ ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[]) static int ctnetlink_setup_nat(struct nf_conn *ct, const struct nlattr * const cda[]) { -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) int ret; if (!cda[CTA_NAT_DST] && !cda[CTA_NAT_SRC]) @@ -1721,8 +1723,8 @@ static int ctnetlink_change_protoinfo(struct nf_conn *ct, struct nlattr *tb[CTA_PROTOINFO_MAX+1]; int err = 0; - err = nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy, - NULL); + err = nla_parse_nested_deprecated(tb, CTA_PROTOINFO_MAX, attr, + protoinfo_policy, NULL); if (err < 0) return err; @@ -1745,7 +1747,8 @@ static int change_seq_adj(struct nf_ct_seqadj *seq, int err; struct nlattr *cda[CTA_SEQADJ_MAX+1]; - err = nla_parse_nested(cda, CTA_SEQADJ_MAX, attr, seqadj_policy, NULL); + err = nla_parse_nested_deprecated(cda, CTA_SEQADJ_MAX, attr, + seqadj_policy, NULL); if (err < 0) return err; @@ -1822,8 +1825,9 @@ static int ctnetlink_change_synproxy(struct nf_conn *ct, if (!synproxy) return 0; - err = nla_parse_nested(tb, CTA_SYNPROXY_MAX, cda[CTA_SYNPROXY], - synproxy_policy, NULL); + err = nla_parse_nested_deprecated(tb, CTA_SYNPROXY_MAX, + cda[CTA_SYNPROXY], synproxy_policy, + NULL); if (err < 0) return err; @@ -2373,7 +2377,7 @@ ctnetlink_glue_build_size(const struct nf_conn *ct) + nla_total_size(0) /* CTA_HELP */ + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */ + ctnetlink_secctx_size(ct) -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */ + 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */ #endif @@ -2400,7 +2404,7 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) zone = nf_ct_zone(ct); - nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) @@ -2410,7 +2414,7 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) goto nla_put_failure; nla_nest_end(skb, nest_parms); - nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0) @@ -2472,7 +2476,7 @@ ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct, { struct nlattr *nest_parms; - nest_parms = nla_nest_start(skb, ct_attr | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, ct_attr); if (!nest_parms) goto nla_put_failure; @@ -2553,7 +2557,8 @@ ctnetlink_glue_parse(const struct nlattr *attr, struct nf_conn *ct) struct nlattr *cda[CTA_MAX+1]; int ret; - ret = nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy, NULL); + ret = nla_parse_nested_deprecated(cda, CTA_MAX, attr, ct_nla_policy, + NULL); if (ret < 0) return ret; @@ -2586,8 +2591,8 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct, struct nf_conntrack_expect *exp; int err; - err = nla_parse_nested(cda, CTA_EXPECT_MAX, attr, exp_nla_policy, - NULL); + err = nla_parse_nested_deprecated(cda, CTA_EXPECT_MAX, attr, + exp_nla_policy, NULL); if (err < 0) return err; @@ -2644,7 +2649,7 @@ static int ctnetlink_exp_dump_tuple(struct sk_buff *skb, { struct nlattr *nest_parms; - nest_parms = nla_nest_start(skb, type | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, type); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, tuple) < 0) @@ -2671,7 +2676,7 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb, m.src.u.all = mask->src.u.all; m.dst.protonum = tuple->dst.protonum; - nest_parms = nla_nest_start(skb, CTA_EXPECT_MASK | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_EXPECT_MASK); if (!nest_parms) goto nla_put_failure; @@ -2722,7 +2727,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, struct nf_conn *master = exp->master; long timeout = ((long)exp->timeout.expires - (long)jiffies) / HZ; struct nf_conn_help *help; -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) struct nlattr *nest_parms; struct nf_conntrack_tuple nat_tuple = {}; #endif @@ -2740,10 +2745,10 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, CTA_EXPECT_MASTER) < 0) goto nla_put_failure; -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) if (!nf_inet_addr_cmp(&exp->saved_addr, &any_addr) || exp->saved_proto.all) { - nest_parms = nla_nest_start(skb, CTA_EXPECT_NAT | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_EXPECT_NAT); if (!nest_parms) goto nla_put_failure; @@ -3204,13 +3209,13 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr, struct nf_conntrack_expect *exp, u_int8_t u3) { -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) struct nlattr *tb[CTA_EXPECT_NAT_MAX+1]; struct nf_conntrack_tuple nat_tuple = {}; int err; - err = nla_parse_nested(tb, CTA_EXPECT_NAT_MAX, attr, - exp_nat_nla_policy, NULL); + err = nla_parse_nested_deprecated(tb, CTA_EXPECT_NAT_MAX, attr, + exp_nat_nla_policy, NULL); if (err < 0) return err; diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 6fca80587505..7491aa4c3566 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -598,7 +598,7 @@ static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, struct nlattr *nest_parms; spin_lock_bh(&ct->lock); - nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP); if (!nest_parms) goto nla_put_failure; if (nla_put_u8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state) || @@ -639,8 +639,8 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) if (!attr) return 0; - err = nla_parse_nested(tb, CTA_PROTOINFO_DCCP_MAX, attr, - dccp_nla_policy, NULL); + err = nla_parse_nested_deprecated(tb, CTA_PROTOINFO_DCCP_MAX, attr, + dccp_nla_policy, NULL); if (err < 0) return err; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index a7818101ad80..5b8dde266412 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -520,7 +520,7 @@ static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, struct nlattr *nest_parms; spin_lock_bh(&ct->lock); - nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP); if (!nest_parms) goto nla_put_failure; @@ -563,8 +563,8 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct) if (!attr) return 0; - err = nla_parse_nested(tb, CTA_PROTOINFO_SCTP_MAX, attr, - sctp_nla_policy, NULL); + err = nla_parse_nested_deprecated(tb, CTA_PROTOINFO_SCTP_MAX, attr, + sctp_nla_policy, NULL); if (err < 0) return err; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index a06875a466a4..7ba01d8ee165 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1192,7 +1192,7 @@ static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, struct nf_ct_tcp_flags tmp = {}; spin_lock_bh(&ct->lock); - nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP); if (!nest_parms) goto nla_put_failure; @@ -1248,8 +1248,8 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) if (!pattr) return 0; - err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, - tcp_nla_policy, NULL); + err = nla_parse_nested_deprecated(tb, CTA_PROTOINFO_TCP_MAX, pattr, + tcp_nla_policy, NULL); if (err < 0) return err; diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index 5072ff96ab33..83306648dd0f 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -30,10 +30,12 @@ #include <net/netfilter/nf_conntrack_expect.h> #include <linux/netfilter/nf_conntrack_sane.h> +#define HELPER_NAME "sane" + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Michal Schmidt <mschmidt@redhat.com>"); MODULE_DESCRIPTION("SANE connection tracking helper"); -MODULE_ALIAS_NFCT_HELPER("sane"); +MODULE_ALIAS_NFCT_HELPER(HELPER_NAME); static char *sane_buffer; @@ -195,12 +197,12 @@ static int __init nf_conntrack_sane_init(void) /* FIXME should be configurable whether IPv4 and IPv6 connections are tracked or not - YK */ for (i = 0; i < ports_c; i++) { - nf_ct_helper_init(&sane[2 * i], AF_INET, IPPROTO_TCP, "sane", - SANE_PORT, ports[i], ports[i], + nf_ct_helper_init(&sane[2 * i], AF_INET, IPPROTO_TCP, + HELPER_NAME, SANE_PORT, ports[i], ports[i], &sane_exp_policy, 0, help, NULL, THIS_MODULE); - nf_ct_helper_init(&sane[2 * i + 1], AF_INET6, IPPROTO_TCP, "sane", - SANE_PORT, ports[i], ports[i], + nf_ct_helper_init(&sane[2 * i + 1], AF_INET6, IPPROTO_TCP, + HELPER_NAME, SANE_PORT, ports[i], ports[i], &sane_exp_policy, 0, help, NULL, THIS_MODULE); } diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 39fcc1ed18f3..c30c883c370b 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -30,11 +30,13 @@ #include <net/netfilter/nf_conntrack_zones.h> #include <linux/netfilter/nf_conntrack_sip.h> +#define HELPER_NAME "sip" + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>"); MODULE_DESCRIPTION("SIP connection tracking helper"); MODULE_ALIAS("ip_conntrack_sip"); -MODULE_ALIAS_NFCT_HELPER("sip"); +MODULE_ALIAS_NFCT_HELPER(HELPER_NAME); #define MAX_PORTS 8 static unsigned short ports[MAX_PORTS]; @@ -928,7 +930,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, nfct_help(exp->master)->helper != nfct_help(ct)->helper || exp->class != class) break; -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) if (!direct_rtp && (!nf_inet_addr_cmp(&exp->saved_addr, &exp->tuple.dst.u3) || exp->saved_proto.udp.port != exp->tuple.dst.u.udp.port) && @@ -1669,21 +1671,21 @@ static int __init nf_conntrack_sip_init(void) ports[ports_c++] = SIP_PORT; for (i = 0; i < ports_c; i++) { - nf_ct_helper_init(&sip[4 * i], AF_INET, IPPROTO_UDP, "sip", - SIP_PORT, ports[i], i, sip_exp_policy, - SIP_EXPECT_MAX, sip_help_udp, + nf_ct_helper_init(&sip[4 * i], AF_INET, IPPROTO_UDP, + HELPER_NAME, SIP_PORT, ports[i], i, + sip_exp_policy, SIP_EXPECT_MAX, sip_help_udp, NULL, THIS_MODULE); - nf_ct_helper_init(&sip[4 * i + 1], AF_INET, IPPROTO_TCP, "sip", - SIP_PORT, ports[i], i, sip_exp_policy, - SIP_EXPECT_MAX, sip_help_tcp, + nf_ct_helper_init(&sip[4 * i + 1], AF_INET, IPPROTO_TCP, + HELPER_NAME, SIP_PORT, ports[i], i, + sip_exp_policy, SIP_EXPECT_MAX, sip_help_tcp, NULL, THIS_MODULE); - nf_ct_helper_init(&sip[4 * i + 2], AF_INET6, IPPROTO_UDP, "sip", - SIP_PORT, ports[i], i, sip_exp_policy, - SIP_EXPECT_MAX, sip_help_udp, + nf_ct_helper_init(&sip[4 * i + 2], AF_INET6, IPPROTO_UDP, + HELPER_NAME, SIP_PORT, ports[i], i, + sip_exp_policy, SIP_EXPECT_MAX, sip_help_udp, NULL, THIS_MODULE); - nf_ct_helper_init(&sip[4 * i + 3], AF_INET6, IPPROTO_TCP, "sip", - SIP_PORT, ports[i], i, sip_exp_policy, - SIP_EXPECT_MAX, sip_help_tcp, + nf_ct_helper_init(&sip[4 * i + 3], AF_INET6, IPPROTO_TCP, + HELPER_NAME, SIP_PORT, ports[i], i, + sip_exp_policy, SIP_EXPECT_MAX, sip_help_tcp, NULL, THIS_MODULE); } diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index c2ae14c720b4..e0d392cb3075 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -511,6 +511,8 @@ static void nf_conntrack_standalone_fini_proc(struct net *net) /* Log invalid packets of a given protocol */ static int log_invalid_proto_min __read_mostly; static int log_invalid_proto_max __read_mostly = 255; +static int zero; +static int one = 1; /* size the user *wants to set */ static unsigned int nf_conntrack_htable_size_user __read_mostly; @@ -624,9 +626,11 @@ static struct ctl_table nf_ct_sysctl_table[] = { [NF_SYSCTL_CT_CHECKSUM] = { .procname = "nf_conntrack_checksum", .data = &init_net.ct.sysctl_checksum, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, }, [NF_SYSCTL_CT_LOG_INVALID] = { .procname = "nf_conntrack_log_invalid", @@ -647,33 +651,41 @@ static struct ctl_table nf_ct_sysctl_table[] = { [NF_SYSCTL_CT_ACCT] = { .procname = "nf_conntrack_acct", .data = &init_net.ct.sysctl_acct, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, }, [NF_SYSCTL_CT_HELPER] = { .procname = "nf_conntrack_helper", .data = &init_net.ct.sysctl_auto_assign_helper, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, }, #ifdef CONFIG_NF_CONNTRACK_EVENTS [NF_SYSCTL_CT_EVENTS] = { .procname = "nf_conntrack_events", .data = &init_net.ct.sysctl_events, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, }, #endif #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP [NF_SYSCTL_CT_TIMESTAMP] = { .procname = "nf_conntrack_timestamp", .data = &init_net.ct.sysctl_tstamp, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, }, #endif [NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC] = { @@ -744,15 +756,19 @@ static struct ctl_table nf_ct_sysctl_table[] = { }, [NF_SYSCTL_CT_PROTO_TCP_LOOSE] = { .procname = "nf_conntrack_tcp_loose", - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, }, [NF_SYSCTL_CT_PROTO_TCP_LIBERAL] = { .procname = "nf_conntrack_tcp_be_liberal", - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, }, [NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS] = { .procname = "nf_conntrack_tcp_max_retrans", @@ -887,7 +903,9 @@ static struct ctl_table nf_ct_sysctl_table[] = { .procname = "nf_conntrack_dccp_loose", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, }, #endif #ifdef CONFIG_NF_CT_PROTO_GRE diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index 548b673b3625..6977cb91ae9a 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -20,11 +20,13 @@ #include <net/netfilter/nf_conntrack_helper.h> #include <linux/netfilter/nf_conntrack_tftp.h> +#define HELPER_NAME "tftp" + MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>"); MODULE_DESCRIPTION("TFTP connection tracking helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_conntrack_tftp"); -MODULE_ALIAS_NFCT_HELPER("tftp"); +MODULE_ALIAS_NFCT_HELPER(HELPER_NAME); #define MAX_PORTS 8 static unsigned short ports[MAX_PORTS]; @@ -119,12 +121,14 @@ static int __init nf_conntrack_tftp_init(void) ports[ports_c++] = TFTP_PORT; for (i = 0; i < ports_c; i++) { - nf_ct_helper_init(&tftp[2 * i], AF_INET, IPPROTO_UDP, "tftp", - TFTP_PORT, ports[i], i, &tftp_exp_policy, - 0, tftp_help, NULL, THIS_MODULE); - nf_ct_helper_init(&tftp[2 * i + 1], AF_INET6, IPPROTO_UDP, "tftp", - TFTP_PORT, ports[i], i, &tftp_exp_policy, - 0, tftp_help, NULL, THIS_MODULE); + nf_ct_helper_init(&tftp[2 * i], AF_INET, IPPROTO_UDP, + HELPER_NAME, TFTP_PORT, ports[i], i, + &tftp_exp_policy, 0, tftp_help, NULL, + THIS_MODULE); + nf_ct_helper_init(&tftp[2 * i + 1], AF_INET6, IPPROTO_UDP, + HELPER_NAME, TFTP_PORT, ports[i], i, + &tftp_exp_policy, 0, tftp_help, NULL, + THIS_MODULE); } ret = nf_conntrack_helpers_register(tftp, ports_c * 2); diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c index 91fbd183da2d..edac8ea4436d 100644 --- a/net/netfilter/nf_conntrack_timeout.c +++ b/net/netfilter/nf_conntrack_timeout.c @@ -48,6 +48,95 @@ void nf_ct_untimeout(struct net *net, struct nf_ct_timeout *timeout) } EXPORT_SYMBOL_GPL(nf_ct_untimeout); +static void __nf_ct_timeout_put(struct nf_ct_timeout *timeout) +{ + typeof(nf_ct_timeout_put_hook) timeout_put; + + timeout_put = rcu_dereference(nf_ct_timeout_put_hook); + if (timeout_put) + timeout_put(timeout); +} + +int nf_ct_set_timeout(struct net *net, struct nf_conn *ct, + u8 l3num, u8 l4num, const char *timeout_name) +{ + typeof(nf_ct_timeout_find_get_hook) timeout_find_get; + struct nf_ct_timeout *timeout; + struct nf_conn_timeout *timeout_ext; + const char *errmsg = NULL; + int ret = 0; + + rcu_read_lock(); + timeout_find_get = rcu_dereference(nf_ct_timeout_find_get_hook); + if (!timeout_find_get) { + ret = -ENOENT; + errmsg = "Timeout policy base is empty"; + goto out; + } + + timeout = timeout_find_get(net, timeout_name); + if (!timeout) { + ret = -ENOENT; + pr_info_ratelimited("No such timeout policy \"%s\"\n", + timeout_name); + goto out; + } + + if (timeout->l3num != l3num) { + ret = -EINVAL; + pr_info_ratelimited("Timeout policy `%s' can only be used by " + "L%d protocol number %d\n", + timeout_name, 3, timeout->l3num); + goto err_put_timeout; + } + /* Make sure the timeout policy matches any existing protocol tracker, + * otherwise default to generic. + */ + if (timeout->l4proto->l4proto != l4num) { + ret = -EINVAL; + pr_info_ratelimited("Timeout policy `%s' can only be used by " + "L%d protocol number %d\n", + timeout_name, 4, timeout->l4proto->l4proto); + goto err_put_timeout; + } + timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_ATOMIC); + if (!timeout_ext) { + ret = -ENOMEM; + goto err_put_timeout; + } + + rcu_read_unlock(); + return ret; + +err_put_timeout: + __nf_ct_timeout_put(timeout); +out: + rcu_read_unlock(); + if (errmsg) + pr_info_ratelimited("%s\n", errmsg); + return ret; +} +EXPORT_SYMBOL_GPL(nf_ct_set_timeout); + +void nf_ct_destroy_timeout(struct nf_conn *ct) +{ + struct nf_conn_timeout *timeout_ext; + typeof(nf_ct_timeout_put_hook) timeout_put; + + rcu_read_lock(); + timeout_put = rcu_dereference(nf_ct_timeout_put_hook); + + if (timeout_put) { + timeout_ext = nf_ct_timeout_find(ct); + if (timeout_ext) { + timeout_put(timeout_ext->timeout); + RCU_INIT_POINTER(timeout_ext->timeout, NULL); + } + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(nf_ct_destroy_timeout); + static const struct nf_ct_ext_type timeout_extend = { .len = sizeof(struct nf_conn_timeout), .align = __alignof__(struct nf_conn_timeout), diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 1d291a51cd45..6452550d187f 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -235,13 +235,10 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, if (tuplehash == NULL) return NF_ACCEPT; - outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx); - if (!outdev) - return NF_ACCEPT; - dir = tuplehash->tuple.dir; flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache; + outdev = rt->dst.dev; if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) && (ip_hdr(skb)->frag_off & htons(IP_DF)) != 0) @@ -452,13 +449,10 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, if (tuplehash == NULL) return NF_ACCEPT; - outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx); - if (!outdev) - return NF_ACCEPT; - dir = tuplehash->tuple.dir; flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache; + outdev = rt->dst.dev; if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu))) return NF_ACCEPT; diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index e15779fd58e3..d6c43902ebd7 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -7,9 +7,6 @@ #include <linux/netdevice.h> /* nf_queue.c */ -int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, - const struct nf_hook_entries *entries, unsigned int index, - unsigned int verdict); void nf_queue_nf_hook_drop(struct net *net); /* nf_log.c */ diff --git a/net/netfilter/nf_nat_amanda.c b/net/netfilter/nf_nat_amanda.c index e4d61a7a5258..4e59416ea709 100644 --- a/net/netfilter/nf_nat_amanda.c +++ b/net/netfilter/nf_nat_amanda.c @@ -19,10 +19,15 @@ #include <net/netfilter/nf_nat_helper.h> #include <linux/netfilter/nf_conntrack_amanda.h> +#define NAT_HELPER_NAME "amanda" + MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>"); MODULE_DESCRIPTION("Amanda NAT helper"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("ip_nat_amanda"); +MODULE_ALIAS_NF_NAT_HELPER(NAT_HELPER_NAME); + +static struct nf_conntrack_nat_helper nat_helper_amanda = + NF_CT_NAT_HELPER_INIT(NAT_HELPER_NAME); static unsigned int help(struct sk_buff *skb, enum ip_conntrack_info ctinfo, @@ -74,6 +79,7 @@ static unsigned int help(struct sk_buff *skb, static void __exit nf_nat_amanda_fini(void) { + nf_nat_helper_unregister(&nat_helper_amanda); RCU_INIT_POINTER(nf_nat_amanda_hook, NULL); synchronize_rcu(); } @@ -81,6 +87,7 @@ static void __exit nf_nat_amanda_fini(void) static int __init nf_nat_amanda_init(void) { BUG_ON(nf_nat_amanda_hook != NULL); + nf_nat_helper_register(&nat_helper_amanda); RCU_INIT_POINTER(nf_nat_amanda_hook, help); return 0; } diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 000952719adf..cd94481e6c07 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -890,8 +890,8 @@ static int nfnetlink_parse_nat_proto(struct nlattr *attr, struct nlattr *tb[CTA_PROTONAT_MAX+1]; int err; - err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, - protonat_nla_policy, NULL); + err = nla_parse_nested_deprecated(tb, CTA_PROTONAT_MAX, attr, + protonat_nla_policy, NULL); if (err < 0) return err; @@ -949,7 +949,8 @@ nfnetlink_parse_nat(const struct nlattr *nat, memset(range, 0, sizeof(*range)); - err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy, NULL); + err = nla_parse_nested_deprecated(tb, CTA_NAT_MAX, nat, + nat_nla_policy, NULL); if (err < 0) return err; @@ -1014,7 +1015,7 @@ static struct nf_ct_helper_expectfn follow_master_nat = { .expectfn = nf_nat_follow_master, }; -int nf_nat_register_fn(struct net *net, const struct nf_hook_ops *ops, +int nf_nat_register_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops, const struct nf_hook_ops *orig_nat_ops, unsigned int ops_count) { struct nat_net *nat_net = net_generic(net, nat_net_id); @@ -1024,14 +1025,12 @@ int nf_nat_register_fn(struct net *net, const struct nf_hook_ops *ops, struct nf_hook_ops *nat_ops; int i, ret; - if (WARN_ON_ONCE(ops->pf >= ARRAY_SIZE(nat_net->nat_proto_net))) + if (WARN_ON_ONCE(pf >= ARRAY_SIZE(nat_net->nat_proto_net))) return -EINVAL; - nat_proto_net = &nat_net->nat_proto_net[ops->pf]; + nat_proto_net = &nat_net->nat_proto_net[pf]; for (i = 0; i < ops_count; i++) { - if (WARN_ON(orig_nat_ops[i].pf != ops->pf)) - return -EINVAL; if (orig_nat_ops[i].hooknum == hooknum) { hooknum = i; break; @@ -1091,8 +1090,8 @@ int nf_nat_register_fn(struct net *net, const struct nf_hook_ops *ops, return ret; } -void nf_nat_unregister_fn(struct net *net, const struct nf_hook_ops *ops, - unsigned int ops_count) +void nf_nat_unregister_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops, + unsigned int ops_count) { struct nat_net *nat_net = net_generic(net, nat_net_id); struct nf_nat_hooks_net *nat_proto_net; @@ -1101,10 +1100,10 @@ void nf_nat_unregister_fn(struct net *net, const struct nf_hook_ops *ops, int hooknum = ops->hooknum; int i; - if (ops->pf >= ARRAY_SIZE(nat_net->nat_proto_net)) + if (pf >= ARRAY_SIZE(nat_net->nat_proto_net)) return; - nat_proto_net = &nat_net->nat_proto_net[ops->pf]; + nat_proto_net = &nat_net->nat_proto_net[pf]; mutex_lock(&nf_nat_proto_mutex); if (WARN_ON(nat_proto_net->users == 0)) diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c index 5063cbf1689c..0ea6b1bc52de 100644 --- a/net/netfilter/nf_nat_ftp.c +++ b/net/netfilter/nf_nat_ftp.c @@ -21,13 +21,18 @@ #include <net/netfilter/nf_conntrack_expect.h> #include <linux/netfilter/nf_conntrack_ftp.h> +#define NAT_HELPER_NAME "ftp" + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>"); MODULE_DESCRIPTION("ftp NAT helper"); -MODULE_ALIAS("ip_nat_ftp"); +MODULE_ALIAS_NF_NAT_HELPER(NAT_HELPER_NAME); /* FIXME: Time out? --RR */ +static struct nf_conntrack_nat_helper nat_helper_ftp = + NF_CT_NAT_HELPER_INIT(NAT_HELPER_NAME); + static int nf_nat_ftp_fmt_cmd(struct nf_conn *ct, enum nf_ct_ftp_type type, char *buffer, size_t buflen, union nf_inet_addr *addr, u16 port) @@ -124,6 +129,7 @@ out: static void __exit nf_nat_ftp_fini(void) { + nf_nat_helper_unregister(&nat_helper_ftp); RCU_INIT_POINTER(nf_nat_ftp_hook, NULL); synchronize_rcu(); } @@ -131,6 +137,7 @@ static void __exit nf_nat_ftp_fini(void) static int __init nf_nat_ftp_init(void) { BUG_ON(nf_nat_ftp_hook != NULL); + nf_nat_helper_register(&nat_helper_ftp); RCU_INIT_POINTER(nf_nat_ftp_hook, nf_nat_ftp); return 0; } diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c index 3aa35a43100d..d87cbe5e03ec 100644 --- a/net/netfilter/nf_nat_irc.c +++ b/net/netfilter/nf_nat_irc.c @@ -23,10 +23,15 @@ #include <net/netfilter/nf_conntrack_expect.h> #include <linux/netfilter/nf_conntrack_irc.h> +#define NAT_HELPER_NAME "irc" + MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); MODULE_DESCRIPTION("IRC (DCC) NAT helper"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("ip_nat_irc"); +MODULE_ALIAS_NF_NAT_HELPER(NAT_HELPER_NAME); + +static struct nf_conntrack_nat_helper nat_helper_irc = + NF_CT_NAT_HELPER_INIT(NAT_HELPER_NAME); static unsigned int help(struct sk_buff *skb, enum ip_conntrack_info ctinfo, @@ -96,6 +101,7 @@ static unsigned int help(struct sk_buff *skb, static void __exit nf_nat_irc_fini(void) { + nf_nat_helper_unregister(&nat_helper_irc); RCU_INIT_POINTER(nf_nat_irc_hook, NULL); synchronize_rcu(); } @@ -103,6 +109,7 @@ static void __exit nf_nat_irc_fini(void) static int __init nf_nat_irc_init(void) { BUG_ON(nf_nat_irc_hook != NULL); + nf_nat_helper_register(&nat_helper_irc); RCU_INIT_POINTER(nf_nat_irc_hook, help); return 0; } diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c index d85c4d902e7b..8e8a65d46345 100644 --- a/net/netfilter/nf_nat_masquerade.c +++ b/net/netfilter/nf_nat_masquerade.c @@ -7,12 +7,10 @@ #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> -#include <net/netfilter/ipv4/nf_nat_masquerade.h> -#include <net/netfilter/ipv6/nf_nat_masquerade.h> +#include <net/netfilter/nf_nat_masquerade.h> static DEFINE_MUTEX(masq_mutex); -static unsigned int masq_refcnt4 __read_mostly; -static unsigned int masq_refcnt6 __read_mostly; +static unsigned int masq_refcnt __read_mostly; unsigned int nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, @@ -137,56 +135,6 @@ static struct notifier_block masq_inet_notifier = { .notifier_call = masq_inet_event, }; -int nf_nat_masquerade_ipv4_register_notifier(void) -{ - int ret = 0; - - mutex_lock(&masq_mutex); - if (WARN_ON_ONCE(masq_refcnt4 == UINT_MAX)) { - ret = -EOVERFLOW; - goto out_unlock; - } - - /* check if the notifier was already set */ - if (++masq_refcnt4 > 1) - goto out_unlock; - - /* Register for device down reports */ - ret = register_netdevice_notifier(&masq_dev_notifier); - if (ret) - goto err_dec; - /* Register IP address change reports */ - ret = register_inetaddr_notifier(&masq_inet_notifier); - if (ret) - goto err_unregister; - - mutex_unlock(&masq_mutex); - return ret; - -err_unregister: - unregister_netdevice_notifier(&masq_dev_notifier); -err_dec: - masq_refcnt4--; -out_unlock: - mutex_unlock(&masq_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier); - -void nf_nat_masquerade_ipv4_unregister_notifier(void) -{ - mutex_lock(&masq_mutex); - /* check if the notifier still has clients */ - if (--masq_refcnt4 > 0) - goto out_unlock; - - unregister_netdevice_notifier(&masq_dev_notifier); - unregister_inetaddr_notifier(&masq_inet_notifier); -out_unlock: - mutex_unlock(&masq_mutex); -} -EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier); - #if IS_ENABLED(CONFIG_IPV6) static atomic_t v6_worker_count __read_mostly; @@ -322,44 +270,68 @@ static struct notifier_block masq_inet6_notifier = { .notifier_call = masq_inet6_event, }; -int nf_nat_masquerade_ipv6_register_notifier(void) +static int nf_nat_masquerade_ipv6_register_notifier(void) +{ + return register_inet6addr_notifier(&masq_inet6_notifier); +} +#else +static inline int nf_nat_masquerade_ipv6_register_notifier(void) { return 0; } +#endif + +int nf_nat_masquerade_inet_register_notifiers(void) { int ret = 0; mutex_lock(&masq_mutex); - if (WARN_ON_ONCE(masq_refcnt6 == UINT_MAX)) { + if (WARN_ON_ONCE(masq_refcnt == UINT_MAX)) { ret = -EOVERFLOW; goto out_unlock; } - /* check if the notifier is already set */ - if (++masq_refcnt6 > 1) + /* check if the notifier was already set */ + if (++masq_refcnt > 1) goto out_unlock; - ret = register_inet6addr_notifier(&masq_inet6_notifier); + /* Register for device down reports */ + ret = register_netdevice_notifier(&masq_dev_notifier); if (ret) goto err_dec; + /* Register IP address change reports */ + ret = register_inetaddr_notifier(&masq_inet_notifier); + if (ret) + goto err_unregister; + + ret = nf_nat_masquerade_ipv6_register_notifier(); + if (ret) + goto err_unreg_inet; mutex_unlock(&masq_mutex); return ret; +err_unreg_inet: + unregister_inetaddr_notifier(&masq_inet_notifier); +err_unregister: + unregister_netdevice_notifier(&masq_dev_notifier); err_dec: - masq_refcnt6--; + masq_refcnt--; out_unlock: mutex_unlock(&masq_mutex); return ret; } -EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier); +EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_register_notifiers); -void nf_nat_masquerade_ipv6_unregister_notifier(void) +void nf_nat_masquerade_inet_unregister_notifiers(void) { mutex_lock(&masq_mutex); - /* check if the notifier still has clients */ - if (--masq_refcnt6 > 0) + /* check if the notifiers still have clients */ + if (--masq_refcnt > 0) goto out_unlock; + unregister_netdevice_notifier(&masq_dev_notifier); + unregister_inetaddr_notifier(&masq_inet_notifier); +#if IS_ENABLED(CONFIG_IPV6) unregister_inet6addr_notifier(&masq_inet6_notifier); +#endif out_unlock: mutex_unlock(&masq_mutex); } -EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier); -#endif +EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_unregister_notifiers); diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c index 62743da3004f..84f5c90a7f21 100644 --- a/net/netfilter/nf_nat_proto.c +++ b/net/netfilter/nf_nat_proto.c @@ -725,7 +725,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb, return ret; } -static const struct nf_hook_ops nf_nat_ipv4_ops[] = { +const struct nf_hook_ops nf_nat_ipv4_ops[] = { /* Before packet filtering, change destination */ { .hook = nf_nat_ipv4_in, @@ -758,13 +758,14 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = { int nf_nat_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops) { - return nf_nat_register_fn(net, ops, nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); + return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv4_ops, + ARRAY_SIZE(nf_nat_ipv4_ops)); } EXPORT_SYMBOL_GPL(nf_nat_ipv4_register_fn); void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops) { - nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv4_ops)); + nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv4_ops)); } EXPORT_SYMBOL_GPL(nf_nat_ipv4_unregister_fn); @@ -925,20 +926,6 @@ nf_nat_ipv6_out(void *priv, struct sk_buff *skb, return ret; } -static int nat_route_me_harder(struct net *net, struct sk_buff *skb) -{ -#ifdef CONFIG_IPV6_MODULE - const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); - - if (!v6_ops) - return -EHOSTUNREACH; - - return v6_ops->route_me_harder(net, skb); -#else - return ip6_route_me_harder(net, skb); -#endif -} - static unsigned int nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -958,7 +945,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, &ct->tuplehash[!dir].tuple.src.u3)) { - err = nat_route_me_harder(state->net, skb); + err = nf_ip6_route_me_harder(state->net, skb); if (err < 0) ret = NF_DROP_ERR(err); } @@ -977,7 +964,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, return ret; } -static const struct nf_hook_ops nf_nat_ipv6_ops[] = { +const struct nf_hook_ops nf_nat_ipv6_ops[] = { /* Before packet filtering, change destination */ { .hook = nf_nat_ipv6_in, @@ -1010,14 +997,44 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = { int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops) { - return nf_nat_register_fn(net, ops, nf_nat_ipv6_ops, + return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); } EXPORT_SYMBOL_GPL(nf_nat_ipv6_register_fn); void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops) { - nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv6_ops)); + nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv6_ops)); } EXPORT_SYMBOL_GPL(nf_nat_ipv6_unregister_fn); #endif /* CONFIG_IPV6 */ + +#if defined(CONFIG_NF_TABLES_INET) && IS_ENABLED(CONFIG_NFT_NAT) +int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops) +{ + int ret; + + if (WARN_ON_ONCE(ops->pf != NFPROTO_INET)) + return -EINVAL; + + ret = nf_nat_register_fn(net, NFPROTO_IPV6, ops, nf_nat_ipv6_ops, + ARRAY_SIZE(nf_nat_ipv6_ops)); + if (ret) + return ret; + + ret = nf_nat_register_fn(net, NFPROTO_IPV4, ops, nf_nat_ipv4_ops, + ARRAY_SIZE(nf_nat_ipv4_ops)); + if (ret) + nf_nat_ipv6_unregister_fn(net, ops); + + return ret; +} +EXPORT_SYMBOL_GPL(nf_nat_inet_register_fn); + +void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops) +{ + nf_nat_unregister_fn(net, NFPROTO_IPV4, ops, ARRAY_SIZE(nf_nat_ipv4_ops)); + nf_nat_unregister_fn(net, NFPROTO_IPV6, ops, ARRAY_SIZE(nf_nat_ipv6_ops)); +} +EXPORT_SYMBOL_GPL(nf_nat_inet_unregister_fn); +#endif /* NFT INET NAT */ diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c index aa1be643d7a0..464387b3600f 100644 --- a/net/netfilter/nf_nat_sip.c +++ b/net/netfilter/nf_nat_sip.c @@ -24,11 +24,15 @@ #include <net/netfilter/nf_conntrack_seqadj.h> #include <linux/netfilter/nf_conntrack_sip.h> +#define NAT_HELPER_NAME "sip" + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>"); MODULE_DESCRIPTION("SIP NAT helper"); -MODULE_ALIAS("ip_nat_sip"); +MODULE_ALIAS_NF_NAT_HELPER(NAT_HELPER_NAME); +static struct nf_conntrack_nat_helper nat_helper_sip = + NF_CT_NAT_HELPER_INIT(NAT_HELPER_NAME); static unsigned int mangle_packet(struct sk_buff *skb, unsigned int protoff, unsigned int dataoff, @@ -656,8 +660,8 @@ static struct nf_ct_helper_expectfn sip_nat = { static void __exit nf_nat_sip_fini(void) { + nf_nat_helper_unregister(&nat_helper_sip); RCU_INIT_POINTER(nf_nat_sip_hooks, NULL); - nf_ct_helper_expectfn_unregister(&sip_nat); synchronize_rcu(); } @@ -675,6 +679,7 @@ static const struct nf_nat_sip_hooks sip_hooks = { static int __init nf_nat_sip_init(void) { BUG_ON(nf_nat_sip_hooks != NULL); + nf_nat_helper_register(&nat_helper_sip); RCU_INIT_POINTER(nf_nat_sip_hooks, &sip_hooks); nf_ct_helper_expectfn_register(&sip_nat); return 0; diff --git a/net/netfilter/nf_nat_tftp.c b/net/netfilter/nf_nat_tftp.c index 7f67e1d5310d..e633b3863e33 100644 --- a/net/netfilter/nf_nat_tftp.c +++ b/net/netfilter/nf_nat_tftp.c @@ -13,10 +13,15 @@ #include <net/netfilter/nf_nat_helper.h> #include <linux/netfilter/nf_conntrack_tftp.h> +#define NAT_HELPER_NAME "tftp" + MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>"); MODULE_DESCRIPTION("TFTP NAT helper"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("ip_nat_tftp"); +MODULE_ALIAS_NF_NAT_HELPER(NAT_HELPER_NAME); + +static struct nf_conntrack_nat_helper nat_helper_tftp = + NF_CT_NAT_HELPER_INIT(NAT_HELPER_NAME); static unsigned int help(struct sk_buff *skb, enum ip_conntrack_info ctinfo, @@ -37,6 +42,7 @@ static unsigned int help(struct sk_buff *skb, static void __exit nf_nat_tftp_fini(void) { + nf_nat_helper_unregister(&nat_helper_tftp); RCU_INIT_POINTER(nf_nat_tftp_hook, NULL); synchronize_rcu(); } @@ -44,6 +50,7 @@ static void __exit nf_nat_tftp_fini(void) static int __init nf_nat_tftp_init(void) { BUG_ON(nf_nat_tftp_hook != NULL); + nf_nat_helper_register(&nat_helper_tftp); RCU_INIT_POINTER(nf_nat_tftp_hook, help); return 0; } diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index a36a77bae1d6..9dc1d6e04946 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -240,6 +240,7 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, return 0; } +EXPORT_SYMBOL_GPL(nf_queue); static unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 1606eaa5ae0d..d98416e83d4e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -53,7 +53,6 @@ static const struct rhashtable_params nft_chain_ht_params = { .hashfn = nft_chain_hash, .obj_hashfn = nft_chain_hash_obj, .obj_cmpfn = nft_chain_hash_cmp, - .locks_mul = 1, .automatic_shrinking = true, }; @@ -1201,7 +1200,7 @@ static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats) total.pkts += pkts; total.bytes += bytes; } - nest = nla_nest_start(skb, NFTA_CHAIN_COUNTERS); + nest = nla_nest_start_noflag(skb, NFTA_CHAIN_COUNTERS); if (nest == NULL) goto nla_put_failure; @@ -1249,7 +1248,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, const struct nf_hook_ops *ops = &basechain->ops; struct nlattr *nest; - nest = nla_nest_start(skb, NFTA_CHAIN_HOOK); + nest = nla_nest_start_noflag(skb, NFTA_CHAIN_HOOK); if (nest == NULL) goto nla_put_failure; if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum))) @@ -1421,8 +1420,8 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr) struct nft_stats *stats; int err; - err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy, - NULL); + err = nla_parse_nested_deprecated(tb, NFTA_COUNTER_MAX, attr, + nft_counter_policy, NULL); if (err < 0) return ERR_PTR(err); @@ -1526,8 +1525,9 @@ static int nft_chain_parse_hook(struct net *net, lockdep_assert_held(&net->nft.commit_mutex); lockdep_nfnl_nft_mutex_not_held(); - err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK], - nft_hook_policy, NULL); + err = nla_parse_nested_deprecated(ha, NFTA_HOOK_MAX, + nla[NFTA_CHAIN_HOOK], + nft_hook_policy, NULL); if (err < 0) return err; @@ -2060,7 +2060,8 @@ static int nf_tables_fill_expr_info(struct sk_buff *skb, goto nla_put_failure; if (expr->ops->dump) { - struct nlattr *data = nla_nest_start(skb, NFTA_EXPR_DATA); + struct nlattr *data = nla_nest_start_noflag(skb, + NFTA_EXPR_DATA); if (data == NULL) goto nla_put_failure; if (expr->ops->dump(skb, expr) < 0) @@ -2079,7 +2080,7 @@ int nft_expr_dump(struct sk_buff *skb, unsigned int attr, { struct nlattr *nest; - nest = nla_nest_start(skb, attr); + nest = nla_nest_start_noflag(skb, attr); if (!nest) goto nla_put_failure; if (nf_tables_fill_expr_info(skb, expr) < 0) @@ -2105,7 +2106,8 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx, struct nlattr *tb[NFTA_EXPR_MAX + 1]; int err; - err = nla_parse_nested(tb, NFTA_EXPR_MAX, nla, nft_expr_policy, NULL); + err = nla_parse_nested_deprecated(tb, NFTA_EXPR_MAX, nla, + nft_expr_policy, NULL); if (err < 0) return err; @@ -2114,8 +2116,9 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx, return PTR_ERR(type); if (tb[NFTA_EXPR_DATA]) { - err = nla_parse_nested(info->tb, type->maxattr, - tb[NFTA_EXPR_DATA], type->policy, NULL); + err = nla_parse_nested_deprecated(info->tb, type->maxattr, + tb[NFTA_EXPR_DATA], + type->policy, NULL); if (err < 0) goto err1; } else @@ -2290,7 +2293,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, goto nla_put_failure; } - list = nla_nest_start(skb, NFTA_RULE_EXPRESSIONS); + list = nla_nest_start_noflag(skb, NFTA_RULE_EXPRESSIONS); if (list == NULL) goto nla_put_failure; nft_rule_for_each_expr(expr, next, rule) { @@ -3194,9 +3197,7 @@ static int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result) static __be64 nf_jiffies64_to_msecs(u64 input) { - u64 ms = jiffies64_to_nsecs(input); - - return cpu_to_be64(div_u64(ms, NSEC_PER_MSEC)); + return cpu_to_be64(jiffies64_to_msecs(input)); } static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, @@ -3261,7 +3262,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, if (nla_put(skb, NFTA_SET_USERDATA, set->udlen, set->udata)) goto nla_put_failure; - desc = nla_nest_start(skb, NFTA_SET_DESC); + desc = nla_nest_start_noflag(skb, NFTA_SET_DESC); if (desc == NULL) goto nla_put_failure; if (set->size && @@ -3439,15 +3440,14 @@ err: return err; } -static int nf_tables_set_desc_parse(const struct nft_ctx *ctx, - struct nft_set_desc *desc, +static int nf_tables_set_desc_parse(struct nft_set_desc *desc, const struct nlattr *nla) { struct nlattr *da[NFTA_SET_DESC_MAX + 1]; int err; - err = nla_parse_nested(da, NFTA_SET_DESC_MAX, nla, - nft_set_desc_policy, NULL); + err = nla_parse_nested_deprecated(da, NFTA_SET_DESC_MAX, nla, + nft_set_desc_policy, NULL); if (err < 0) return err; @@ -3566,7 +3566,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY])); if (nla[NFTA_SET_DESC] != NULL) { - err = nf_tables_set_desc_parse(&ctx, &desc, nla[NFTA_SET_DESC]); + err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]); if (err < 0) return err; } @@ -3786,8 +3786,8 @@ bind: } EXPORT_SYMBOL_GPL(nf_tables_bind_set); -void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding, bool event) +static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding, bool event) { list_del_rcu(&binding->list); @@ -3798,7 +3798,6 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, GFP_KERNEL); } } -EXPORT_SYMBOL_GPL(nf_tables_unbind_set); void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, @@ -3913,7 +3912,7 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; - nest = nla_nest_start(skb, NFTA_LIST_ELEM); + nest = nla_nest_start_noflag(skb, NFTA_LIST_ELEM); if (nest == NULL) goto nla_put_failure; @@ -4057,7 +4056,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name)) goto nla_put_failure; - nest = nla_nest_start(skb, NFTA_SET_ELEM_LIST_ELEMENTS); + nest = nla_nest_start_noflag(skb, NFTA_SET_ELEM_LIST_ELEMENTS); if (nest == NULL) goto nla_put_failure; @@ -4129,7 +4128,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb, if (nla_put_string(skb, NFTA_SET_NAME, set->name)) goto nla_put_failure; - nest = nla_nest_start(skb, NFTA_SET_ELEM_LIST_ELEMENTS); + nest = nla_nest_start_noflag(skb, NFTA_SET_ELEM_LIST_ELEMENTS); if (nest == NULL) goto nla_put_failure; @@ -4174,8 +4173,8 @@ static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set, void *priv; int err; - err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, - nft_set_elem_policy, NULL); + err = nla_parse_nested_deprecated(nla, NFTA_SET_ELEM_MAX, attr, + nft_set_elem_policy, NULL); if (err < 0) return err; @@ -4406,8 +4405,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, u8 ulen; int err; - err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, - nft_set_elem_policy, NULL); + err = nla_parse_nested_deprecated(nla, NFTA_SET_ELEM_MAX, attr, + nft_set_elem_policy, NULL); if (err < 0) return err; @@ -4700,8 +4699,8 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, void *priv; int err; - err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, - nft_set_elem_policy, NULL); + err = nla_parse_nested_deprecated(nla, NFTA_SET_ELEM_MAX, attr, + nft_set_elem_policy, NULL); if (err < 0) goto err1; @@ -4975,8 +4974,8 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx, goto err1; if (attr) { - err = nla_parse_nested(tb, type->maxattr, attr, type->policy, - NULL); + err = nla_parse_nested_deprecated(tb, type->maxattr, attr, + type->policy, NULL); if (err < 0) goto err2; } else { @@ -5019,7 +5018,7 @@ static int nft_object_dump(struct sk_buff *skb, unsigned int attr, { struct nlattr *nest; - nest = nla_nest_start(skb, attr); + nest = nla_nest_start_noflag(skb, attr); if (!nest) goto nla_put_failure; if (obj->ops->dump(skb, obj, reset) < 0) @@ -5552,8 +5551,8 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx, int hooknum, priority; int err, n = 0, i; - err = nla_parse_nested(tb, NFTA_FLOWTABLE_HOOK_MAX, attr, - nft_flowtable_hook_policy, NULL); + err = nla_parse_nested_deprecated(tb, NFTA_FLOWTABLE_HOOK_MAX, attr, + nft_flowtable_hook_policy, NULL); if (err < 0) return err; @@ -5836,14 +5835,14 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, NFTA_FLOWTABLE_PAD)) goto nla_put_failure; - nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK); + nest = nla_nest_start_noflag(skb, NFTA_FLOWTABLE_HOOK); if (!nest) goto nla_put_failure; if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) || nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority))) goto nla_put_failure; - nest_devs = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK_DEVS); + nest_devs = nla_nest_start_noflag(skb, NFTA_FLOWTABLE_HOOK_DEVS); if (!nest_devs) goto nla_put_failure; @@ -7210,8 +7209,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, struct nft_chain *chain; int err; - err = nla_parse_nested(tb, NFTA_VERDICT_MAX, nla, nft_verdict_policy, - NULL); + err = nla_parse_nested_deprecated(tb, NFTA_VERDICT_MAX, nla, + nft_verdict_policy, NULL); if (err < 0) return err; @@ -7269,7 +7268,7 @@ int nft_verdict_dump(struct sk_buff *skb, int type, const struct nft_verdict *v) { struct nlattr *nest; - nest = nla_nest_start(skb, type); + nest = nla_nest_start_noflag(skb, type); if (!nest) goto nla_put_failure; @@ -7341,7 +7340,8 @@ int nft_data_init(const struct nft_ctx *ctx, struct nlattr *tb[NFTA_DATA_MAX + 1]; int err; - err = nla_parse_nested(tb, NFTA_DATA_MAX, nla, nft_data_policy, NULL); + err = nla_parse_nested_deprecated(tb, NFTA_DATA_MAX, nla, + nft_data_policy, NULL); if (err < 0) return err; @@ -7382,7 +7382,7 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, struct nlattr *nest; int err; - nest = nla_nest_start(skb, attr); + nest = nla_nest_start_noflag(skb, attr); if (nest == NULL) return -1; @@ -7534,6 +7534,7 @@ static int __init nf_tables_module_init(void) if (err < 0) goto err5; + nft_chain_route_init(); return err; err5: rhltable_destroy(&nft_objname_ht); @@ -7553,6 +7554,7 @@ static void __exit nf_tables_module_exit(void) nfnetlink_subsys_unregister(&nf_tables_subsys); unregister_netdevice_notifier(&nf_tables_flowtable_notifier); nft_chain_filter_fini(); + nft_chain_route_fini(); unregister_pernet_subsys(&nf_tables_net_ops); cancel_work_sync(&trans_destroy_work); rcu_barrier(); diff --git a/net/netfilter/nf_tables_set_core.c b/net/netfilter/nf_tables_set_core.c index 814789644bd3..a9fce8d10051 100644 --- a/net/netfilter/nf_tables_set_core.c +++ b/net/netfilter/nf_tables_set_core.c @@ -1,4 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/module.h> #include <net/netfilter/nf_tables_core.h> static int __init nf_tables_set_module_init(void) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 916913454624..92077d459109 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -206,8 +206,9 @@ replay: return -ENOMEM; } - err = nla_parse(cda, ss->cb[cb_id].attr_count, attr, attrlen, - ss->cb[cb_id].policy, extack); + err = nla_parse_deprecated(cda, ss->cb[cb_id].attr_count, + attr, attrlen, + ss->cb[cb_id].policy, extack); if (err < 0) { rcu_read_unlock(); return err; @@ -421,8 +422,10 @@ replay: goto ack; } - err = nla_parse(cda, ss->cb[cb_id].attr_count, attr, - attrlen, ss->cb[cb_id].policy, NULL); + err = nla_parse_deprecated(cda, + ss->cb[cb_id].attr_count, + attr, attrlen, + ss->cb[cb_id].policy, NULL); if (err < 0) goto ack; @@ -520,8 +523,8 @@ static void nfnetlink_rcv_skb_batch(struct sk_buff *skb, struct nlmsghdr *nlh) if (skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg)) return; - err = nla_parse(cda, NFNL_BATCH_MAX, attr, attrlen, nfnl_batch_policy, - NULL); + err = nla_parse_deprecated(cda, NFNL_BATCH_MAX, attr, attrlen, + nfnl_batch_policy, NULL); if (err < 0) { netlink_ack(skb, nlh, err, NULL); return; diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 8fa8bf7c48e6..02c877432d71 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -248,8 +248,8 @@ static int nfnl_acct_start(struct netlink_callback *cb) if (!attr) return 0; - err = nla_parse_nested(tb, NFACCT_FILTER_MAX, attr, filter_policy, - NULL); + err = nla_parse_nested_deprecated(tb, NFACCT_FILTER_MAX, attr, + filter_policy, NULL); if (err < 0) return err; diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index e5d27b2e4eba..17eb473a626b 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -78,8 +78,8 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, int err; struct nlattr *tb[NFCTH_TUPLE_MAX+1]; - err = nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr, - nfnl_cthelper_tuple_pol, NULL); + err = nla_parse_nested_deprecated(tb, NFCTH_TUPLE_MAX, attr, + nfnl_cthelper_tuple_pol, NULL); if (err < 0) return err; @@ -139,8 +139,8 @@ nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy, int err; struct nlattr *tb[NFCTH_POLICY_MAX+1]; - err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, - nfnl_cthelper_expect_pol, NULL); + err = nla_parse_nested_deprecated(tb, NFCTH_POLICY_MAX, attr, + nfnl_cthelper_expect_pol, NULL); if (err < 0) return err; @@ -176,8 +176,9 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, struct nlattr *tb[NFCTH_POLICY_SET_MAX+1]; unsigned int class_max; - ret = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, - nfnl_cthelper_expect_policy_set, NULL); + ret = nla_parse_nested_deprecated(tb, NFCTH_POLICY_SET_MAX, attr, + nfnl_cthelper_expect_policy_set, + NULL); if (ret < 0) return ret; @@ -289,8 +290,8 @@ nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy, struct nlattr *tb[NFCTH_POLICY_MAX + 1]; int err; - err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, - nfnl_cthelper_expect_pol, NULL); + err = nla_parse_nested_deprecated(tb, NFCTH_POLICY_MAX, attr, + nfnl_cthelper_expect_pol, NULL); if (err < 0) return err; @@ -361,8 +362,9 @@ static int nfnl_cthelper_update_policy(struct nf_conntrack_helper *helper, unsigned int class_max; int err; - err = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, - nfnl_cthelper_expect_policy_set, NULL); + err = nla_parse_nested_deprecated(tb, NFCTH_POLICY_SET_MAX, attr, + nfnl_cthelper_expect_policy_set, + NULL); if (err < 0) return err; @@ -462,7 +464,7 @@ nfnl_cthelper_dump_tuple(struct sk_buff *skb, { struct nlattr *nest_parms; - nest_parms = nla_nest_start(skb, NFCTH_TUPLE | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, NFCTH_TUPLE); if (nest_parms == NULL) goto nla_put_failure; @@ -487,7 +489,7 @@ nfnl_cthelper_dump_policy(struct sk_buff *skb, int i; struct nlattr *nest_parms1, *nest_parms2; - nest_parms1 = nla_nest_start(skb, NFCTH_POLICY | NLA_F_NESTED); + nest_parms1 = nla_nest_start(skb, NFCTH_POLICY); if (nest_parms1 == NULL) goto nla_put_failure; @@ -496,8 +498,7 @@ nfnl_cthelper_dump_policy(struct sk_buff *skb, goto nla_put_failure; for (i = 0; i < helper->expect_class_max + 1; i++) { - nest_parms2 = nla_nest_start(skb, - (NFCTH_POLICY_SET+i) | NLA_F_NESTED); + nest_parms2 = nla_nest_start(skb, (NFCTH_POLICY_SET + i)); if (nest_parms2 == NULL) goto nla_put_failure; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index c69b11ca5aad..427b411c5739 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -59,8 +59,11 @@ ctnl_timeout_parse_policy(void *timeout, if (!tb) return -ENOMEM; - ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, attr, - l4proto->ctnl_timeout.nla_policy, NULL); + ret = nla_parse_nested_deprecated(tb, + l4proto->ctnl_timeout.nlattr_max, + attr, + l4proto->ctnl_timeout.nla_policy, + NULL); if (ret < 0) goto err; @@ -184,7 +187,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, htonl(refcount_read(&timeout->refcnt)))) goto nla_put_failure; - nest_parms = nla_nest_start(skb, CTA_TIMEOUT_DATA | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_TIMEOUT_DATA); if (!nest_parms) goto nla_put_failure; @@ -401,7 +404,7 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid, nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto)) goto nla_put_failure; - nest_parms = nla_nest_start(skb, CTA_TIMEOUT_DATA | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_TIMEOUT_DATA); if (!nest_parms) goto nla_put_failure; diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c index 1f1d90c1716b..7b827bcb412c 100644 --- a/net/netfilter/nfnetlink_osf.c +++ b/net/netfilter/nfnetlink_osf.c @@ -255,9 +255,9 @@ nf_osf_match(const struct sk_buff *skb, u_int8_t family, } EXPORT_SYMBOL_GPL(nf_osf_match); -const char *nf_osf_find(const struct sk_buff *skb, - const struct list_head *nf_osf_fingers, - const int ttl_check) +bool nf_osf_find(const struct sk_buff *skb, + const struct list_head *nf_osf_fingers, + const int ttl_check, struct nf_osf_data *data) { const struct iphdr *ip = ip_hdr(skb); const struct nf_osf_user_finger *f; @@ -265,24 +265,24 @@ const char *nf_osf_find(const struct sk_buff *skb, const struct nf_osf_finger *kf; struct nf_osf_hdr_ctx ctx; const struct tcphdr *tcp; - const char *genre = NULL; memset(&ctx, 0, sizeof(ctx)); tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts); if (!tcp) - return NULL; + return false; list_for_each_entry_rcu(kf, &nf_osf_fingers[ctx.df], finger_entry) { f = &kf->finger; if (!nf_osf_match_one(skb, f, ttl_check, &ctx)) continue; - genre = f->genre; + data->genre = f->genre; + data->version = f->version; break; } - return genre; + return true; } EXPORT_SYMBOL_GPL(nf_osf_find); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index e057b2961d31..27dac47b29c2 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -351,7 +351,7 @@ static int nfqnl_put_bridge(struct nf_queue_entry *entry, struct sk_buff *skb) if (skb_vlan_tag_present(entskb)) { struct nlattr *nest; - nest = nla_nest_start(skb, NFQA_VLAN | NLA_F_NESTED); + nest = nla_nest_start(skb, NFQA_VLAN); if (!nest) goto nla_put_failure; @@ -1139,8 +1139,9 @@ static int nfqa_parse_bridge(struct nf_queue_entry *entry, struct nlattr *tb[NFQA_VLAN_MAX + 1]; int err; - err = nla_parse_nested(tb, NFQA_VLAN_MAX, nfqa[NFQA_VLAN], - nfqa_vlan_policy, NULL); + err = nla_parse_nested_deprecated(tb, NFQA_VLAN_MAX, + nfqa[NFQA_VLAN], + nfqa_vlan_policy, NULL); if (err < 0) return err; diff --git a/net/netfilter/nft_chain_nat.c b/net/netfilter/nft_chain_nat.c index ee4852088d50..2f89bde3c61c 100644 --- a/net/netfilter/nft_chain_nat.c +++ b/net/netfilter/nft_chain_nat.c @@ -74,6 +74,36 @@ static const struct nft_chain_type nft_chain_nat_ipv6 = { }; #endif +#ifdef CONFIG_NF_TABLES_INET +static int nft_nat_inet_reg(struct net *net, const struct nf_hook_ops *ops) +{ + return nf_nat_inet_register_fn(net, ops); +} + +static void nft_nat_inet_unreg(struct net *net, const struct nf_hook_ops *ops) +{ + nf_nat_inet_unregister_fn(net, ops); +} + +static const struct nft_chain_type nft_chain_nat_inet = { + .name = "nat", + .type = NFT_CHAIN_T_NAT, + .family = NFPROTO_INET, + .hook_mask = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING), + .hooks = { + [NF_INET_PRE_ROUTING] = nft_nat_do_chain, + [NF_INET_LOCAL_IN] = nft_nat_do_chain, + [NF_INET_LOCAL_OUT] = nft_nat_do_chain, + [NF_INET_POST_ROUTING] = nft_nat_do_chain, + }, + .ops_register = nft_nat_inet_reg, + .ops_unregister = nft_nat_inet_unreg, +}; +#endif + static int __init nft_chain_nat_init(void) { #ifdef CONFIG_NF_TABLES_IPV6 @@ -82,6 +112,9 @@ static int __init nft_chain_nat_init(void) #ifdef CONFIG_NF_TABLES_IPV4 nft_register_chain_type(&nft_chain_nat_ipv4); #endif +#ifdef CONFIG_NF_TABLES_INET + nft_register_chain_type(&nft_chain_nat_inet); +#endif return 0; } @@ -94,6 +127,9 @@ static void __exit nft_chain_nat_exit(void) #ifdef CONFIG_NF_TABLES_IPV6 nft_unregister_chain_type(&nft_chain_nat_ipv6); #endif +#ifdef CONFIG_NF_TABLES_INET + nft_unregister_chain_type(&nft_chain_nat_inet); +#endif } module_init(nft_chain_nat_init); diff --git a/net/netfilter/nft_chain_route.c b/net/netfilter/nft_chain_route.c new file mode 100644 index 000000000000..8826bbe71136 --- /dev/null +++ b/net/netfilter/nft_chain_route.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/skbuff.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter_ipv6.h> +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_ipv4.h> +#include <net/netfilter/nf_tables_ipv6.h> +#include <net/route.h> +#include <net/ip.h> + +#ifdef CONFIG_NF_TABLES_IPV4 +static unsigned int nf_route_table_hook4(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + const struct iphdr *iph; + struct nft_pktinfo pkt; + __be32 saddr, daddr; + unsigned int ret; + u32 mark; + int err; + u8 tos; + + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_ipv4(&pkt, skb); + + mark = skb->mark; + iph = ip_hdr(skb); + saddr = iph->saddr; + daddr = iph->daddr; + tos = iph->tos; + + ret = nft_do_chain(&pkt, priv); + if (ret == NF_ACCEPT) { + iph = ip_hdr(skb); + + if (iph->saddr != saddr || + iph->daddr != daddr || + skb->mark != mark || + iph->tos != tos) { + err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); + if (err < 0) + ret = NF_DROP_ERR(err); + } + } + return ret; +} + +static const struct nft_chain_type nft_chain_route_ipv4 = { + .name = "route", + .type = NFT_CHAIN_T_ROUTE, + .family = NFPROTO_IPV4, + .hook_mask = (1 << NF_INET_LOCAL_OUT), + .hooks = { + [NF_INET_LOCAL_OUT] = nf_route_table_hook4, + }, +}; +#endif + +#ifdef CONFIG_NF_TABLES_IPV6 +static unsigned int nf_route_table_hook6(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct in6_addr saddr, daddr; + struct nft_pktinfo pkt; + u32 mark, flowlabel; + unsigned int ret; + u8 hop_limit; + int err; + + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_ipv6(&pkt, skb); + + /* save source/dest address, mark, hoplimit, flowlabel, priority */ + memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); + memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr)); + mark = skb->mark; + hop_limit = ipv6_hdr(skb)->hop_limit; + + /* flowlabel and prio (includes version, which shouldn't change either)*/ + flowlabel = *((u32 *)ipv6_hdr(skb)); + + ret = nft_do_chain(&pkt, priv); + if (ret == NF_ACCEPT && + (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || + memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || + skb->mark != mark || + ipv6_hdr(skb)->hop_limit != hop_limit || + flowlabel != *((u32 *)ipv6_hdr(skb)))) { + err = nf_ip6_route_me_harder(state->net, skb); + if (err < 0) + ret = NF_DROP_ERR(err); + } + + return ret; +} + +static const struct nft_chain_type nft_chain_route_ipv6 = { + .name = "route", + .type = NFT_CHAIN_T_ROUTE, + .family = NFPROTO_IPV6, + .hook_mask = (1 << NF_INET_LOCAL_OUT), + .hooks = { + [NF_INET_LOCAL_OUT] = nf_route_table_hook6, + }, +}; +#endif + +#ifdef CONFIG_NF_TABLES_INET +static unsigned int nf_route_table_inet(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nft_pktinfo pkt; + + switch (state->pf) { + case NFPROTO_IPV4: + return nf_route_table_hook4(priv, skb, state); + case NFPROTO_IPV6: + return nf_route_table_hook6(priv, skb, state); + default: + nft_set_pktinfo(&pkt, skb, state); + break; + } + + return nft_do_chain(&pkt, priv); +} + +static const struct nft_chain_type nft_chain_route_inet = { + .name = "route", + .type = NFT_CHAIN_T_ROUTE, + .family = NFPROTO_INET, + .hook_mask = (1 << NF_INET_LOCAL_OUT), + .hooks = { + [NF_INET_LOCAL_OUT] = nf_route_table_inet, + }, +}; +#endif + +void __init nft_chain_route_init(void) +{ +#ifdef CONFIG_NF_TABLES_IPV6 + nft_register_chain_type(&nft_chain_route_ipv6); +#endif +#ifdef CONFIG_NF_TABLES_IPV4 + nft_register_chain_type(&nft_chain_route_ipv4); +#endif +#ifdef CONFIG_NF_TABLES_INET + nft_register_chain_type(&nft_chain_route_inet); +#endif +} + +void __exit nft_chain_route_fini(void) +{ +#ifdef CONFIG_NF_TABLES_IPV6 + nft_unregister_chain_type(&nft_chain_route_ipv6); +#endif +#ifdef CONFIG_NF_TABLES_IPV4 + nft_unregister_chain_type(&nft_chain_route_ipv4); +#endif +#ifdef CONFIG_NF_TABLES_INET + nft_unregister_chain_type(&nft_chain_route_inet); +#endif +} diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 469f9da5073b..276f1f2d6de1 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -198,8 +198,8 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) u32 flags; int err; - err = nla_parse_nested(tb, NFTA_RULE_COMPAT_MAX, attr, - nft_rule_compat_policy, NULL); + err = nla_parse_nested_deprecated(tb, NFTA_RULE_COMPAT_MAX, attr, + nft_rule_compat_policy, NULL); if (err < 0) return err; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 7b717fad6cdc..f043936763f3 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -178,6 +178,11 @@ static void nft_ct_get_eval(const struct nft_expr *expr, return; } #endif + case NFT_CT_ID: + if (!nf_ct_is_confirmed(ct)) + goto err; + *dest = nf_ct_get_id(ct); + return; default: break; } @@ -479,6 +484,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, len = sizeof(u16); break; #endif + case NFT_CT_ID: + len = sizeof(u32); + break; default: return -EOPNOTSUPP; } @@ -797,9 +805,11 @@ nft_ct_timeout_parse_policy(void *timeouts, if (!tb) return -ENOMEM; - ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, - attr, l4proto->ctnl_timeout.nla_policy, - NULL); + ret = nla_parse_nested_deprecated(tb, + l4proto->ctnl_timeout.nlattr_max, + attr, + l4proto->ctnl_timeout.nla_policy, + NULL); if (ret < 0) goto err; @@ -928,7 +938,7 @@ static int nft_ct_timeout_obj_dump(struct sk_buff *skb, nla_put_be16(skb, NFTA_CT_TIMEOUT_L3PROTO, htons(timeout->l3num))) return -1; - nest_params = nla_nest_start(skb, NFTA_CT_TIMEOUT_DATA | NLA_F_NESTED); + nest_params = nla_nest_start(skb, NFTA_CT_TIMEOUT_DATA); if (!nest_params) return -1; diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index e461007558e8..8394560aa695 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -28,6 +28,23 @@ struct nft_dynset { struct nft_set_binding binding; }; +static int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src) +{ + int err; + + if (src->ops->clone) { + dst->ops = src->ops; + err = src->ops->clone(dst, src); + if (err < 0) + return err; + } else { + memcpy(dst, src, src->ops->size); + } + + __module_get(src->ops->type->owner); + return 0; +} + static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr, struct nft_regs *regs) { diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index bee156eaa400..86fd90085eaf 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -14,8 +14,7 @@ #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_nat.h> -#include <net/netfilter/ipv4/nf_nat_masquerade.h> -#include <net/netfilter/ipv6/nf_nat_masquerade.h> +#include <net/netfilter/nf_nat_masquerade.h> struct nft_masq { u32 flags; @@ -196,28 +195,73 @@ static struct nft_expr_type nft_masq_ipv6_type __read_mostly = { static int __init nft_masq_module_init_ipv6(void) { - int ret = nft_register_expr(&nft_masq_ipv6_type); - - if (ret) - return ret; - - ret = nf_nat_masquerade_ipv6_register_notifier(); - if (ret < 0) - nft_unregister_expr(&nft_masq_ipv6_type); - - return ret; + return nft_register_expr(&nft_masq_ipv6_type); } static void nft_masq_module_exit_ipv6(void) { nft_unregister_expr(&nft_masq_ipv6_type); - nf_nat_masquerade_ipv6_unregister_notifier(); } #else static inline int nft_masq_module_init_ipv6(void) { return 0; } static inline void nft_masq_module_exit_ipv6(void) {} #endif +#ifdef CONFIG_NF_TABLES_INET +static void nft_masq_inet_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: + return nft_masq_ipv4_eval(expr, regs, pkt); + case NFPROTO_IPV6: + return nft_masq_ipv6_eval(expr, regs, pkt); + } + + WARN_ON_ONCE(1); +} + +static void +nft_masq_inet_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +{ + nf_ct_netns_put(ctx->net, NFPROTO_INET); +} + +static struct nft_expr_type nft_masq_inet_type; +static const struct nft_expr_ops nft_masq_inet_ops = { + .type = &nft_masq_inet_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)), + .eval = nft_masq_inet_eval, + .init = nft_masq_init, + .destroy = nft_masq_inet_destroy, + .dump = nft_masq_dump, + .validate = nft_masq_validate, +}; + +static struct nft_expr_type nft_masq_inet_type __read_mostly = { + .family = NFPROTO_INET, + .name = "masq", + .ops = &nft_masq_inet_ops, + .policy = nft_masq_policy, + .maxattr = NFTA_MASQ_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_masq_module_init_inet(void) +{ + return nft_register_expr(&nft_masq_inet_type); +} + +static void nft_masq_module_exit_inet(void) +{ + nft_unregister_expr(&nft_masq_inet_type); +} +#else +static inline int nft_masq_module_init_inet(void) { return 0; } +static inline void nft_masq_module_exit_inet(void) {} +#endif + static int __init nft_masq_module_init(void) { int ret; @@ -226,15 +270,23 @@ static int __init nft_masq_module_init(void) if (ret < 0) return ret; + ret = nft_masq_module_init_inet(); + if (ret < 0) { + nft_masq_module_exit_ipv6(); + return ret; + } + ret = nft_register_expr(&nft_masq_ipv4_type); if (ret < 0) { + nft_masq_module_exit_inet(); nft_masq_module_exit_ipv6(); return ret; } - ret = nf_nat_masquerade_ipv4_register_notifier(); + ret = nf_nat_masquerade_inet_register_notifiers(); if (ret < 0) { nft_masq_module_exit_ipv6(); + nft_masq_module_exit_inet(); nft_unregister_expr(&nft_masq_ipv4_type); return ret; } @@ -245,8 +297,9 @@ static int __init nft_masq_module_init(void) static void __exit nft_masq_module_exit(void) { nft_masq_module_exit_ipv6(); + nft_masq_module_exit_inet(); nft_unregister_expr(&nft_masq_ipv4_type); - nf_nat_masquerade_ipv4_unregister_notifier(); + nf_nat_masquerade_inet_unregister_notifiers(); } module_init(nft_masq_module_init); diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index e93aed9bda88..d90d421826aa 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -140,7 +140,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return -EINVAL; family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY])); - if (family != ctx->family) + if (ctx->family != NFPROTO_INET && ctx->family != family) return -EOPNOTSUPP; switch (family) { @@ -278,13 +278,67 @@ static struct nft_expr_type nft_nat_type __read_mostly = { .owner = THIS_MODULE, }; +#ifdef CONFIG_NF_TABLES_INET +static void nft_nat_inet_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_nat *priv = nft_expr_priv(expr); + + if (priv->family == nft_pf(pkt)) + nft_nat_eval(expr, regs, pkt); +} + +static const struct nft_expr_ops nft_nat_inet_ops = { + .type = &nft_nat_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_nat)), + .eval = nft_nat_inet_eval, + .init = nft_nat_init, + .destroy = nft_nat_destroy, + .dump = nft_nat_dump, + .validate = nft_nat_validate, +}; + +static struct nft_expr_type nft_inet_nat_type __read_mostly = { + .name = "nat", + .family = NFPROTO_INET, + .ops = &nft_nat_inet_ops, + .policy = nft_nat_policy, + .maxattr = NFTA_NAT_MAX, + .owner = THIS_MODULE, +}; + +static int nft_nat_inet_module_init(void) +{ + return nft_register_expr(&nft_inet_nat_type); +} + +static void nft_nat_inet_module_exit(void) +{ + nft_unregister_expr(&nft_inet_nat_type); +} +#else +static int nft_nat_inet_module_init(void) { return 0; } +static void nft_nat_inet_module_exit(void) { } +#endif + static int __init nft_nat_module_init(void) { - return nft_register_expr(&nft_nat_type); + int ret = nft_nat_inet_module_init(); + + if (ret) + return ret; + + ret = nft_register_expr(&nft_nat_type); + if (ret) + nft_nat_inet_module_exit(); + + return ret; } static void __exit nft_nat_module_exit(void) { + nft_nat_inet_module_exit(); nft_unregister_expr(&nft_nat_type); } diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index b13618c764ec..87b60d6617ef 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -7,11 +7,13 @@ struct nft_osf { enum nft_registers dreg:8; u8 ttl; + u32 flags; }; static const struct nla_policy nft_osf_policy[NFTA_OSF_MAX + 1] = { [NFTA_OSF_DREG] = { .type = NLA_U32 }, [NFTA_OSF_TTL] = { .type = NLA_U8 }, + [NFTA_OSF_FLAGS] = { .type = NLA_U32 }, }; static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs, @@ -20,9 +22,10 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs, struct nft_osf *priv = nft_expr_priv(expr); u32 *dest = ®s->data[priv->dreg]; struct sk_buff *skb = pkt->skb; + char os_match[NFT_OSF_MAXGENRELEN + 1]; const struct tcphdr *tcp; + struct nf_osf_data data; struct tcphdr _tcph; - const char *os_name; tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph); @@ -35,11 +38,17 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs, return; } - os_name = nf_osf_find(skb, nf_osf_fingers, priv->ttl); - if (!os_name) + if (!nf_osf_find(skb, nf_osf_fingers, priv->ttl, &data)) { strncpy((char *)dest, "unknown", NFT_OSF_MAXGENRELEN); - else - strncpy((char *)dest, os_name, NFT_OSF_MAXGENRELEN); + } else { + if (priv->flags & NFT_OSF_F_VERSION) + snprintf(os_match, NFT_OSF_MAXGENRELEN, "%s:%s", + data.genre, data.version); + else + strlcpy(os_match, data.genre, NFT_OSF_MAXGENRELEN); + + strncpy((char *)dest, os_match, NFT_OSF_MAXGENRELEN); + } } static int nft_osf_init(const struct nft_ctx *ctx, @@ -47,6 +56,7 @@ static int nft_osf_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_osf *priv = nft_expr_priv(expr); + u32 flags; int err; u8 ttl; @@ -57,6 +67,13 @@ static int nft_osf_init(const struct nft_ctx *ctx, priv->ttl = ttl; } + if (tb[NFTA_OSF_FLAGS]) { + flags = ntohl(nla_get_be32(tb[NFTA_OSF_FLAGS])); + if (flags != NFT_OSF_F_VERSION) + return -EINVAL; + priv->flags = flags; + } + priv->dreg = nft_parse_register(tb[NFTA_OSF_DREG]); err = nft_validate_register_store(ctx, priv->dreg, NULL, NFT_DATA_VALUE, NFT_OSF_MAXGENRELEN); @@ -73,6 +90,9 @@ static int nft_osf_dump(struct sk_buff *skb, const struct nft_expr *expr) if (nla_put_u8(skb, NFTA_OSF_TTL, priv->ttl)) goto nla_put_failure; + if (nla_put_be32(skb, NFTA_OSF_FLAGS, ntohl(priv->flags))) + goto nla_put_failure; + if (nft_dump_register(skb, NFTA_OSF_DREG, priv->dreg)) goto nla_put_failure; diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index a340cd8a751b..da74fdc4a684 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -82,7 +82,7 @@ static int nft_redir_init(const struct nft_ctx *ctx, return nf_ct_netns_get(ctx->net, ctx->family); } -int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_redir *priv = nft_expr_priv(expr); @@ -202,6 +202,55 @@ static struct nft_expr_type nft_redir_ipv6_type __read_mostly = { }; #endif +#ifdef CONFIG_NF_TABLES_INET +static void nft_redir_inet_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: + return nft_redir_ipv4_eval(expr, regs, pkt); + case NFPROTO_IPV6: + return nft_redir_ipv6_eval(expr, regs, pkt); + } + + WARN_ON_ONCE(1); +} + +static void +nft_redir_inet_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +{ + nf_ct_netns_put(ctx->net, NFPROTO_INET); +} + +static struct nft_expr_type nft_redir_inet_type; +static const struct nft_expr_ops nft_redir_inet_ops = { + .type = &nft_redir_inet_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), + .eval = nft_redir_inet_eval, + .init = nft_redir_init, + .destroy = nft_redir_inet_destroy, + .dump = nft_redir_dump, + .validate = nft_redir_validate, +}; + +static struct nft_expr_type nft_redir_inet_type __read_mostly = { + .family = NFPROTO_INET, + .name = "redir", + .ops = &nft_redir_inet_ops, + .policy = nft_redir_policy, + .maxattr = NFTA_MASQ_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_redir_module_init_inet(void) +{ + return nft_register_expr(&nft_redir_inet_type); +} +#else +static inline int nft_redir_module_init_inet(void) { return 0; } +#endif + static int __init nft_redir_module_init(void) { int ret = nft_register_expr(&nft_redir_ipv4_type); @@ -217,6 +266,15 @@ static int __init nft_redir_module_init(void) } #endif + ret = nft_redir_module_init_inet(); + if (ret < 0) { + nft_unregister_expr(&nft_redir_ipv4_type); +#ifdef CONFIG_NF_TABLES_IPV6 + nft_unregister_expr(&nft_redir_ipv6_type); +#endif + return ret; + } + return ret; } @@ -226,6 +284,9 @@ static void __exit nft_redir_module_exit(void) #ifdef CONFIG_NF_TABLES_IPV6 nft_unregister_expr(&nft_redir_ipv6_type); #endif +#ifdef CONFIG_NF_TABLES_INET + nft_unregister_expr(&nft_redir_inet_type); +#endif } module_init(nft_redir_module_init); diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index b113fcac94e1..3d4c2ae605a8 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -166,8 +166,8 @@ static int nft_tunnel_obj_ip_init(const struct nft_ctx *ctx, struct nlattr *tb[NFTA_TUNNEL_KEY_IP_MAX + 1]; int err; - err = nla_parse_nested(tb, NFTA_TUNNEL_KEY_IP_MAX, attr, - nft_tunnel_ip_policy, NULL); + err = nla_parse_nested_deprecated(tb, NFTA_TUNNEL_KEY_IP_MAX, attr, + nft_tunnel_ip_policy, NULL); if (err < 0) return err; @@ -195,8 +195,8 @@ static int nft_tunnel_obj_ip6_init(const struct nft_ctx *ctx, struct nlattr *tb[NFTA_TUNNEL_KEY_IP6_MAX + 1]; int err; - err = nla_parse_nested(tb, NFTA_TUNNEL_KEY_IP6_MAX, attr, - nft_tunnel_ip6_policy, NULL); + err = nla_parse_nested_deprecated(tb, NFTA_TUNNEL_KEY_IP6_MAX, attr, + nft_tunnel_ip6_policy, NULL); if (err < 0) return err; @@ -231,8 +231,8 @@ static int nft_tunnel_obj_vxlan_init(const struct nlattr *attr, struct nlattr *tb[NFTA_TUNNEL_KEY_VXLAN_MAX + 1]; int err; - err = nla_parse_nested(tb, NFTA_TUNNEL_KEY_VXLAN_MAX, attr, - nft_tunnel_opts_vxlan_policy, NULL); + err = nla_parse_nested_deprecated(tb, NFTA_TUNNEL_KEY_VXLAN_MAX, attr, + nft_tunnel_opts_vxlan_policy, NULL); if (err < 0) return err; @@ -260,8 +260,9 @@ static int nft_tunnel_obj_erspan_init(const struct nlattr *attr, uint8_t hwid, dir; int err, version; - err = nla_parse_nested(tb, NFTA_TUNNEL_KEY_ERSPAN_MAX, attr, - nft_tunnel_opts_erspan_policy, NULL); + err = nla_parse_nested_deprecated(tb, NFTA_TUNNEL_KEY_ERSPAN_MAX, + attr, nft_tunnel_opts_erspan_policy, + NULL); if (err < 0) return err; @@ -309,8 +310,8 @@ static int nft_tunnel_obj_opts_init(const struct nft_ctx *ctx, struct nlattr *tb[NFTA_TUNNEL_KEY_OPTS_MAX + 1]; int err; - err = nla_parse_nested(tb, NFTA_TUNNEL_KEY_OPTS_MAX, attr, - nft_tunnel_opts_policy, NULL); + err = nla_parse_nested_deprecated(tb, NFTA_TUNNEL_KEY_OPTS_MAX, attr, + nft_tunnel_opts_policy, NULL); if (err < 0) return err; @@ -437,7 +438,7 @@ static int nft_tunnel_ip_dump(struct sk_buff *skb, struct ip_tunnel_info *info) struct nlattr *nest; if (info->mode & IP_TUNNEL_INFO_IPV6) { - nest = nla_nest_start(skb, NFTA_TUNNEL_KEY_IP6); + nest = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_IP6); if (!nest) return -1; @@ -448,7 +449,7 @@ static int nft_tunnel_ip_dump(struct sk_buff *skb, struct ip_tunnel_info *info) nla_nest_end(skb, nest); } else { - nest = nla_nest_start(skb, NFTA_TUNNEL_KEY_IP); + nest = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_IP); if (!nest) return -1; @@ -468,7 +469,7 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb, struct nft_tunnel_opts *opts = &priv->opts; struct nlattr *nest; - nest = nla_nest_start(skb, NFTA_TUNNEL_KEY_OPTS); + nest = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS); if (!nest) return -1; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index e5e5c64df8d1..0a6656ed1534 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -227,7 +227,7 @@ xt_request_find_match(uint8_t nfproto, const char *name, uint8_t revision) EXPORT_SYMBOL_GPL(xt_request_find_match); /* Find target, grabs ref. Returns ERR_PTR() on error. */ -struct xt_target *xt_find_target(u8 af, const char *name, u8 revision) +static struct xt_target *xt_find_target(u8 af, const char *name, u8 revision) { struct xt_target *t; int err = -ENOENT; @@ -255,7 +255,6 @@ struct xt_target *xt_find_target(u8 af, const char *name, u8 revision) return ERR_PTR(err); } -EXPORT_SYMBOL(xt_find_target); struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision) { diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 0fa863f57575..d59cb4730fac 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -103,85 +103,24 @@ xt_ct_set_helper(struct nf_conn *ct, const char *helper_name, return 0; } -#ifdef CONFIG_NF_CONNTRACK_TIMEOUT -static void __xt_ct_tg_timeout_put(struct nf_ct_timeout *timeout) -{ - typeof(nf_ct_timeout_put_hook) timeout_put; - - timeout_put = rcu_dereference(nf_ct_timeout_put_hook); - if (timeout_put) - timeout_put(timeout); -} -#endif - static int xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par, const char *timeout_name) { #ifdef CONFIG_NF_CONNTRACK_TIMEOUT - typeof(nf_ct_timeout_find_get_hook) timeout_find_get; const struct nf_conntrack_l4proto *l4proto; - struct nf_ct_timeout *timeout; - struct nf_conn_timeout *timeout_ext; - const char *errmsg = NULL; - int ret = 0; u8 proto; - rcu_read_lock(); - timeout_find_get = rcu_dereference(nf_ct_timeout_find_get_hook); - if (timeout_find_get == NULL) { - ret = -ENOENT; - errmsg = "Timeout policy base is empty"; - goto out; - } - proto = xt_ct_find_proto(par); if (!proto) { - ret = -EINVAL; - errmsg = "You must specify a L4 protocol and not use inversions on it"; - goto out; - } - - timeout = timeout_find_get(par->net, timeout_name); - if (timeout == NULL) { - ret = -ENOENT; - pr_info_ratelimited("No such timeout policy \"%s\"\n", - timeout_name); - goto out; - } - - if (timeout->l3num != par->family) { - ret = -EINVAL; - pr_info_ratelimited("Timeout policy `%s' can only be used by L%d protocol number %d\n", - timeout_name, 3, timeout->l3num); - goto err_put_timeout; + pr_info_ratelimited("You must specify a L4 protocol and not " + "use inversions on it"); + return -EINVAL; } - /* Make sure the timeout policy matches any existing protocol tracker, - * otherwise default to generic. - */ l4proto = nf_ct_l4proto_find(proto); - if (timeout->l4proto->l4proto != l4proto->l4proto) { - ret = -EINVAL; - pr_info_ratelimited("Timeout policy `%s' can only be used by L%d protocol number %d\n", - timeout_name, 4, timeout->l4proto->l4proto); - goto err_put_timeout; - } - timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_ATOMIC); - if (!timeout_ext) { - ret = -ENOMEM; - goto err_put_timeout; - } + return nf_ct_set_timeout(par->net, ct, par->family, l4proto->l4proto, + timeout_name); - rcu_read_unlock(); - return ret; - -err_put_timeout: - __xt_ct_tg_timeout_put(timeout); -out: - rcu_read_unlock(); - if (errmsg) - pr_info_ratelimited("%s\n", errmsg); - return ret; #else return -EOPNOTSUPP; #endif @@ -328,26 +267,6 @@ static int xt_ct_tg_check_v2(const struct xt_tgchk_param *par) return xt_ct_tg_check(par, par->targinfo); } -static void xt_ct_destroy_timeout(struct nf_conn *ct) -{ -#ifdef CONFIG_NF_CONNTRACK_TIMEOUT - struct nf_conn_timeout *timeout_ext; - typeof(nf_ct_timeout_put_hook) timeout_put; - - rcu_read_lock(); - timeout_put = rcu_dereference(nf_ct_timeout_put_hook); - - if (timeout_put) { - timeout_ext = nf_ct_timeout_find(ct); - if (timeout_ext) { - timeout_put(timeout_ext->timeout); - RCU_INIT_POINTER(timeout_ext->timeout, NULL); - } - } - rcu_read_unlock(); -#endif -} - static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par, struct xt_ct_target_info_v1 *info) { @@ -361,7 +280,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par, nf_ct_netns_put(par->net, par->family); - xt_ct_destroy_timeout(ct); + nf_ct_destroy_timeout(ct); nf_ct_put(info->ct); } } diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/netfilter/xt_MASQUERADE.c index fd3f9e8a74da..ece20d832adc 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/netfilter/xt_MASQUERADE.c @@ -9,20 +9,10 @@ * published by the Free Software Foundation. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/types.h> -#include <linux/inetdevice.h> -#include <linux/ip.h> -#include <linux/timer.h> #include <linux/module.h> -#include <linux/netfilter.h> -#include <net/protocol.h> -#include <net/ip.h> -#include <net/checksum.h> -#include <net/route.h> -#include <linux/netfilter_ipv4.h> #include <linux/netfilter/x_tables.h> #include <net/netfilter/nf_nat.h> -#include <net/netfilter/ipv4/nf_nat_masquerade.h> +#include <net/netfilter/nf_nat_masquerade.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); @@ -64,38 +54,78 @@ static void masquerade_tg_destroy(const struct xt_tgdtor_param *par) nf_ct_netns_put(par->net, par->family); } -static struct xt_target masquerade_tg_reg __read_mostly = { - .name = "MASQUERADE", - .family = NFPROTO_IPV4, - .target = masquerade_tg, - .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), - .table = "nat", - .hooks = 1 << NF_INET_POST_ROUTING, - .checkentry = masquerade_tg_check, - .destroy = masquerade_tg_destroy, - .me = THIS_MODULE, +#if IS_ENABLED(CONFIG_IPV6) +static unsigned int +masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par) +{ + return nf_nat_masquerade_ipv6(skb, par->targinfo, xt_out(par)); +} + +static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par) +{ + const struct nf_nat_range2 *range = par->targinfo; + + if (range->flags & NF_NAT_RANGE_MAP_IPS) + return -EINVAL; + + return nf_ct_netns_get(par->net, par->family); +} +#endif + +static struct xt_target masquerade_tg_reg[] __read_mostly = { + { +#if IS_ENABLED(CONFIG_IPV6) + .name = "MASQUERADE", + .family = NFPROTO_IPV6, + .target = masquerade_tg6, + .targetsize = sizeof(struct nf_nat_range), + .table = "nat", + .hooks = 1 << NF_INET_POST_ROUTING, + .checkentry = masquerade_tg6_checkentry, + .destroy = masquerade_tg_destroy, + .me = THIS_MODULE, + }, { +#endif + .name = "MASQUERADE", + .family = NFPROTO_IPV4, + .target = masquerade_tg, + .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), + .table = "nat", + .hooks = 1 << NF_INET_POST_ROUTING, + .checkentry = masquerade_tg_check, + .destroy = masquerade_tg_destroy, + .me = THIS_MODULE, + } }; static int __init masquerade_tg_init(void) { int ret; - ret = xt_register_target(&masquerade_tg_reg); + ret = xt_register_targets(masquerade_tg_reg, + ARRAY_SIZE(masquerade_tg_reg)); if (ret) return ret; - ret = nf_nat_masquerade_ipv4_register_notifier(); - if (ret) - xt_unregister_target(&masquerade_tg_reg); + ret = nf_nat_masquerade_inet_register_notifiers(); + if (ret) { + xt_unregister_targets(masquerade_tg_reg, + ARRAY_SIZE(masquerade_tg_reg)); + return ret; + } return ret; } static void __exit masquerade_tg_exit(void) { - xt_unregister_target(&masquerade_tg_reg); - nf_nat_masquerade_ipv4_unregister_notifier(); + xt_unregister_targets(masquerade_tg_reg, ARRAY_SIZE(masquerade_tg_reg)); + nf_nat_masquerade_inet_unregister_notifiers(); } module_init(masquerade_tg_init); module_exit(masquerade_tg_exit); +#if IS_ENABLED(CONFIG_IPV6) +MODULE_ALIAS("ip6t_MASQUERADE"); +#endif +MODULE_ALIAS("ipt_MASQUERADE"); diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c index 4fa4efd24353..893374ac3758 100644 --- a/net/netfilter/xt_connlabel.c +++ b/net/netfilter/xt_connlabel.c @@ -15,7 +15,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); -MODULE_DESCRIPTION("Xtables: add/match connection trackling labels"); +MODULE_DESCRIPTION("Xtables: add/match connection tracking labels"); MODULE_ALIAS("ipt_connlabel"); MODULE_ALIAS("ip6t_connlabel"); diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 8d86e39d6280..a30536b17ee1 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -288,8 +288,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg, size = 16; } /* FIXME: don't use vmalloc() here or anywhere else -HW */ - hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) + - sizeof(struct hlist_head) * size); + hinfo = vmalloc(struct_size(hinfo, hash, size)); if (hinfo == NULL) return -ENOMEM; *out_hinfo = hinfo; diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c index 4d748975117d..1de87172885d 100644 --- a/net/netlabel/netlabel_calipso.c +++ b/net/netlabel/netlabel_calipso.c @@ -321,29 +321,29 @@ static int netlbl_calipso_remove(struct sk_buff *skb, struct genl_info *info) static const struct genl_ops netlbl_calipso_ops[] = { { .cmd = NLBL_CALIPSO_C_ADD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = calipso_genl_policy, .doit = netlbl_calipso_add, .dumpit = NULL, }, { .cmd = NLBL_CALIPSO_C_REMOVE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = calipso_genl_policy, .doit = netlbl_calipso_remove, .dumpit = NULL, }, { .cmd = NLBL_CALIPSO_C_LIST, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, - .policy = calipso_genl_policy, .doit = netlbl_calipso_list, .dumpit = NULL, }, { .cmd = NLBL_CALIPSO_C_LISTALL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, - .policy = calipso_genl_policy, .doit = NULL, .dumpit = netlbl_calipso_listall, }, @@ -354,6 +354,7 @@ static struct genl_family netlbl_calipso_gnl_family __ro_after_init = { .name = NETLBL_NLTYPE_CALIPSO_NAME, .version = NETLBL_PROTO_VERSION, .maxattr = NLBL_CALIPSO_A_MAX, + .policy = calipso_genl_policy, .module = THIS_MODULE, .ops = netlbl_calipso_ops, .n_ops = ARRAY_SIZE(netlbl_calipso_ops), diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 9aacf2da3d98..5d1121981d0b 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -99,9 +99,10 @@ static int netlbl_cipsov4_add_common(struct genl_info *info, doi_def->doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); - if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_TAGLST], - NLBL_CIPSOV4_A_MAX, - netlbl_cipsov4_genl_policy, NULL) != 0) + if (nla_validate_nested_deprecated(info->attrs[NLBL_CIPSOV4_A_TAGLST], + NLBL_CIPSOV4_A_MAX, + netlbl_cipsov4_genl_policy, + NULL) != 0) return -EINVAL; nla_for_each_nested(nla, info->attrs[NLBL_CIPSOV4_A_TAGLST], nla_rem) @@ -146,9 +147,10 @@ static int netlbl_cipsov4_add_std(struct genl_info *info, !info->attrs[NLBL_CIPSOV4_A_MLSLVLLST]) return -EINVAL; - if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_MLSLVLLST], - NLBL_CIPSOV4_A_MAX, - netlbl_cipsov4_genl_policy, NULL) != 0) + if (nla_validate_nested_deprecated(info->attrs[NLBL_CIPSOV4_A_MLSLVLLST], + NLBL_CIPSOV4_A_MAX, + netlbl_cipsov4_genl_policy, + NULL) != 0) return -EINVAL; doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL); @@ -170,9 +172,10 @@ static int netlbl_cipsov4_add_std(struct genl_info *info, info->attrs[NLBL_CIPSOV4_A_MLSLVLLST], nla_a_rem) if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSLVL) { - if (nla_validate_nested(nla_a, NLBL_CIPSOV4_A_MAX, - netlbl_cipsov4_genl_policy, - NULL) != 0) + if (nla_validate_nested_deprecated(nla_a, + NLBL_CIPSOV4_A_MAX, + netlbl_cipsov4_genl_policy, + NULL) != 0) goto add_std_failure; nla_for_each_nested(nla_b, nla_a, nla_b_rem) switch (nla_type(nla_b)) { @@ -234,19 +237,20 @@ static int netlbl_cipsov4_add_std(struct genl_info *info, } if (info->attrs[NLBL_CIPSOV4_A_MLSCATLST]) { - if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_MLSCATLST], - NLBL_CIPSOV4_A_MAX, - netlbl_cipsov4_genl_policy, NULL) != 0) + if (nla_validate_nested_deprecated(info->attrs[NLBL_CIPSOV4_A_MLSCATLST], + NLBL_CIPSOV4_A_MAX, + netlbl_cipsov4_genl_policy, + NULL) != 0) goto add_std_failure; nla_for_each_nested(nla_a, info->attrs[NLBL_CIPSOV4_A_MLSCATLST], nla_a_rem) if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSCAT) { - if (nla_validate_nested(nla_a, - NLBL_CIPSOV4_A_MAX, - netlbl_cipsov4_genl_policy, - NULL) != 0) + if (nla_validate_nested_deprecated(nla_a, + NLBL_CIPSOV4_A_MAX, + netlbl_cipsov4_genl_policy, + NULL) != 0) goto add_std_failure; nla_for_each_nested(nla_b, nla_a, nla_b_rem) switch (nla_type(nla_b)) { @@ -498,7 +502,7 @@ list_start: if (ret_val != 0) goto list_failure_lock; - nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_TAGLST); + nla_a = nla_nest_start_noflag(ans_skb, NLBL_CIPSOV4_A_TAGLST); if (nla_a == NULL) { ret_val = -ENOMEM; goto list_failure_lock; @@ -517,7 +521,8 @@ list_start: switch (doi_def->type) { case CIPSO_V4_MAP_TRANS: - nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVLLST); + nla_a = nla_nest_start_noflag(ans_skb, + NLBL_CIPSOV4_A_MLSLVLLST); if (nla_a == NULL) { ret_val = -ENOMEM; goto list_failure_lock; @@ -529,7 +534,8 @@ list_start: CIPSO_V4_INV_LVL) continue; - nla_b = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVL); + nla_b = nla_nest_start_noflag(ans_skb, + NLBL_CIPSOV4_A_MLSLVL); if (nla_b == NULL) { ret_val = -ENOMEM; goto list_retry; @@ -548,7 +554,8 @@ list_start: } nla_nest_end(ans_skb, nla_a); - nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSCATLST); + nla_a = nla_nest_start_noflag(ans_skb, + NLBL_CIPSOV4_A_MLSCATLST); if (nla_a == NULL) { ret_val = -ENOMEM; goto list_retry; @@ -560,7 +567,8 @@ list_start: CIPSO_V4_INV_CAT) continue; - nla_b = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSCAT); + nla_b = nla_nest_start_noflag(ans_skb, + NLBL_CIPSOV4_A_MLSCAT); if (nla_b == NULL) { ret_val = -ENOMEM; goto list_retry; @@ -733,29 +741,29 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info) static const struct genl_ops netlbl_cipsov4_ops[] = { { .cmd = NLBL_CIPSOV4_C_ADD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = netlbl_cipsov4_genl_policy, .doit = netlbl_cipsov4_add, .dumpit = NULL, }, { .cmd = NLBL_CIPSOV4_C_REMOVE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = netlbl_cipsov4_genl_policy, .doit = netlbl_cipsov4_remove, .dumpit = NULL, }, { .cmd = NLBL_CIPSOV4_C_LIST, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, - .policy = netlbl_cipsov4_genl_policy, .doit = netlbl_cipsov4_list, .dumpit = NULL, }, { .cmd = NLBL_CIPSOV4_C_LISTALL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, - .policy = netlbl_cipsov4_genl_policy, .doit = NULL, .dumpit = netlbl_cipsov4_listall, }, @@ -766,6 +774,7 @@ static struct genl_family netlbl_cipsov4_gnl_family __ro_after_init = { .name = NETLBL_NLTYPE_CIPSOV4_NAME, .version = NETLBL_PROTO_VERSION, .maxattr = NLBL_CIPSOV4_A_MAX, + .policy = netlbl_cipsov4_genl_policy, .module = THIS_MODULE, .ops = netlbl_cipsov4_ops, .n_ops = ARRAY_SIZE(netlbl_cipsov4_ops), diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 21e0095b1d14..cae04f207782 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -315,7 +315,7 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, switch (entry->def.type) { case NETLBL_NLTYPE_ADDRSELECT: - nla_a = nla_nest_start(skb, NLBL_MGMT_A_SELECTORLIST); + nla_a = nla_nest_start_noflag(skb, NLBL_MGMT_A_SELECTORLIST); if (nla_a == NULL) return -ENOMEM; @@ -323,7 +323,8 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, struct netlbl_domaddr4_map *map4; struct in_addr addr_struct; - nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR); + nla_b = nla_nest_start_noflag(skb, + NLBL_MGMT_A_ADDRSELECTOR); if (nla_b == NULL) return -ENOMEM; @@ -357,7 +358,8 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, netlbl_af6list_foreach_rcu(iter6, &entry->def.addrsel->list6) { struct netlbl_domaddr6_map *map6; - nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR); + nla_b = nla_nest_start_noflag(skb, + NLBL_MGMT_A_ADDRSELECTOR); if (nla_b == NULL) return -ENOMEM; @@ -772,57 +774,57 @@ version_failure: static const struct genl_ops netlbl_mgmt_genl_ops[] = { { .cmd = NLBL_MGMT_C_ADD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = netlbl_mgmt_genl_policy, .doit = netlbl_mgmt_add, .dumpit = NULL, }, { .cmd = NLBL_MGMT_C_REMOVE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = netlbl_mgmt_genl_policy, .doit = netlbl_mgmt_remove, .dumpit = NULL, }, { .cmd = NLBL_MGMT_C_LISTALL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, - .policy = netlbl_mgmt_genl_policy, .doit = NULL, .dumpit = netlbl_mgmt_listall, }, { .cmd = NLBL_MGMT_C_ADDDEF, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = netlbl_mgmt_genl_policy, .doit = netlbl_mgmt_adddef, .dumpit = NULL, }, { .cmd = NLBL_MGMT_C_REMOVEDEF, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = netlbl_mgmt_genl_policy, .doit = netlbl_mgmt_removedef, .dumpit = NULL, }, { .cmd = NLBL_MGMT_C_LISTDEF, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, - .policy = netlbl_mgmt_genl_policy, .doit = netlbl_mgmt_listdef, .dumpit = NULL, }, { .cmd = NLBL_MGMT_C_PROTOCOLS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, - .policy = netlbl_mgmt_genl_policy, .doit = NULL, .dumpit = netlbl_mgmt_protocols, }, { .cmd = NLBL_MGMT_C_VERSION, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, - .policy = netlbl_mgmt_genl_policy, .doit = netlbl_mgmt_version, .dumpit = NULL, }, @@ -833,6 +835,7 @@ static struct genl_family netlbl_mgmt_gnl_family __ro_after_init = { .name = NETLBL_NLTYPE_MGMT_NAME, .version = NETLBL_PROTO_VERSION, .maxattr = NLBL_MGMT_A_MAX, + .policy = netlbl_mgmt_genl_policy, .module = THIS_MODULE, .ops = netlbl_mgmt_genl_ops, .n_ops = ARRAY_SIZE(netlbl_mgmt_genl_ops), diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index c92894c3e40a..b87dd34e1835 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1317,57 +1317,57 @@ unlabel_staticlistdef_return: static const struct genl_ops netlbl_unlabel_genl_ops[] = { { .cmd = NLBL_UNLABEL_C_STATICADD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = netlbl_unlabel_genl_policy, .doit = netlbl_unlabel_staticadd, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_STATICREMOVE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = netlbl_unlabel_genl_policy, .doit = netlbl_unlabel_staticremove, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_STATICLIST, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, - .policy = netlbl_unlabel_genl_policy, .doit = NULL, .dumpit = netlbl_unlabel_staticlist, }, { .cmd = NLBL_UNLABEL_C_STATICADDDEF, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = netlbl_unlabel_genl_policy, .doit = netlbl_unlabel_staticadddef, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_STATICREMOVEDEF, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = netlbl_unlabel_genl_policy, .doit = netlbl_unlabel_staticremovedef, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_STATICLISTDEF, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, - .policy = netlbl_unlabel_genl_policy, .doit = NULL, .dumpit = netlbl_unlabel_staticlistdef, }, { .cmd = NLBL_UNLABEL_C_ACCEPT, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = netlbl_unlabel_genl_policy, .doit = netlbl_unlabel_accept, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_LIST, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, - .policy = netlbl_unlabel_genl_policy, .doit = netlbl_unlabel_list, .dumpit = NULL, }, @@ -1378,6 +1378,7 @@ static struct genl_family netlbl_unlabel_gnl_family __ro_after_init = { .name = NETLBL_NLTYPE_UNLABELED_NAME, .version = NETLBL_PROTO_VERSION, .maxattr = NLBL_UNLABEL_A_MAX, + .policy = netlbl_unlabel_genl_policy, .module = THIS_MODULE, .ops = netlbl_unlabel_genl_ops, .n_ops = ARRAY_SIZE(netlbl_unlabel_genl_ops), diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index cb69d35c8e6a..efccd1ac9a66 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -536,6 +536,28 @@ static int genl_family_rcv_msg(const struct genl_family *family, if (ops->dumpit == NULL) return -EOPNOTSUPP; + if (!(ops->validate & GENL_DONT_VALIDATE_DUMP)) { + int hdrlen = GENL_HDRLEN + family->hdrsize; + + if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) + return -EINVAL; + + if (family->maxattr) { + unsigned int validate = NL_VALIDATE_STRICT; + + if (ops->validate & + GENL_DONT_VALIDATE_DUMP_STRICT) + validate = NL_VALIDATE_LIBERAL; + rc = __nla_validate(nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), + family->maxattr, + family->policy, + validate, extack); + if (rc) + return rc; + } + } + if (!family->parallel_ops) { struct netlink_dump_control c = { .module = family->module, @@ -577,8 +599,13 @@ static int genl_family_rcv_msg(const struct genl_family *family, attrbuf = family->attrbuf; if (attrbuf) { - err = nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr, - ops->policy, extack); + enum netlink_validation validate = NL_VALIDATE_STRICT; + + if (ops->validate & GENL_DONT_VALIDATE_STRICT) + validate = NL_VALIDATE_LIBERAL; + + err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr, + family->policy, validate, extack); if (err < 0) goto out; } @@ -665,7 +692,7 @@ static int ctrl_fill_info(const struct genl_family *family, u32 portid, u32 seq, struct nlattr *nla_ops; int i; - nla_ops = nla_nest_start(skb, CTRL_ATTR_OPS); + nla_ops = nla_nest_start_noflag(skb, CTRL_ATTR_OPS); if (nla_ops == NULL) goto nla_put_failure; @@ -678,10 +705,10 @@ static int ctrl_fill_info(const struct genl_family *family, u32 portid, u32 seq, op_flags |= GENL_CMD_CAP_DUMP; if (ops->doit) op_flags |= GENL_CMD_CAP_DO; - if (ops->policy) + if (family->policy) op_flags |= GENL_CMD_CAP_HASPOL; - nest = nla_nest_start(skb, i + 1); + nest = nla_nest_start_noflag(skb, i + 1); if (nest == NULL) goto nla_put_failure; @@ -699,7 +726,7 @@ static int ctrl_fill_info(const struct genl_family *family, u32 portid, u32 seq, struct nlattr *nla_grps; int i; - nla_grps = nla_nest_start(skb, CTRL_ATTR_MCAST_GROUPS); + nla_grps = nla_nest_start_noflag(skb, CTRL_ATTR_MCAST_GROUPS); if (nla_grps == NULL) goto nla_put_failure; @@ -709,7 +736,7 @@ static int ctrl_fill_info(const struct genl_family *family, u32 portid, u32 seq, grp = &family->mcgrps[i]; - nest = nla_nest_start(skb, i + 1); + nest = nla_nest_start_noflag(skb, i + 1); if (nest == NULL) goto nla_put_failure; @@ -749,11 +776,11 @@ static int ctrl_fill_mcgrp_info(const struct genl_family *family, nla_put_u16(skb, CTRL_ATTR_FAMILY_ID, family->id)) goto nla_put_failure; - nla_grps = nla_nest_start(skb, CTRL_ATTR_MCAST_GROUPS); + nla_grps = nla_nest_start_noflag(skb, CTRL_ATTR_MCAST_GROUPS); if (nla_grps == NULL) goto nla_put_failure; - nest = nla_nest_start(skb, 1); + nest = nla_nest_start_noflag(skb, 1); if (nest == NULL) goto nla_put_failure; @@ -938,9 +965,9 @@ static int genl_ctrl_event(int event, const struct genl_family *family, static const struct genl_ops genl_ctrl_ops[] = { { .cmd = CTRL_CMD_GETFAMILY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ctrl_getfamily, .dumpit = ctrl_dumpfamily, - .policy = ctrl_policy, }, }; @@ -958,6 +985,7 @@ static struct genl_family genl_ctrl __ro_after_init = { .name = "nlctrl", .version = 0x2, .maxattr = CTRL_ATTR_MAX, + .policy = ctrl_policy, .netnsok = true, }; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 71ffd1a6dc7c..167c09e1ea90 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1199,7 +1199,6 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; void __user *argp = (void __user *)arg; - int ret; switch (cmd) { case TIOCOUTQ: { @@ -1225,18 +1224,6 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return put_user(amount, (int __user *)argp); } - case SIOCGSTAMP: - lock_sock(sk); - ret = sock_get_timestamp(sk, argp); - release_sock(sk); - return ret; - - case SIOCGSTAMPNS: - lock_sock(sk); - ret = sock_get_timestampns(sk, argp); - release_sock(sk); - return ret; - case SIOCGIFADDR: case SIOCSIFADDR: case SIOCGIFDSTADDR: @@ -1362,6 +1349,7 @@ static const struct proto_ops nr_proto_ops = { .getname = nr_getname, .poll = datagram_poll, .ioctl = nr_ioctl, + .gettstamp = sock_gettstamp, .listen = nr_listen, .shutdown = sock_no_shutdown, .setsockopt = nr_setsockopt, diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 376181cc1def..04a8e47674ec 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -119,9 +119,10 @@ static struct nfc_dev *__get_device_from_cb(struct netlink_callback *cb) int rc; u32 idx; - rc = nlmsg_parse(cb->nlh, GENL_HDRLEN + nfc_genl_family.hdrsize, - attrbuf, nfc_genl_family.maxattr, nfc_genl_policy, - NULL); + rc = nlmsg_parse_deprecated(cb->nlh, + GENL_HDRLEN + nfc_genl_family.hdrsize, + attrbuf, nfc_genl_family.maxattr, + nfc_genl_policy, NULL); if (rc < 0) return ERR_PTR(rc); @@ -392,7 +393,7 @@ int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list) if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) goto nla_put_failure; - sdp_attr = nla_nest_start(msg, NFC_ATTR_LLC_SDP); + sdp_attr = nla_nest_start_noflag(msg, NFC_ATTR_LLC_SDP); if (sdp_attr == NULL) { rc = -ENOMEM; goto nla_put_failure; @@ -402,7 +403,7 @@ int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list) hlist_for_each_entry_safe(sdres, n, sdres_list, node) { pr_debug("uri: %s, sap: %d\n", sdres->uri, sdres->sap); - uri_attr = nla_nest_start(msg, i++); + uri_attr = nla_nest_start_noflag(msg, i++); if (uri_attr == NULL) { rc = -ENOMEM; goto nla_put_failure; @@ -1177,8 +1178,9 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info) tlvs_len = 0; nla_for_each_nested(attr, info->attrs[NFC_ATTR_LLC_SDP], rem) { - rc = nla_parse_nested(sdp_attrs, NFC_SDP_ATTR_MAX, attr, - nfc_sdp_genl_policy, info->extack); + rc = nla_parse_nested_deprecated(sdp_attrs, NFC_SDP_ATTR_MAX, + attr, nfc_sdp_genl_policy, + info->extack); if (rc != 0) { rc = -EINVAL; @@ -1667,102 +1669,102 @@ EXPORT_SYMBOL(nfc_vendor_cmd_reply); static const struct genl_ops nfc_genl_ops[] = { { .cmd = NFC_CMD_GET_DEVICE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_get_device, .dumpit = nfc_genl_dump_devices, .done = nfc_genl_dump_devices_done, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_DEV_UP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_dev_up, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_DEV_DOWN, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_dev_down, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_START_POLL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_start_poll, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_STOP_POLL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_stop_poll, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_DEP_LINK_UP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_dep_link_up, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_DEP_LINK_DOWN, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_dep_link_down, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_GET_TARGET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = nfc_genl_dump_targets, .done = nfc_genl_dump_targets_done, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_LLC_GET_PARAMS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_llc_get_params, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_LLC_SET_PARAMS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_llc_set_params, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_LLC_SDREQ, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_llc_sdreq, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_FW_DOWNLOAD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_fw_download, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_ENABLE_SE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_enable_se, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_DISABLE_SE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_disable_se, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_GET_SE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = nfc_genl_dump_ses, .done = nfc_genl_dump_ses_done, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_SE_IO, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_se_io, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_ACTIVATE_TARGET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_activate_target, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_VENDOR, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_vendor_cmd, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_DEACTIVATE_TARGET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_deactivate_target, - .policy = nfc_genl_policy, }, }; @@ -1771,6 +1773,7 @@ static struct genl_family nfc_genl_family __ro_after_init = { .name = NFC_GENL_NAME, .version = NFC_GENL_VERSION, .maxattr = NFC_ATTR_MAX, + .policy = nfc_genl_policy, .module = THIS_MODULE, .ops = nfc_genl_ops, .n_ops = ARRAY_SIZE(nfc_genl_ops), diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index e47ebbbe71b8..2c151bb322c1 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -169,6 +169,10 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb, const struct nlattr *actions, int len, bool last, bool clone_flow_key); +static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, + struct sw_flow_key *key, + const struct nlattr *attr, int len); + static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr, __be16 ethertype) { @@ -1213,6 +1217,40 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb, return clone_execute(dp, skb, key, recirc_id, NULL, 0, last, true); } +static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb, + struct sw_flow_key *key, + const struct nlattr *attr, bool last) +{ + const struct nlattr *actions, *cpl_arg; + const struct check_pkt_len_arg *arg; + int rem = nla_len(attr); + bool clone_flow_key; + + /* The first netlink attribute in 'attr' is always + * 'OVS_CHECK_PKT_LEN_ATTR_ARG'. + */ + cpl_arg = nla_data(attr); + arg = nla_data(cpl_arg); + + if (skb->len <= arg->pkt_len) { + /* Second netlink attribute in 'attr' is always + * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'. + */ + actions = nla_next(cpl_arg, &rem); + clone_flow_key = !arg->exec_for_lesser_equal; + } else { + /* Third netlink attribute in 'attr' is always + * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER'. + */ + actions = nla_next(cpl_arg, &rem); + actions = nla_next(actions, &rem); + clone_flow_key = !arg->exec_for_greater; + } + + return clone_execute(dp, skb, key, 0, nla_data(actions), + nla_len(actions), last, clone_flow_key); +} + /* Execute a list of actions against 'skb'. */ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, @@ -1374,6 +1412,16 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, break; } + + case OVS_ACTION_ATTR_CHECK_PKT_LEN: { + bool last = nla_is_last(a, rem); + + err = execute_check_pkt_len(dp, skb, key, a, last); + if (last) + return err; + + break; + } } if (unlikely(err)) { diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 1b6896896fff..333ec5f298fe 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -24,11 +24,12 @@ #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_labels.h> #include <net/netfilter/nf_conntrack_seqadj.h> +#include <net/netfilter/nf_conntrack_timeout.h> #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> #include <net/ipv6_frag.h> -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) #include <net/netfilter/nf_nat.h> #endif @@ -73,7 +74,8 @@ struct ovs_conntrack_info { u32 eventmask; /* Mask of 1 << IPCT_*. */ struct md_mark mark; struct md_labels labels; -#ifdef CONFIG_NF_NAT_NEEDED + char timeout[CTNL_TIMEOUT_NAME_MAX]; +#if IS_ENABLED(CONFIG_NF_NAT) struct nf_nat_range2 range; /* Only present for SRC NAT and DST NAT. */ #endif }; @@ -719,7 +721,7 @@ static bool skb_nfct_cached(struct net *net, return ct_executed; } -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) /* Modelled after nf_nat_ipv[46]_fn(). * range is only used for new, uninitialized NAT state. * Returns either NF_ACCEPT or NF_DROP. @@ -901,7 +903,7 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, return err; } -#else /* !CONFIG_NF_NAT_NEEDED */ +#else /* !CONFIG_NF_NAT */ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, const struct ovs_conntrack_info *info, struct sk_buff *skb, struct nf_conn *ct, @@ -990,6 +992,12 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, GFP_ATOMIC); if (err) return err; + + /* helper installed, add seqadj if NAT is required */ + if (info->nat && !nfct_seqadj(ct)) { + if (!nfct_seqadj_ext_add(ct)) + return -EINVAL; + } } /* Call the helper only if: @@ -1299,6 +1307,7 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name, { struct nf_conntrack_helper *helper; struct nf_conn_help *help; + int ret = 0; helper = nf_conntrack_helper_try_module_get(name, info->family, key->ip.proto); @@ -1313,16 +1322,24 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name, return -ENOMEM; } +#ifdef CONFIG_NF_NAT_NEEDED + if (info->nat) { + ret = nf_nat_helper_try_module_get(name, info->family, + key->ip.proto); + if (ret) { + nf_conntrack_helper_put(helper); + OVS_NLERR(log, "Failed to load \"%s\" NAT helper, error: %d", + name, ret); + return ret; + } + } +#endif rcu_assign_pointer(help->helper, helper); info->helper = helper; - - if (info->nat) - request_module("ip_nat_%s", name); - - return 0; + return ret; } -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) static int parse_nat(const struct nlattr *attr, struct ovs_conntrack_info *info, bool log) { @@ -1459,12 +1476,14 @@ static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { .maxlen = sizeof(struct md_labels) }, [OVS_CT_ATTR_HELPER] = { .minlen = 1, .maxlen = NF_CT_HELPER_NAME_LEN }, -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) /* NAT length is checked when parsing the nested attributes. */ [OVS_CT_ATTR_NAT] = { .minlen = 0, .maxlen = INT_MAX }, #endif [OVS_CT_ATTR_EVENTMASK] = { .minlen = sizeof(u32), .maxlen = sizeof(u32) }, + [OVS_CT_ATTR_TIMEOUT] = { .minlen = 1, + .maxlen = CTNL_TIMEOUT_NAME_MAX }, }; static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, @@ -1537,7 +1556,7 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, return -EINVAL; } break; -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) case OVS_CT_ATTR_NAT: { int err = parse_nat(a, info, log); @@ -1550,6 +1569,15 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, info->have_eventmask = true; info->eventmask = nla_get_u32(a); break; +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT + case OVS_CT_ATTR_TIMEOUT: + memcpy(info->timeout, nla_data(a), nla_len(a)); + if (!memchr(info->timeout, '\0', nla_len(a))) { + OVS_NLERR(log, "Invalid conntrack helper"); + return -EINVAL; + } + break; +#endif default: OVS_NLERR(log, "Unknown conntrack attr (%d)", @@ -1631,6 +1659,14 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, OVS_NLERR(log, "Failed to allocate conntrack template"); return -ENOMEM; } + + if (ct_info.timeout[0]) { + if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto, + ct_info.timeout)) + pr_info_ratelimited("Failed to associated timeout " + "policy `%s'\n", ct_info.timeout); + } + if (helper) { err = ovs_ct_add_helper(&ct_info, helper, key, log); if (err) @@ -1650,13 +1686,13 @@ err_free_ct: return err; } -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info, struct sk_buff *skb) { struct nlattr *start; - start = nla_nest_start(skb, OVS_CT_ATTR_NAT); + start = nla_nest_start_noflag(skb, OVS_CT_ATTR_NAT); if (!start) return false; @@ -1723,7 +1759,7 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, { struct nlattr *start; - start = nla_nest_start(skb, OVS_ACTION_ATTR_CT); + start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_CT); if (!start) return -EMSGSIZE; @@ -1751,8 +1787,12 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, if (ct_info->have_eventmask && nla_put_u32(skb, OVS_CT_ATTR_EVENTMASK, ct_info->eventmask)) return -EMSGSIZE; + if (ct_info->timeout[0]) { + if (nla_put_string(skb, OVS_CT_ATTR_TIMEOUT, ct_info->timeout)) + return -EMSGSIZE; + } -#ifdef CONFIG_NF_NAT_NEEDED +#if IS_ENABLED(CONFIG_NF_NAT) if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb)) return -EMSGSIZE; #endif @@ -1770,10 +1810,18 @@ void ovs_ct_free_action(const struct nlattr *a) static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info) { - if (ct_info->helper) + if (ct_info->helper) { +#ifdef CONFIG_NF_NAT_NEEDED + if (ct_info->nat) + nf_nat_helper_put(ct_info->helper); +#endif nf_conntrack_helper_put(ct_info->helper); - if (ct_info->ct) + } + if (ct_info->ct) { + if (ct_info->timeout[0]) + nf_ct_destroy_timeout(ct_info->ct); nf_ct_tmpl_free(ct_info->ct); + } } #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) @@ -2126,7 +2174,11 @@ static int ovs_ct_limit_cmd_get(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(reply)) return PTR_ERR(reply); - nla_reply = nla_nest_start(reply, OVS_CT_LIMIT_ATTR_ZONE_LIMIT); + nla_reply = nla_nest_start_noflag(reply, OVS_CT_LIMIT_ATTR_ZONE_LIMIT); + if (!nla_reply) { + err = -EMSGSIZE; + goto exit_err; + } if (a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) { err = ovs_ct_limit_get_zone_limit( @@ -2152,20 +2204,20 @@ exit_err: static struct genl_ops ct_limit_genl_ops[] = { { .cmd = OVS_CT_LIMIT_CMD_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN * privilege. */ - .policy = ct_limit_policy, .doit = ovs_ct_limit_cmd_set, }, { .cmd = OVS_CT_LIMIT_CMD_DEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN * privilege. */ - .policy = ct_limit_policy, .doit = ovs_ct_limit_cmd_del, }, { .cmd = OVS_CT_LIMIT_CMD_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, /* OK for unprivileged users. */ - .policy = ct_limit_policy, .doit = ovs_ct_limit_cmd_get, }, }; @@ -2179,6 +2231,7 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = { .name = OVS_CT_LIMIT_FAMILY, .version = OVS_CT_LIMIT_VERSION, .maxattr = OVS_CT_LIMIT_ATTR_MAX, + .policy = ct_limit_policy, .netnsok = true, .parallel_ops = true, .ops = ct_limit_genl_ops, diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 9dd158ab51b3..dc9ff9367221 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -455,7 +455,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, upcall->dp_ifindex = dp_ifindex; err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb); - BUG_ON(err); + if (err) + goto out; if (upcall_info->userdata) __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA, @@ -463,19 +464,22 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, nla_data(upcall_info->userdata)); if (upcall_info->egress_tun_info) { - nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY); + nla = nla_nest_start_noflag(user_skb, + OVS_PACKET_ATTR_EGRESS_TUN_KEY); if (!nla) { err = -EMSGSIZE; goto out; } err = ovs_nla_put_tunnel_info(user_skb, upcall_info->egress_tun_info); - BUG_ON(err); + if (err) + goto out; + nla_nest_end(user_skb, nla); } if (upcall_info->actions_len) { - nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS); + nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_ACTIONS); if (!nla) { err = -EMSGSIZE; goto out; @@ -638,8 +642,8 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { static const struct genl_ops dp_packet_genl_ops[] = { { .cmd = OVS_PACKET_CMD_EXECUTE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = packet_policy, .doit = ovs_packet_cmd_execute } }; @@ -649,6 +653,7 @@ static struct genl_family dp_packet_genl_family __ro_after_init = { .name = OVS_PACKET_FAMILY, .version = OVS_PACKET_VERSION, .maxattr = OVS_PACKET_ATTR_MAX, + .policy = packet_policy, .netnsok = true, .parallel_ops = true, .ops = dp_packet_genl_ops, @@ -776,7 +781,7 @@ static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow, * This can only fail for dump operations because the skb is always * properly sized for single flows. */ - start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS); + start = nla_nest_start_noflag(skb, OVS_FLOW_ATTR_ACTIONS); if (start) { const struct sw_flow_actions *sf_acts; @@ -1374,8 +1379,8 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) u32 ufid_flags; int err; - err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a, - OVS_FLOW_ATTR_MAX, flow_policy, NULL); + err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a, + OVS_FLOW_ATTR_MAX, flow_policy, NULL); if (err) return err; ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); @@ -1423,24 +1428,24 @@ static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { static const struct genl_ops dp_flow_genl_ops[] = { { .cmd = OVS_FLOW_CMD_NEW, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = flow_policy, .doit = ovs_flow_cmd_new }, { .cmd = OVS_FLOW_CMD_DEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = flow_policy, .doit = ovs_flow_cmd_del }, { .cmd = OVS_FLOW_CMD_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, /* OK for unprivileged users. */ - .policy = flow_policy, .doit = ovs_flow_cmd_get, .dumpit = ovs_flow_cmd_dump }, { .cmd = OVS_FLOW_CMD_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = flow_policy, .doit = ovs_flow_cmd_set, }, }; @@ -1450,6 +1455,7 @@ static struct genl_family dp_flow_genl_family __ro_after_init = { .name = OVS_FLOW_FAMILY, .version = OVS_FLOW_VERSION, .maxattr = OVS_FLOW_ATTR_MAX, + .policy = flow_policy, .netnsok = true, .parallel_ops = true, .ops = dp_flow_genl_ops, @@ -1816,24 +1822,24 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { static const struct genl_ops dp_datapath_genl_ops[] = { { .cmd = OVS_DP_CMD_NEW, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = datapath_policy, .doit = ovs_dp_cmd_new }, { .cmd = OVS_DP_CMD_DEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = datapath_policy, .doit = ovs_dp_cmd_del }, { .cmd = OVS_DP_CMD_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, /* OK for unprivileged users. */ - .policy = datapath_policy, .doit = ovs_dp_cmd_get, .dumpit = ovs_dp_cmd_dump }, { .cmd = OVS_DP_CMD_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = datapath_policy, .doit = ovs_dp_cmd_set, }, }; @@ -1843,6 +1849,7 @@ static struct genl_family dp_datapath_genl_family __ro_after_init = { .name = OVS_DATAPATH_FAMILY, .version = OVS_DATAPATH_VERSION, .maxattr = OVS_DP_ATTR_MAX, + .policy = datapath_policy, .netnsok = true, .parallel_ops = true, .ops = dp_datapath_genl_ops, @@ -2259,24 +2266,24 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { static const struct genl_ops dp_vport_genl_ops[] = { { .cmd = OVS_VPORT_CMD_NEW, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = vport_policy, .doit = ovs_vport_cmd_new }, { .cmd = OVS_VPORT_CMD_DEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = vport_policy, .doit = ovs_vport_cmd_del }, { .cmd = OVS_VPORT_CMD_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, /* OK for unprivileged users. */ - .policy = vport_policy, .doit = ovs_vport_cmd_get, .dumpit = ovs_vport_cmd_dump }, { .cmd = OVS_VPORT_CMD_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = vport_policy, .doit = ovs_vport_cmd_set, }, }; @@ -2286,6 +2293,7 @@ struct genl_family dp_vport_genl_family __ro_after_init = { .name = OVS_VPORT_FAMILY, .version = OVS_VPORT_VERSION, .maxattr = OVS_VPORT_ATTR_MAX, + .policy = vport_policy, .netnsok = true, .parallel_ops = true, .ops = dp_vport_genl_ops, diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 4bdf5e3ac208..54eb80dd2dc6 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -91,6 +91,7 @@ static bool actions_may_change_flow(const struct nlattr *actions) case OVS_ACTION_ATTR_SET: case OVS_ACTION_ATTR_SET_MASKED: case OVS_ACTION_ATTR_METER: + case OVS_ACTION_ATTR_CHECK_PKT_LEN: default: return true; } @@ -403,6 +404,7 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) }, [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = OVS_ATTR_VARIABLE }, + [OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE] = { .len = 0 }, }; static const struct ovs_len_tbl @@ -666,6 +668,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, bool log) { bool ttl = false, ipv4 = false, ipv6 = false; + bool info_bridge_mode = false; __be16 tun_flags = 0; int opts_type = 0; struct nlattr *a; @@ -782,6 +785,10 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, tun_flags |= TUNNEL_ERSPAN_OPT; opts_type = type; break; + case OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE: + info_bridge_mode = true; + ipv4 = true; + break; default: OVS_NLERR(log, "Unknown IP tunnel attribute %d", type); @@ -812,16 +819,29 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, OVS_NLERR(log, "IP tunnel dst address not specified"); return -EINVAL; } - if (ipv4 && !match->key->tun_key.u.ipv4.dst) { - OVS_NLERR(log, "IPv4 tunnel dst address is zero"); - return -EINVAL; + if (ipv4) { + if (info_bridge_mode) { + if (match->key->tun_key.u.ipv4.src || + match->key->tun_key.u.ipv4.dst || + match->key->tun_key.tp_src || + match->key->tun_key.tp_dst || + match->key->tun_key.ttl || + match->key->tun_key.tos || + tun_flags & ~TUNNEL_KEY) { + OVS_NLERR(log, "IPv4 tun info is not correct"); + return -EINVAL; + } + } else if (!match->key->tun_key.u.ipv4.dst) { + OVS_NLERR(log, "IPv4 tunnel dst address is zero"); + return -EINVAL; + } } if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) { OVS_NLERR(log, "IPv6 tunnel dst address is zero"); return -EINVAL; } - if (!ttl) { + if (!ttl && !info_bridge_mode) { OVS_NLERR(log, "IP tunnel TTL not specified."); return -EINVAL; } @@ -836,7 +856,7 @@ static int vxlan_opt_to_nlattr(struct sk_buff *skb, const struct vxlan_metadata *opts = tun_opts; struct nlattr *nla; - nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS); + nla = nla_nest_start_noflag(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS); if (!nla) return -EMSGSIZE; @@ -850,12 +870,17 @@ static int vxlan_opt_to_nlattr(struct sk_buff *skb, static int __ip_tun_to_nlattr(struct sk_buff *skb, const struct ip_tunnel_key *output, const void *tun_opts, int swkey_tun_opts_len, - unsigned short tun_proto) + unsigned short tun_proto, u8 mode) { if (output->tun_flags & TUNNEL_KEY && nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id, OVS_TUNNEL_KEY_ATTR_PAD)) return -EMSGSIZE; + + if (mode & IP_TUNNEL_INFO_BRIDGE) + return nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE) + ? -EMSGSIZE : 0; + switch (tun_proto) { case AF_INET: if (output->u.ipv4.src && @@ -918,17 +943,17 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb, static int ip_tun_to_nlattr(struct sk_buff *skb, const struct ip_tunnel_key *output, const void *tun_opts, int swkey_tun_opts_len, - unsigned short tun_proto) + unsigned short tun_proto, u8 mode) { struct nlattr *nla; int err; - nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); + nla = nla_nest_start_noflag(skb, OVS_KEY_ATTR_TUNNEL); if (!nla) return -EMSGSIZE; err = __ip_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len, - tun_proto); + tun_proto, mode); if (err) return err; @@ -942,7 +967,7 @@ int ovs_nla_put_tunnel_info(struct sk_buff *skb, return __ip_tun_to_nlattr(skb, &tun_info->key, ip_tunnel_info_opts(tun_info), tun_info->options_len, - ip_tunnel_info_af(tun_info)); + ip_tunnel_info_af(tun_info), tun_info->mode); } static int encode_vlan_from_nlattrs(struct sw_flow_match *match, @@ -1932,7 +1957,7 @@ static int nsh_key_to_nlattr(const struct ovs_key_nsh *nsh, bool is_mask, { struct nlattr *start; - start = nla_nest_start(skb, OVS_KEY_ATTR_NSH); + start = nla_nest_start_noflag(skb, OVS_KEY_ATTR_NSH); if (!start) return -EMSGSIZE; @@ -1980,7 +2005,7 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len); if (ip_tun_to_nlattr(skb, &output->tun_key, opts, - swkey->tun_opts_len, swkey->tun_proto)) + swkey->tun_opts_len, swkey->tun_proto, 0)) goto nla_put_failure; } @@ -2015,14 +2040,15 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) { if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask)) goto nla_put_failure; - encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); + encap = nla_nest_start_noflag(skb, OVS_KEY_ATTR_ENCAP); if (!swkey->eth.vlan.tci) goto unencap; if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) { if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask)) goto nla_put_failure; - in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); + in_encap = nla_nest_start_noflag(skb, + OVS_KEY_ATTR_ENCAP); if (!swkey->eth.cvlan.tci) goto unencap; } @@ -2201,7 +2227,7 @@ int ovs_nla_put_key(const struct sw_flow_key *swkey, int err; struct nlattr *nla; - nla = nla_nest_start(skb, attr); + nla = nla_nest_start_noflag(skb, attr); if (!nla) return -EMSGSIZE; err = __ovs_nla_put_key(swkey, output, is_mask, skb); @@ -2605,6 +2631,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, tun_info->mode = IP_TUNNEL_INFO_TX; if (key.tun_proto == AF_INET6) tun_info->mode |= IP_TUNNEL_INFO_IPV6; + else if (key.tun_proto == AF_INET && key.tun_key.u.ipv4.dst == 0) + tun_info->mode |= IP_TUNNEL_INFO_BRIDGE; tun_info->key = key.tun_key; /* We need to store the options in the action itself since @@ -2826,8 +2854,8 @@ static int validate_userspace(const struct nlattr *attr) struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; int error; - error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, attr, - userspace_policy, NULL); + error = nla_parse_nested_deprecated(a, OVS_USERSPACE_ATTR_MAX, attr, + userspace_policy, NULL); if (error) return error; @@ -2838,6 +2866,88 @@ static int validate_userspace(const struct nlattr *attr) return 0; } +static const struct nla_policy cpl_policy[OVS_CHECK_PKT_LEN_ATTR_MAX + 1] = { + [OVS_CHECK_PKT_LEN_ATTR_PKT_LEN] = {.type = NLA_U16 }, + [OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER] = {.type = NLA_NESTED }, + [OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL] = {.type = NLA_NESTED }, +}; + +static int validate_and_copy_check_pkt_len(struct net *net, + const struct nlattr *attr, + const struct sw_flow_key *key, + struct sw_flow_actions **sfa, + __be16 eth_type, __be16 vlan_tci, + bool log, bool last) +{ + const struct nlattr *acts_if_greater, *acts_if_lesser_eq; + struct nlattr *a[OVS_CHECK_PKT_LEN_ATTR_MAX + 1]; + struct check_pkt_len_arg arg; + int nested_acts_start; + int start, err; + + err = nla_parse_deprecated_strict(a, OVS_CHECK_PKT_LEN_ATTR_MAX, + nla_data(attr), nla_len(attr), + cpl_policy, NULL); + if (err) + return err; + + if (!a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN] || + !nla_get_u16(a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN])) + return -EINVAL; + + acts_if_lesser_eq = a[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL]; + acts_if_greater = a[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER]; + + /* Both the nested action should be present. */ + if (!acts_if_greater || !acts_if_lesser_eq) + return -EINVAL; + + /* validation done, copy the nested actions. */ + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_CHECK_PKT_LEN, + log); + if (start < 0) + return start; + + arg.pkt_len = nla_get_u16(a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN]); + arg.exec_for_lesser_equal = + last || !actions_may_change_flow(acts_if_lesser_eq); + arg.exec_for_greater = + last || !actions_may_change_flow(acts_if_greater); + + err = ovs_nla_add_action(sfa, OVS_CHECK_PKT_LEN_ATTR_ARG, &arg, + sizeof(arg), log); + if (err) + return err; + + nested_acts_start = add_nested_action_start(sfa, + OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL, log); + if (nested_acts_start < 0) + return nested_acts_start; + + err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa, + eth_type, vlan_tci, log); + + if (err) + return err; + + add_nested_action_end(*sfa, nested_acts_start); + + nested_acts_start = add_nested_action_start(sfa, + OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER, log); + if (nested_acts_start < 0) + return nested_acts_start; + + err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa, + eth_type, vlan_tci, log); + + if (err) + return err; + + add_nested_action_end(*sfa, nested_acts_start); + add_nested_action_end(*sfa, start); + return 0; +} + static int copy_action(const struct nlattr *from, struct sw_flow_actions **sfa, bool log) { @@ -2884,6 +2994,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, [OVS_ACTION_ATTR_POP_NSH] = 0, [OVS_ACTION_ATTR_METER] = sizeof(u32), [OVS_ACTION_ATTR_CLONE] = (u32)-1, + [OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1, }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); @@ -3085,6 +3196,19 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, break; } + case OVS_ACTION_ATTR_CHECK_PKT_LEN: { + bool last = nla_is_last(a, rem); + + err = validate_and_copy_check_pkt_len(net, a, key, sfa, + eth_type, + vlan_tci, log, + last); + if (err) + return err; + skip_copy = true; + break; + } + default: OVS_NLERR(log, "Unknown Action type %d", type); return -EINVAL; @@ -3130,7 +3254,7 @@ static int sample_action_to_attr(const struct nlattr *attr, const struct sample_arg *arg; struct nlattr *actions; - start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); + start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SAMPLE); if (!start) return -EMSGSIZE; @@ -3143,7 +3267,7 @@ static int sample_action_to_attr(const struct nlattr *attr, goto out; } - ac_start = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); + ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS); if (!ac_start) { err = -EMSGSIZE; goto out; @@ -3169,7 +3293,7 @@ static int clone_action_to_attr(const struct nlattr *attr, struct nlattr *start; int err = 0, rem = nla_len(attr); - start = nla_nest_start(skb, OVS_ACTION_ATTR_CLONE); + start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_CLONE); if (!start) return -EMSGSIZE; @@ -3183,6 +3307,75 @@ static int clone_action_to_attr(const struct nlattr *attr, return err; } +static int check_pkt_len_action_to_attr(const struct nlattr *attr, + struct sk_buff *skb) +{ + struct nlattr *start, *ac_start = NULL; + const struct check_pkt_len_arg *arg; + const struct nlattr *a, *cpl_arg; + int err = 0, rem = nla_len(attr); + + start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_CHECK_PKT_LEN); + if (!start) + return -EMSGSIZE; + + /* The first nested attribute in 'attr' is always + * 'OVS_CHECK_PKT_LEN_ATTR_ARG'. + */ + cpl_arg = nla_data(attr); + arg = nla_data(cpl_arg); + + if (nla_put_u16(skb, OVS_CHECK_PKT_LEN_ATTR_PKT_LEN, arg->pkt_len)) { + err = -EMSGSIZE; + goto out; + } + + /* Second nested attribute in 'attr' is always + * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'. + */ + a = nla_next(cpl_arg, &rem); + ac_start = nla_nest_start_noflag(skb, + OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL); + if (!ac_start) { + err = -EMSGSIZE; + goto out; + } + + err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb); + if (err) { + nla_nest_cancel(skb, ac_start); + goto out; + } else { + nla_nest_end(skb, ac_start); + } + + /* Third nested attribute in 'attr' is always + * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER. + */ + a = nla_next(a, &rem); + ac_start = nla_nest_start_noflag(skb, + OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER); + if (!ac_start) { + err = -EMSGSIZE; + goto out; + } + + err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb); + if (err) { + nla_nest_cancel(skb, ac_start); + goto out; + } else { + nla_nest_end(skb, ac_start); + } + + nla_nest_end(skb, start); + return 0; + +out: + nla_nest_cancel(skb, start); + return err; +} + static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) { const struct nlattr *ovs_key = nla_data(a); @@ -3195,14 +3388,14 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) struct ovs_tunnel_info *ovs_tun = nla_data(ovs_key); struct ip_tunnel_info *tun_info = &ovs_tun->tun_dst->u.tun_info; - start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); + start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SET); if (!start) return -EMSGSIZE; err = ip_tun_to_nlattr(skb, &tun_info->key, ip_tunnel_info_opts(tun_info), tun_info->options_len, - ip_tunnel_info_af(tun_info)); + ip_tunnel_info_af(tun_info), tun_info->mode); if (err) return err; nla_nest_end(skb, start); @@ -3227,7 +3420,7 @@ static int masked_set_action_to_set_action_attr(const struct nlattr *a, /* Revert the conversion we did from a non-masked set action to * masked set action. */ - nla = nla_nest_start(skb, OVS_ACTION_ATTR_SET); + nla = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SET); if (!nla) return -EMSGSIZE; @@ -3277,6 +3470,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) return err; break; + case OVS_ACTION_ATTR_CHECK_PKT_LEN: + err = check_pkt_len_action_to_attr(a, skb); + if (err) + return err; + break; + default: if (nla_put(skb, type, nla_len(a), nla_data(a))) return -EMSGSIZE; diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index 43849d752a1e..bb67238f0340 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -127,7 +127,7 @@ static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id, OVS_METER_ATTR_PAD)) goto error; - nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS); + nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS); if (!nla) goto error; @@ -136,7 +136,7 @@ static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id, for (i = 0; i < meter->n_bands; ++i, ++band) { struct nlattr *band_nla; - band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC); + band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC); if (!band_nla || nla_put(reply, OVS_BAND_ATTR_STATS, sizeof(struct ovs_flow_stats), &band->stats)) @@ -166,11 +166,11 @@ static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info) nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS)) goto nla_put_failure; - nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS); + nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS); if (!nla) goto nla_put_failure; - band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC); + band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC); if (!band_nla) goto nla_put_failure; /* Currently only DROP band type is supported. */ @@ -227,9 +227,9 @@ static struct dp_meter *dp_meter_create(struct nlattr **a) struct nlattr *attr[OVS_BAND_ATTR_MAX + 1]; u32 band_max_delta_t; - err = nla_parse((struct nlattr **)&attr, OVS_BAND_ATTR_MAX, - nla_data(nla), nla_len(nla), band_policy, - NULL); + err = nla_parse_deprecated((struct nlattr **)&attr, + OVS_BAND_ATTR_MAX, nla_data(nla), + nla_len(nla), band_policy, NULL); if (err) goto exit_free_meter; @@ -526,27 +526,27 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb, static struct genl_ops dp_meter_genl_ops[] = { { .cmd = OVS_METER_CMD_FEATURES, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, /* OK for unprivileged users. */ - .policy = meter_policy, .doit = ovs_meter_cmd_features }, { .cmd = OVS_METER_CMD_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN * privilege. */ - .policy = meter_policy, .doit = ovs_meter_cmd_set, }, { .cmd = OVS_METER_CMD_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, /* OK for unprivileged users. */ - .policy = meter_policy, .doit = ovs_meter_cmd_get, }, { .cmd = OVS_METER_CMD_DEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN * privilege. */ - .policy = meter_policy, .doit = ovs_meter_cmd_del }, }; @@ -560,6 +560,7 @@ struct genl_family dp_meter_genl_family __ro_after_init = { .name = OVS_METER_FAMILY, .version = OVS_METER_VERSION, .maxattr = OVS_METER_ATTR_MAX, + .policy = meter_policy, .netnsok = true, .parallel_ops = true, .ops = dp_meter_genl_ops, diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 8f16f11f7ad3..f3c54871f9e1 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -43,7 +43,7 @@ static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb) if (vxlan->cfg.flags & VXLAN_F_GBP) { struct nlattr *exts; - exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION); + exts = nla_nest_start_noflag(skb, OVS_TUNNEL_ATTR_EXTENSION); if (!exts) return -EMSGSIZE; @@ -70,8 +70,8 @@ static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr, if (nla_len(attr) < sizeof(struct nlattr)) return -EINVAL; - err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy, - NULL); + err = nla_parse_nested_deprecated(exts, OVS_VXLAN_EXT_MAX, attr, + exts_policy, NULL); if (err < 0) return err; diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 19f6765566e7..258ce3b7b452 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -319,7 +319,7 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) if (!vport->ops->get_options) return 0; - nla = nla_nest_start(skb, OVS_VPORT_ATTR_OPTIONS); + nla = nla_nest_start_noflag(skb, OVS_VPORT_ATTR_OPTIONS); if (!nla) return -EMSGSIZE; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9b81813dd16a..90d4e3ce00e5 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -275,24 +275,22 @@ static bool packet_use_direct_xmit(const struct packet_sock *po) return po->xmit == packet_direct_xmit; } -static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev) -{ - return dev_pick_tx_cpu_id(dev, skb, sb_dev, NULL); -} - static u16 packet_pick_tx_queue(struct sk_buff *skb) { struct net_device *dev = skb->dev; const struct net_device_ops *ops = dev->netdev_ops; + int cpu = raw_smp_processor_id(); u16 queue_index; +#ifdef CONFIG_XPS + skb->sender_cpu = cpu + 1; +#endif + skb_record_rx_queue(skb, cpu % dev->real_num_tx_queues); if (ops->ndo_select_queue) { - queue_index = ops->ndo_select_queue(dev, skb, NULL, - __packet_pick_tx_queue); + queue_index = ops->ndo_select_queue(dev, skb, NULL); queue_index = netdev_cap_txqueue(dev, queue_index); } else { - queue_index = __packet_pick_tx_queue(dev, skb, NULL); + queue_index = netdev_pick_tx(dev, skb, NULL); } return queue_index; @@ -4090,11 +4088,6 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd, spin_unlock_bh(&sk->sk_receive_queue.lock); return put_user(amount, (int __user *)arg); } - case SIOCGSTAMP: - return sock_get_timestamp(sk, (struct timeval __user *)arg); - case SIOCGSTAMPNS: - return sock_get_timestampns(sk, (struct timespec __user *)arg); - #ifdef CONFIG_INET case SIOCADDRT: case SIOCDELRT: @@ -4470,6 +4463,7 @@ static const struct proto_ops packet_ops_spkt = { .getname = packet_getname_spkt, .poll = datagram_poll, .ioctl = packet_ioctl, + .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = sock_no_setsockopt, @@ -4491,6 +4485,7 @@ static const struct proto_ops packet_ops = { .getname = packet_getname, .poll = packet_poll, .ioctl = packet_ioctl, + .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = packet_setsockopt, diff --git a/net/packet/diag.c b/net/packet/diag.c index 7ef1c881ae74..98abfd8644a4 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -39,7 +39,7 @@ static int pdiag_put_mclist(const struct packet_sock *po, struct sk_buff *nlskb) struct nlattr *mca; struct packet_mclist *ml; - mca = nla_nest_start(nlskb, PACKET_DIAG_MCLIST); + mca = nla_nest_start_noflag(nlskb, PACKET_DIAG_MCLIST); if (!mca) return -EMSGSIZE; diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 871eaf2cb85e..be92d936b5d5 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -79,8 +79,8 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, ASSERT_RTNL(); - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_phonet_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_phonet_policy, extack); if (err < 0) return err; @@ -246,8 +246,8 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh, ASSERT_RTNL(); - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_phonet_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_phonet_policy, extack); if (err < 0) return err; diff --git a/net/psample/psample.c b/net/psample/psample.c index 64f95624f219..a107b2405668 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -100,6 +100,7 @@ static int psample_nl_cmd_get_group_dumpit(struct sk_buff *msg, static const struct genl_ops psample_nl_ops[] = { { .cmd = PSAMPLE_CMD_GET_GROUP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = psample_nl_cmd_get_group_dumpit, /* can be retrieved by unprivileged users */ } diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index b37e6e0a1026..dd0e97f4f6c0 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -968,9 +968,6 @@ static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) break; } break; - case SIOCGSTAMP: - rc = sock_get_timestamp(sk, argp); - break; case SIOCADDRT: case SIOCDELRT: case SIOCSIFADDR: @@ -1033,6 +1030,7 @@ static const struct proto_ops qrtr_proto_ops = { .recvmsg = qrtr_recvmsg, .getname = qrtr_getname, .ioctl = qrtr_ioctl, + .gettstamp = sock_gettstamp, .poll = datagram_poll, .shutdown = sock_no_shutdown, .setsockopt = sock_no_setsockopt, @@ -1093,7 +1091,8 @@ static int qrtr_addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, ASSERT_RTNL(); - rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, qrtr_policy, extack); + rc = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, + qrtr_policy, extack); if (rc < 0) return rc; diff --git a/net/rds/tcp.c b/net/rds/tcp.c index faf726e00e27..66121bc6f34e 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -551,7 +551,7 @@ static __net_init int rds_tcp_init_net(struct net *net) tbl = kmemdup(rds_tcp_sysctl_table, sizeof(rds_tcp_sysctl_table), GFP_KERNEL); if (!tbl) { - pr_warn("could not set allocate syctl table\n"); + pr_warn("could not set allocate sysctl table\n"); return -ENOMEM; } rtn->ctl_table = tbl; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index c96f63ffe31e..e274bc6e1458 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1301,12 +1301,6 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return put_user(amount, (unsigned int __user *) argp); } - case SIOCGSTAMP: - return sock_get_timestamp(sk, (struct timeval __user *) argp); - - case SIOCGSTAMPNS: - return sock_get_timestampns(sk, (struct timespec __user *) argp); - case SIOCGIFADDR: case SIOCSIFADDR: case SIOCGIFDSTADDR: @@ -1474,6 +1468,7 @@ static const struct proto_ops rose_proto_ops = { .getname = rose_getname, .poll = datagram_poll, .ioctl = rose_ioctl, + .gettstamp = sock_gettstamp, .listen = rose_listen, .shutdown = sock_no_shutdown, .setsockopt = rose_setsockopt, diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 01959db51445..b67dec945498 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -180,7 +180,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) /* Fall through and set IPv4 options too otherwise we don't get * errors from IPv4 packets sent through the IPv6 socket. */ - + /* Fall through */ case AF_INET: /* we want to receive ICMP errors */ opt = 1; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 5a87e271d35a..683fcc00da49 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -242,7 +242,7 @@ static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, (unsigned long)p->tcfa_tm.lastuse)) continue; - nest = nla_nest_start(skb, n_i); + nest = nla_nest_start_noflag(skb, n_i); if (!nest) { index--; goto nla_put_failure; @@ -299,7 +299,7 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, struct tc_action *p; unsigned long id = 1; - nest = nla_nest_start(skb, 0); + nest = nla_nest_start_noflag(skb, 0); if (nest == NULL) goto nla_put_failure; if (nla_put_string(skb, TCA_KIND, ops->kind)) @@ -776,7 +776,7 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) } rcu_read_unlock(); - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; err = tcf_action_dump_old(skb, a, bind, ref); @@ -800,7 +800,7 @@ int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) { a = actions[i]; - nest = nla_nest_start(skb, a->order); + nest = nla_nest_start_noflag(skb, a->order); if (nest == NULL) goto nla_put_failure; err = tcf_action_dump_1(skb, a, bind, ref); @@ -849,7 +849,8 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, int err; if (name == NULL) { - err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack); + err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, NULL, + extack); if (err < 0) goto err_out; err = -EINVAL; @@ -964,7 +965,8 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, int err; int i; - err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack); + err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX_PRIO, nla, NULL, + extack); if (err < 0) return err; @@ -1052,7 +1054,7 @@ static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[], t->tca__pad1 = 0; t->tca__pad2 = 0; - nest = nla_nest_start(skb, TCA_ACT_TAB); + nest = nla_nest_start_noflag(skb, TCA_ACT_TAB); if (!nest) goto out_nlmsg_trim; @@ -1099,7 +1101,7 @@ static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla, int index; int err; - err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack); + err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, NULL, extack); if (err < 0) goto err_out; @@ -1153,7 +1155,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, b = skb_tail_pointer(skb); - err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack); + err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, NULL, extack); if (err < 0) goto err_out; @@ -1176,7 +1178,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, t->tca__pad1 = 0; t->tca__pad2 = 0; - nest = nla_nest_start(skb, TCA_ACT_TAB); + nest = nla_nest_start_noflag(skb, TCA_ACT_TAB); if (!nest) { NL_SET_ERR_MSG(extack, "Failed to add new netlink message"); goto out_module_put; @@ -1282,7 +1284,8 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, size_t attr_size = 0; struct tc_action *actions[TCA_ACT_MAX_PRIO] = {}; - ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack); + ret = nla_parse_nested_deprecated(tb, TCA_ACT_MAX_PRIO, nla, NULL, + extack); if (ret < 0) return ret; @@ -1384,8 +1387,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL, - extack); + ret = nlmsg_parse_deprecated(n, sizeof(struct tcamsg), tca, + TCA_ROOT_MAX, NULL, extack); if (ret < 0) return ret; @@ -1436,13 +1439,12 @@ static struct nlattr *find_dump_kind(struct nlattr **nla) if (tb1 == NULL) return NULL; - if (nla_parse(tb, TCA_ACT_MAX_PRIO, nla_data(tb1), - NLMSG_ALIGN(nla_len(tb1)), NULL, NULL) < 0) + if (nla_parse_deprecated(tb, TCA_ACT_MAX_PRIO, nla_data(tb1), NLMSG_ALIGN(nla_len(tb1)), NULL, NULL) < 0) return NULL; if (tb[1] == NULL) return NULL; - if (nla_parse_nested(tb2, TCA_ACT_MAX, tb[1], NULL, NULL) < 0) + if (nla_parse_nested_deprecated(tb2, TCA_ACT_MAX, tb[1], NULL, NULL) < 0) return NULL; kind = tb2[TCA_ACT_KIND]; @@ -1466,8 +1468,8 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) u32 msecs_since = 0; u32 act_count = 0; - ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX, - tcaa_policy, cb->extack); + ret = nlmsg_parse_deprecated(cb->nlh, sizeof(struct tcamsg), tb, + TCA_ROOT_MAX, tcaa_policy, cb->extack); if (ret < 0) return ret; @@ -1508,7 +1510,7 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) if (!count_attr) goto out_module_put; - nest = nla_nest_start(skb, TCA_ACT_TAB); + nest = nla_nest_start_noflag(skb, TCA_ACT_TAB); if (nest == NULL) goto out_module_put; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 3841156aa09f..a0c77faca04b 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -293,7 +293,8 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (!nla) return -EINVAL; - ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy, NULL); + ret = nla_parse_nested_deprecated(tb, TCA_ACT_BPF_MAX, nla, + act_bpf_policy, NULL); if (ret < 0) return ret; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 32ae0cd6e31c..8838575cd536 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -111,8 +111,8 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, if (!nla) return -EINVAL; - ret = nla_parse_nested(tb, TCA_CONNMARK_MAX, nla, connmark_policy, - NULL); + ret = nla_parse_nested_deprecated(tb, TCA_CONNMARK_MAX, nla, + connmark_policy, NULL); if (ret < 0) return ret; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 0c77e7bdf6d5..14bb525e355e 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -61,7 +61,8 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_CSUM_MAX, nla, csum_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_CSUM_MAX, nla, csum_policy, + NULL); if (err < 0) return err; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index e540e31069d7..75492b07f324 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -74,7 +74,8 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_GACT_MAX, nla, gact_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_GACT_MAX, nla, gact_policy, + NULL); if (err < 0) return err; diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 31c6ffb6abe7..12489f60a979 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -387,7 +387,7 @@ static int dump_metalist(struct sk_buff *skb, struct tcf_ife_info *ife) if (list_empty(&ife->metalist)) return 0; - nest = nla_nest_start(skb, TCA_IFE_METALST); + nest = nla_nest_start_noflag(skb, TCA_IFE_METALST); if (!nest) goto out_nlmsg_trim; @@ -486,7 +486,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, int ret = 0; int err; - err = nla_parse_nested(tb, TCA_IFE_MAX, nla, ife_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_IFE_MAX, nla, ife_policy, + NULL); if (err < 0) return err; @@ -567,8 +568,9 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, INIT_LIST_HEAD(&ife->metalist); if (tb[TCA_IFE_METALST]) { - err = nla_parse_nested(tb2, IFE_META_MAX, tb[TCA_IFE_METALST], - NULL, NULL); + err = nla_parse_nested_deprecated(tb2, IFE_META_MAX, + tb[TCA_IFE_METALST], NULL, + NULL); if (err) goto metadata_parse_err; err = populate_metalist(ife, tb2, exists, rtnl_held); diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 04a0b5c61194..ae6e28ab1cd7 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -113,7 +113,8 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_IPT_MAX, nla, ipt_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_IPT_MAX, nla, ipt_policy, + NULL); if (err < 0) return err; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 17cc6bd4c57c..c329390342f4 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -111,7 +111,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, NL_SET_ERR_MSG_MOD(extack, "Mirred requires attributes to be passed"); return -EINVAL; } - ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, extack); + ret = nla_parse_nested_deprecated(tb, TCA_MIRRED_MAX, nla, + mirred_policy, extack); if (ret < 0) return ret; if (!tb[TCA_MIRRED_PARMS]) { diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index e91bb8eb81ec..51bd1ba02380 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -52,7 +52,8 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_NAT_MAX, nla, nat_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_NAT_MAX, nla, nat_policy, + NULL); if (err < 0) return err; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 287793abfaf9..d790c02b9c6c 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -70,8 +70,9 @@ static struct tcf_pedit_key_ex *tcf_pedit_keys_ex_parse(struct nlattr *nla, goto err_out; } - err = nla_parse_nested(tb, TCA_PEDIT_KEY_EX_MAX, ka, - pedit_key_ex_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_PEDIT_KEY_EX_MAX, + ka, pedit_key_ex_policy, + NULL); if (err) goto err_out; @@ -108,14 +109,15 @@ err_out: static int tcf_pedit_key_ex_dump(struct sk_buff *skb, struct tcf_pedit_key_ex *keys_ex, int n) { - struct nlattr *keys_start = nla_nest_start(skb, TCA_PEDIT_KEYS_EX); + struct nlattr *keys_start = nla_nest_start_noflag(skb, + TCA_PEDIT_KEYS_EX); if (!keys_start) goto nla_failure; for (; n > 0; n--) { struct nlattr *key_start; - key_start = nla_nest_start(skb, TCA_PEDIT_KEY_EX); + key_start = nla_nest_start_noflag(skb, TCA_PEDIT_KEY_EX); if (!key_start) goto nla_failure; @@ -157,7 +159,8 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, return -EINVAL; } - err = nla_parse_nested(tb, TCA_PEDIT_MAX, nla, pedit_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_PEDIT_MAX, nla, + pedit_policy, NULL); if (err < 0) return err; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 2b8581f6ab51..61731944742a 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -22,42 +22,7 @@ #include <net/act_api.h> #include <net/netlink.h> #include <net/pkt_cls.h> - -struct tcf_police_params { - int tcfp_result; - u32 tcfp_ewma_rate; - s64 tcfp_burst; - u32 tcfp_mtu; - s64 tcfp_mtu_ptoks; - struct psched_ratecfg rate; - bool rate_present; - struct psched_ratecfg peak; - bool peak_present; - struct rcu_head rcu; -}; - -struct tcf_police { - struct tc_action common; - struct tcf_police_params __rcu *params; - - spinlock_t tcfp_lock ____cacheline_aligned_in_smp; - s64 tcfp_toks; - s64 tcfp_ptoks; - s64 tcfp_t_c; -}; - -#define to_police(pc) ((struct tcf_police *)pc) - -/* old policer structure from before tc actions */ -struct tc_police_compat { - u32 index; - int action; - u32 limit; - u32 burst; - u32 mtu; - struct tc_ratespec rate; - struct tc_ratespec peakrate; -}; +#include <net/tc_act/tc_police.h> /* Each policer is serialized by its individual spinlock */ @@ -100,7 +65,8 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_POLICE_MAX, nla, police_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_POLICE_MAX, nla, + police_policy, NULL); if (err < 0) return err; @@ -316,6 +282,20 @@ static void tcf_police_cleanup(struct tc_action *a) kfree_rcu(p, rcu); } +static void tcf_police_stats_update(struct tc_action *a, + u64 bytes, u32 packets, + u64 lastuse, bool hw) +{ + struct tcf_police *police = to_police(a); + struct tcf_t *tm = &police->tcf_tm; + + _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); + if (hw) + _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw), + bytes, packets); + tm->lastuse = max_t(u64, tm->lastuse, lastuse); +} + static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { @@ -379,6 +359,7 @@ static struct tc_action_ops act_police_ops = { .kind = "police", .id = TCA_ID_POLICE, .owner = THIS_MODULE, + .stats_update = tcf_police_stats_update, .act = tcf_police_act, .dump = tcf_police_dump, .init = tcf_police_init, diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 0f82d50ea232..b2faa43c1ac7 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -53,7 +53,8 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, if (!nla) return -EINVAL; - ret = nla_parse_nested(tb, TCA_SAMPLE_MAX, nla, sample_policy, NULL); + ret = nla_parse_nested_deprecated(tb, TCA_SAMPLE_MAX, nla, + sample_policy, NULL); if (ret < 0) return ret; if (!tb[TCA_SAMPLE_PARMS] || !tb[TCA_SAMPLE_RATE] || diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 23c8ca5615e5..ead480e6014c 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -104,7 +104,8 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_DEF_MAX, nla, simple_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_DEF_MAX, nla, simple_policy, + NULL); if (err < 0) return err; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 7e1d261a31d2..7ec159b95364 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -114,7 +114,8 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_SKBEDIT_MAX, nla, skbedit_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_SKBEDIT_MAX, nla, + skbedit_policy, NULL); if (err < 0) return err; diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 1d4c324d0a42..186ef98c828f 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -102,7 +102,8 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, if (!nla) return -EINVAL; - err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_SKBMOD_MAX, nla, + skbmod_policy, NULL); if (err < 0) return err; diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index d5aaf90a3971..6a9070511ee8 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -76,8 +76,9 @@ tunnel_key_copy_geneve_opt(const struct nlattr *nla, void *dst, int dst_len, int err, data_len, opt_len; u8 *data; - err = nla_parse_nested(tb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX, - nla, geneve_opt_policy, extack); + err = nla_parse_nested_deprecated(tb, + TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX, + nla, geneve_opt_policy, extack); if (err < 0) return err; @@ -125,8 +126,8 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst, int err, rem, opt_len, len = nla_len(nla), opts_len = 0; const struct nlattr *attr, *head = nla_data(nla); - err = nla_validate(head, len, TCA_TUNNEL_KEY_ENC_OPTS_MAX, - enc_opts_policy, extack); + err = nla_validate_deprecated(head, len, TCA_TUNNEL_KEY_ENC_OPTS_MAX, + enc_opts_policy, extack); if (err) return err; @@ -235,8 +236,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, return -EINVAL; } - err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy, - extack); + err = nla_parse_nested_deprecated(tb, TCA_TUNNEL_KEY_MAX, nla, + tunnel_key_policy, extack); if (err < 0) { NL_SET_ERR_MSG(extack, "Failed to parse nested tunnel key attributes"); return err; @@ -426,7 +427,7 @@ static int tunnel_key_geneve_opts_dump(struct sk_buff *skb, u8 *src = (u8 *)(info + 1); struct nlattr *start; - start = nla_nest_start(skb, TCA_TUNNEL_KEY_ENC_OPTS_GENEVE); + start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_GENEVE); if (!start) return -EMSGSIZE; @@ -460,7 +461,7 @@ static int tunnel_key_opts_dump(struct sk_buff *skb, if (!info->options_len) return 0; - start = nla_nest_start(skb, TCA_TUNNEL_KEY_ENC_OPTS); + start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS); if (!start) return -EMSGSIZE; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 0f40d0a74423..39bd9fa3e455 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -124,7 +124,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, if (!nla) return -EINVAL; - err = nla_parse_nested(tb, TCA_VLAN_MAX, nla, vlan_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_VLAN_MAX, nla, vlan_policy, + NULL); if (err < 0) return err; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 99ae30c177c7..d4699156974a 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -37,6 +37,8 @@ #include <net/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_csum.h> #include <net/tc_act/tc_gact.h> +#include <net/tc_act/tc_police.h> +#include <net/tc_act/tc_sample.h> #include <net/tc_act/tc_skbedit.h> extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; @@ -2006,7 +2008,8 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, replay: tp_created = 0; - err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); + err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, + rtm_tca_policy, extack); if (err < 0) return err; @@ -2217,7 +2220,8 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; - err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); + err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, + rtm_tca_policy, extack); if (err < 0) return err; @@ -2366,7 +2370,8 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, int err; bool rtnl_held = false; - err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); + err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, + rtm_tca_policy, extack); if (err < 0) return err; @@ -2558,8 +2563,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if (nlmsg_len(cb->nlh) < sizeof(*tcm)) return skb->len; - err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, - cb->extack); + err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX, + NULL, cb->extack); if (err) return err; @@ -2806,7 +2811,8 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, return -EPERM; replay: - err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); + err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, + rtm_tca_policy, extack); if (err < 0) return err; @@ -2937,8 +2943,8 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) if (nlmsg_len(cb->nlh) < sizeof(*tcm)) return skb->len; - err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, - cb->extack); + err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX, + rtm_tca_policy, cb->extack); if (err) return err; @@ -3111,7 +3117,7 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts) * tc data even if iproute2 was newer - jhs */ if (exts->type != TCA_OLD_COMPAT) { - nest = nla_nest_start(skb, exts->action); + nest = nla_nest_start_noflag(skb, exts->action); if (nest == NULL) goto nla_put_failure; @@ -3120,7 +3126,7 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts) nla_nest_end(skb, nest); } else if (exts->police) { struct tc_action *act = tcf_exts_first_act(exts); - nest = nla_nest_start(skb, exts->police); + nest = nla_nest_start_noflag(skb, exts->police); if (nest == NULL || !act) goto nla_put_failure; if (tcf_action_dump_old(skb, act, 0, 0) < 0) @@ -3229,7 +3235,6 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->tunnel = tcf_tunnel_info(act); } else if (is_tcf_tunnel_release(act)) { entry->id = FLOW_ACTION_TUNNEL_DECAP; - entry->tunnel = tcf_tunnel_info(act); } else if (is_tcf_pedit(act)) { for (k = 0; k < tcf_pedit_nkeys(act); k++) { switch (tcf_pedit_cmd(act, k)) { @@ -3254,6 +3259,18 @@ int tc_setup_flow_action(struct flow_action *flow_action, } else if (is_tcf_skbedit_mark(act)) { entry->id = FLOW_ACTION_MARK; entry->mark = tcf_skbedit_mark(act); + } else if (is_tcf_sample(act)) { + entry->id = FLOW_ACTION_SAMPLE; + entry->sample.psample_group = + tcf_sample_psample_group(act); + entry->sample.trunc_size = tcf_sample_trunc_size(act); + entry->sample.truncate = tcf_sample_truncate(act); + entry->sample.rate = tcf_sample_rate(act); + } else if (is_tcf_police(act)) { + entry->id = FLOW_ACTION_POLICE; + entry->police.burst = tcf_police_tcfp_burst(act); + entry->police.rate_bytes_ps = + tcf_police_rate_bytes_ps(act); } else { goto err_out; } diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 687b0af67878..923863f3b0d8 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -185,8 +185,8 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, if (tca[TCA_OPTIONS] == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS], - basic_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS], + basic_policy, NULL); if (err < 0) return err; @@ -288,7 +288,7 @@ static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh, t->tcm_handle = f->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index b4ac58039cb1..27365ed3fe0b 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -157,8 +157,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, skip_sw = prog && tc_skip_sw(prog->gen_flags); obj = prog ?: oldprog; - tc_cls_common_offload_init(&cls_bpf.common, tp, obj->gen_flags, - extack); + tc_cls_common_offload_init(&cls_bpf.common, tp, obj->gen_flags, extack); cls_bpf.command = TC_CLSBPF_OFFLOAD; cls_bpf.exts = &obj->exts; cls_bpf.prog = prog ? prog->filter : NULL; @@ -468,8 +467,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (tca[TCA_OPTIONS] == NULL) return -EINVAL; - ret = nla_parse_nested(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], bpf_policy, - NULL); + ret = nla_parse_nested_deprecated(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], + bpf_policy, NULL); if (ret < 0) return ret; @@ -591,7 +590,7 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, void *fh, cls_bpf_offload_update_stats(tp, prog); - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 4c1567854f95..35659127e5a3 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -32,6 +32,8 @@ static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct cls_cgroup_head *head = rcu_dereference_bh(tp->root); u32 classid = task_get_classid(skb); + if (unlikely(!head)) + return -1; if (!classid) return -1; if (!tcf_em_tree_match(skb, &head->ematches, NULL)) @@ -104,8 +106,9 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, goto errout; new->handle = handle; new->tp = tp; - err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS], - cgroup_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_CGROUP_MAX, + tca[TCA_OPTIONS], cgroup_policy, + NULL); if (err < 0) goto errout; @@ -176,7 +179,7 @@ static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, void *fh, t->tcm_handle = head->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index eece1ee26930..7bb79ec5b176 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -408,7 +408,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_FLOW_MAX, opt, flow_policy, + NULL); if (err < 0) return err; @@ -629,7 +630,7 @@ static int flow_dump(struct net *net, struct tcf_proto *tp, void *fh, t->tcm_handle = f->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index c04247b403ed..f6685fc53119 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -14,6 +14,7 @@ #include <linux/module.h> #include <linux/rhashtable.h> #include <linux/workqueue.h> +#include <linux/refcount.h> #include <linux/if_ether.h> #include <linux/in6.h> @@ -75,6 +76,7 @@ struct fl_flow_mask { struct list_head filters; struct rcu_work rwork; struct list_head list; + refcount_t refcnt; }; struct fl_flow_tmplt { @@ -86,7 +88,9 @@ struct fl_flow_tmplt { struct cls_fl_head { struct rhashtable ht; + spinlock_t masks_lock; /* Protect masks list */ struct list_head masks; + struct list_head hw_filters; struct rcu_work rwork; struct idr handle_idr; }; @@ -99,11 +103,18 @@ struct cls_fl_filter { struct tcf_result res; struct fl_flow_key key; struct list_head list; + struct list_head hw_list; u32 handle; u32 flags; u32 in_hw_count; struct rcu_work rwork; struct net_device *hw_dev; + /* Flower classifier is unlocked, which means that its reference counter + * can be changed concurrently without any kind of external + * synchronization. Use atomic reference counter to be concurrency-safe. + */ + refcount_t refcnt; + bool deleted; }; static const struct rhashtable_params mask_ht_params = { @@ -304,7 +315,9 @@ static int fl_init(struct tcf_proto *tp) if (!head) return -ENOBUFS; + spin_lock_init(&head->masks_lock); INIT_LIST_HEAD_RCU(&head->masks); + INIT_LIST_HEAD(&head->hw_filters); rcu_assign_pointer(tp->root, head); idr_init(&head->handle_idr); @@ -313,6 +326,7 @@ static int fl_init(struct tcf_proto *tp) static void fl_mask_free(struct fl_flow_mask *mask) { + WARN_ON(!list_empty(&mask->filters)); rhashtable_destroy(&mask->ht); kfree(mask); } @@ -325,22 +339,32 @@ static void fl_mask_free_work(struct work_struct *work) fl_mask_free(mask); } -static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask, - bool async) +static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask) { - if (!list_empty(&mask->filters)) + if (!refcount_dec_and_test(&mask->refcnt)) return false; rhashtable_remove_fast(&head->ht, &mask->ht_node, mask_ht_params); + + spin_lock(&head->masks_lock); list_del_rcu(&mask->list); - if (async) - tcf_queue_work(&mask->rwork, fl_mask_free_work); - else - fl_mask_free(mask); + spin_unlock(&head->masks_lock); + + tcf_queue_work(&mask->rwork, fl_mask_free_work); return true; } +static struct cls_fl_head *fl_head_dereference(struct tcf_proto *tp) +{ + /* Flower classifier only changes root pointer during init and destroy. + * Users must obtain reference to tcf_proto instance before calling its + * API, so tp->root pointer is protected from concurrent call to + * fl_destroy() by reference counting. + */ + return rcu_dereference_raw(tp->root); +} + static void __fl_destroy_filter(struct cls_fl_filter *f) { tcf_exts_destroy(&f->exts); @@ -353,37 +377,50 @@ static void fl_destroy_filter_work(struct work_struct *work) struct cls_fl_filter *f = container_of(to_rcu_work(work), struct cls_fl_filter, rwork); - rtnl_lock(); __fl_destroy_filter(f); - rtnl_unlock(); } static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct tc_cls_flower_offload cls_flower = {}; struct tcf_block *block = tp->chain->block; + if (!rtnl_held) + rtnl_lock(); + tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = TC_CLSFLOWER_DESTROY; cls_flower.cookie = (unsigned long) f; tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); + spin_lock(&tp->lock); + list_del_init(&f->hw_list); tcf_block_offload_dec(block, &f->flags); + spin_unlock(&tp->lock); + + if (!rtnl_held) + rtnl_unlock(); } static int fl_hw_replace_filter(struct tcf_proto *tp, - struct cls_fl_filter *f, + struct cls_fl_filter *f, bool rtnl_held, struct netlink_ext_ack *extack) { + struct cls_fl_head *head = fl_head_dereference(tp); struct tc_cls_flower_offload cls_flower = {}; struct tcf_block *block = tp->chain->block; bool skip_sw = tc_skip_sw(f->flags); - int err; + int err = 0; + + if (!rtnl_held) + rtnl_lock(); cls_flower.rule = flow_rule_alloc(tcf_exts_num_actions(&f->exts)); - if (!cls_flower.rule) - return -ENOMEM; + if (!cls_flower.rule) { + err = -ENOMEM; + goto errout; + } tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = TC_CLSFLOWER_REPLACE; @@ -396,35 +433,51 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); if (err) { kfree(cls_flower.rule); - if (skip_sw) { + if (skip_sw) NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); - return err; - } - return 0; + else + err = 0; + goto errout; } err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw); kfree(cls_flower.rule); if (err < 0) { - fl_hw_destroy_filter(tp, f, NULL); - return err; + fl_hw_destroy_filter(tp, f, true, NULL); + goto errout; } else if (err > 0) { f->in_hw_count = err; + err = 0; + spin_lock(&tp->lock); tcf_block_offload_inc(block, &f->flags); + spin_unlock(&tp->lock); } - if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW)) - return -EINVAL; + if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW)) { + err = -EINVAL; + goto errout; + } - return 0; + spin_lock(&tp->lock); + list_add(&f->hw_list, &head->hw_filters); + spin_unlock(&tp->lock); +errout: + if (!rtnl_held) + rtnl_unlock(); + + return err; } -static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) +static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f, + bool rtnl_held) { struct tc_cls_flower_offload cls_flower = {}; struct tcf_block *block = tp->chain->block; + if (!rtnl_held) + rtnl_lock(); + tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL); cls_flower.command = TC_CLSFLOWER_STATS; cls_flower.cookie = (unsigned long) f; @@ -435,27 +488,81 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes, cls_flower.stats.pkts, cls_flower.stats.lastused); + + if (!rtnl_held) + rtnl_unlock(); } -static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, - struct netlink_ext_ack *extack) +static void __fl_put(struct cls_fl_filter *f) { - struct cls_fl_head *head = rtnl_dereference(tp->root); - bool async = tcf_exts_get_net(&f->exts); - bool last; + if (!refcount_dec_and_test(&f->refcnt)) + return; + + if (tcf_exts_get_net(&f->exts)) + tcf_queue_work(&f->rwork, fl_destroy_filter_work); + else + __fl_destroy_filter(f); +} + +static struct cls_fl_filter *__fl_get(struct cls_fl_head *head, u32 handle) +{ + struct cls_fl_filter *f; + rcu_read_lock(); + f = idr_find(&head->handle_idr, handle); + if (f && !refcount_inc_not_zero(&f->refcnt)) + f = NULL; + rcu_read_unlock(); + + return f; +} + +static struct cls_fl_filter *fl_get_next_filter(struct tcf_proto *tp, + unsigned long *handle) +{ + struct cls_fl_head *head = fl_head_dereference(tp); + struct cls_fl_filter *f; + + rcu_read_lock(); + while ((f = idr_get_next_ul(&head->handle_idr, handle))) { + /* don't return filters that are being deleted */ + if (refcount_inc_not_zero(&f->refcnt)) + break; + ++(*handle); + } + rcu_read_unlock(); + + return f; +} + +static int __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, + bool *last, bool rtnl_held, + struct netlink_ext_ack *extack) +{ + struct cls_fl_head *head = fl_head_dereference(tp); + + *last = false; + + spin_lock(&tp->lock); + if (f->deleted) { + spin_unlock(&tp->lock); + return -ENOENT; + } + + f->deleted = true; + rhashtable_remove_fast(&f->mask->ht, &f->ht_node, + f->mask->filter_ht_params); idr_remove(&head->handle_idr, f->handle); list_del_rcu(&f->list); - last = fl_mask_put(head, f->mask, async); + spin_unlock(&tp->lock); + + *last = fl_mask_put(head, f->mask); if (!tc_skip_hw(f->flags)) - fl_hw_destroy_filter(tp, f, extack); + fl_hw_destroy_filter(tp, f, rtnl_held, extack); tcf_unbind_filter(tp, &f->res); - if (async) - tcf_queue_work(&f->rwork, fl_destroy_filter_work); - else - __fl_destroy_filter(f); + __fl_put(f); - return last; + return 0; } static void fl_destroy_sleepable(struct work_struct *work) @@ -472,13 +579,15 @@ static void fl_destroy_sleepable(struct work_struct *work) static void fl_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { - struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_head *head = fl_head_dereference(tp); struct fl_flow_mask *mask, *next_mask; struct cls_fl_filter *f, *next; + bool last; list_for_each_entry_safe(mask, next_mask, &head->masks, list) { list_for_each_entry_safe(f, next, &mask->filters, list) { - if (__fl_delete(tp, f, extack)) + __fl_delete(tp, f, &last, rtnl_held, extack); + if (last) break; } } @@ -488,11 +597,18 @@ static void fl_destroy(struct tcf_proto *tp, bool rtnl_held, tcf_queue_work(&head->rwork, fl_destroy_sleepable); } +static void fl_put(struct tcf_proto *tp, void *arg) +{ + struct cls_fl_filter *f = arg; + + __fl_put(f); +} + static void *fl_get(struct tcf_proto *tp, u32 handle) { - struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_head *head = fl_head_dereference(tp); - return idr_find(&head->handle_idr, handle); + return __fl_get(head, handle); } static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { @@ -768,8 +884,9 @@ static int fl_set_geneve_opt(const struct nlattr *nla, struct fl_flow_key *key, return -EINVAL; } - err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX, - nla, geneve_opt_policy, extack); + err = nla_parse_nested_deprecated(tb, + TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX, + nla, geneve_opt_policy, extack); if (err < 0) return err; @@ -831,18 +948,18 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, const struct nlattr *nla_enc_key, *nla_opt_key, *nla_opt_msk = NULL; int err, option_len, key_depth, msk_depth = 0; - err = nla_validate_nested(tb[TCA_FLOWER_KEY_ENC_OPTS], - TCA_FLOWER_KEY_ENC_OPTS_MAX, - enc_opts_policy, extack); + err = nla_validate_nested_deprecated(tb[TCA_FLOWER_KEY_ENC_OPTS], + TCA_FLOWER_KEY_ENC_OPTS_MAX, + enc_opts_policy, extack); if (err) return err; nla_enc_key = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS]); if (tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]) { - err = nla_validate_nested(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK], - TCA_FLOWER_KEY_ENC_OPTS_MAX, - enc_opts_policy, extack); + err = nla_validate_nested_deprecated(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK], + TCA_FLOWER_KEY_ENC_OPTS_MAX, + enc_opts_policy, extack); if (err) return err; @@ -1227,12 +1344,18 @@ static struct fl_flow_mask *fl_create_new_mask(struct cls_fl_head *head, INIT_LIST_HEAD_RCU(&newmask->filters); - err = rhashtable_insert_fast(&head->ht, &newmask->ht_node, - mask_ht_params); + refcount_set(&newmask->refcnt, 1); + err = rhashtable_replace_fast(&head->ht, &mask->ht_node, + &newmask->ht_node, mask_ht_params); if (err) goto errout_destroy; + /* Wait until any potential concurrent users of mask are finished */ + synchronize_rcu(); + + spin_lock(&head->masks_lock); list_add_tail_rcu(&newmask->list, &head->masks); + spin_unlock(&head->masks_lock); return newmask; @@ -1250,41 +1373,77 @@ static int fl_check_assign_mask(struct cls_fl_head *head, struct fl_flow_mask *mask) { struct fl_flow_mask *newmask; + int ret = 0; + + rcu_read_lock(); - fnew->mask = rhashtable_lookup_fast(&head->ht, mask, mask_ht_params); + /* Insert mask as temporary node to prevent concurrent creation of mask + * with same key. Any concurrent lookups with same key will return + * -EAGAIN because mask's refcnt is zero. It is safe to insert + * stack-allocated 'mask' to masks hash table because we call + * synchronize_rcu() before returning from this function (either in case + * of error or after replacing it with heap-allocated mask in + * fl_create_new_mask()). + */ + fnew->mask = rhashtable_lookup_get_insert_fast(&head->ht, + &mask->ht_node, + mask_ht_params); if (!fnew->mask) { - if (fold) - return -EINVAL; + rcu_read_unlock(); + + if (fold) { + ret = -EINVAL; + goto errout_cleanup; + } newmask = fl_create_new_mask(head, mask); - if (IS_ERR(newmask)) - return PTR_ERR(newmask); + if (IS_ERR(newmask)) { + ret = PTR_ERR(newmask); + goto errout_cleanup; + } fnew->mask = newmask; + return 0; + } else if (IS_ERR(fnew->mask)) { + ret = PTR_ERR(fnew->mask); } else if (fold && fold->mask != fnew->mask) { - return -EINVAL; + ret = -EINVAL; + } else if (!refcount_inc_not_zero(&fnew->mask->refcnt)) { + /* Mask was deleted concurrently, try again */ + ret = -EAGAIN; } + rcu_read_unlock(); + return ret; - return 0; +errout_cleanup: + rhashtable_remove_fast(&head->ht, &mask->ht_node, + mask_ht_params); + /* Wait until any potential concurrent users of mask are finished */ + synchronize_rcu(); + return ret; } static int fl_set_parms(struct net *net, struct tcf_proto *tp, struct cls_fl_filter *f, struct fl_flow_mask *mask, unsigned long base, struct nlattr **tb, struct nlattr *est, bool ovr, - struct fl_flow_tmplt *tmplt, + struct fl_flow_tmplt *tmplt, bool rtnl_held, struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, rtnl_held, extack); if (err < 0) return err; if (tb[TCA_FLOWER_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]); + if (!rtnl_held) + rtnl_lock(); tcf_bind_filter(tp, &f->res, base); + if (!rtnl_held) + rtnl_unlock(); } err = fl_set_key(net, tb, &f->key, &mask->key, extack); @@ -1302,25 +1461,52 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, return 0; } +static int fl_ht_insert_unique(struct cls_fl_filter *fnew, + struct cls_fl_filter *fold, + bool *in_ht) +{ + struct fl_flow_mask *mask = fnew->mask; + int err; + + err = rhashtable_lookup_insert_fast(&mask->ht, + &fnew->ht_node, + mask->filter_ht_params); + if (err) { + *in_ht = false; + /* It is okay if filter with same key exists when + * overwriting. + */ + return fold && err == -EEXIST ? 0 : err; + } + + *in_ht = true; + return 0; +} + static int fl_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, bool ovr, bool rtnl_held, struct netlink_ext_ack *extack) { - struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_head *head = fl_head_dereference(tp); struct cls_fl_filter *fold = *arg; struct cls_fl_filter *fnew; struct fl_flow_mask *mask; struct nlattr **tb; + bool in_ht; int err; - if (!tca[TCA_OPTIONS]) - return -EINVAL; + if (!tca[TCA_OPTIONS]) { + err = -EINVAL; + goto errout_fold; + } mask = kzalloc(sizeof(struct fl_flow_mask), GFP_KERNEL); - if (!mask) - return -ENOBUFS; + if (!mask) { + err = -ENOBUFS; + goto errout_fold; + } tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL); if (!tb) { @@ -1328,8 +1514,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, goto errout_mask_alloc; } - err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], - fl_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_FLOWER_MAX, + tca[TCA_OPTIONS], fl_policy, NULL); if (err < 0) goto errout_tb; @@ -1343,6 +1529,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, err = -ENOBUFS; goto errout_tb; } + INIT_LIST_HEAD(&fnew->hw_list); + refcount_set(&fnew->refcnt, 1); err = tcf_exts_init(&fnew->exts, net, TCA_FLOWER_ACT, 0); if (err < 0) @@ -1358,7 +1546,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, } err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr, - tp->chain->tmplt_priv, extack); + tp->chain->tmplt_priv, rtnl_held, extack); if (err) goto errout; @@ -1366,169 +1554,247 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (err) goto errout; - if (!handle) { - handle = 1; - err = idr_alloc_u32(&head->handle_idr, fnew, &handle, - INT_MAX, GFP_KERNEL); - } else if (!fold) { - /* user specifies a handle and it doesn't exist */ - err = idr_alloc_u32(&head->handle_idr, fnew, &handle, - handle, GFP_KERNEL); - } + err = fl_ht_insert_unique(fnew, fold, &in_ht); if (err) goto errout_mask; - fnew->handle = handle; - - if (!fold && __fl_lookup(fnew->mask, &fnew->mkey)) { - err = -EEXIST; - goto errout_idr; - } - - err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node, - fnew->mask->filter_ht_params); - if (err) - goto errout_idr; if (!tc_skip_hw(fnew->flags)) { - err = fl_hw_replace_filter(tp, fnew, extack); + err = fl_hw_replace_filter(tp, fnew, rtnl_held, extack); if (err) - goto errout_mask_ht; + goto errout_ht; } if (!tc_in_hw(fnew->flags)) fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + spin_lock(&tp->lock); + + /* tp was deleted concurrently. -EAGAIN will cause caller to lookup + * proto again or create new one, if necessary. + */ + if (tp->deleting) { + err = -EAGAIN; + goto errout_hw; + } + if (fold) { + /* Fold filter was deleted concurrently. Retry lookup. */ + if (fold->deleted) { + err = -EAGAIN; + goto errout_hw; + } + + fnew->handle = handle; + + if (!in_ht) { + struct rhashtable_params params = + fnew->mask->filter_ht_params; + + err = rhashtable_insert_fast(&fnew->mask->ht, + &fnew->ht_node, + params); + if (err) + goto errout_hw; + in_ht = true; + } + + refcount_inc(&fnew->refcnt); rhashtable_remove_fast(&fold->mask->ht, &fold->ht_node, fold->mask->filter_ht_params); - if (!tc_skip_hw(fold->flags)) - fl_hw_destroy_filter(tp, fold, NULL); - } - - *arg = fnew; - - if (fold) { idr_replace(&head->handle_idr, fnew, fnew->handle); list_replace_rcu(&fold->list, &fnew->list); + fold->deleted = true; + + spin_unlock(&tp->lock); + + fl_mask_put(head, fold->mask); + if (!tc_skip_hw(fold->flags)) + fl_hw_destroy_filter(tp, fold, rtnl_held, NULL); tcf_unbind_filter(tp, &fold->res); - tcf_exts_get_net(&fold->exts); - tcf_queue_work(&fold->rwork, fl_destroy_filter_work); + /* Caller holds reference to fold, so refcnt is always > 0 + * after this. + */ + refcount_dec(&fold->refcnt); + __fl_put(fold); } else { + if (handle) { + /* user specifies a handle and it doesn't exist */ + err = idr_alloc_u32(&head->handle_idr, fnew, &handle, + handle, GFP_ATOMIC); + + /* Filter with specified handle was concurrently + * inserted after initial check in cls_api. This is not + * necessarily an error if NLM_F_EXCL is not set in + * message flags. Returning EAGAIN will cause cls_api to + * try to update concurrently inserted rule. + */ + if (err == -ENOSPC) + err = -EAGAIN; + } else { + handle = 1; + err = idr_alloc_u32(&head->handle_idr, fnew, &handle, + INT_MAX, GFP_ATOMIC); + } + if (err) + goto errout_hw; + + refcount_inc(&fnew->refcnt); + fnew->handle = handle; list_add_tail_rcu(&fnew->list, &fnew->mask->filters); + spin_unlock(&tp->lock); } + *arg = fnew; + kfree(tb); kfree(mask); return 0; -errout_mask_ht: - rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node, - fnew->mask->filter_ht_params); - -errout_idr: - if (!fold) - idr_remove(&head->handle_idr, fnew->handle); - +errout_ht: + spin_lock(&tp->lock); +errout_hw: + fnew->deleted = true; + spin_unlock(&tp->lock); + if (!tc_skip_hw(fnew->flags)) + fl_hw_destroy_filter(tp, fnew, rtnl_held, NULL); + if (in_ht) + rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node, + fnew->mask->filter_ht_params); errout_mask: - fl_mask_put(head, fnew->mask, false); - + fl_mask_put(head, fnew->mask); errout: - tcf_exts_destroy(&fnew->exts); - kfree(fnew); + __fl_put(fnew); errout_tb: kfree(tb); errout_mask_alloc: kfree(mask); +errout_fold: + if (fold) + __fl_put(fold); return err; } static int fl_delete(struct tcf_proto *tp, void *arg, bool *last, bool rtnl_held, struct netlink_ext_ack *extack) { - struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_head *head = fl_head_dereference(tp); struct cls_fl_filter *f = arg; + bool last_on_mask; + int err = 0; - rhashtable_remove_fast(&f->mask->ht, &f->ht_node, - f->mask->filter_ht_params); - __fl_delete(tp, f, extack); + err = __fl_delete(tp, f, &last_on_mask, rtnl_held, extack); *last = list_empty(&head->masks); - return 0; + __fl_put(f); + + return err; } static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg, bool rtnl_held) { - struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *f; arg->count = arg->skip; - while ((f = idr_get_next_ul(&head->handle_idr, - &arg->cookie)) != NULL) { + while ((f = fl_get_next_filter(tp, &arg->cookie)) != NULL) { if (arg->fn(tp, f, arg) < 0) { + __fl_put(f); arg->stop = 1; break; } - arg->cookie = f->handle + 1; + __fl_put(f); + arg->cookie++; arg->count++; } } +static struct cls_fl_filter * +fl_get_next_hw_filter(struct tcf_proto *tp, struct cls_fl_filter *f, bool add) +{ + struct cls_fl_head *head = fl_head_dereference(tp); + + spin_lock(&tp->lock); + if (list_empty(&head->hw_filters)) { + spin_unlock(&tp->lock); + return NULL; + } + + if (!f) + f = list_entry(&head->hw_filters, struct cls_fl_filter, + hw_list); + list_for_each_entry_continue(f, &head->hw_filters, hw_list) { + if (!(add && f->deleted) && refcount_inc_not_zero(&f->refcnt)) { + spin_unlock(&tp->lock); + return f; + } + } + + spin_unlock(&tp->lock); + return NULL; +} + static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, void *cb_priv, struct netlink_ext_ack *extack) { - struct cls_fl_head *head = rtnl_dereference(tp->root); struct tc_cls_flower_offload cls_flower = {}; struct tcf_block *block = tp->chain->block; - struct fl_flow_mask *mask; - struct cls_fl_filter *f; + struct cls_fl_filter *f = NULL; int err; - list_for_each_entry(mask, &head->masks, list) { - list_for_each_entry(f, &mask->filters, list) { - if (tc_skip_hw(f->flags)) - continue; - - cls_flower.rule = - flow_rule_alloc(tcf_exts_num_actions(&f->exts)); - if (!cls_flower.rule) - return -ENOMEM; - - tc_cls_common_offload_init(&cls_flower.common, tp, - f->flags, extack); - cls_flower.command = add ? - TC_CLSFLOWER_REPLACE : TC_CLSFLOWER_DESTROY; - cls_flower.cookie = (unsigned long)f; - cls_flower.rule->match.dissector = &mask->dissector; - cls_flower.rule->match.mask = &mask->key; - cls_flower.rule->match.key = &f->mkey; - - err = tc_setup_flow_action(&cls_flower.rule->action, - &f->exts); - if (err) { - kfree(cls_flower.rule); - if (tc_skip_sw(f->flags)) { - NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); - return err; - } - continue; + /* hw_filters list can only be changed by hw offload functions after + * obtaining rtnl lock. Make sure it is not changed while reoffload is + * iterating it. + */ + ASSERT_RTNL(); + + while ((f = fl_get_next_hw_filter(tp, f, add))) { + cls_flower.rule = + flow_rule_alloc(tcf_exts_num_actions(&f->exts)); + if (!cls_flower.rule) { + __fl_put(f); + return -ENOMEM; + } + + tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, + extack); + cls_flower.command = add ? + TC_CLSFLOWER_REPLACE : TC_CLSFLOWER_DESTROY; + cls_flower.cookie = (unsigned long)f; + cls_flower.rule->match.dissector = &f->mask->dissector; + cls_flower.rule->match.mask = &f->mask->key; + cls_flower.rule->match.key = &f->mkey; + + err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); + if (err) { + kfree(cls_flower.rule); + if (tc_skip_sw(f->flags)) { + NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); + __fl_put(f); + return err; } + goto next_flow; + } - cls_flower.classid = f->res.classid; + cls_flower.classid = f->res.classid; - err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv); - kfree(cls_flower.rule); + err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv); + kfree(cls_flower.rule); - if (err) { - if (add && tc_skip_sw(f->flags)) - return err; - continue; + if (err) { + if (add && tc_skip_sw(f->flags)) { + __fl_put(f); + return err; } - - tc_cls_offload_cnt_update(block, &f->in_hw_count, - &f->flags, add); + goto next_flow; } + + spin_lock(&tp->lock); + tc_cls_offload_cnt_update(block, &f->in_hw_count, &f->flags, + add); + spin_unlock(&tp->lock); +next_flow: + __fl_put(f); } return 0; @@ -1587,8 +1853,8 @@ static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain, tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL); if (!tb) return ERR_PTR(-ENOBUFS); - err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], - fl_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_FLOWER_MAX, + tca[TCA_OPTIONS], fl_policy, NULL); if (err) goto errout_tb; @@ -1786,7 +2052,7 @@ static int fl_dump_key_geneve_opt(struct sk_buff *skb, struct nlattr *nest; int opt_off = 0; - nest = nla_nest_start(skb, TCA_FLOWER_KEY_ENC_OPTS_GENEVE); + nest = nla_nest_start_noflag(skb, TCA_FLOWER_KEY_ENC_OPTS_GENEVE); if (!nest) goto nla_put_failure; @@ -1822,7 +2088,7 @@ static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type, if (!enc_opts->len) return 0; - nest = nla_nest_start(skb, enc_opt_type); + nest = nla_nest_start_noflag(skb, enc_opt_type); if (!nest) goto nla_put_failure; @@ -2061,31 +2327,37 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh, struct cls_fl_filter *f = fh; struct nlattr *nest; struct fl_flow_key *key, *mask; + bool skip_hw; if (!f) return skb->len; t->tcm_handle = f->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!nest) goto nla_put_failure; + spin_lock(&tp->lock); + if (f->res.classid && nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid)) - goto nla_put_failure; + goto nla_put_failure_locked; key = &f->key; mask = &f->mask->key; + skip_hw = tc_skip_hw(f->flags); if (fl_dump_key(skb, net, key, mask)) - goto nla_put_failure; - - if (!tc_skip_hw(f->flags)) - fl_hw_update_stats(tp, f); + goto nla_put_failure_locked; if (f->flags && nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags)) - goto nla_put_failure; + goto nla_put_failure_locked; + + spin_unlock(&tp->lock); + + if (!skip_hw) + fl_hw_update_stats(tp, f, rtnl_held); if (nla_put_u32(skb, TCA_FLOWER_IN_HW_COUNT, f->in_hw_count)) goto nla_put_failure; @@ -2100,6 +2372,8 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh, return skb->len; +nla_put_failure_locked: + spin_unlock(&tp->lock); nla_put_failure: nla_nest_cancel(skb, nest); return -1; @@ -2111,7 +2385,7 @@ static int fl_tmplt_dump(struct sk_buff *skb, struct net *net, void *tmplt_priv) struct fl_flow_key *key, *mask; struct nlattr *nest; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!nest) goto nla_put_failure; @@ -2144,6 +2418,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = { .init = fl_init, .destroy = fl_destroy, .get = fl_get, + .put = fl_put, .change = fl_change, .delete = fl_delete, .walk = fl_walk, @@ -2154,6 +2429,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = { .tmplt_destroy = fl_tmplt_destroy, .tmplt_dump = fl_tmplt_dump, .owner = THIS_MODULE, + .flags = TCF_PROTO_OPS_DOIT_UNLOCKED, }; static int __init cls_fl_init(void) diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index ad036b00427d..1d0b39c3932f 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -263,7 +263,8 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, if (!opt) return handle ? -EINVAL : 0; /* Succeed if it is old method. */ - err = nla_parse_nested(tb, TCA_FW_MAX, opt, fw_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_FW_MAX, opt, fw_policy, + NULL); if (err < 0) return err; @@ -402,7 +403,7 @@ static int fw_dump(struct net *net, struct tcf_proto *tp, void *fh, if (!f->res.classid && !tcf_exts_has_actions(&f->exts)) return skb->len; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index a13bc351a414..1e98a517fb0b 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -32,6 +32,9 @@ static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp, { struct cls_mall_head *head = rcu_dereference_bh(tp->root); + if (unlikely(!head)) + return -1; + if (tc_skip_sw(head->flags)) return -1; @@ -89,12 +92,29 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, bool skip_sw = tc_skip_sw(head->flags); int err; + cls_mall.rule = flow_rule_alloc(tcf_exts_num_actions(&head->exts)); + if (!cls_mall.rule) + return -ENOMEM; + tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack); cls_mall.command = TC_CLSMATCHALL_REPLACE; - cls_mall.exts = &head->exts; cls_mall.cookie = cookie; + err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts); + if (err) { + kfree(cls_mall.rule); + mall_destroy_hw_filter(tp, head, cookie, NULL); + if (skip_sw) + NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); + else + err = 0; + + return err; + } + err = tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, skip_sw); + kfree(cls_mall.rule); + if (err < 0) { mall_destroy_hw_filter(tp, head, cookie, NULL); return err; @@ -181,8 +201,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, if (head) return -EEXIST; - err = nla_parse_nested(tb, TCA_MATCHALL_MAX, tca[TCA_OPTIONS], - mall_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_MATCHALL_MAX, + tca[TCA_OPTIONS], mall_policy, NULL); if (err < 0) return err; @@ -272,13 +292,27 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, if (tc_skip_hw(head->flags)) return 0; + cls_mall.rule = flow_rule_alloc(tcf_exts_num_actions(&head->exts)); + if (!cls_mall.rule) + return -ENOMEM; + tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack); cls_mall.command = add ? TC_CLSMATCHALL_REPLACE : TC_CLSMATCHALL_DESTROY; - cls_mall.exts = &head->exts; cls_mall.cookie = (unsigned long)head; + err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts); + if (err) { + kfree(cls_mall.rule); + if (add && tc_skip_sw(head->flags)) { + NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); + return err; + } + } + err = cb(TC_SETUP_CLSMATCHALL, &cls_mall, cb_priv); + kfree(cls_mall.rule); + if (err) { if (add && tc_skip_sw(head->flags)) return err; @@ -290,6 +324,23 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, return 0; } +static void mall_stats_hw_filter(struct tcf_proto *tp, + struct cls_mall_head *head, + unsigned long cookie) +{ + struct tc_cls_matchall_offload cls_mall = {}; + struct tcf_block *block = tp->chain->block; + + tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, NULL); + cls_mall.command = TC_CLSMATCHALL_STATS; + cls_mall.cookie = cookie; + + tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false); + + tcf_exts_stats_update(&head->exts, cls_mall.stats.bytes, + cls_mall.stats.pkts, cls_mall.stats.lastused); +} + static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { @@ -301,9 +352,12 @@ static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh, if (!head) return skb->len; + if (!tc_skip_hw(head->flags)) + mall_stats_hw_filter(tp, head, (unsigned long)head); + t->tcm_handle = head->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!nest) goto nla_put_failure; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index f006af23b64a..eeff5bbfb912 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -484,7 +484,8 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, if (opt == NULL) return handle ? -EINVAL : 0; - err = nla_parse_nested(tb, TCA_ROUTE4_MAX, opt, route4_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_ROUTE4_MAX, opt, + route4_policy, NULL); if (err < 0) return err; @@ -607,7 +608,7 @@ static int route4_dump(struct net *net, struct tcf_proto *tp, void *fh, t->tcm_handle = f->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 0719a21d9c41..a4688bb92f43 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -497,7 +497,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, if (opt == NULL) return handle ? -EINVAL : 0; - err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_RSVP_MAX, opt, rsvp_policy, + NULL); if (err < 0) return err; @@ -706,7 +707,7 @@ static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh, t->tcm_handle = f->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 24e0a62a65cc..9f4f4203c388 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -510,7 +510,8 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, if (!opt) return 0; - err = nla_parse_nested(tb, TCA_TCINDEX_MAX, opt, tcindex_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_TCINDEX_MAX, opt, + tcindex_policy, NULL); if (err < 0) return err; @@ -601,7 +602,7 @@ static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh, tp, fh, skb, t, p, r); pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h); - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 48e76a3acf8a..4b8710a266cc 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -847,7 +847,7 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, /* Similarly success statistics must be moved as pointers */ new->pcpu_success = n->pcpu_success; #endif - memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key)); + memcpy(&new->sel, s, struct_size(s, keys, s->nkeys)); if (tcf_exts_init(&new->exts, net, TCA_U32_ACT, TCA_U32_POLICE)) { kfree(new); @@ -884,7 +884,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, } } - err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_U32_MAX, opt, u32_policy, + extack); if (err < 0) return err; @@ -1294,7 +1295,7 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh, t->tcm_handle = n->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c index a5f34e930eff..60c26b8294b5 100644 --- a/net/sched/em_ipt.c +++ b/net/sched/em_ipt.c @@ -120,8 +120,8 @@ static int em_ipt_change(struct net *net, void *data, int data_len, struct xt_match *match; int mdata_len, ret; - ret = nla_parse(tb, TCA_EM_IPT_MAX, data, data_len, em_ipt_policy, - NULL); + ret = nla_parse_deprecated(tb, TCA_EM_IPT_MAX, data, data_len, + em_ipt_policy, NULL); if (ret < 0) return ret; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index d6e97115500b..28dfa8f2a4ea 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -912,7 +912,8 @@ static int em_meta_change(struct net *net, void *data, int len, struct tcf_meta_hdr *hdr; struct meta_match *meta = NULL; - err = nla_parse(tb, TCA_EM_META_MAX, data, len, meta_policy, NULL); + err = nla_parse_deprecated(tb, TCA_EM_META_MAX, data, len, + meta_policy, NULL); if (err < 0) goto errout; diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 1331a4c2d8ff..7b86c2a44746 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -314,7 +314,8 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla, if (!nla) return 0; - err = nla_parse_nested(tb, TCA_EMATCH_TREE_MAX, nla, em_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_EMATCH_TREE_MAX, nla, + em_policy, NULL); if (err < 0) goto errout; @@ -440,14 +441,14 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv) struct nlattr *top_start; struct nlattr *list_start; - top_start = nla_nest_start(skb, tlv); + top_start = nla_nest_start_noflag(skb, tlv); if (top_start == NULL) goto nla_put_failure; if (nla_put(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr)) goto nla_put_failure; - list_start = nla_nest_start(skb, TCA_EMATCH_TREE_LIST); + list_start = nla_nest_start_noflag(skb, TCA_EMATCH_TREE_LIST); if (list_start == NULL) goto nla_put_failure; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index fb8f138b9776..607e84d67c33 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -479,7 +479,8 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt, u16 *tab = NULL; int err; - err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy, + extack); if (err < 0) return ERR_PTR(err); if (!tb[TCA_STAB_BASE]) { @@ -542,7 +543,7 @@ static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab) { struct nlattr *nest; - nest = nla_nest_start(skb, TCA_STAB); + nest = nla_nest_start_noflag(skb, TCA_STAB); if (nest == NULL) goto nla_put_failure; if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts)) @@ -998,6 +999,19 @@ static void notify_and_destroy(struct net *net, struct sk_buff *skb, qdisc_put(old); } +static void qdisc_clear_nolock(struct Qdisc *sch) +{ + sch->flags &= ~TCQ_F_NOLOCK; + if (!(sch->flags & TCQ_F_CPUSTATS)) + return; + + free_percpu(sch->cpu_bstats); + free_percpu(sch->cpu_qstats); + sch->cpu_bstats = NULL; + sch->cpu_qstats = NULL; + sch->flags &= ~TCQ_F_CPUSTATS; +} + /* Graft qdisc "new" to class "classid" of qdisc "parent" or * to device "dev". * @@ -1076,7 +1090,7 @@ skip: /* Only support running class lockless if parent is lockless */ if (new && (new->flags & TCQ_F_NOLOCK) && parent && !(parent->flags & TCQ_F_NOLOCK)) - new->flags &= ~TCQ_F_NOLOCK; + qdisc_clear_nolock(new); if (!cops || !cops->graft) return -EOPNOTSUPP; @@ -1410,8 +1424,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, - extack); + err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, + rtm_tca_policy, extack); if (err < 0) return err; @@ -1495,8 +1509,8 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, replay: /* Reinit, just in case something touches this. */ - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, - extack); + err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, + rtm_tca_policy, extack); if (err < 0) return err; @@ -1730,8 +1744,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; ASSERT_RTNL(); - err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, - rtm_tca_policy, cb->extack); + err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX, + rtm_tca_policy, cb->extack); if (err < 0) return err; @@ -1959,8 +1973,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, - extack); + err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, + rtm_tca_policy, extack); if (err < 0) return err; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index d714d3747bcb..ae506c7906cd 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -223,7 +223,8 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, if (opt == NULL) return -EINVAL; - error = nla_parse_nested(tb, TCA_ATM_MAX, opt, atm_policy, NULL); + error = nla_parse_nested_deprecated(tb, TCA_ATM_MAX, opt, atm_policy, + NULL); if (error < 0) return error; @@ -609,7 +610,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, tcm->tcm_handle = flow->common.classid; tcm->tcm_info = flow->q->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 259d97bc2abd..53a80bc6b13a 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -2531,7 +2531,8 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt, if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_CAKE_MAX, opt, cake_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_CAKE_MAX, opt, cake_policy, + extack); if (err < 0) return err; @@ -2735,7 +2736,7 @@ static int cake_dump(struct Qdisc *sch, struct sk_buff *skb) struct cake_sched_data *q = qdisc_priv(sch); struct nlattr *opts; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!opts) goto nla_put_failure; @@ -2806,7 +2807,7 @@ nla_put_failure: static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { - struct nlattr *stats = nla_nest_start(d->skb, TCA_STATS_APP); + struct nlattr *stats = nla_nest_start_noflag(d->skb, TCA_STATS_APP); struct cake_sched_data *q = qdisc_priv(sch); struct nlattr *tstats, *ts; int i; @@ -2836,7 +2837,7 @@ static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d) #undef PUT_STAT_U32 #undef PUT_STAT_U64 - tstats = nla_nest_start(d->skb, TCA_CAKE_STATS_TIN_STATS); + tstats = nla_nest_start_noflag(d->skb, TCA_CAKE_STATS_TIN_STATS); if (!tstats) goto nla_put_failure; @@ -2853,7 +2854,7 @@ static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d) for (i = 0; i < q->tin_cnt; i++) { struct cake_tin_data *b = &q->tins[q->tin_order[i]]; - ts = nla_nest_start(d->skb, i + 1); + ts = nla_nest_start_noflag(d->skb, i + 1); if (!ts) goto nla_put_failure; @@ -2973,7 +2974,7 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl, if (flow) { ktime_t now = ktime_get(); - stats = nla_nest_start(d->skb, TCA_STATS_APP); + stats = nla_nest_start_noflag(d->skb, TCA_STATS_APP); if (!stats) return -1; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 114b9048ea7e..ba4b33b74dd8 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1149,7 +1149,8 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt, return -EINVAL; } - err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_CBQ_MAX, opt, cbq_policy, + extack); if (err < 0) return err; @@ -1305,7 +1306,7 @@ static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb) struct cbq_sched_data *q = qdisc_priv(sch); struct nlattr *nest; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (cbq_dump_attr(skb, &q->link) < 0) @@ -1340,7 +1341,7 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg, tcm->tcm_handle = cl->common.classid; tcm->tcm_info = cl->q->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (cbq_dump_attr(skb, cl) < 0) @@ -1473,7 +1474,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t return -EINVAL; } - err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_CBQ_MAX, opt, cbq_policy, + extack); if (err < 0) return err; diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index c6a502933fe7..8077c846f5bf 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -61,16 +61,20 @@ #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> +#include <net/netevent.h> #include <net/netlink.h> #include <net/sch_generic.h> #include <net/pkt_sched.h> +static LIST_HEAD(cbs_list); +static DEFINE_SPINLOCK(cbs_list_lock); + #define BYTES_PER_KBIT (1000LL / 8) struct cbs_sched_data { bool offload; int queue; - s64 port_rate; /* in bytes/s */ + atomic64_t port_rate; /* in bytes/s */ s64 last; /* timestamp in ns */ s64 credits; /* in bytes */ s32 locredit; /* in bytes */ @@ -82,6 +86,7 @@ struct cbs_sched_data { struct sk_buff **to_free); struct sk_buff *(*dequeue)(struct Qdisc *sch); struct Qdisc *qdisc; + struct list_head cbs_list; }; static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch, @@ -181,6 +186,11 @@ static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch) s64 credits; int len; + if (atomic64_read(&q->port_rate) == -1) { + WARN_ONCE(1, "cbs: dequeue() called with unknown port rate."); + return NULL; + } + if (q->credits < 0) { credits = timediff_to_credits(now - q->last, q->idleslope); @@ -207,7 +217,8 @@ static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch) /* As sendslope is a negative number, this will decrease the * amount of q->credits. */ - credits = credits_from_len(len, q->sendslope, q->port_rate); + credits = credits_from_len(len, q->sendslope, + atomic64_read(&q->port_rate)); credits += q->credits; q->credits = max_t(s64, credits, q->locredit); @@ -294,6 +305,50 @@ static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q, return 0; } +static void cbs_set_port_rate(struct net_device *dev, struct cbs_sched_data *q) +{ + struct ethtool_link_ksettings ecmd; + int port_rate = -1; + + if (!__ethtool_get_link_ksettings(dev, &ecmd) && + ecmd.base.speed != SPEED_UNKNOWN) + port_rate = ecmd.base.speed * 1000 * BYTES_PER_KBIT; + + atomic64_set(&q->port_rate, port_rate); + netdev_dbg(dev, "cbs: set %s's port_rate to: %lld, linkspeed: %d\n", + dev->name, (long long)atomic64_read(&q->port_rate), + ecmd.base.speed); +} + +static int cbs_dev_notifier(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct cbs_sched_data *q; + struct net_device *qdev; + bool found = false; + + ASSERT_RTNL(); + + if (event != NETDEV_UP && event != NETDEV_CHANGE) + return NOTIFY_DONE; + + spin_lock(&cbs_list_lock); + list_for_each_entry(q, &cbs_list, cbs_list) { + qdev = qdisc_dev(q->qdisc); + if (qdev == dev) { + found = true; + break; + } + } + spin_unlock(&cbs_list_lock); + + if (found) + cbs_set_port_rate(dev, q); + + return NOTIFY_DONE; +} + static int cbs_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { @@ -303,7 +358,8 @@ static int cbs_change(struct Qdisc *sch, struct nlattr *opt, struct tc_cbs_qopt *qopt; int err; - err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_CBS_MAX, opt, cbs_policy, + extack); if (err < 0) return err; @@ -315,16 +371,7 @@ static int cbs_change(struct Qdisc *sch, struct nlattr *opt, qopt = nla_data(tb[TCA_CBS_PARMS]); if (!qopt->offload) { - struct ethtool_link_ksettings ecmd; - s64 link_speed; - - if (!__ethtool_get_link_ksettings(dev, &ecmd)) - link_speed = ecmd.base.speed; - else - link_speed = SPEED_1000; - - q->port_rate = link_speed * 1000 * BYTES_PER_KBIT; - + cbs_set_port_rate(dev, q); cbs_disable_offload(dev, q); } else { err = cbs_enable_offload(dev, q, qopt, extack); @@ -347,6 +394,7 @@ static int cbs_init(struct Qdisc *sch, struct nlattr *opt, { struct cbs_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); + int err; if (!opt) { NL_SET_ERR_MSG(extack, "Missing CBS qdisc options which are mandatory"); @@ -367,7 +415,17 @@ static int cbs_init(struct Qdisc *sch, struct nlattr *opt, qdisc_watchdog_init(&q->watchdog, sch); - return cbs_change(sch, opt, extack); + err = cbs_change(sch, opt, extack); + if (err) + return err; + + if (!q->offload) { + spin_lock(&cbs_list_lock); + list_add(&q->cbs_list, &cbs_list); + spin_unlock(&cbs_list_lock); + } + + return 0; } static void cbs_destroy(struct Qdisc *sch) @@ -375,8 +433,11 @@ static void cbs_destroy(struct Qdisc *sch) struct cbs_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); - qdisc_watchdog_cancel(&q->watchdog); + spin_lock(&cbs_list_lock); + list_del(&q->cbs_list); + spin_unlock(&cbs_list_lock); + qdisc_watchdog_cancel(&q->watchdog); cbs_disable_offload(dev, q); if (q->qdisc) @@ -389,7 +450,7 @@ static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb) struct tc_cbs_qopt opt = { }; struct nlattr *nest; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!nest) goto nla_put_failure; @@ -487,14 +548,24 @@ static struct Qdisc_ops cbs_qdisc_ops __read_mostly = { .owner = THIS_MODULE, }; +static struct notifier_block cbs_device_notifier = { + .notifier_call = cbs_dev_notifier, +}; + static int __init cbs_module_init(void) { + int err = register_netdevice_notifier(&cbs_device_notifier); + + if (err) + return err; + return register_qdisc(&cbs_qdisc_ops); } static void __exit cbs_module_exit(void) { unregister_qdisc(&cbs_qdisc_ops); + unregister_netdevice_notifier(&cbs_device_notifier); } module_init(cbs_module_init) module_exit(cbs_module_exit) diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index eafc0d17d174..370dbcf49e8b 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -358,7 +358,8 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt, if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_CHOKE_MAX, opt, + choke_policy, NULL); if (err < 0) return err; @@ -452,7 +453,7 @@ static int choke_dump(struct Qdisc *sch, struct sk_buff *skb) .Scell_log = q->parms.Scell_log, }; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 17cd81f84b5d..25ef172c23df 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -141,7 +141,8 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt, if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_CODEL_MAX, opt, codel_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_CODEL_MAX, opt, + codel_policy, NULL); if (err < 0) return err; @@ -217,7 +218,7 @@ static int codel_dump(struct Qdisc *sch, struct sk_buff *skb) struct codel_sched_data *q = qdisc_priv(sch); struct nlattr *opts; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 430df9a55ec4..ffcd6654c39d 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -70,7 +70,8 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return -EINVAL; } - err = nla_parse_nested(tb, TCA_DRR_MAX, opt, drr_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_DRR_MAX, opt, drr_policy, + extack); if (err < 0) return err; @@ -244,7 +245,7 @@ static int drr_dump_class(struct Qdisc *sch, unsigned long arg, tcm->tcm_handle = cl->common.classid; tcm->tcm_info = cl->qdisc->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (nla_put_u32(skb, TCA_DRR_QUANTUM, cl->quantum)) diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 42471464ded3..3deeb06eaecf 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -132,7 +132,8 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, if (!opt) goto errout; - err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt, + dsmark_policy, NULL); if (err < 0) goto errout; @@ -353,7 +354,8 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt, if (err) return err; - err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt, + dsmark_policy, NULL); if (err < 0) goto errout; @@ -432,7 +434,7 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl - 1); tcm->tcm_info = p->q->handle; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; if (nla_put_u8(skb, TCA_DSMARK_MASK, p->mv[cl - 1].mask) || @@ -451,7 +453,7 @@ static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb) struct dsmark_qdisc_data *p = qdisc_priv(sch); struct nlattr *opts = NULL; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; if (nla_put_u16(skb, TCA_DSMARK_INDICES, p->indices)) diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c index 1150f22983df..db0c2ba1d156 100644 --- a/net/sched/sch_etf.c +++ b/net/sched/sch_etf.c @@ -351,7 +351,8 @@ static int etf_init(struct Qdisc *sch, struct nlattr *opt, return -EINVAL; } - err = nla_parse_nested(tb, TCA_ETF_MAX, opt, etf_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_ETF_MAX, opt, etf_policy, + extack); if (err < 0) return err; @@ -460,7 +461,7 @@ static int etf_dump(struct Qdisc *sch, struct sk_buff *skb) struct tc_etf_qopt opt = { }; struct nlattr *nest; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!nest) goto nla_put_failure; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 1a662f2bb7bb..26a94e5cd5df 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -54,10 +54,23 @@ #include <net/tcp_states.h> #include <net/tcp.h> +struct fq_skb_cb { + u64 time_to_send; +}; + +static inline struct fq_skb_cb *fq_skb_cb(struct sk_buff *skb) +{ + qdisc_cb_private_validate(skb, sizeof(struct fq_skb_cb)); + return (struct fq_skb_cb *)qdisc_skb_cb(skb)->data; +} + /* - * Per flow structure, dynamically allocated + * Per flow structure, dynamically allocated. + * If packets have monotically increasing time_to_send, they are placed in O(1) + * in linear list (head,tail), otherwise are placed in a rbtree (t_root). */ struct fq_flow { + struct rb_root t_root; struct sk_buff *head; /* list of skbs for this flow : first skb */ union { struct sk_buff *tail; /* last skb in the list */ @@ -257,6 +270,17 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) */ sk = (struct sock *)((hash << 1) | 1UL); skb_orphan(skb); + } else if (sk->sk_state == TCP_CLOSE) { + unsigned long hash = skb_get_hash(skb) & q->orphan_mask; + /* + * Sockets in TCP_CLOSE are non connected. + * Typical use case is UDP sockets, they can send packets + * with sendto() to many different destinations. + * We probably could use a generic bit advertising + * non connected sockets, instead of sk_state == TCP_CLOSE, + * if we care enough. + */ + sk = (struct sock *)((hash << 1) | 1UL); } root = &q->fq_root[hash_ptr(sk, q->fq_trees_log)]; @@ -277,7 +301,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) * It not, we need to refill credit with * initial quantum */ - if (unlikely(skb->sk && + if (unlikely(skb->sk == sk && f->socket_hash != sk->sk_hash)) { f->credit = q->initial_quantum; f->socket_hash = sk->sk_hash; @@ -298,9 +322,11 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) q->stat_allocation_errors++; return &q->internal; } + /* f->t_root is already zeroed after kmem_cache_zalloc() */ + fq_flow_set_detached(f); f->sk = sk; - if (skb->sk) + if (skb->sk == sk) f->socket_hash = sk->sk_hash; f->credit = q->initial_quantum; @@ -312,14 +338,40 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) return f; } +static struct sk_buff *fq_peek(struct fq_flow *flow) +{ + struct sk_buff *skb = skb_rb_first(&flow->t_root); + struct sk_buff *head = flow->head; + + if (!skb) + return head; + + if (!head) + return skb; + + if (fq_skb_cb(skb)->time_to_send < fq_skb_cb(head)->time_to_send) + return skb; + return head; +} + +static void fq_erase_head(struct Qdisc *sch, struct fq_flow *flow, + struct sk_buff *skb) +{ + if (skb == flow->head) { + flow->head = skb->next; + } else { + rb_erase(&skb->rbnode, &flow->t_root); + skb->dev = qdisc_dev(sch); + } +} /* remove one skb from head of flow queue */ static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow) { - struct sk_buff *skb = flow->head; + struct sk_buff *skb = fq_peek(flow); if (skb) { - flow->head = skb->next; + fq_erase_head(sch, flow, skb); skb_mark_not_on_list(skb); flow->qlen--; qdisc_qstats_backlog_dec(sch, skb); @@ -330,15 +382,36 @@ static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow) static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb) { - struct sk_buff *head = flow->head; + struct rb_node **p, *parent; + struct sk_buff *head, *aux; - skb->next = NULL; - if (!head) - flow->head = skb; - else - flow->tail->next = skb; + fq_skb_cb(skb)->time_to_send = skb->tstamp ?: ktime_get_ns(); + + head = flow->head; + if (!head || + fq_skb_cb(skb)->time_to_send >= fq_skb_cb(flow->tail)->time_to_send) { + if (!head) + flow->head = skb; + else + flow->tail->next = skb; + flow->tail = skb; + skb->next = NULL; + return; + } + + p = &flow->t_root.rb_node; + parent = NULL; - flow->tail = skb; + while (*p) { + parent = *p; + aux = rb_to_skb(parent); + if (fq_skb_cb(skb)->time_to_send >= fq_skb_cb(aux)->time_to_send) + p = &parent->rb_right; + else + p = &parent->rb_left; + } + rb_link_node(&skb->rbnode, parent, p); + rb_insert_color(&skb->rbnode, &flow->t_root); } static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, @@ -450,9 +523,9 @@ begin: goto begin; } - skb = f->head; + skb = fq_peek(f); if (skb) { - u64 time_next_packet = max_t(u64, ktime_to_ns(skb->tstamp), + u64 time_next_packet = max_t(u64, fq_skb_cb(skb)->time_to_send, f->time_next_packet); if (now < time_next_packet) { @@ -533,6 +606,15 @@ out: static void fq_flow_purge(struct fq_flow *flow) { + struct rb_node *p = rb_first(&flow->t_root); + + while (p) { + struct sk_buff *skb = rb_to_skb(p); + + p = rb_next(p); + rb_erase(&skb->rbnode, &flow->t_root); + rtnl_kfree_skbs(skb, skb); + } rtnl_kfree_skbs(flow->head, flow->tail); flow->head = NULL; flow->qlen = 0; @@ -684,7 +766,8 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_FQ_MAX, opt, fq_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_FQ_MAX, opt, fq_policy, + NULL); if (err < 0) return err; @@ -823,7 +906,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) u64 ce_threshold = q->ce_threshold; struct nlattr *opts; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index cd04d40c30b6..08d85370b97c 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -387,8 +387,8 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_FQ_CODEL_MAX, opt, fq_codel_policy, - NULL); + err = nla_parse_nested_deprecated(tb, TCA_FQ_CODEL_MAX, opt, + fq_codel_policy, NULL); if (err < 0) return err; if (tb[TCA_FQ_CODEL_FLOWS]) { @@ -527,7 +527,7 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb) struct fq_codel_sched_data *q = qdisc_priv(sch); struct nlattr *opts; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index a117d9260558..cce1e9ee85af 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -32,6 +32,7 @@ #include <net/pkt_sched.h> #include <net/dst.h> #include <trace/events/qdisc.h> +#include <trace/events/net.h> #include <net/xfrm.h> /* Qdisc to use by default */ @@ -68,7 +69,7 @@ static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q) skb = __skb_dequeue(&q->skb_bad_txq); if (qdisc_is_percpu_stats(q)) { qdisc_qstats_cpu_backlog_dec(q, skb); - qdisc_qstats_atomic_qlen_dec(q); + qdisc_qstats_cpu_qlen_dec(q); } else { qdisc_qstats_backlog_dec(q, skb); q->q.qlen--; @@ -108,7 +109,7 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q, if (qdisc_is_percpu_stats(q)) { qdisc_qstats_cpu_backlog_inc(q, skb); - qdisc_qstats_atomic_qlen_inc(q); + qdisc_qstats_cpu_qlen_inc(q); } else { qdisc_qstats_backlog_inc(q, skb); q->q.qlen++; @@ -118,52 +119,36 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q, spin_unlock(lock); } -static inline int __dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) +static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { - while (skb) { - struct sk_buff *next = skb->next; - - __skb_queue_tail(&q->gso_skb, skb); - q->qstats.requeues++; - qdisc_qstats_backlog_inc(q, skb); - q->q.qlen++; /* it's still part of the queue */ + spinlock_t *lock = NULL; - skb = next; + if (q->flags & TCQ_F_NOLOCK) { + lock = qdisc_lock(q); + spin_lock(lock); } - __netif_schedule(q); - return 0; -} - -static inline int dev_requeue_skb_locked(struct sk_buff *skb, struct Qdisc *q) -{ - spinlock_t *lock = qdisc_lock(q); - - spin_lock(lock); while (skb) { struct sk_buff *next = skb->next; __skb_queue_tail(&q->gso_skb, skb); - qdisc_qstats_cpu_requeues_inc(q); - qdisc_qstats_cpu_backlog_inc(q, skb); - qdisc_qstats_atomic_qlen_inc(q); + /* it's still part of the queue */ + if (qdisc_is_percpu_stats(q)) { + qdisc_qstats_cpu_requeues_inc(q); + qdisc_qstats_cpu_backlog_inc(q, skb); + qdisc_qstats_cpu_qlen_inc(q); + } else { + q->qstats.requeues++; + qdisc_qstats_backlog_inc(q, skb); + q->q.qlen++; + } skb = next; } - spin_unlock(lock); - + if (lock) + spin_unlock(lock); __netif_schedule(q); - - return 0; -} - -static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) -{ - if (q->flags & TCQ_F_NOLOCK) - return dev_requeue_skb_locked(skb, q); - else - return __dev_requeue_skb(skb, q); } static void try_bulk_dequeue_skb(struct Qdisc *q, @@ -252,7 +237,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, skb = __skb_dequeue(&q->gso_skb); if (qdisc_is_percpu_stats(q)) { qdisc_qstats_cpu_backlog_dec(q, skb); - qdisc_qstats_atomic_qlen_dec(q); + qdisc_qstats_cpu_qlen_dec(q); } else { qdisc_qstats_backlog_dec(q, skb); q->q.qlen--; @@ -457,6 +442,7 @@ static void dev_watchdog(struct timer_list *t) } if (some_queue_timedout) { + trace_net_dev_xmit_timeout(dev, i); WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n", dev->name, netdev_drivername(dev), i); dev->netdev_ops->ndo_tx_timeout(dev); @@ -645,11 +631,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, if (unlikely(err)) return qdisc_drop_cpu(skb, qdisc, to_free); - qdisc_qstats_atomic_qlen_inc(qdisc); - /* Note: skb can not be used after skb_array_produce(), - * so we better not use qdisc_qstats_cpu_backlog_inc() - */ - this_cpu_add(qdisc->cpu_qstats->backlog, pkt_len); + qdisc_update_stats_at_enqueue(qdisc, pkt_len); return NET_XMIT_SUCCESS; } @@ -668,9 +650,9 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) skb = __skb_array_consume(q); } if (likely(skb)) { - qdisc_qstats_cpu_backlog_dec(qdisc, skb); - qdisc_bstats_cpu_update(qdisc, skb); - qdisc_qstats_atomic_qlen_dec(qdisc); + qdisc_update_stats_at_dequeue(qdisc, skb); + } else { + qdisc->empty = true; } return skb; @@ -714,6 +696,7 @@ static void pfifo_fast_reset(struct Qdisc *qdisc) struct gnet_stats_queue *q = per_cpu_ptr(qdisc->cpu_qstats, i); q->backlog = 0; + q->qlen = 0; } } @@ -880,6 +863,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; sch->dev_queue = dev_queue; + sch->empty = true; dev_hold(dev); refcount_set(&sch->refcnt, 1); diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 234afbf9115b..dfa657da100f 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -538,7 +538,8 @@ static void gred_vq_apply(struct gred_sched *table, const struct nlattr *entry) struct nlattr *tb[TCA_GRED_VQ_MAX + 1]; u32 dp; - nla_parse_nested(tb, TCA_GRED_VQ_MAX, entry, gred_vq_policy, NULL); + nla_parse_nested_deprecated(tb, TCA_GRED_VQ_MAX, entry, + gred_vq_policy, NULL); dp = nla_get_u32(tb[TCA_GRED_VQ_DP]); @@ -568,8 +569,8 @@ static int gred_vq_validate(struct gred_sched *table, u32 cdp, int err; u32 dp; - err = nla_parse_nested(tb, TCA_GRED_VQ_MAX, entry, gred_vq_policy, - extack); + err = nla_parse_nested_deprecated(tb, TCA_GRED_VQ_MAX, entry, + gred_vq_policy, extack); if (err < 0) return err; @@ -610,8 +611,8 @@ static int gred_vqs_validate(struct gred_sched *table, u32 cdp, const struct nlattr *attr; int rem, err; - err = nla_validate_nested(vqs, TCA_GRED_VQ_ENTRY_MAX, - gred_vqe_policy, extack); + err = nla_validate_nested_deprecated(vqs, TCA_GRED_VQ_ENTRY_MAX, + gred_vqe_policy, extack); if (err < 0) return err; @@ -650,7 +651,8 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt, if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_GRED_MAX, opt, gred_policy, + extack); if (err < 0) return err; @@ -737,7 +739,8 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt, if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_GRED_MAX, opt, gred_policy, + extack); if (err < 0) return err; @@ -772,7 +775,7 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) if (gred_offload_dump_stats(sch)) goto nla_put_failure; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; if (nla_put(skb, TCA_GRED_DPS, sizeof(sopt), &sopt)) @@ -790,7 +793,7 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) goto nla_put_failure; /* Old style all-in-one dump of VQs */ - parms = nla_nest_start(skb, TCA_GRED_PARMS); + parms = nla_nest_start_noflag(skb, TCA_GRED_PARMS); if (parms == NULL) goto nla_put_failure; @@ -841,7 +844,7 @@ append_opt: nla_nest_end(skb, parms); /* Dump the VQs again, in more structured way */ - vqs = nla_nest_start(skb, TCA_GRED_VQ_LIST); + vqs = nla_nest_start_noflag(skb, TCA_GRED_VQ_LIST); if (!vqs) goto nla_put_failure; @@ -852,7 +855,7 @@ append_opt: if (!q) continue; - vq = nla_nest_start(skb, TCA_GRED_VQ_ENTRY); + vq = nla_nest_start_noflag(skb, TCA_GRED_VQ_ENTRY); if (!vq) goto nla_put_failure; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index d2ab463f22ae..433f2190960f 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -926,7 +926,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_HFSC_MAX, opt, hfsc_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_HFSC_MAX, opt, hfsc_policy, + NULL); if (err < 0) return err; @@ -1300,7 +1301,7 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, if (cl->level == 0) tcm->tcm_info = cl->qdisc->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (hfsc_dump_curves(skb, cl) < 0) diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 9d6a47697406..a28e09b1609c 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -518,7 +518,8 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt, if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_HHF_MAX, opt, hhf_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_HHF_MAX, opt, hhf_policy, + NULL); if (err < 0) return err; @@ -654,7 +655,7 @@ static int hhf_dump(struct Qdisc *sch, struct sk_buff *skb) struct hhf_sched_data *q = qdisc_priv(sch); struct nlattr *opts; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 2f9883b196e8..909370049fca 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -165,7 +165,8 @@ struct htb_sched { /* non shaped skbs; let them go directly thru */ struct qdisc_skb_head direct_queue; - long direct_pkts; + u32 direct_pkts; + u32 overlimits; struct qdisc_watchdog watchdog; @@ -533,8 +534,10 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff) if (new_mode == cl->cmode) return; - if (new_mode == HTB_CANT_SEND) + if (new_mode == HTB_CANT_SEND) { cl->overlimits++; + q->overlimits++; + } if (cl->prio_activity) { /* not necessary: speed optimization */ if (cl->cmode != HTB_CANT_SEND) @@ -937,7 +940,6 @@ ok: goto ok; } } - qdisc_qstats_overlimit(sch); if (likely(next_event > q->now)) qdisc_watchdog_schedule_ns(&q->watchdog, next_event); else @@ -1012,7 +1014,8 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt, if (err) return err; - err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy, + NULL); if (err < 0) return err; @@ -1047,6 +1050,7 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) struct nlattr *nest; struct tc_htb_glob gopt; + sch->qstats.overlimits = q->overlimits; /* Its safe to not acquire qdisc lock. As we hold RTNL, * no change can happen on the qdisc parameters. */ @@ -1057,7 +1061,7 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) gopt.defcls = q->defcls; gopt.debug = 0; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) || @@ -1086,7 +1090,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, if (!cl->level && cl->leaf.q) tcm->tcm_info = cl->leaf.q->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; @@ -1310,7 +1314,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!opt) goto failure; - err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy, + NULL); if (err < 0) goto failure; diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index ce3f55259d0d..0bac926b46c7 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -106,7 +106,7 @@ static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb) { struct nlattr *nest; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index ea0dc112b38d..d05086dc3866 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -125,8 +125,9 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, int nested_len = nla_len(nla) - NLA_ALIGN(len); if (nested_len >= nla_attr_size(0)) - return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), - nested_len, policy, NULL); + return nla_parse_deprecated(tb, maxtype, + nla_data(nla) + NLA_ALIGN(len), + nested_len, policy, NULL); memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); return 0; @@ -349,7 +350,7 @@ static int dump_rates(struct mqprio_sched *priv, int i; if (priv->flags & TC_MQPRIO_F_MIN_RATE) { - nest = nla_nest_start(skb, TCA_MQPRIO_MIN_RATE64); + nest = nla_nest_start_noflag(skb, TCA_MQPRIO_MIN_RATE64); if (!nest) goto nla_put_failure; @@ -363,7 +364,7 @@ static int dump_rates(struct mqprio_sched *priv, } if (priv->flags & TC_MQPRIO_F_MAX_RATE) { - nest = nla_nest_start(skb, TCA_MQPRIO_MAX_RATE64); + nest = nla_nest_start_noflag(skb, TCA_MQPRIO_MAX_RATE64); if (!nest) goto nla_put_failure; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index cc9d8133afcd..78aa76b0da2e 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -935,8 +935,9 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, } if (nested_len >= nla_attr_size(0)) - return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), - nested_len, policy, NULL); + return nla_parse_deprecated(tb, maxtype, + nla_data(nla) + NLA_ALIGN(len), + nested_len, policy, NULL); memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); return 0; @@ -1079,7 +1080,7 @@ static int dump_loss_model(const struct netem_sched_data *q, { struct nlattr *nest; - nest = nla_nest_start(skb, TCA_NETEM_LOSS); + nest = nla_nest_start_noflag(skb, TCA_NETEM_LOSS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 1cc0c7b74aa3..8fa129d3943e 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -216,7 +216,8 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt, if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_PIE_MAX, opt, pie_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_PIE_MAX, opt, pie_policy, + NULL); if (err < 0) return err; @@ -491,7 +492,7 @@ static int pie_dump(struct Qdisc *sch, struct sk_buff *skb) struct pie_sched_data *q = qdisc_priv(sch); struct nlattr *opts; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!opts) goto nla_put_failure; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 1589364b54da..3f9e8b425ac6 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -410,8 +410,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return -EINVAL; } - err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy, - NULL); + err = nla_parse_nested_deprecated(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], + qfq_policy, NULL); if (err < 0) return err; @@ -619,7 +619,7 @@ static int qfq_dump_class(struct Qdisc *sch, unsigned long arg, tcm->tcm_handle = cl->common.classid; tcm->tcm_info = cl->qdisc->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (nla_put_u32(skb, TCA_QFQ_WEIGHT, cl->agg->class_weight) || diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 4e8c0abf6194..1e68a13bb66b 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -205,7 +205,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy, + NULL); if (err < 0) return err; @@ -318,7 +319,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb) if (err) goto nla_put_failure; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) || diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 2419fdb75966..b245d6a2068d 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -499,7 +499,8 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt, int err; if (opt) { - err = nla_parse_nested(tb, TCA_SFB_MAX, opt, sfb_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_SFB_MAX, opt, + sfb_policy, NULL); if (err < 0) return -EINVAL; @@ -580,7 +581,7 @@ static int sfb_dump(struct Qdisc *sch, struct sk_buff *skb) }; sch->qstats.backlog = q->qdisc->qstats.backlog; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; if (nla_put(skb, TCA_SFB_PARMS, sizeof(opt), &opt)) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index c7041999eb5d..9ecfb8f5902a 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -13,13 +13,18 @@ #include <linux/list.h> #include <linux/errno.h> #include <linux/skbuff.h> +#include <linux/math64.h> #include <linux/module.h> #include <linux/spinlock.h> +#include <linux/rcupdate.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> #include <net/sch_generic.h> +static LIST_HEAD(taprio_list); +static DEFINE_SPINLOCK(taprio_list_lock); + #define TAPRIO_ALL_GATES_OPEN -1 struct sched_entry { @@ -37,24 +42,88 @@ struct sched_entry { u8 command; }; +struct sched_gate_list { + struct rcu_head rcu; + struct list_head entries; + size_t num_entries; + ktime_t cycle_close_time; + s64 cycle_time; + s64 cycle_time_extension; + s64 base_time; +}; + struct taprio_sched { struct Qdisc **qdiscs; struct Qdisc *root; - s64 base_time; int clockid; - int picos_per_byte; /* Using picoseconds because for 10Gbps+ - * speeds it's sub-nanoseconds per byte - */ - size_t num_entries; + atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+ + * speeds it's sub-nanoseconds per byte + */ /* Protects the update side of the RCU protected current_entry */ spinlock_t current_entry_lock; struct sched_entry __rcu *current_entry; - struct list_head entries; + struct sched_gate_list __rcu *oper_sched; + struct sched_gate_list __rcu *admin_sched; ktime_t (*get_time)(void); struct hrtimer advance_timer; + struct list_head taprio_list; }; +static ktime_t sched_base_time(const struct sched_gate_list *sched) +{ + if (!sched) + return KTIME_MAX; + + return ns_to_ktime(sched->base_time); +} + +static void taprio_free_sched_cb(struct rcu_head *head) +{ + struct sched_gate_list *sched = container_of(head, struct sched_gate_list, rcu); + struct sched_entry *entry, *n; + + if (!sched) + return; + + list_for_each_entry_safe(entry, n, &sched->entries, list) { + list_del(&entry->list); + kfree(entry); + } + + kfree(sched); +} + +static void switch_schedules(struct taprio_sched *q, + struct sched_gate_list **admin, + struct sched_gate_list **oper) +{ + rcu_assign_pointer(q->oper_sched, *admin); + rcu_assign_pointer(q->admin_sched, NULL); + + if (*oper) + call_rcu(&(*oper)->rcu, taprio_free_sched_cb); + + *oper = *admin; + *admin = NULL; +} + +static ktime_t get_cycle_time(struct sched_gate_list *sched) +{ + struct sched_entry *entry; + ktime_t cycle = 0; + + if (sched->cycle_time != 0) + return sched->cycle_time; + + list_for_each_entry(entry, &sched->entries, list) + cycle = ktime_add_ns(cycle, entry->interval); + + sched->cycle_time = cycle; + + return cycle; +} + static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { @@ -85,7 +154,7 @@ static struct sk_buff *taprio_peek(struct Qdisc *sch) rcu_read_lock(); entry = rcu_dereference(q->current_entry); - gate_mask = entry ? entry->gate_mask : -1; + gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN; rcu_read_unlock(); if (!gate_mask) @@ -107,7 +176,7 @@ static struct sk_buff *taprio_peek(struct Qdisc *sch) tc = netdev_get_prio_tc_map(dev, prio); if (!(gate_mask & BIT(tc))) - return NULL; + continue; return skb; } @@ -117,18 +186,30 @@ static struct sk_buff *taprio_peek(struct Qdisc *sch) static inline int length_to_duration(struct taprio_sched *q, int len) { - return (len * q->picos_per_byte) / 1000; + return div_u64(len * atomic64_read(&q->picos_per_byte), 1000); +} + +static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry) +{ + atomic_set(&entry->budget, + div64_u64((u64)entry->interval * 1000, + atomic64_read(&q->picos_per_byte))); } static struct sk_buff *taprio_dequeue(struct Qdisc *sch) { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); + struct sk_buff *skb = NULL; struct sched_entry *entry; - struct sk_buff *skb; u32 gate_mask; int i; + if (atomic64_read(&q->picos_per_byte) == -1) { + WARN_ONCE(1, "taprio: dequeue() called with unknown picos per byte."); + return NULL; + } + rcu_read_lock(); entry = rcu_dereference(q->current_entry); /* if there's no entry, it means that the schedule didn't @@ -137,10 +218,9 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch) * "AdminGateSates" */ gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN; - rcu_read_unlock(); if (!gate_mask) - return NULL; + goto done; for (i = 0; i < dev->num_tx_queues; i++) { struct Qdisc *child = q->qdiscs[i]; @@ -171,39 +251,81 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch) */ if (gate_mask != TAPRIO_ALL_GATES_OPEN && ktime_after(guard, entry->close_time)) - return NULL; + continue; /* ... and no budget. */ if (gate_mask != TAPRIO_ALL_GATES_OPEN && atomic_sub_return(len, &entry->budget) < 0) - return NULL; + continue; skb = child->ops->dequeue(child); if (unlikely(!skb)) - return NULL; + goto done; qdisc_bstats_update(sch, skb); qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; - return skb; + goto done; } - return NULL; +done: + rcu_read_unlock(); + + return skb; } -static bool should_restart_cycle(const struct taprio_sched *q, +static bool should_restart_cycle(const struct sched_gate_list *oper, const struct sched_entry *entry) { - WARN_ON(!entry); + if (list_is_last(&entry->list, &oper->entries)) + return true; + + if (ktime_compare(entry->close_time, oper->cycle_close_time) == 0) + return true; + + return false; +} + +static bool should_change_schedules(const struct sched_gate_list *admin, + const struct sched_gate_list *oper, + ktime_t close_time) +{ + ktime_t next_base_time, extension_time; + + if (!admin) + return false; - return list_is_last(&entry->list, &q->entries); + next_base_time = sched_base_time(admin); + + /* This is the simple case, the close_time would fall after + * the next schedule base_time. + */ + if (ktime_compare(next_base_time, close_time) <= 0) + return true; + + /* This is the cycle_time_extension case, if the close_time + * plus the amount that can be extended would fall after the + * next schedule base_time, we can extend the current schedule + * for that amount. + */ + extension_time = ktime_add_ns(close_time, oper->cycle_time_extension); + + /* FIXME: the IEEE 802.1Q-2018 Specification isn't clear about + * how precisely the extension should be made. So after + * conformance testing, this logic may change. + */ + if (ktime_compare(next_base_time, extension_time) <= 0) + return true; + + return false; } static enum hrtimer_restart advance_sched(struct hrtimer *timer) { struct taprio_sched *q = container_of(timer, struct taprio_sched, advance_timer); + struct sched_gate_list *oper, *admin; struct sched_entry *entry, *next; struct Qdisc *sch = q->root; ktime_t close_time; @@ -211,29 +333,49 @@ static enum hrtimer_restart advance_sched(struct hrtimer *timer) spin_lock(&q->current_entry_lock); entry = rcu_dereference_protected(q->current_entry, lockdep_is_held(&q->current_entry_lock)); + oper = rcu_dereference_protected(q->oper_sched, + lockdep_is_held(&q->current_entry_lock)); + admin = rcu_dereference_protected(q->admin_sched, + lockdep_is_held(&q->current_entry_lock)); - /* This is the case that it's the first time that the schedule - * runs, so it only happens once per schedule. The first entry - * is pre-calculated during the schedule initialization. + if (!oper) + switch_schedules(q, &admin, &oper); + + /* This can happen in two cases: 1. this is the very first run + * of this function (i.e. we weren't running any schedule + * previously); 2. The previous schedule just ended. The first + * entry of all schedules are pre-calculated during the + * schedule initialization. */ - if (unlikely(!entry)) { - next = list_first_entry(&q->entries, struct sched_entry, + if (unlikely(!entry || entry->close_time == oper->base_time)) { + next = list_first_entry(&oper->entries, struct sched_entry, list); close_time = next->close_time; goto first_run; } - if (should_restart_cycle(q, entry)) - next = list_first_entry(&q->entries, struct sched_entry, + if (should_restart_cycle(oper, entry)) { + next = list_first_entry(&oper->entries, struct sched_entry, list); - else + oper->cycle_close_time = ktime_add_ns(oper->cycle_close_time, + oper->cycle_time); + } else { next = list_next_entry(entry, list); + } close_time = ktime_add_ns(entry->close_time, next->interval); + close_time = min_t(ktime_t, close_time, oper->cycle_close_time); + + if (should_change_schedules(admin, oper, close_time)) { + /* Set things so the next time this runs, the new + * schedule runs. + */ + close_time = sched_base_time(admin); + switch_schedules(q, &admin, &oper); + } next->close_time = close_time; - atomic_set(&next->budget, - (next->interval * 1000) / q->picos_per_byte); + taprio_set_budget(q, next); first_run: rcu_assign_pointer(q->current_entry, next); @@ -263,10 +405,12 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_PRIOMAP] = { .len = sizeof(struct tc_mqprio_qopt) }, - [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] = { .type = NLA_NESTED }, - [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 }, - [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED }, - [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 }, + [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] = { .type = NLA_NESTED }, + [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 }, + [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED }, + [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 }, + [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 }, + [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 }, }; static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry, @@ -302,8 +446,8 @@ static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry, struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { }; int err; - err = nla_parse_nested(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, n, - entry_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, n, + entry_policy, NULL); if (err < 0) { NL_SET_ERR_MSG(extack, "Could not parse nested entry"); return -EINVAL; @@ -314,70 +458,8 @@ static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry, return fill_sched_entry(tb, entry, extack); } -/* Returns the number of entries in case of success */ -static int parse_sched_single_entry(struct nlattr *n, - struct taprio_sched *q, - struct netlink_ext_ack *extack) -{ - struct nlattr *tb_entry[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { }; - struct nlattr *tb_list[TCA_TAPRIO_SCHED_MAX + 1] = { }; - struct sched_entry *entry; - bool found = false; - u32 index; - int err; - - err = nla_parse_nested(tb_list, TCA_TAPRIO_SCHED_MAX, - n, entry_list_policy, NULL); - if (err < 0) { - NL_SET_ERR_MSG(extack, "Could not parse nested entry"); - return -EINVAL; - } - - if (!tb_list[TCA_TAPRIO_SCHED_ENTRY]) { - NL_SET_ERR_MSG(extack, "Single-entry must include an entry"); - return -EINVAL; - } - - err = nla_parse_nested(tb_entry, TCA_TAPRIO_SCHED_ENTRY_MAX, - tb_list[TCA_TAPRIO_SCHED_ENTRY], - entry_policy, NULL); - if (err < 0) { - NL_SET_ERR_MSG(extack, "Could not parse nested entry"); - return -EINVAL; - } - - if (!tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]) { - NL_SET_ERR_MSG(extack, "Entry must specify an index\n"); - return -EINVAL; - } - - index = nla_get_u32(tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]); - if (index >= q->num_entries) { - NL_SET_ERR_MSG(extack, "Index for single entry exceeds number of entries in schedule"); - return -EINVAL; - } - - list_for_each_entry(entry, &q->entries, list) { - if (entry->index == index) { - found = true; - break; - } - } - - if (!found) { - NL_SET_ERR_MSG(extack, "Could not find entry"); - return -ENOENT; - } - - err = fill_sched_entry(tb_entry, entry, extack); - if (err < 0) - return err; - - return q->num_entries; -} - static int parse_sched_list(struct nlattr *list, - struct taprio_sched *q, + struct sched_gate_list *sched, struct netlink_ext_ack *extack) { struct nlattr *n; @@ -407,64 +489,42 @@ static int parse_sched_list(struct nlattr *list, return err; } - list_add_tail(&entry->list, &q->entries); + list_add_tail(&entry->list, &sched->entries); i++; } - q->num_entries = i; + sched->num_entries = i; return i; } -/* Returns the number of entries in case of success */ -static int parse_taprio_opt(struct nlattr **tb, struct taprio_sched *q, - struct netlink_ext_ack *extack) +static int parse_taprio_schedule(struct nlattr **tb, + struct sched_gate_list *new, + struct netlink_ext_ack *extack) { int err = 0; - int clockid; - if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] && - tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) - return -EINVAL; - - if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] && q->num_entries == 0) - return -EINVAL; - - if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) - return -EINVAL; + if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) { + NL_SET_ERR_MSG(extack, "Adding a single entry is not supported"); + return -ENOTSUPP; + } if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]) - q->base_time = nla_get_s64( - tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]); + new->base_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]); - if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { - clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]); + if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION]) + new->cycle_time_extension = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION]); - /* We only support static clockids and we don't allow - * for it to be modified after the first init. - */ - if (clockid < 0 || (q->clockid != -1 && q->clockid != clockid)) - return -EINVAL; - - q->clockid = clockid; - } + if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]) + new->cycle_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]); if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]) err = parse_sched_list( - tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], q, extack); - else if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) - err = parse_sched_single_entry( - tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY], q, extack); - - /* parse_sched_* return the number of entries in the schedule, - * a schedule with zero entries is an error. - */ - if (err == 0) { - NL_SET_ERR_MSG(extack, "The schedule should contain at least one entry"); - return -EINVAL; - } + tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], new, extack); + if (err < 0) + return err; - return err; + return 0; } static int taprio_parse_mqprio_opt(struct net_device *dev, @@ -473,11 +533,17 @@ static int taprio_parse_mqprio_opt(struct net_device *dev, { int i, j; - if (!qopt) { + if (!qopt && !dev->num_tc) { NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary"); return -EINVAL; } + /* If num_tc is already set, it means that the user already + * configured the mqprio part + */ + if (dev->num_tc) + return 0; + /* Verify num_tc is not out of max range */ if (qopt->num_tc > TC_MAX_QUEUE) { NL_SET_ERR_MSG(extack, "Number of traffic classes is outside valid range"); @@ -523,76 +589,141 @@ static int taprio_parse_mqprio_opt(struct net_device *dev, return 0; } -static ktime_t taprio_get_start_time(struct Qdisc *sch) +static int taprio_get_start_time(struct Qdisc *sch, + struct sched_gate_list *sched, + ktime_t *start) { struct taprio_sched *q = qdisc_priv(sch); - struct sched_entry *entry; ktime_t now, base, cycle; s64 n; - base = ns_to_ktime(q->base_time); - cycle = 0; - - /* Calculate the cycle_time, by summing all the intervals. - */ - list_for_each_entry(entry, &q->entries, list) - cycle = ktime_add_ns(cycle, entry->interval); + base = sched_base_time(sched); + now = q->get_time(); - if (!cycle) - return base; + if (ktime_after(base, now)) { + *start = base; + return 0; + } - now = q->get_time(); + cycle = get_cycle_time(sched); - if (ktime_after(base, now)) - return base; + /* The qdisc is expected to have at least one sched_entry. Moreover, + * any entry must have 'interval' > 0. Thus if the cycle time is zero, + * something went really wrong. In that case, we should warn about this + * inconsistent state and return error. + */ + if (WARN_ON(!cycle)) + return -EFAULT; /* Schedule the start time for the beginning of the next * cycle. */ n = div64_s64(ktime_sub_ns(now, base), cycle); - - return ktime_add_ns(base, (n + 1) * cycle); + *start = ktime_add_ns(base, (n + 1) * cycle); + return 0; } -static void taprio_start_sched(struct Qdisc *sch, ktime_t start) +static void setup_first_close_time(struct taprio_sched *q, + struct sched_gate_list *sched, ktime_t base) { - struct taprio_sched *q = qdisc_priv(sch); struct sched_entry *first; - unsigned long flags; + ktime_t cycle; - spin_lock_irqsave(&q->current_entry_lock, flags); + first = list_first_entry(&sched->entries, + struct sched_entry, list); + + cycle = get_cycle_time(sched); - first = list_first_entry(&q->entries, struct sched_entry, - list); + /* FIXME: find a better place to do this */ + sched->cycle_close_time = ktime_add_ns(base, cycle); - first->close_time = ktime_add_ns(start, first->interval); - atomic_set(&first->budget, - (first->interval * 1000) / q->picos_per_byte); + first->close_time = ktime_add_ns(base, first->interval); + taprio_set_budget(q, first); rcu_assign_pointer(q->current_entry, NULL); +} - spin_unlock_irqrestore(&q->current_entry_lock, flags); +static void taprio_start_sched(struct Qdisc *sch, + ktime_t start, struct sched_gate_list *new) +{ + struct taprio_sched *q = qdisc_priv(sch); + ktime_t expires; + + expires = hrtimer_get_expires(&q->advance_timer); + if (expires == 0) + expires = KTIME_MAX; + + /* If the new schedule starts before the next expiration, we + * reprogram it to the earliest one, so we change the admin + * schedule to the operational one at the right time. + */ + start = min_t(ktime_t, start, expires); hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS); } +static void taprio_set_picos_per_byte(struct net_device *dev, + struct taprio_sched *q) +{ + struct ethtool_link_ksettings ecmd; + int picos_per_byte = -1; + + if (!__ethtool_get_link_ksettings(dev, &ecmd) && + ecmd.base.speed != SPEED_UNKNOWN) + picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8, + ecmd.base.speed * 1000 * 1000); + + atomic64_set(&q->picos_per_byte, picos_per_byte); + netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n", + dev->name, (long long)atomic64_read(&q->picos_per_byte), + ecmd.base.speed); +} + +static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct net_device *qdev; + struct taprio_sched *q; + bool found = false; + + ASSERT_RTNL(); + + if (event != NETDEV_UP && event != NETDEV_CHANGE) + return NOTIFY_DONE; + + spin_lock(&taprio_list_lock); + list_for_each_entry(q, &taprio_list, taprio_list) { + qdev = qdisc_dev(q->root); + if (qdev == dev) { + found = true; + break; + } + } + spin_unlock(&taprio_list_lock); + + if (found) + taprio_set_picos_per_byte(dev, q); + + return NOTIFY_DONE; +} + static int taprio_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { }; + struct sched_gate_list *oper, *admin, *new_admin; struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); struct tc_mqprio_qopt *mqprio = NULL; - struct ethtool_link_ksettings ecmd; - int i, err, size; - s64 link_speed; + int i, err, clockid; + unsigned long flags; ktime_t start; - err = nla_parse_nested(tb, TCA_TAPRIO_ATTR_MAX, opt, - taprio_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_ATTR_MAX, opt, + taprio_policy, extack); if (err < 0) return err; - err = -EINVAL; if (tb[TCA_TAPRIO_ATTR_PRIOMAP]) mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]); @@ -600,13 +731,78 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, if (err < 0) return err; - /* A schedule with less than one entry is an error */ - size = parse_taprio_opt(tb, q, extack); - if (size < 0) - return size; + new_admin = kzalloc(sizeof(*new_admin), GFP_KERNEL); + if (!new_admin) { + NL_SET_ERR_MSG(extack, "Not enough memory for a new schedule"); + return -ENOMEM; + } + INIT_LIST_HEAD(&new_admin->entries); - hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS); - q->advance_timer.function = advance_sched; + rcu_read_lock(); + oper = rcu_dereference(q->oper_sched); + admin = rcu_dereference(q->admin_sched); + rcu_read_unlock(); + + if (mqprio && (oper || admin)) { + NL_SET_ERR_MSG(extack, "Changing the traffic mapping of a running schedule is not supported"); + err = -ENOTSUPP; + goto free_sched; + } + + err = parse_taprio_schedule(tb, new_admin, extack); + if (err < 0) + goto free_sched; + + if (new_admin->num_entries == 0) { + NL_SET_ERR_MSG(extack, "There should be at least one entry in the schedule"); + err = -EINVAL; + goto free_sched; + } + + if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { + clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]); + + /* We only support static clockids and we don't allow + * for it to be modified after the first init. + */ + if (clockid < 0 || + (q->clockid != -1 && q->clockid != clockid)) { + NL_SET_ERR_MSG(extack, "Changing the 'clockid' of a running schedule is not supported"); + err = -ENOTSUPP; + goto free_sched; + } + + q->clockid = clockid; + } + + if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { + NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory"); + err = -EINVAL; + goto free_sched; + } + + taprio_set_picos_per_byte(dev, q); + + /* Protects against enqueue()/dequeue() */ + spin_lock_bh(qdisc_lock(sch)); + + if (!hrtimer_active(&q->advance_timer)) { + hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS); + q->advance_timer.function = advance_sched; + } + + if (mqprio) { + netdev_set_num_tc(dev, mqprio->num_tc); + for (i = 0; i < mqprio->num_tc; i++) + netdev_set_tc_queue(dev, i, + mqprio->count[i], + mqprio->offset[i]); + + /* Always use supplied priority mappings */ + for (i = 0; i < TC_BITMASK + 1; i++) + netdev_set_prio_tc_map(dev, i, + mqprio->prio_tc_map[i]); + } switch (q->clockid) { case CLOCK_REALTIME: @@ -622,65 +818,52 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, q->get_time = ktime_get_clocktai; break; default: - return -ENOTSUPP; + NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); + err = -EINVAL; + goto unlock; } - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *dev_queue; - struct Qdisc *qdisc; - - dev_queue = netdev_get_tx_queue(dev, i); - qdisc = qdisc_create_dflt(dev_queue, - &pfifo_qdisc_ops, - TC_H_MAKE(TC_H_MAJ(sch->handle), - TC_H_MIN(i + 1)), - extack); - if (!qdisc) - return -ENOMEM; + err = taprio_get_start_time(sch, new_admin, &start); + if (err < 0) { + NL_SET_ERR_MSG(extack, "Internal error: failed get start time"); + goto unlock; + } - if (i < dev->real_num_tx_queues) - qdisc_hash_add(qdisc, false); + setup_first_close_time(q, new_admin, start); - q->qdiscs[i] = qdisc; - } + /* Protects against advance_sched() */ + spin_lock_irqsave(&q->current_entry_lock, flags); - if (mqprio) { - netdev_set_num_tc(dev, mqprio->num_tc); - for (i = 0; i < mqprio->num_tc; i++) - netdev_set_tc_queue(dev, i, - mqprio->count[i], - mqprio->offset[i]); + taprio_start_sched(sch, start, new_admin); - /* Always use supplied priority mappings */ - for (i = 0; i < TC_BITMASK + 1; i++) - netdev_set_prio_tc_map(dev, i, - mqprio->prio_tc_map[i]); - } + rcu_assign_pointer(q->admin_sched, new_admin); + if (admin) + call_rcu(&admin->rcu, taprio_free_sched_cb); + new_admin = NULL; - if (!__ethtool_get_link_ksettings(dev, &ecmd)) - link_speed = ecmd.base.speed; - else - link_speed = SPEED_1000; + spin_unlock_irqrestore(&q->current_entry_lock, flags); - q->picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8, - link_speed * 1000 * 1000); + err = 0; - start = taprio_get_start_time(sch); - if (!start) - return 0; +unlock: + spin_unlock_bh(qdisc_lock(sch)); - taprio_start_sched(sch, start); +free_sched: + kfree(new_admin); - return 0; + return err; } static void taprio_destroy(struct Qdisc *sch) { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); - struct sched_entry *entry, *n; unsigned int i; + spin_lock(&taprio_list_lock); + list_del(&q->taprio_list); + spin_unlock(&taprio_list_lock); + hrtimer_cancel(&q->advance_timer); if (q->qdiscs) { @@ -693,10 +876,11 @@ static void taprio_destroy(struct Qdisc *sch) netdev_set_num_tc(dev, 0); - list_for_each_entry_safe(entry, n, &q->entries, list) { - list_del(&entry->list); - kfree(entry); - } + if (q->oper_sched) + call_rcu(&q->oper_sched->rcu, taprio_free_sched_cb); + + if (q->admin_sched) + call_rcu(&q->admin_sched->rcu, taprio_free_sched_cb); } static int taprio_init(struct Qdisc *sch, struct nlattr *opt, @@ -704,12 +888,12 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); + int i; - INIT_LIST_HEAD(&q->entries); spin_lock_init(&q->current_entry_lock); - /* We may overwrite the configuration later */ hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS); + q->advance_timer.function = advance_sched; q->root = sch; @@ -735,6 +919,29 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, if (!opt) return -EINVAL; + spin_lock(&taprio_list_lock); + list_add(&q->taprio_list, &taprio_list); + spin_unlock(&taprio_list_lock); + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *dev_queue; + struct Qdisc *qdisc; + + dev_queue = netdev_get_tx_queue(dev, i); + qdisc = qdisc_create_dflt(dev_queue, + &pfifo_qdisc_ops, + TC_H_MAKE(TC_H_MAJ(sch->handle), + TC_H_MIN(i + 1)), + extack); + if (!qdisc) + return -ENOMEM; + + if (i < dev->real_num_tx_queues) + qdisc_hash_add(qdisc, false); + + q->qdiscs[i] = qdisc; + } + return taprio_change(sch, opt, extack); } @@ -781,7 +988,7 @@ static int dump_entry(struct sk_buff *msg, { struct nlattr *item; - item = nla_nest_start(msg, TCA_TAPRIO_SCHED_ENTRY); + item = nla_nest_start_noflag(msg, TCA_TAPRIO_SCHED_ENTRY); if (!item) return -ENOSPC; @@ -806,15 +1013,55 @@ nla_put_failure: return -1; } +static int dump_schedule(struct sk_buff *msg, + const struct sched_gate_list *root) +{ + struct nlattr *entry_list; + struct sched_entry *entry; + + if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_BASE_TIME, + root->base_time, TCA_TAPRIO_PAD)) + return -1; + + if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME, + root->cycle_time, TCA_TAPRIO_PAD)) + return -1; + + if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION, + root->cycle_time_extension, TCA_TAPRIO_PAD)) + return -1; + + entry_list = nla_nest_start_noflag(msg, + TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST); + if (!entry_list) + goto error_nest; + + list_for_each_entry(entry, &root->entries, list) { + if (dump_entry(msg, entry) < 0) + goto error_nest; + } + + nla_nest_end(msg, entry_list); + return 0; + +error_nest: + nla_nest_cancel(msg, entry_list); + return -1; +} + static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); + struct sched_gate_list *oper, *admin; struct tc_mqprio_qopt opt = { 0 }; - struct nlattr *nest, *entry_list; - struct sched_entry *entry; + struct nlattr *nest, *sched_nest; unsigned int i; + rcu_read_lock(); + oper = rcu_dereference(q->oper_sched); + admin = rcu_dereference(q->admin_sched); + opt.num_tc = netdev_get_num_tc(dev); memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map)); @@ -823,36 +1070,45 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) opt.offset[i] = dev->tc_to_txq[i].offset; } - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!nest) - return -ENOSPC; + goto start_error; if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt)) goto options_error; - if (nla_put_s64(skb, TCA_TAPRIO_ATTR_SCHED_BASE_TIME, - q->base_time, TCA_TAPRIO_PAD)) + if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid)) goto options_error; - if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid)) + if (oper && dump_schedule(skb, oper)) goto options_error; - entry_list = nla_nest_start(skb, TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST); - if (!entry_list) + if (!admin) + goto done; + + sched_nest = nla_nest_start_noflag(skb, TCA_TAPRIO_ATTR_ADMIN_SCHED); + if (!sched_nest) goto options_error; - list_for_each_entry(entry, &q->entries, list) { - if (dump_entry(skb, entry) < 0) - goto options_error; - } + if (dump_schedule(skb, admin)) + goto admin_error; + + nla_nest_end(skb, sched_nest); - nla_nest_end(skb, entry_list); +done: + rcu_read_unlock(); return nla_nest_end(skb, nest); +admin_error: + nla_nest_cancel(skb, sched_nest); + options_error: nla_nest_cancel(skb, nest); - return -1; + +start_error: + rcu_read_unlock(); + return -ENOSPC; } static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl) @@ -939,6 +1195,7 @@ static struct Qdisc_ops taprio_qdisc_ops __read_mostly = { .id = "taprio", .priv_size = sizeof(struct taprio_sched), .init = taprio_init, + .change = taprio_change, .destroy = taprio_destroy, .peek = taprio_peek, .dequeue = taprio_dequeue, @@ -947,14 +1204,24 @@ static struct Qdisc_ops taprio_qdisc_ops __read_mostly = { .owner = THIS_MODULE, }; +static struct notifier_block taprio_device_notifier = { + .notifier_call = taprio_dev_notifier, +}; + static int __init taprio_module_init(void) { + int err = register_netdevice_notifier(&taprio_device_notifier); + + if (err) + return err; + return register_qdisc(&taprio_qdisc_ops); } static void __exit taprio_module_exit(void) { unregister_qdisc(&taprio_qdisc_ops); + unregister_netdevice_notifier(&taprio_device_notifier); } module_init(taprio_module_init); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index f71578dbb9e3..c09c0d855846 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -308,7 +308,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, s64 buffer, mtu; u64 rate64 = 0, prate64 = 0; - err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_TBF_MAX, opt, tbf_policy, + NULL); if (err < 0) return err; @@ -448,7 +449,7 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) struct tc_tbf_qopt opt; sch->qstats.backlog = q->qdisc->qstats.backlog; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 6200cd2b4b99..188c47eb206e 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -1030,6 +1030,7 @@ static const struct proto_ops inet6_seqpacket_ops = { .getname = sctp_getname, .poll = sctp_poll, .ioctl = inet6_ioctl, + .gettstamp = sock_gettstamp, .listen = sctp_inet_listen, .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 951afdeea5e9..f0631bf486b6 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1026,6 +1026,7 @@ static const struct proto_ops inet_seqpacket_ops = { .getname = inet_getname, /* Semantics are different. */ .poll = sctp_poll, .ioctl = inet_ioctl, + .gettstamp = sock_gettstamp, .listen = sctp_inet_listen, .shutdown = inet_shutdown, /* Looks harmless. */ .setsockopt = sock_common_setsockopt, /* IP_SOL IP_OPTION is a problem */ diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 713a669d2058..e3f4abe6134e 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -6431,13 +6431,15 @@ static int sctp_eat_data(const struct sctp_association *asoc, * in sctp_ulpevent_make_rcvmsg will drop the frame if we grow our * memory usage too much */ - if (*sk->sk_prot_creator->memory_pressure) { + if (sk_under_memory_pressure(sk)) { if (sctp_tsnmap_has_gap(map) && (sctp_tsnmap_get_ctsn(map) + 1) == tsn) { pr_debug("%s: under pressure, reneging for tsn:%u\n", __func__, tsn); deliver = SCTP_CMD_RENEGE; - } + } else { + sk_mem_reclaim(sk); + } } /* diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 4583fa914e62..e4e892cc5644 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1913,7 +1913,10 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc, if (sctp_wspace(asoc) < (int)msg_len) sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc)); - if (sctp_wspace(asoc) <= 0) { + if (sk_under_memory_pressure(sk)) + sk_mem_reclaim(sk); + + if (sctp_wspace(asoc) <= 0 || !sk_wmem_schedule(sk, msg_len)) { timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); if (err) @@ -8931,7 +8934,10 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, goto do_error; if (signal_pending(current)) goto do_interrupted; - if ((int)msg_len <= sctp_wspace(asoc)) + if (sk_under_memory_pressure(sk)) + sk_mem_reclaim(sk); + if ((int)msg_len <= sctp_wspace(asoc) && + sk_wmem_schedule(sk, msg_len)) break; /* Let another process have a go. Since we are going diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c index 102c6fefe38c..25e0b7e5189c 100644 --- a/net/sctp/stream_interleave.c +++ b/net/sctp/stream_interleave.c @@ -484,14 +484,15 @@ static struct sctp_ulpevent *sctp_intl_order(struct sctp_ulpq *ulpq, } static int sctp_enqueue_event(struct sctp_ulpq *ulpq, - struct sctp_ulpevent *event) + struct sk_buff_head *skb_list) { - struct sk_buff *skb = sctp_event2skb(event); struct sock *sk = ulpq->asoc->base.sk; struct sctp_sock *sp = sctp_sk(sk); - struct sk_buff_head *skb_list; + struct sctp_ulpevent *event; + struct sk_buff *skb; - skb_list = (struct sk_buff_head *)skb->prev; + skb = __skb_peek(skb_list); + event = sctp_skb2event(skb); if (sk->sk_shutdown & RCV_SHUTDOWN && (sk->sk_shutdown & SEND_SHUTDOWN || @@ -858,19 +859,24 @@ static int sctp_ulpevent_idata(struct sctp_ulpq *ulpq, if (!(event->msg_flags & SCTP_DATA_UNORDERED)) { event = sctp_intl_reasm(ulpq, event); - if (event && event->msg_flags & MSG_EOR) { + if (event) { skb_queue_head_init(&temp); __skb_queue_tail(&temp, sctp_event2skb(event)); - event = sctp_intl_order(ulpq, event); + if (event->msg_flags & MSG_EOR) + event = sctp_intl_order(ulpq, event); } } else { event = sctp_intl_reasm_uo(ulpq, event); + if (event) { + skb_queue_head_init(&temp); + __skb_queue_tail(&temp, sctp_event2skb(event)); + } } if (event) { event_eor = (event->msg_flags & MSG_EOR) ? 1 : 0; - sctp_enqueue_event(ulpq, event); + sctp_enqueue_event(ulpq, &temp); } return event_eor; @@ -944,20 +950,27 @@ out: static void sctp_intl_start_pd(struct sctp_ulpq *ulpq, gfp_t gfp) { struct sctp_ulpevent *event; + struct sk_buff_head temp; if (!skb_queue_empty(&ulpq->reasm)) { do { event = sctp_intl_retrieve_first(ulpq); - if (event) - sctp_enqueue_event(ulpq, event); + if (event) { + skb_queue_head_init(&temp); + __skb_queue_tail(&temp, sctp_event2skb(event)); + sctp_enqueue_event(ulpq, &temp); + } } while (event); } if (!skb_queue_empty(&ulpq->reasm_uo)) { do { event = sctp_intl_retrieve_first_uo(ulpq); - if (event) - sctp_enqueue_event(ulpq, event); + if (event) { + skb_queue_head_init(&temp); + __skb_queue_tail(&temp, sctp_event2skb(event)); + sctp_enqueue_event(ulpq, &temp); + } } while (event); } } @@ -1059,7 +1072,7 @@ static void sctp_intl_reap_ordered(struct sctp_ulpq *ulpq, __u16 sid) if (event) { sctp_intl_retrieve_ordered(ulpq, event); - sctp_enqueue_event(ulpq, event); + sctp_enqueue_event(ulpq, &temp); } } @@ -1298,6 +1311,15 @@ static void sctp_handle_iftsn(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk) ntohl(skip->mid), skip->flags); } +static int do_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) +{ + struct sk_buff_head temp; + + skb_queue_head_init(&temp); + __skb_queue_tail(&temp, sctp_event2skb(event)); + return sctp_ulpq_tail_event(ulpq, &temp); +} + static struct sctp_stream_interleave sctp_stream_interleave_0 = { .data_chunk_len = sizeof(struct sctp_data_chunk), .ftsn_chunk_len = sizeof(struct sctp_fwdtsn_chunk), @@ -1306,7 +1328,7 @@ static struct sctp_stream_interleave sctp_stream_interleave_0 = { .assign_number = sctp_chunk_assign_ssn, .validate_data = sctp_validate_data, .ulpevent_data = sctp_ulpq_tail_data, - .enqueue_event = sctp_ulpq_tail_event, + .enqueue_event = do_ulpq_tail_event, .renege_events = sctp_ulpq_renege, .start_pd = sctp_ulpq_partial_delivery, .abort_pd = sctp_ulpq_abort_pd, @@ -1317,6 +1339,16 @@ static struct sctp_stream_interleave sctp_stream_interleave_0 = { .handle_ftsn = sctp_handle_fwdtsn, }; +static int do_sctp_enqueue_event(struct sctp_ulpq *ulpq, + struct sctp_ulpevent *event) +{ + struct sk_buff_head temp; + + skb_queue_head_init(&temp); + __skb_queue_tail(&temp, sctp_event2skb(event)); + return sctp_enqueue_event(ulpq, &temp); +} + static struct sctp_stream_interleave sctp_stream_interleave_1 = { .data_chunk_len = sizeof(struct sctp_idata_chunk), .ftsn_chunk_len = sizeof(struct sctp_ifwdtsn_chunk), @@ -1325,7 +1357,7 @@ static struct sctp_stream_interleave sctp_stream_interleave_1 = { .assign_number = sctp_chunk_assign_mid, .validate_data = sctp_validate_idata, .ulpevent_data = sctp_ulpevent_idata, - .enqueue_event = sctp_enqueue_event, + .enqueue_event = do_sctp_enqueue_event, .renege_events = sctp_renege_events, .start_pd = sctp_intl_start_pd, .abort_pd = sctp_intl_abort_pd, diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 8cb7d9858270..c2a7478587ab 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -634,8 +634,9 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, gfp_t gfp) { struct sctp_ulpevent *event = NULL; - struct sk_buff *skb; - size_t padding, len; + struct sk_buff *skb = chunk->skb; + struct sock *sk = asoc->base.sk; + size_t padding, datalen; int rx_count; /* @@ -646,15 +647,12 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, if (asoc->ep->rcvbuf_policy) rx_count = atomic_read(&asoc->rmem_alloc); else - rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc); + rx_count = atomic_read(&sk->sk_rmem_alloc); - if (rx_count >= asoc->base.sk->sk_rcvbuf) { + datalen = ntohs(chunk->chunk_hdr->length); - if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) || - (!sk_rmem_schedule(asoc->base.sk, chunk->skb, - chunk->skb->truesize))) - goto fail; - } + if (rx_count >= sk->sk_rcvbuf || !sk_rmem_schedule(sk, skb, datalen)) + goto fail; /* Clone the original skb, sharing the data. */ skb = skb_clone(chunk->skb, gfp); @@ -681,8 +679,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, * The sender should never pad with more than 3 bytes. The receiver * MUST ignore the padding bytes. */ - len = ntohs(chunk->chunk_hdr->length); - padding = SCTP_PAD4(len) - len; + padding = SCTP_PAD4(datalen) - datalen; /* Fixup cloned skb with just this chunks data. */ skb_trim(skb, chunk->chunk_end - padding - skb->data); diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 5dde92101743..a212fe079c07 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -116,12 +116,13 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, event = sctp_ulpq_reasm(ulpq, event); /* Do ordering if needed. */ - if ((event) && (event->msg_flags & MSG_EOR)) { + if (event) { /* Create a temporary list to collect chunks on. */ skb_queue_head_init(&temp); __skb_queue_tail(&temp, sctp_event2skb(event)); - event = sctp_ulpq_order(ulpq, event); + if (event->msg_flags & MSG_EOR) + event = sctp_ulpq_order(ulpq, event); } /* Send event to the ULP. 'event' is the sctp_ulpevent for @@ -129,7 +130,7 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, */ if (event) { event_eor = (event->msg_flags & MSG_EOR) ? 1 : 0; - sctp_ulpq_tail_event(ulpq, event); + sctp_ulpq_tail_event(ulpq, &temp); } return event_eor; @@ -193,18 +194,17 @@ static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq) return sctp_clear_pd(ulpq->asoc->base.sk, ulpq->asoc); } -/* If the SKB of 'event' is on a list, it is the first such member - * of that list. - */ -int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) +int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sk_buff_head *skb_list) { struct sock *sk = ulpq->asoc->base.sk; struct sctp_sock *sp = sctp_sk(sk); - struct sk_buff_head *queue, *skb_list; - struct sk_buff *skb = sctp_event2skb(event); + struct sctp_ulpevent *event; + struct sk_buff_head *queue; + struct sk_buff *skb; int clear_pd = 0; - skb_list = (struct sk_buff_head *) skb->prev; + skb = __skb_peek(skb_list); + event = sctp_skb2event(skb); /* If the socket is just going to throw this away, do not * even try to deliver it. @@ -257,13 +257,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) } } - /* If we are harvesting multiple skbs they will be - * collected on a list. - */ - if (skb_list) - skb_queue_splice_tail_init(skb_list, queue); - else - __skb_queue_tail(queue, skb); + skb_queue_splice_tail_init(skb_list, queue); /* Did we just complete partial delivery and need to get * rolling again? Move pending data to the receive @@ -738,25 +732,25 @@ void sctp_ulpq_reasm_flushtsn(struct sctp_ulpq *ulpq, __u32 fwd_tsn) static void sctp_ulpq_reasm_drain(struct sctp_ulpq *ulpq) { struct sctp_ulpevent *event = NULL; - struct sk_buff_head temp; if (skb_queue_empty(&ulpq->reasm)) return; while ((event = sctp_ulpq_retrieve_reassembled(ulpq)) != NULL) { - /* Do ordering if needed. */ - if ((event) && (event->msg_flags & MSG_EOR)) { - skb_queue_head_init(&temp); - __skb_queue_tail(&temp, sctp_event2skb(event)); + struct sk_buff_head temp; + + skb_queue_head_init(&temp); + __skb_queue_tail(&temp, sctp_event2skb(event)); + /* Do ordering if needed. */ + if (event->msg_flags & MSG_EOR) event = sctp_ulpq_order(ulpq, event); - } /* Send event to the ULP. 'event' is the * sctp_ulpevent for very first SKB on the temp' list. */ if (event) - sctp_ulpq_tail_event(ulpq, event); + sctp_ulpq_tail_event(ulpq, &temp); } } @@ -956,7 +950,7 @@ static void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq, __u16 sid) if (event) { /* see if we have more ordered that we can deliver */ sctp_ulpq_retrieve_ordered(ulpq, event); - sctp_ulpq_tail_event(ulpq, event); + sctp_ulpq_tail_event(ulpq, &temp); } } @@ -1082,7 +1076,11 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq, event = sctp_ulpq_retrieve_first(ulpq); /* Send event to the ULP. */ if (event) { - sctp_ulpq_tail_event(ulpq, event); + struct sk_buff_head temp; + + skb_queue_head_init(&temp); + __skb_queue_tail(&temp, sctp_event2skb(event)); + sctp_ulpq_tail_event(ulpq, &temp); sctp_ulpq_set_pd(ulpq); return; } @@ -1106,7 +1104,8 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, freed += sctp_ulpq_renege_frags(ulpq, needed - freed); } /* If able to free enough room, accept this chunk. */ - if (freed >= needed) { + if (sk_rmem_schedule(asoc->base.sk, chunk->skb, needed) && + freed >= needed) { int retval = sctp_ulpq_tail_data(ulpq, chunk, gfp); /* * Enter partial delivery if chunk has not been diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 6f869ef49b32..086d9913975d 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -134,11 +134,9 @@ static int smc_release(struct socket *sock) smc = smc_sk(sk); /* cleanup for a dangling non-blocking connect */ - if (smc->connect_info && sk->sk_state == SMC_INIT) + if (smc->connect_nonblock && sk->sk_state == SMC_INIT) tcp_abort(smc->clcsock->sk, ECONNABORTED); flush_work(&smc->connect_work); - kfree(smc->connect_info); - smc->connect_info = NULL; if (sk->sk_state == SMC_LISTEN) /* smc_close_non_accepted() is called and acquires @@ -460,6 +458,7 @@ static int smc_connect_fallback(struct smc_sock *smc, int reason_code) smc_switch_to_fallback(smc); smc->fallback_rsn = reason_code; smc_copy_sock_settings_to_clc(smc); + smc->connect_nonblock = 0; if (smc->sk.sk_state == SMC_INIT) smc->sk.sk_state = SMC_ACTIVE; return 0; @@ -499,46 +498,41 @@ static int smc_connect_abort(struct smc_sock *smc, int reason_code, mutex_unlock(&smc_client_lgr_pending); smc_conn_free(&smc->conn); + smc->connect_nonblock = 0; return reason_code; } /* check if there is a rdma device available for this connection. */ /* called for connect and listen */ -static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev, - u8 *ibport, unsigned short vlan_id, u8 gid[]) +static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini) { - int reason_code = 0; - /* PNET table look up: search active ib_device and port * within same PNETID that also contains the ethernet device * used for the internal TCP socket */ - smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport, vlan_id, - gid); - if (!(*ibdev)) - reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ - - return reason_code; + smc_pnet_find_roce_resource(smc->clcsock->sk, ini); + if (!ini->ib_dev) + return SMC_CLC_DECL_NOSMCRDEV; + return 0; } /* check if there is an ISM device available for this connection. */ /* called for connect and listen */ -static int smc_check_ism(struct smc_sock *smc, struct smcd_dev **ismdev) +static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini) { /* Find ISM device with same PNETID as connecting interface */ - smc_pnet_find_ism_resource(smc->clcsock->sk, ismdev); - if (!(*ismdev)) - return SMC_CLC_DECL_CNFERR; /* configuration error */ + smc_pnet_find_ism_resource(smc->clcsock->sk, ini); + if (!ini->ism_dev) + return SMC_CLC_DECL_NOSMCDDEV; return 0; } /* Check for VLAN ID and register it on ISM device just for CLC handshake */ static int smc_connect_ism_vlan_setup(struct smc_sock *smc, - struct smcd_dev *ismdev, - unsigned short vlan_id) + struct smc_init_info *ini) { - if (vlan_id && smc_ism_get_vlan(ismdev, vlan_id)) - return SMC_CLC_DECL_CNFERR; + if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) + return SMC_CLC_DECL_ISMVLANERR; return 0; } @@ -546,12 +540,11 @@ static int smc_connect_ism_vlan_setup(struct smc_sock *smc, * used, the VLAN ID will be registered again during the connection setup. */ static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd, - struct smcd_dev *ismdev, - unsigned short vlan_id) + struct smc_init_info *ini) { if (!is_smcd) return 0; - if (vlan_id && smc_ism_put_vlan(ismdev, vlan_id)) + if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev, ini->vlan_id)) return SMC_CLC_DECL_CNFERR; return 0; } @@ -559,13 +552,12 @@ static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd, /* CLC handshake during connect */ static int smc_connect_clc(struct smc_sock *smc, int smc_type, struct smc_clc_msg_accept_confirm *aclc, - struct smc_ib_device *ibdev, u8 ibport, - u8 gid[], struct smcd_dev *ismdev) + struct smc_init_info *ini) { int rc = 0; /* do inband token exchange */ - rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, gid, ismdev); + rc = smc_clc_send_proposal(smc, smc_type, ini); if (rc) return rc; /* receive SMC Accept CLC message */ @@ -576,23 +568,19 @@ static int smc_connect_clc(struct smc_sock *smc, int smc_type, /* setup for RDMA connection of client */ static int smc_connect_rdma(struct smc_sock *smc, struct smc_clc_msg_accept_confirm *aclc, - struct smc_ib_device *ibdev, u8 ibport) + struct smc_init_info *ini) { - int local_contact = SMC_FIRST_CONTACT; struct smc_link *link; int reason_code = 0; + ini->is_smcd = false; + ini->ib_lcl = &aclc->lcl; + ini->ib_clcqpn = ntoh24(aclc->qpn); + ini->srv_first_contact = aclc->hdr.flag; + mutex_lock(&smc_client_lgr_pending); - local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev, - ibport, ntoh24(aclc->qpn), &aclc->lcl, - NULL, 0); - if (local_contact < 0) { - if (local_contact == -ENOMEM) - reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ - else if (local_contact == -ENOLINK) - reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */ - else - reason_code = SMC_CLC_DECL_INTERR; /* other error */ + reason_code = smc_conn_create(smc, ini); + if (reason_code) { mutex_unlock(&smc_client_lgr_pending); return reason_code; } @@ -602,45 +590,48 @@ static int smc_connect_rdma(struct smc_sock *smc, /* create send buffer and rmb */ if (smc_buf_create(smc, false)) - return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact); + return smc_connect_abort(smc, SMC_CLC_DECL_MEM, + ini->cln_first_contact); - if (local_contact == SMC_FIRST_CONTACT) + if (ini->cln_first_contact == SMC_FIRST_CONTACT) smc_link_save_peer_info(link, aclc); if (smc_rmb_rtoken_handling(&smc->conn, aclc)) return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK, - local_contact); + ini->cln_first_contact); smc_close_init(smc); smc_rx_init(smc); - if (local_contact == SMC_FIRST_CONTACT) { + if (ini->cln_first_contact == SMC_FIRST_CONTACT) { if (smc_ib_ready_link(link)) return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK, - local_contact); + ini->cln_first_contact); } else { if (smc_reg_rmb(link, smc->conn.rmb_desc, true)) return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB, - local_contact); + ini->cln_first_contact); } smc_rmb_sync_sg_for_device(&smc->conn); reason_code = smc_clc_send_confirm(smc); if (reason_code) - return smc_connect_abort(smc, reason_code, local_contact); + return smc_connect_abort(smc, reason_code, + ini->cln_first_contact); smc_tx_init(smc); - if (local_contact == SMC_FIRST_CONTACT) { + if (ini->cln_first_contact == SMC_FIRST_CONTACT) { /* QP confirmation over RoCE fabric */ reason_code = smc_clnt_conf_first_link(smc); if (reason_code) return smc_connect_abort(smc, reason_code, - local_contact); + ini->cln_first_contact); } mutex_unlock(&smc_client_lgr_pending); smc_copy_sock_settings_to_clc(smc); + smc->connect_nonblock = 0; if (smc->sk.sk_state == SMC_INIT) smc->sk.sk_state = SMC_ACTIVE; @@ -650,23 +641,26 @@ static int smc_connect_rdma(struct smc_sock *smc, /* setup for ISM connection of client */ static int smc_connect_ism(struct smc_sock *smc, struct smc_clc_msg_accept_confirm *aclc, - struct smcd_dev *ismdev) + struct smc_init_info *ini) { - int local_contact = SMC_FIRST_CONTACT; int rc = 0; + ini->is_smcd = true; + ini->ism_gid = aclc->gid; + ini->srv_first_contact = aclc->hdr.flag; + /* there is only one lgr role for SMC-D; use server lock */ mutex_lock(&smc_server_lgr_pending); - local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0, 0, - NULL, ismdev, aclc->gid); - if (local_contact < 0) { + rc = smc_conn_create(smc, ini); + if (rc) { mutex_unlock(&smc_server_lgr_pending); - return SMC_CLC_DECL_MEM; + return rc; } /* Create send and receive buffers */ if (smc_buf_create(smc, true)) - return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact); + return smc_connect_abort(smc, SMC_CLC_DECL_MEM, + ini->cln_first_contact); smc_conn_save_peer_info(smc, aclc); smc_close_init(smc); @@ -675,10 +669,11 @@ static int smc_connect_ism(struct smc_sock *smc, rc = smc_clc_send_confirm(smc); if (rc) - return smc_connect_abort(smc, rc, local_contact); + return smc_connect_abort(smc, rc, ini->cln_first_contact); mutex_unlock(&smc_server_lgr_pending); smc_copy_sock_settings_to_clc(smc); + smc->connect_nonblock = 0; if (smc->sk.sk_state == SMC_INIT) smc->sk.sk_state = SMC_ACTIVE; @@ -690,13 +685,9 @@ static int __smc_connect(struct smc_sock *smc) { bool ism_supported = false, rdma_supported = false; struct smc_clc_msg_accept_confirm aclc; - struct smc_ib_device *ibdev; - struct smcd_dev *ismdev; - u8 gid[SMC_GID_SIZE]; - unsigned short vlan; + struct smc_init_info ini = {0}; int smc_type; int rc = 0; - u8 ibport; sock_hold(&smc->sk); /* sock put in passive closing */ @@ -711,20 +702,21 @@ static int __smc_connect(struct smc_sock *smc) if (using_ipsec(smc)) return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC); - /* check for VLAN ID */ - if (smc_vlan_by_tcpsk(smc->clcsock, &vlan)) - return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR); + /* get vlan id from IP device */ + if (smc_vlan_by_tcpsk(smc->clcsock, &ini)) + return smc_connect_decline_fallback(smc, + SMC_CLC_DECL_GETVLANERR); /* check if there is an ism device available */ - if (!smc_check_ism(smc, &ismdev) && - !smc_connect_ism_vlan_setup(smc, ismdev, vlan)) { + if (!smc_find_ism_device(smc, &ini) && + !smc_connect_ism_vlan_setup(smc, &ini)) { /* ISM is supported for this connection */ ism_supported = true; smc_type = SMC_TYPE_D; } /* check if there is a rdma device available */ - if (!smc_check_rdma(smc, &ibdev, &ibport, vlan, gid)) { + if (!smc_find_rdma_device(smc, &ini)) { /* RDMA is supported for this connection */ rdma_supported = true; if (ism_supported) @@ -738,25 +730,25 @@ static int __smc_connect(struct smc_sock *smc) return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV); /* perform CLC handshake */ - rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, gid, ismdev); + rc = smc_connect_clc(smc, smc_type, &aclc, &ini); if (rc) { - smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan); + smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini); return smc_connect_decline_fallback(smc, rc); } /* depending on previous steps, connect using rdma or ism */ if (rdma_supported && aclc.hdr.path == SMC_TYPE_R) - rc = smc_connect_rdma(smc, &aclc, ibdev, ibport); + rc = smc_connect_rdma(smc, &aclc, &ini); else if (ism_supported && aclc.hdr.path == SMC_TYPE_D) - rc = smc_connect_ism(smc, &aclc, ismdev); + rc = smc_connect_ism(smc, &aclc, &ini); else rc = SMC_CLC_DECL_MODEUNSUPP; if (rc) { - smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan); + smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini); return smc_connect_decline_fallback(smc, rc); } - smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan); + smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini); return 0; } @@ -764,17 +756,30 @@ static void smc_connect_work(struct work_struct *work) { struct smc_sock *smc = container_of(work, struct smc_sock, connect_work); - int rc; + long timeo = smc->sk.sk_sndtimeo; + int rc = 0; - lock_sock(&smc->sk); - rc = kernel_connect(smc->clcsock, &smc->connect_info->addr, - smc->connect_info->alen, smc->connect_info->flags); + if (!timeo) + timeo = MAX_SCHEDULE_TIMEOUT; + lock_sock(smc->clcsock->sk); if (smc->clcsock->sk->sk_err) { smc->sk.sk_err = smc->clcsock->sk->sk_err; - goto out; - } - if (rc < 0) { - smc->sk.sk_err = -rc; + } else if ((1 << smc->clcsock->sk->sk_state) & + (TCPF_SYN_SENT | TCP_SYN_RECV)) { + rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo); + if ((rc == -EPIPE) && + ((1 << smc->clcsock->sk->sk_state) & + (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))) + rc = 0; + } + release_sock(smc->clcsock->sk); + lock_sock(&smc->sk); + if (rc != 0 || smc->sk.sk_err) { + smc->sk.sk_state = SMC_CLOSED; + if (rc == -EPIPE || rc == -EAGAIN) + smc->sk.sk_err = EPIPE; + else if (signal_pending(current)) + smc->sk.sk_err = -sock_intr_errno(timeo); goto out; } @@ -791,8 +796,6 @@ out: smc->sk.sk_write_space(&smc->sk); } } - kfree(smc->connect_info); - smc->connect_info = NULL; release_sock(&smc->sk); } @@ -825,26 +828,18 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr, smc_copy_sock_settings_to_clc(smc); tcp_sk(smc->clcsock->sk)->syn_smc = 1; + if (smc->connect_nonblock) { + rc = -EALREADY; + goto out; + } + rc = kernel_connect(smc->clcsock, addr, alen, flags); + if (rc && rc != -EINPROGRESS) + goto out; if (flags & O_NONBLOCK) { - if (smc->connect_info) { - rc = -EALREADY; - goto out; - } - smc->connect_info = kzalloc(alen + 2 * sizeof(int), GFP_KERNEL); - if (!smc->connect_info) { - rc = -ENOMEM; - goto out; - } - smc->connect_info->alen = alen; - smc->connect_info->flags = flags ^ O_NONBLOCK; - memcpy(&smc->connect_info->addr, addr, alen); - schedule_work(&smc->connect_work); + if (schedule_work(&smc->connect_work)) + smc->connect_nonblock = 1; rc = -EINPROGRESS; } else { - rc = kernel_connect(smc->clcsock, addr, alen, flags); - if (rc) - goto out; - rc = __smc_connect(smc); if (rc < 0) goto out; @@ -1116,7 +1111,7 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, } /* listen worker: check prefixes */ -static int smc_listen_rdma_check(struct smc_sock *new_smc, +static int smc_listen_prfx_check(struct smc_sock *new_smc, struct smc_clc_msg_proposal *pclc) { struct smc_clc_msg_proposal_prefix *pclc_prfx; @@ -1124,25 +1119,21 @@ static int smc_listen_rdma_check(struct smc_sock *new_smc, pclc_prfx = smc_clc_proposal_get_prefix(pclc); if (smc_clc_prfx_match(newclcsock, pclc_prfx)) - return SMC_CLC_DECL_CNFERR; + return SMC_CLC_DECL_DIFFPREFIX; return 0; } /* listen worker: initialize connection and buffers */ static int smc_listen_rdma_init(struct smc_sock *new_smc, - struct smc_clc_msg_proposal *pclc, - struct smc_ib_device *ibdev, u8 ibport, - int *local_contact) + struct smc_init_info *ini) { + int rc; + /* allocate connection / link group */ - *local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport, 0, - &pclc->lcl, NULL, 0); - if (*local_contact < 0) { - if (*local_contact == -ENOMEM) - return SMC_CLC_DECL_MEM;/* insufficient memory*/ - return SMC_CLC_DECL_INTERR; /* other error */ - } + rc = smc_conn_create(new_smc, ini); + if (rc) + return rc; /* create send buffer and rmb */ if (smc_buf_create(new_smc, false)) @@ -1154,33 +1145,30 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc, /* listen worker: initialize connection and buffers for SMC-D */ static int smc_listen_ism_init(struct smc_sock *new_smc, struct smc_clc_msg_proposal *pclc, - struct smcd_dev *ismdev, - int *local_contact) + struct smc_init_info *ini) { struct smc_clc_msg_smcd *pclc_smcd; + int rc; pclc_smcd = smc_get_clc_msg_smcd(pclc); - *local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, 0, NULL, - ismdev, pclc_smcd->gid); - if (*local_contact < 0) { - if (*local_contact == -ENOMEM) - return SMC_CLC_DECL_MEM;/* insufficient memory*/ - return SMC_CLC_DECL_INTERR; /* other error */ - } + ini->ism_gid = pclc_smcd->gid; + rc = smc_conn_create(new_smc, ini); + if (rc) + return rc; /* Check if peer can be reached via ISM device */ if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid, new_smc->conn.lgr->vlan_id, new_smc->conn.lgr->smcd)) { - if (*local_contact == SMC_FIRST_CONTACT) + if (ini->cln_first_contact == SMC_FIRST_CONTACT) smc_lgr_forget(new_smc->conn.lgr); smc_conn_free(&new_smc->conn); - return SMC_CLC_DECL_CNFERR; + return SMC_CLC_DECL_SMCDNOTALK; } /* Create send and receive buffers */ if (smc_buf_create(new_smc, true)) { - if (*local_contact == SMC_FIRST_CONTACT) + if (ini->cln_first_contact == SMC_FIRST_CONTACT) smc_lgr_forget(new_smc->conn.lgr); smc_conn_free(&new_smc->conn); return SMC_CLC_DECL_MEM; @@ -1244,15 +1232,10 @@ static void smc_listen_work(struct work_struct *work) struct socket *newclcsock = new_smc->clcsock; struct smc_clc_msg_accept_confirm cclc; struct smc_clc_msg_proposal *pclc; - struct smc_ib_device *ibdev; + struct smc_init_info ini = {0}; bool ism_supported = false; - struct smcd_dev *ismdev; u8 buf[SMC_CLC_MAX_LEN]; - int local_contact = 0; - unsigned short vlan; - int reason_code = 0; int rc = 0; - u8 ibport; if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN) return smc_listen_out_err(new_smc); @@ -1274,17 +1257,26 @@ static void smc_listen_work(struct work_struct *work) * wait for and receive SMC Proposal CLC message */ pclc = (struct smc_clc_msg_proposal *)&buf; - reason_code = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN, - SMC_CLC_PROPOSAL, CLC_WAIT_TIME); - if (reason_code) { - smc_listen_decline(new_smc, reason_code, 0); - return; - } + rc = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN, + SMC_CLC_PROPOSAL, CLC_WAIT_TIME); + if (rc) + goto out_decl; /* IPSec connections opt out of SMC-R optimizations */ if (using_ipsec(new_smc)) { - smc_listen_decline(new_smc, SMC_CLC_DECL_IPSEC, 0); - return; + rc = SMC_CLC_DECL_IPSEC; + goto out_decl; + } + + /* check for matching IP prefix and subnet length */ + rc = smc_listen_prfx_check(new_smc, pclc); + if (rc) + goto out_decl; + + /* get vlan id from IP device */ + if (smc_vlan_by_tcpsk(new_smc->clcsock, &ini)) { + rc = SMC_CLC_DECL_GETVLANERR; + goto out_decl; } mutex_lock(&smc_server_lgr_pending); @@ -1293,59 +1285,73 @@ static void smc_listen_work(struct work_struct *work) smc_tx_init(new_smc); /* check if ISM is available */ - if ((pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) && - !smc_check_ism(new_smc, &ismdev) && - !smc_listen_ism_init(new_smc, pclc, ismdev, &local_contact)) { - ism_supported = true; + if (pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) { + ini.is_smcd = true; /* prepare ISM check */ + rc = smc_find_ism_device(new_smc, &ini); + if (!rc) + rc = smc_listen_ism_init(new_smc, pclc, &ini); + if (!rc) + ism_supported = true; + else if (pclc->hdr.path == SMC_TYPE_D) + goto out_unlock; /* skip RDMA and decline */ } /* check if RDMA is available */ - if (!ism_supported && - ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) || - smc_vlan_by_tcpsk(new_smc->clcsock, &vlan) || - smc_check_rdma(new_smc, &ibdev, &ibport, vlan, NULL) || - smc_listen_rdma_check(new_smc, pclc) || - smc_listen_rdma_init(new_smc, pclc, ibdev, ibport, - &local_contact) || - smc_listen_rdma_reg(new_smc, local_contact))) { - /* SMC not supported, decline */ - mutex_unlock(&smc_server_lgr_pending); - smc_listen_decline(new_smc, SMC_CLC_DECL_MODEUNSUPP, - local_contact); - return; + if (!ism_supported) { /* SMC_TYPE_R or SMC_TYPE_B */ + /* prepare RDMA check */ + memset(&ini, 0, sizeof(ini)); + ini.is_smcd = false; + ini.ib_lcl = &pclc->lcl; + rc = smc_find_rdma_device(new_smc, &ini); + if (rc) { + /* no RDMA device found */ + if (pclc->hdr.path == SMC_TYPE_B) + /* neither ISM nor RDMA device found */ + rc = SMC_CLC_DECL_NOSMCDEV; + goto out_unlock; + } + rc = smc_listen_rdma_init(new_smc, &ini); + if (rc) + goto out_unlock; + rc = smc_listen_rdma_reg(new_smc, ini.cln_first_contact); + if (rc) + goto out_unlock; } /* send SMC Accept CLC message */ - rc = smc_clc_send_accept(new_smc, local_contact); - if (rc) { - mutex_unlock(&smc_server_lgr_pending); - smc_listen_decline(new_smc, rc, local_contact); - return; - } + rc = smc_clc_send_accept(new_smc, ini.cln_first_contact); + if (rc) + goto out_unlock; /* SMC-D does not need this lock any more */ if (ism_supported) mutex_unlock(&smc_server_lgr_pending); /* receive SMC Confirm CLC message */ - reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), - SMC_CLC_CONFIRM, CLC_WAIT_TIME); - if (reason_code) { + rc = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), + SMC_CLC_CONFIRM, CLC_WAIT_TIME); + if (rc) { if (!ism_supported) - mutex_unlock(&smc_server_lgr_pending); - smc_listen_decline(new_smc, reason_code, local_contact); - return; + goto out_unlock; + goto out_decl; } /* finish worker */ if (!ism_supported) { - rc = smc_listen_rdma_finish(new_smc, &cclc, local_contact); + rc = smc_listen_rdma_finish(new_smc, &cclc, + ini.cln_first_contact); mutex_unlock(&smc_server_lgr_pending); if (rc) return; } smc_conn_save_peer_info(new_smc, &cclc); smc_listen_out_connected(new_smc); + return; + +out_unlock: + mutex_unlock(&smc_server_lgr_pending); +out_decl: + smc_listen_decline(new_smc, rc, ini.cln_first_contact); } static void smc_tcp_listen_work(struct work_struct *work) @@ -1591,8 +1597,8 @@ static __poll_t smc_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; - __poll_t mask = 0; struct smc_sock *smc; + __poll_t mask = 0; if (!sk) return EPOLLNVAL; @@ -1602,8 +1608,6 @@ static __poll_t smc_poll(struct file *file, struct socket *sock, /* delegate to CLC child sock */ mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); sk->sk_err = smc->clcsock->sk->sk_err; - if (sk->sk_err) - mask |= EPOLLERR; } else { if (sk->sk_state != SMC_CLOSED) sock_poll_wait(file, sock, wait); @@ -1614,9 +1618,14 @@ static __poll_t smc_poll(struct file *file, struct socket *sock, mask |= EPOLLHUP; if (sk->sk_state == SMC_LISTEN) { /* woken up by sk_data_ready in smc_listen_work() */ - mask = smc_accept_poll(sk); + mask |= smc_accept_poll(sk); + } else if (smc->use_fallback) { /* as result of connect_work()*/ + mask |= smc->clcsock->ops->poll(file, smc->clcsock, + wait); + sk->sk_err = smc->clcsock->sk->sk_err; } else { - if (atomic_read(&smc->conn.sndbuf_space) || + if ((sk->sk_state != SMC_INIT && + atomic_read(&smc->conn.sndbuf_space)) || sk->sk_shutdown & SEND_SHUTDOWN) { mask |= EPOLLOUT | EPOLLWRNORM; } else { diff --git a/net/smc/smc.h b/net/smc/smc.h index adbdf195eb08..878313f8d6c1 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -190,18 +190,11 @@ struct smc_connection { u64 peer_token; /* SMC-D token of peer */ }; -struct smc_connect_info { - int flags; - int alen; - struct sockaddr addr; -}; - struct smc_sock { /* smc sock container */ struct sock sk; struct socket *clcsock; /* internal tcp socket */ struct smc_connection conn; /* smc connection */ struct smc_sock *listen_smc; /* listen parent */ - struct smc_connect_info *connect_info; /* connect address & flags */ struct work_struct connect_work; /* handle non-blocking connect*/ struct work_struct tcp_listen_work;/* handle tcp socket accepts */ struct work_struct smc_listen_work;/* prepare new accept socket */ @@ -219,6 +212,10 @@ struct smc_sock { /* smc sock container */ * started, waiting for unsent * data to be sent */ + u8 connect_nonblock : 1; + /* non-blocking connect in + * flight + */ struct mutex clcsock_release_lock; /* protects clcsock of a listen * socket diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index d53fd588d1f5..745afd82f281 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -385,8 +385,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) /* send CLC PROPOSAL message across internal TCP socket */ int smc_clc_send_proposal(struct smc_sock *smc, int smc_type, - struct smc_ib_device *ibdev, u8 ibport, u8 gid[], - struct smcd_dev *ismdev) + struct smc_init_info *ini) { struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX]; struct smc_clc_msg_proposal_prefix pclc_prfx; @@ -416,8 +415,9 @@ int smc_clc_send_proposal(struct smc_sock *smc, int smc_type, /* add SMC-R specifics */ memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); - memcpy(&pclc.lcl.gid, gid, SMC_GID_SIZE); - memcpy(&pclc.lcl.mac, &ibdev->mac[ibport - 1], ETH_ALEN); + memcpy(&pclc.lcl.gid, ini->ib_gid, SMC_GID_SIZE); + memcpy(&pclc.lcl.mac, &ini->ib_dev->mac[ini->ib_port - 1], + ETH_ALEN); pclc.iparea_offset = htons(0); } if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) { @@ -425,7 +425,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, int smc_type, memset(&pclc_smcd, 0, sizeof(pclc_smcd)); plen += sizeof(pclc_smcd); pclc.iparea_offset = htons(SMC_CLC_PROPOSAL_MAX_OFFSET); - pclc_smcd.gid = ismdev->local_gid; + pclc_smcd.gid = ini->ism_dev->local_gid; } pclc.hdr.length = htons(plen); diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 24658e8c0de4..ca209272e5fa 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -34,16 +34,22 @@ #define SMC_CLC_DECL_CNFERR 0x03000000 /* configuration error */ #define SMC_CLC_DECL_PEERNOSMC 0x03010000 /* peer did not indicate SMC */ #define SMC_CLC_DECL_IPSEC 0x03020000 /* IPsec usage */ -#define SMC_CLC_DECL_NOSMCDEV 0x03030000 /* no SMC device found */ +#define SMC_CLC_DECL_NOSMCDEV 0x03030000 /* no SMC device found (R or D) */ +#define SMC_CLC_DECL_NOSMCDDEV 0x03030001 /* no SMC-D device found */ +#define SMC_CLC_DECL_NOSMCRDEV 0x03030002 /* no SMC-R device found */ +#define SMC_CLC_DECL_SMCDNOTALK 0x03030003 /* SMC-D dev can't talk to peer */ #define SMC_CLC_DECL_MODEUNSUPP 0x03040000 /* smc modes do not match (R or D)*/ #define SMC_CLC_DECL_RMBE_EC 0x03050000 /* peer has eyecatcher in RMBE */ #define SMC_CLC_DECL_OPTUNSUPP 0x03060000 /* fastopen sockopt not supported */ +#define SMC_CLC_DECL_DIFFPREFIX 0x03070000 /* IP prefix / subnet mismatch */ +#define SMC_CLC_DECL_GETVLANERR 0x03080000 /* err to get vlan id of ip device*/ +#define SMC_CLC_DECL_ISMVLANERR 0x03090000 /* err to reg vlan id on ism dev */ #define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */ #define SMC_CLC_DECL_PEERDECL 0x05000000 /* peer declined during handshake */ -#define SMC_CLC_DECL_INTERR 0x99990000 /* internal error */ -#define SMC_CLC_DECL_ERR_RTOK 0x99990001 /* rtoken handling failed */ -#define SMC_CLC_DECL_ERR_RDYLNK 0x99990002 /* ib ready link failed */ -#define SMC_CLC_DECL_ERR_REGRMB 0x99990003 /* reg rmb failed */ +#define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */ +#define SMC_CLC_DECL_ERR_RTOK 0x09990001 /* rtoken handling failed */ +#define SMC_CLC_DECL_ERR_RDYLNK 0x09990002 /* ib ready link failed */ +#define SMC_CLC_DECL_ERR_REGRMB 0x09990003 /* reg rmb failed */ struct smc_clc_msg_hdr { /* header1 of clc messages */ u8 eyecatcher[4]; /* eye catcher */ @@ -179,6 +185,7 @@ smc_get_clc_msg_smcd(struct smc_clc_msg_proposal *prop) } struct smcd_dev; +struct smc_init_info; int smc_clc_prfx_match(struct socket *clcsock, struct smc_clc_msg_proposal_prefix *prop); @@ -186,8 +193,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, u8 expected_type, unsigned long timeout); int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info); int smc_clc_send_proposal(struct smc_sock *smc, int smc_type, - struct smc_ib_device *smcibdev, u8 ibport, u8 gid[], - struct smcd_dev *ismdev); + struct smc_init_info *ini); int smc_clc_send_confirm(struct smc_sock *smc); int smc_clc_send_accept(struct smc_sock *smc, int srv_first_contact); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 53a17cfa61af..2d2850adc2a3 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -195,10 +195,7 @@ static void smc_lgr_free_work(struct work_struct *work) } /* create a new SMC link group */ -static int smc_lgr_create(struct smc_sock *smc, bool is_smcd, - struct smc_ib_device *smcibdev, u8 ibport, - char *peer_systemid, unsigned short vlan_id, - struct smcd_dev *smcismdev, u64 peer_gid) +static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) { struct smc_link_group *lgr; struct smc_link *lnk; @@ -206,20 +203,21 @@ static int smc_lgr_create(struct smc_sock *smc, bool is_smcd, int rc = 0; int i; - if (is_smcd && vlan_id) { - rc = smc_ism_get_vlan(smcismdev, vlan_id); - if (rc) + if (ini->is_smcd && ini->vlan_id) { + if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) { + rc = SMC_CLC_DECL_ISMVLANERR; goto out; + } } lgr = kzalloc(sizeof(*lgr), GFP_KERNEL); if (!lgr) { - rc = -ENOMEM; + rc = SMC_CLC_DECL_MEM; goto out; } - lgr->is_smcd = is_smcd; + lgr->is_smcd = ini->is_smcd; lgr->sync_err = 0; - lgr->vlan_id = vlan_id; + lgr->vlan_id = ini->vlan_id; rwlock_init(&lgr->sndbufs_lock); rwlock_init(&lgr->rmbs_lock); rwlock_init(&lgr->conns_lock); @@ -231,29 +229,32 @@ static int smc_lgr_create(struct smc_sock *smc, bool is_smcd, memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE); INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); lgr->conns_all = RB_ROOT; - if (is_smcd) { + if (ini->is_smcd) { /* SMC-D specific settings */ - lgr->peer_gid = peer_gid; - lgr->smcd = smcismdev; + lgr->peer_gid = ini->ism_gid; + lgr->smcd = ini->ism_dev; } else { /* SMC-R specific settings */ lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; - memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN); + memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer, + SMC_SYSTEMID_LEN); lnk = &lgr->lnk[SMC_SINGLE_LINK]; /* initialize link */ lnk->state = SMC_LNK_ACTIVATING; lnk->link_id = SMC_SINGLE_LINK; - lnk->smcibdev = smcibdev; - lnk->ibport = ibport; - lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu; - if (!smcibdev->initialized) - smc_ib_setup_per_ibdev(smcibdev); + lnk->smcibdev = ini->ib_dev; + lnk->ibport = ini->ib_port; + lnk->path_mtu = + ini->ib_dev->pattr[ini->ib_port - 1].active_mtu; + if (!ini->ib_dev->initialized) + smc_ib_setup_per_ibdev(ini->ib_dev); get_random_bytes(rndvec, sizeof(rndvec)); lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + (rndvec[2] << 16); rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport, - vlan_id, lnk->gid, &lnk->sgid_index); + ini->vlan_id, lnk->gid, + &lnk->sgid_index); if (rc) goto free_lgr; rc = smc_llc_link_init(lnk); @@ -289,6 +290,12 @@ clear_llc_lnk: free_lgr: kfree(lgr); out: + if (rc < 0) { + if (rc == -ENOMEM) + rc = SMC_CLC_DECL_MEM; + else + rc = SMC_CLC_DECL_INTERR; + } return rc; } @@ -528,13 +535,13 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) /* Determine vlan of internal TCP socket. * @vlan_id: address to store the determined vlan id into */ -int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id) +int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini) { struct dst_entry *dst = sk_dst_get(clcsock->sk); struct net_device *ndev; int i, nest_lvl, rc = 0; - *vlan_id = 0; + ini->vlan_id = 0; if (!dst) { rc = -ENOTCONN; goto out; @@ -546,7 +553,7 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id) ndev = dst->dev; if (is_vlan_dev(ndev)) { - *vlan_id = vlan_dev_vlan_id(ndev); + ini->vlan_id = vlan_dev_vlan_id(ndev); goto out_rel; } @@ -560,7 +567,7 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id) lower = lower->next; ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower); if (is_vlan_dev(ndev)) { - *vlan_id = vlan_dev_vlan_id(ndev); + ini->vlan_id = vlan_dev_vlan_id(ndev); break; } } @@ -594,24 +601,16 @@ static bool smcd_lgr_match(struct smc_link_group *lgr, } /* create a new SMC connection (and a new link group if necessary) */ -int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact, - struct smc_ib_device *smcibdev, u8 ibport, u32 clcqpn, - struct smc_clc_msg_local *lcl, struct smcd_dev *smcd, - u64 peer_gid) +int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) { struct smc_connection *conn = &smc->conn; - int local_contact = SMC_FIRST_CONTACT; struct smc_link_group *lgr; - unsigned short vlan_id; enum smc_lgr_role role; int rc = 0; + ini->cln_first_contact = SMC_FIRST_CONTACT; role = smc->listen_smc ? SMC_SERV : SMC_CLNT; - rc = smc_vlan_by_tcpsk(smc->clcsock, &vlan_id); - if (rc) - return rc; - - if ((role == SMC_CLNT) && srv_first_contact) + if (role == SMC_CLNT && ini->srv_first_contact) /* create new link group as well */ goto create; @@ -619,14 +618,15 @@ int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact, spin_lock_bh(&smc_lgr_list.lock); list_for_each_entry(lgr, &smc_lgr_list.list, list) { write_lock_bh(&lgr->conns_lock); - if ((is_smcd ? smcd_lgr_match(lgr, smcd, peer_gid) : - smcr_lgr_match(lgr, lcl, role, clcqpn)) && + if ((ini->is_smcd ? + smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) : + smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) && !lgr->sync_err && - lgr->vlan_id == vlan_id && + lgr->vlan_id == ini->vlan_id && (role == SMC_CLNT || lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) { /* link group found */ - local_contact = SMC_REUSE_CONTACT; + ini->cln_first_contact = SMC_REUSE_CONTACT; conn->lgr = lgr; smc_lgr_register_conn(conn); /* add smc conn to lgr */ if (delayed_work_pending(&lgr->free_work)) @@ -638,19 +638,18 @@ int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact, } spin_unlock_bh(&smc_lgr_list.lock); - if (role == SMC_CLNT && !srv_first_contact && - (local_contact == SMC_FIRST_CONTACT)) { + if (role == SMC_CLNT && !ini->srv_first_contact && + ini->cln_first_contact == SMC_FIRST_CONTACT) { /* Server reuses a link group, but Client wants to start * a new one * send out_of_sync decline, reason synchr. error */ - return -ENOLINK; + return SMC_CLC_DECL_SYNCERR; } create: - if (local_contact == SMC_FIRST_CONTACT) { - rc = smc_lgr_create(smc, is_smcd, smcibdev, ibport, - lcl->id_for_peer, vlan_id, smcd, peer_gid); + if (ini->cln_first_contact == SMC_FIRST_CONTACT) { + rc = smc_lgr_create(smc, ini); if (rc) goto out; smc_lgr_register_conn(conn); /* add smc conn to lgr */ @@ -658,7 +657,7 @@ create: conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; conn->urg_state = SMC_URG_READ; - if (is_smcd) { + if (ini->is_smcd) { conn->rx_off = sizeof(struct smcd_cdc_msg); smcd_cdc_rx_init(conn); /* init tasklet for this conn */ } @@ -667,7 +666,7 @@ create: #endif out: - return rc ? rc : local_contact; + return rc; } /* convert the RMB size into the compressed notation - minimum 16K. diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 8806d2afa6ed..c00ac61dc129 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -229,6 +229,24 @@ struct smc_link_group { }; }; +struct smc_clc_msg_local; + +struct smc_init_info { + u8 is_smcd; + unsigned short vlan_id; + int srv_first_contact; + int cln_first_contact; + /* SMC-R */ + struct smc_clc_msg_local *ib_lcl; + struct smc_ib_device *ib_dev; + u8 ib_gid[SMC_GID_SIZE]; + u8 ib_port; + u32 ib_clcqpn; + /* SMC-D */ + u64 ism_gid; + struct smcd_dev *ism_dev; +}; + /* Find the connection associated with the given alert token in the link group. * To use rbtrees we have to implement our own search core. * Requires @conns_lock @@ -281,13 +299,10 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn); void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn); void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn); void smc_rmb_sync_sg_for_device(struct smc_connection *conn); -int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id); +int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini); void smc_conn_free(struct smc_connection *conn); -int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact, - struct smc_ib_device *smcibdev, u8 ibport, u32 clcqpn, - struct smc_clc_msg_local *lcl, struct smcd_dev *smcd, - u64 peer_gid); +int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini); void smcd_conn_free(struct smc_connection *conn); void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr); void smc_core_exit(void); diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 0285c7f9e79b..bab2da8cf17a 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -26,6 +26,7 @@ #include "smc_pnet.h" #include "smc_ib.h" #include "smc_ism.h" +#include "smc_core.h" #define SMC_ASCII_BLANK 32 @@ -611,28 +612,28 @@ static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info) static const struct genl_ops smc_pnet_ops[] = { { .cmd = SMC_PNETID_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = smc_pnet_policy, .doit = smc_pnet_get, .dumpit = smc_pnet_dump, .start = smc_pnet_dump_start }, { .cmd = SMC_PNETID_ADD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = smc_pnet_policy, .doit = smc_pnet_add }, { .cmd = SMC_PNETID_DEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = smc_pnet_policy, .doit = smc_pnet_del }, { .cmd = SMC_PNETID_FLUSH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = smc_pnet_policy, .doit = smc_pnet_flush } }; @@ -643,6 +644,7 @@ static struct genl_family smc_pnet_nl_family __ro_after_init = { .name = SMCR_GENL_FAMILY_NAME, .version = SMCR_GENL_FAMILY_VERSION, .maxattr = SMC_PNETID_MAX, + .policy = smc_pnet_policy, .netnsok = true, .module = THIS_MODULE, .ops = smc_pnet_ops, @@ -759,8 +761,7 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev, * IB device and port */ static void smc_pnet_find_rdma_dev(struct net_device *netdev, - struct smc_ib_device **smcibdev, - u8 *ibport, unsigned short vlan_id, u8 gid[]) + struct smc_init_info *ini) { struct smc_ib_device *ibdev; @@ -780,10 +781,10 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev, dev_put(ndev); if (netdev == ndev && smc_ib_port_active(ibdev, i) && - !smc_ib_determine_gid(ibdev, i, vlan_id, gid, - NULL)) { - *smcibdev = ibdev; - *ibport = i; + !smc_ib_determine_gid(ibdev, i, ini->vlan_id, + ini->ib_gid, NULL)) { + ini->ib_dev = ibdev; + ini->ib_port = i; break; } } @@ -798,9 +799,7 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev, * If nothing found, try to use handshake device */ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, - struct smc_ib_device **smcibdev, - u8 *ibport, unsigned short vlan_id, - u8 gid[]) + struct smc_init_info *ini) { u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; struct smc_ib_device *ibdev; @@ -810,7 +809,7 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, ndev_pnetid) && smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) { - smc_pnet_find_rdma_dev(ndev, smcibdev, ibport, vlan_id, gid); + smc_pnet_find_rdma_dev(ndev, ini); return; /* pnetid could not be determined */ } @@ -821,10 +820,10 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, continue; if (smc_pnet_match(ibdev->pnetid[i - 1], ndev_pnetid) && smc_ib_port_active(ibdev, i) && - !smc_ib_determine_gid(ibdev, i, vlan_id, gid, - NULL)) { - *smcibdev = ibdev; - *ibport = i; + !smc_ib_determine_gid(ibdev, i, ini->vlan_id, + ini->ib_gid, NULL)) { + ini->ib_dev = ibdev; + ini->ib_port = i; goto out; } } @@ -834,7 +833,7 @@ out: } static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, - struct smcd_dev **smcismdev) + struct smc_init_info *ini) { u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; struct smcd_dev *ismdev; @@ -848,7 +847,7 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, spin_lock(&smcd_dev_list.lock); list_for_each_entry(ismdev, &smcd_dev_list.list, list) { if (smc_pnet_match(ismdev->pnetid, ndev_pnetid)) { - *smcismdev = ismdev; + ini->ism_dev = ismdev; break; } } @@ -859,21 +858,18 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, * determine ib_device and port belonging to used internal TCP socket * ethernet interface. */ -void smc_pnet_find_roce_resource(struct sock *sk, - struct smc_ib_device **smcibdev, u8 *ibport, - unsigned short vlan_id, u8 gid[]) +void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini) { struct dst_entry *dst = sk_dst_get(sk); - *smcibdev = NULL; - *ibport = 0; - + ini->ib_dev = NULL; + ini->ib_port = 0; if (!dst) goto out; if (!dst->dev) goto out_rel; - smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport, vlan_id, gid); + smc_pnet_find_roce_by_pnetid(dst->dev, ini); out_rel: dst_release(dst); @@ -881,17 +877,17 @@ out: return; } -void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev) +void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini) { struct dst_entry *dst = sk_dst_get(sk); - *smcismdev = NULL; + ini->ism_dev = NULL; if (!dst) goto out; if (!dst->dev) goto out_rel; - smc_pnet_find_ism_by_pnetid(dst->dev, smcismdev); + smc_pnet_find_ism_by_pnetid(dst->dev, ini); out_rel: dst_release(dst); diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h index 5eac42fb45d0..4564e4d69c2e 100644 --- a/net/smc/smc_pnet.h +++ b/net/smc/smc_pnet.h @@ -18,6 +18,7 @@ struct smc_ib_device; struct smcd_dev; +struct smc_init_info; /** * struct smc_pnettable - SMC PNET table anchor @@ -43,9 +44,7 @@ int smc_pnet_init(void) __init; int smc_pnet_net_init(struct net *net); void smc_pnet_exit(void); void smc_pnet_net_exit(struct net *net); -void smc_pnet_find_roce_resource(struct sock *sk, - struct smc_ib_device **smcibdev, u8 *ibport, - unsigned short vlan_id, u8 gid[]); -void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev); +void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini); +void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini); #endif diff --git a/net/socket.c b/net/socket.c index 8255f5bda0aa..472fbefa5d9b 100644 --- a/net/socket.c +++ b/net/socket.c @@ -90,6 +90,7 @@ #include <linux/slab.h> #include <linux/xattr.h> #include <linux/nospec.h> +#include <linux/indirect_call_wrapper.h> #include <linux/uaccess.h> #include <asm/unistd.h> @@ -108,6 +109,13 @@ #include <net/busy_poll.h> #include <linux/errqueue.h> +/* proto_ops for ipv4 and ipv6 use the same {recv,send}msg function */ +#if IS_ENABLED(CONFIG_INET) +#define INDIRECT_CALL_INET4(f, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__) +#else +#define INDIRECT_CALL_INET4(f, f1, ...) f(__VA_ARGS__) +#endif + #ifdef CONFIG_NET_RX_BUSY_POLL unsigned int sysctl_net_busy_read __read_mostly; unsigned int sysctl_net_busy_poll __read_mostly; @@ -645,10 +653,12 @@ EXPORT_SYMBOL(__sock_tx_timestamp); * Sends @msg through @sock, passing through LSM. * Returns the number of bytes sent, or an error code. */ - +INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *, + size_t)); static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) { - int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg)); + int ret = INDIRECT_CALL_INET4(sock->ops->sendmsg, inet_sendmsg, sock, + msg, msg_data_left(msg)); BUG_ON(ret == -EIOCBQUEUED); return ret; } @@ -874,11 +884,13 @@ EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); * Receives @msg from @sock, passing through LSM. Returns the total number * of bytes received, or an error. */ - +INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *, + size_t , int )); static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, int flags) { - return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags); + return INDIRECT_CALL_INET4(sock->ops->recvmsg, inet_recvmsg, sock, msg, + msg_data_left(msg), flags); } int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags) @@ -1164,6 +1176,26 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) err = open_related_ns(&net->ns, get_net_ns); break; + case SIOCGSTAMP_OLD: + case SIOCGSTAMPNS_OLD: + if (!sock->ops->gettstamp) { + err = -ENOIOCTLCMD; + break; + } + err = sock->ops->gettstamp(sock, argp, + cmd == SIOCGSTAMP_OLD, + !IS_ENABLED(CONFIG_64BIT)); + break; + case SIOCGSTAMP_NEW: + case SIOCGSTAMPNS_NEW: + if (!sock->ops->gettstamp) { + err = -ENOIOCTLCMD; + break; + } + err = sock->ops->gettstamp(sock, argp, + cmd == SIOCGSTAMP_NEW, + false); + break; default: err = sock_do_ioctl(net, sock, cmd, arg); break; @@ -2916,38 +2948,6 @@ void socket_seq_show(struct seq_file *seq) #endif /* CONFIG_PROC_FS */ #ifdef CONFIG_COMPAT -static int do_siocgstamp(struct net *net, struct socket *sock, - unsigned int cmd, void __user *up) -{ - mm_segment_t old_fs = get_fs(); - struct timeval ktv; - int err; - - set_fs(KERNEL_DS); - err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv); - set_fs(old_fs); - if (!err) - err = compat_put_timeval(&ktv, up); - - return err; -} - -static int do_siocgstampns(struct net *net, struct socket *sock, - unsigned int cmd, void __user *up) -{ - mm_segment_t old_fs = get_fs(); - struct timespec kts; - int err; - - set_fs(KERNEL_DS); - err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts); - set_fs(old_fs); - if (!err) - err = compat_put_timespec(&kts, up); - - return err; -} - static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) { struct compat_ifconf ifc32; @@ -3347,10 +3347,13 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCADDRT: case SIOCDELRT: return routing_ioctl(net, sock, cmd, argp); - case SIOCGSTAMP: - return do_siocgstamp(net, sock, cmd, argp); - case SIOCGSTAMPNS: - return do_siocgstampns(net, sock, cmd, argp); + case SIOCGSTAMP_OLD: + case SIOCGSTAMPNS_OLD: + if (!sock->ops->gettstamp) + return -ENOIOCTLCMD; + return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD, + !COMPAT_USE_64BIT_TIME); + case SIOCBONDSLAVEINFOQUERY: case SIOCBONDINFOQUERY: case SIOCSHWTSTAMP: @@ -3368,6 +3371,8 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCADDDLCI: case SIOCDELDLCI: case SIOCGSKNS: + case SIOCGSTAMP_NEW: + case SIOCGSTAMPNS_NEW: return sock_ioctl(file, cmd, arg); case SIOCGIFFLAGS: diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index fa6c977b4c41..e137698e8aef 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -14,7 +14,8 @@ #include <linux/file.h> #include <linux/in.h> #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/export.h> +#include <linux/init.h> #include <linux/net.h> #include <linux/netdevice.h> #include <linux/poll.h> @@ -297,7 +298,7 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, break; } - /* Positive extra indicates ore bytes than needed for the + /* Positive extra indicates more bytes than needed for the * message */ @@ -545,7 +546,7 @@ void strp_check_rcv(struct strparser *strp) } EXPORT_SYMBOL_GPL(strp_check_rcv); -static int __init strp_mod_init(void) +static int __init strp_dev_init(void) { strp_wq = create_singlethread_workqueue("kstrp"); if (unlikely(!strp_wq)) @@ -553,11 +554,4 @@ static int __init strp_mod_init(void) return 0; } - -static void __exit strp_mod_exit(void) -{ - destroy_workqueue(strp_wq); -} -module_init(strp_mod_init); -module_exit(strp_mod_exit); -MODULE_LICENSE("GPL"); +device_initcall(strp_dev_init); diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index d8026543bf4c..6c997d4a6218 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -54,7 +54,9 @@ const char tipc_bclink_name[] = "broadcast-link"; * @dests: array keeping number of reachable destinations per bearer * @primary_bearer: a bearer having links to all broadcast destinations, if any * @bcast_support: indicates if primary bearer, if any, supports broadcast + * @force_bcast: forces broadcast for multicast traffic * @rcast_support: indicates if all peer nodes support replicast + * @force_rcast: forces replicast for multicast traffic * @rc_ratio: dest count as percentage of cluster size where send method changes * @bc_threshold: calculated from rc_ratio; if dests > threshold use broadcast */ @@ -64,7 +66,9 @@ struct tipc_bc_base { int dests[MAX_BEARERS]; int primary_bearer; bool bcast_support; + bool force_bcast; bool rcast_support; + bool force_rcast; int rc_ratio; int bc_threshold; }; @@ -216,9 +220,24 @@ static void tipc_bcast_select_xmit_method(struct net *net, int dests, } /* Can current method be changed ? */ method->expires = jiffies + TIPC_METHOD_EXPIRE; - if (method->mandatory || time_before(jiffies, exp)) + if (method->mandatory) return; + if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL) && + time_before(jiffies, exp)) + return; + + /* Configuration as force 'broadcast' method */ + if (bb->force_bcast) { + method->rcast = false; + return; + } + /* Configuration as force 'replicast' method */ + if (bb->force_rcast) { + method->rcast = true; + return; + } + /* Configuration as 'autoselect' or default method */ /* Determine method to use now */ method->rcast = dests <= bb->bc_threshold; } @@ -281,6 +300,63 @@ static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts, return 0; } +/* tipc_mcast_send_sync - deliver a dummy message with SYN bit + * @net: the applicable net namespace + * @skb: socket buffer to copy + * @method: send method to be used + * @dests: destination nodes for message. + * @cong_link_cnt: returns number of encountered congested destination links + * Returns 0 if success, otherwise errno + */ +static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb, + struct tipc_mc_method *method, + struct tipc_nlist *dests, + u16 *cong_link_cnt) +{ + struct tipc_msg *hdr, *_hdr; + struct sk_buff_head tmpq; + struct sk_buff *_skb; + + /* Is a cluster supporting with new capabilities ? */ + if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL)) + return 0; + + hdr = buf_msg(skb); + if (msg_user(hdr) == MSG_FRAGMENTER) + hdr = msg_get_wrapped(hdr); + if (msg_type(hdr) != TIPC_MCAST_MSG) + return 0; + + /* Allocate dummy message */ + _skb = tipc_buf_acquire(MCAST_H_SIZE, GFP_KERNEL); + if (!_skb) + return -ENOMEM; + + /* Preparing for 'synching' header */ + msg_set_syn(hdr, 1); + + /* Copy skb's header into a dummy header */ + skb_copy_to_linear_data(_skb, hdr, MCAST_H_SIZE); + skb_orphan(_skb); + + /* Reverse method for dummy message */ + _hdr = buf_msg(_skb); + msg_set_size(_hdr, MCAST_H_SIZE); + msg_set_is_rcast(_hdr, !msg_is_rcast(hdr)); + + skb_queue_head_init(&tmpq); + __skb_queue_tail(&tmpq, _skb); + if (method->rcast) + tipc_bcast_xmit(net, &tmpq, cong_link_cnt); + else + tipc_rcast_xmit(net, &tmpq, dests, cong_link_cnt); + + /* This queue should normally be empty by now */ + __skb_queue_purge(&tmpq); + + return 0; +} + /* tipc_mcast_xmit - deliver message to indicated destination nodes * and to identified node local sockets * @net: the applicable net namespace @@ -296,6 +372,9 @@ int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, u16 *cong_link_cnt) { struct sk_buff_head inputq, localq; + bool rcast = method->rcast; + struct tipc_msg *hdr; + struct sk_buff *skb; int rc = 0; skb_queue_head_init(&inputq); @@ -309,6 +388,18 @@ int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, /* Send according to determined transmit method */ if (dests->remote) { tipc_bcast_select_xmit_method(net, dests->remote, method); + + skb = skb_peek(pkts); + hdr = buf_msg(skb); + if (msg_user(hdr) == MSG_FRAGMENTER) + hdr = msg_get_wrapped(hdr); + msg_set_is_rcast(hdr, method->rcast); + + /* Switch method ? */ + if (rcast != method->rcast) + tipc_mcast_send_sync(net, skb, method, + dests, cong_link_cnt); + if (method->rcast) rc = tipc_rcast_xmit(net, pkts, dests, cong_link_cnt); else @@ -485,10 +576,63 @@ static int tipc_bc_link_set_queue_limits(struct net *net, u32 limit) return 0; } +static int tipc_bc_link_set_broadcast_mode(struct net *net, u32 bc_mode) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + + switch (bc_mode) { + case BCLINK_MODE_BCAST: + if (!bb->bcast_support) + return -ENOPROTOOPT; + + bb->force_bcast = true; + bb->force_rcast = false; + break; + case BCLINK_MODE_RCAST: + if (!bb->rcast_support) + return -ENOPROTOOPT; + + bb->force_bcast = false; + bb->force_rcast = true; + break; + case BCLINK_MODE_SEL: + if (!bb->bcast_support || !bb->rcast_support) + return -ENOPROTOOPT; + + bb->force_bcast = false; + bb->force_rcast = false; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int tipc_bc_link_set_broadcast_ratio(struct net *net, u32 bc_ratio) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + + if (!bb->bcast_support || !bb->rcast_support) + return -ENOPROTOOPT; + + if (bc_ratio > 100 || bc_ratio <= 0) + return -EINVAL; + + bb->rc_ratio = bc_ratio; + tipc_bcast_lock(net); + tipc_bcbase_calc_bc_threshold(net); + tipc_bcast_unlock(net); + + return 0; +} + int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]) { int err; u32 win; + u32 bc_mode; + u32 bc_ratio; struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; if (!attrs[TIPC_NLA_LINK_PROP]) @@ -498,12 +642,28 @@ int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]) if (err) return err; - if (!props[TIPC_NLA_PROP_WIN]) + if (!props[TIPC_NLA_PROP_WIN] && + !props[TIPC_NLA_PROP_BROADCAST] && + !props[TIPC_NLA_PROP_BROADCAST_RATIO]) { return -EOPNOTSUPP; + } + + if (props[TIPC_NLA_PROP_BROADCAST]) { + bc_mode = nla_get_u32(props[TIPC_NLA_PROP_BROADCAST]); + err = tipc_bc_link_set_broadcast_mode(net, bc_mode); + } + + if (!err && props[TIPC_NLA_PROP_BROADCAST_RATIO]) { + bc_ratio = nla_get_u32(props[TIPC_NLA_PROP_BROADCAST_RATIO]); + err = tipc_bc_link_set_broadcast_ratio(net, bc_ratio); + } - win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + if (!err && props[TIPC_NLA_PROP_WIN]) { + win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + err = tipc_bc_link_set_queue_limits(net, win); + } - return tipc_bc_link_set_queue_limits(net, win); + return err; } int tipc_bcast_init(struct net *net) @@ -529,7 +689,7 @@ int tipc_bcast_init(struct net *net) goto enomem; bb->link = l; tn->bcl = l; - bb->rc_ratio = 25; + bb->rc_ratio = 10; bb->rcast_support = true; return 0; enomem: @@ -576,3 +736,108 @@ void tipc_nlist_purge(struct tipc_nlist *nl) nl->remote = 0; nl->local = false; } + +u32 tipc_bcast_get_broadcast_mode(struct net *net) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + + if (bb->force_bcast) + return BCLINK_MODE_BCAST; + + if (bb->force_rcast) + return BCLINK_MODE_RCAST; + + if (bb->bcast_support && bb->rcast_support) + return BCLINK_MODE_SEL; + + return 0; +} + +u32 tipc_bcast_get_broadcast_ratio(struct net *net) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + + return bb->rc_ratio; +} + +void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, + struct sk_buff_head *inputq) +{ + struct sk_buff *skb, *_skb, *tmp; + struct tipc_msg *hdr, *_hdr; + bool match = false; + u32 node, port; + + skb = skb_peek(inputq); + if (!skb) + return; + + hdr = buf_msg(skb); + + if (likely(!msg_is_syn(hdr) && skb_queue_empty(defq))) + return; + + node = msg_orignode(hdr); + if (node == tipc_own_addr(net)) + return; + + port = msg_origport(hdr); + + /* Has the twin SYN message already arrived ? */ + skb_queue_walk(defq, _skb) { + _hdr = buf_msg(_skb); + if (msg_orignode(_hdr) != node) + continue; + if (msg_origport(_hdr) != port) + continue; + match = true; + break; + } + + if (!match) { + if (!msg_is_syn(hdr)) + return; + __skb_dequeue(inputq); + __skb_queue_tail(defq, skb); + return; + } + + /* Deliver non-SYN message from other link, otherwise queue it */ + if (!msg_is_syn(hdr)) { + if (msg_is_rcast(hdr) != msg_is_rcast(_hdr)) + return; + __skb_dequeue(inputq); + __skb_queue_tail(defq, skb); + return; + } + + /* Queue non-SYN/SYN message from same link */ + if (msg_is_rcast(hdr) == msg_is_rcast(_hdr)) { + __skb_dequeue(inputq); + __skb_queue_tail(defq, skb); + return; + } + + /* Matching SYN messages => return the one with data, if any */ + __skb_unlink(_skb, defq); + if (msg_data_sz(hdr)) { + kfree_skb(_skb); + } else { + __skb_dequeue(inputq); + kfree_skb(skb); + __skb_queue_tail(inputq, _skb); + } + + /* Deliver subsequent non-SYN messages from same peer */ + skb_queue_walk_safe(defq, _skb, tmp) { + _hdr = buf_msg(_skb); + if (msg_orignode(_hdr) != node) + continue; + if (msg_origport(_hdr) != port) + continue; + if (msg_is_syn(_hdr)) + break; + __skb_unlink(_skb, defq); + __skb_queue_tail(inputq, _skb); + } +} diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 751530ab0c49..dadad953e2be 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -48,6 +48,10 @@ extern const char tipc_bclink_name[]; #define TIPC_METHOD_EXPIRE msecs_to_jiffies(5000) +#define BCLINK_MODE_BCAST 0x1 +#define BCLINK_MODE_RCAST 0x2 +#define BCLINK_MODE_SEL 0x4 + struct tipc_nlist { struct list_head list; u32 self; @@ -63,11 +67,13 @@ void tipc_nlist_del(struct tipc_nlist *nl, u32 node); /* Cookie to be used between socket and broadcast layer * @rcast: replicast (instead of broadcast) was used at previous xmit * @mandatory: broadcast/replicast indication was set by user + * @deferredq: defer queue to make message in order * @expires: re-evaluate non-mandatory transmit method if we are past this */ struct tipc_mc_method { bool rcast; bool mandatory; + struct sk_buff_head deferredq; unsigned long expires; }; @@ -92,6 +98,12 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg); int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]); int tipc_bclink_reset_stats(struct net *net); +u32 tipc_bcast_get_broadcast_mode(struct net *net); +u32 tipc_bcast_get_broadcast_ratio(struct net *net); + +void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, + struct sk_buff_head *inputq); + static inline void tipc_bcast_lock(struct net *net) { spin_lock_bh(&tipc_net(net)->bclock); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index d27f30a9a01d..2bed6589f41e 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -687,14 +687,14 @@ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, if (!hdr) return -EMSGSIZE; - attrs = nla_nest_start(msg->skb, TIPC_NLA_BEARER); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_BEARER); if (!attrs) goto msg_full; if (nla_put_string(msg->skb, TIPC_NLA_BEARER_NAME, bearer->name)) goto attr_msg_full; - prop = nla_nest_start(msg->skb, TIPC_NLA_BEARER_PROP); + prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_BEARER_PROP); if (!prop) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, bearer->priority)) @@ -776,9 +776,9 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, - info->attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, info->extack); if (err) return err; @@ -825,9 +825,9 @@ int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, - info->attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, info->extack); if (err) return err; @@ -870,9 +870,9 @@ int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, - info->attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, info->extack); if (err) return err; @@ -921,9 +921,9 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, - info->attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, info->extack); if (err) return err; @@ -964,9 +964,9 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, - info->attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, info->extack); if (err) return err; @@ -1033,14 +1033,14 @@ static int __tipc_nl_add_media(struct tipc_nl_msg *msg, if (!hdr) return -EMSGSIZE; - attrs = nla_nest_start(msg->skb, TIPC_NLA_MEDIA); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MEDIA); if (!attrs) goto msg_full; if (nla_put_string(msg->skb, TIPC_NLA_MEDIA_NAME, media->name)) goto attr_msg_full; - prop = nla_nest_start(msg->skb, TIPC_NLA_MEDIA_PROP); + prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_MEDIA_PROP); if (!prop) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, media->priority)) @@ -1107,9 +1107,9 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_MEDIA]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_MEDIA_MAX, - info->attrs[TIPC_NLA_MEDIA], - tipc_nl_media_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_MEDIA_MAX, + info->attrs[TIPC_NLA_MEDIA], + tipc_nl_media_policy, info->extack); if (err) return err; @@ -1155,9 +1155,9 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_MEDIA]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_MEDIA_MAX, - info->attrs[TIPC_NLA_MEDIA], - tipc_nl_media_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_MEDIA_MAX, + info->attrs[TIPC_NLA_MEDIA], + tipc_nl_media_policy, info->extack); if (!attrs[TIPC_NLA_MEDIA_NAME]) return -EINVAL; diff --git a/net/tipc/core.c b/net/tipc/core.c index 5b38f5164281..27cccd101ef6 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -43,6 +43,7 @@ #include "net.h" #include "socket.h" #include "bcast.h" +#include "node.h" #include <linux/module.h> @@ -59,6 +60,7 @@ static int __net_init tipc_init_net(struct net *net) tn->node_addr = 0; tn->trial_addr = 0; tn->addr_trial_end = 0; + tn->capabilities = TIPC_NODE_CAPABILITIES; memset(tn->node_id, 0, sizeof(tn->node_id)); memset(tn->node_id_string, 0, sizeof(tn->node_id_string)); tn->mon_threshold = TIPC_DEF_MON_THRESHOLD; diff --git a/net/tipc/core.h b/net/tipc/core.h index 8020a6c360ff..7a68e1b6a066 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -122,6 +122,9 @@ struct tipc_net { /* Topology subscription server */ struct tipc_topsrv *topsrv; atomic_t subscription_count; + + /* Cluster capabilities */ + u16 capabilities; }; static inline struct tipc_net *tipc_net(struct net *net) diff --git a/net/tipc/group.c b/net/tipc/group.c index 63f39201e41e..992be6113676 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -917,7 +917,7 @@ void tipc_group_member_evt(struct tipc_group *grp, int tipc_group_fill_sock_diag(struct tipc_group *grp, struct sk_buff *skb) { - struct nlattr *group = nla_nest_start(skb, TIPC_NLA_SOCK_GROUP); + struct nlattr *group = nla_nest_start_noflag(skb, TIPC_NLA_SOCK_GROUP); if (!group) return -EMSGSIZE; diff --git a/net/tipc/link.c b/net/tipc/link.c index 131aa2f0fd27..f5cd986e1e50 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -151,6 +151,7 @@ struct tipc_link { /* Failover/synch */ u16 drop_point; struct sk_buff *failover_reasm_skb; + struct sk_buff_head failover_deferdq; /* Max packet negotiation */ u16 mtu; @@ -209,6 +210,7 @@ enum { }; #define TIPC_BC_RETR_LIM msecs_to_jiffies(10) /* [ms] */ +#define TIPC_UC_RETR_TIME (jiffies + msecs_to_jiffies(1)) /* * Interval between NACKs when packets arrive out of order @@ -246,6 +248,10 @@ static int tipc_link_build_nack_msg(struct tipc_link *l, static void tipc_link_build_bc_init_msg(struct tipc_link *l, struct sk_buff_head *xmitq); static bool tipc_link_release_pkts(struct tipc_link *l, u16 to); +static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data); +static void tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap, + struct tipc_gap_ack_blks *ga, + struct sk_buff_head *xmitq); /* * Simple non-static link routines (i.e. referenced outside this file) @@ -493,6 +499,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, __skb_queue_head_init(&l->transmq); __skb_queue_head_init(&l->backlogq); __skb_queue_head_init(&l->deferdq); + __skb_queue_head_init(&l->failover_deferdq); skb_queue_head_init(&l->wakeupq); skb_queue_head_init(l->inputq); return true; @@ -885,6 +892,7 @@ void tipc_link_reset(struct tipc_link *l) __skb_queue_purge(&l->transmq); __skb_queue_purge(&l->deferdq); __skb_queue_purge(&l->backlogq); + __skb_queue_purge(&l->failover_deferdq); l->backlog[TIPC_LOW_IMPORTANCE].len = 0; l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0; l->backlog[TIPC_HIGH_IMPORTANCE].len = 0; @@ -1156,34 +1164,14 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, * Consumes buffer */ static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb, - struct sk_buff_head *inputq) + struct sk_buff_head *inputq, + struct sk_buff **reasm_skb) { struct tipc_msg *hdr = buf_msg(skb); - struct sk_buff **reasm_skb = &l->reasm_buf; struct sk_buff *iskb; struct sk_buff_head tmpq; int usr = msg_user(hdr); - int rc = 0; int pos = 0; - int ipos = 0; - - if (unlikely(usr == TUNNEL_PROTOCOL)) { - if (msg_type(hdr) == SYNCH_MSG) { - __skb_queue_purge(&l->deferdq); - goto drop; - } - if (!tipc_msg_extract(skb, &iskb, &ipos)) - return rc; - kfree_skb(skb); - skb = iskb; - hdr = buf_msg(skb); - if (less(msg_seqno(hdr), l->drop_point)) - goto drop; - if (tipc_data_input(l, skb, inputq)) - return rc; - usr = msg_user(hdr); - reasm_skb = &l->failover_reasm_skb; - } if (usr == MSG_BUNDLER) { skb_queue_head_init(&tmpq); @@ -1208,11 +1196,66 @@ static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb, tipc_link_bc_init_rcv(l->bc_rcvlink, hdr); tipc_bcast_unlock(l->net); } -drop: + kfree_skb(skb); return 0; } +/* tipc_link_tnl_rcv() - receive TUNNEL_PROTOCOL message, drop or process the + * inner message along with the ones in the old link's + * deferdq + * @l: tunnel link + * @skb: TUNNEL_PROTOCOL message + * @inputq: queue to put messages ready for delivery + */ +static int tipc_link_tnl_rcv(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *inputq) +{ + struct sk_buff **reasm_skb = &l->failover_reasm_skb; + struct sk_buff_head *fdefq = &l->failover_deferdq; + struct tipc_msg *hdr = buf_msg(skb); + struct sk_buff *iskb; + int ipos = 0; + int rc = 0; + u16 seqno; + + /* SYNCH_MSG */ + if (msg_type(hdr) == SYNCH_MSG) + goto drop; + + /* FAILOVER_MSG */ + if (!tipc_msg_extract(skb, &iskb, &ipos)) { + pr_warn_ratelimited("Cannot extract FAILOVER_MSG, defq: %d\n", + skb_queue_len(fdefq)); + return rc; + } + + do { + seqno = buf_seqno(iskb); + + if (unlikely(less(seqno, l->drop_point))) { + kfree_skb(iskb); + continue; + } + + if (unlikely(seqno != l->drop_point)) { + __tipc_skb_queue_sorted(fdefq, seqno, iskb); + continue; + } + + l->drop_point++; + + if (!tipc_data_input(l, iskb, inputq)) + rc |= tipc_link_input(l, iskb, inputq, reasm_skb); + if (unlikely(rc)) + break; + } while ((iskb = __tipc_skb_dequeue(fdefq, l->drop_point))); + +drop: + kfree_skb(skb); + return rc; +} + static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked) { bool released = false; @@ -1228,6 +1271,106 @@ static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked) return released; } +/* tipc_build_gap_ack_blks - build Gap ACK blocks + * @l: tipc link that data have come with gaps in sequence if any + * @data: data buffer to store the Gap ACK blocks after built + * + * returns the actual allocated memory size + */ +static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data) +{ + struct sk_buff *skb = skb_peek(&l->deferdq); + struct tipc_gap_ack_blks *ga = data; + u16 len, expect, seqno = 0; + u8 n = 0; + + if (!skb) + goto exit; + + expect = buf_seqno(skb); + skb_queue_walk(&l->deferdq, skb) { + seqno = buf_seqno(skb); + if (unlikely(more(seqno, expect))) { + ga->gacks[n].ack = htons(expect - 1); + ga->gacks[n].gap = htons(seqno - expect); + if (++n >= MAX_GAP_ACK_BLKS) { + pr_info_ratelimited("Too few Gap ACK blocks!\n"); + goto exit; + } + } else if (unlikely(less(seqno, expect))) { + pr_warn("Unexpected skb in deferdq!\n"); + continue; + } + expect = seqno + 1; + } + + /* last block */ + ga->gacks[n].ack = htons(seqno); + ga->gacks[n].gap = 0; + n++; + +exit: + len = tipc_gap_ack_blks_sz(n); + ga->len = htons(len); + ga->gack_cnt = n; + return len; +} + +/* tipc_link_advance_transmq - advance TIPC link transmq queue by releasing + * acked packets, also doing retransmissions if + * gaps found + * @l: tipc link with transmq queue to be advanced + * @acked: seqno of last packet acked by peer without any gaps before + * @gap: # of gap packets + * @ga: buffer pointer to Gap ACK blocks from peer + * @xmitq: queue for accumulating the retransmitted packets if any + */ +static void tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap, + struct tipc_gap_ack_blks *ga, + struct sk_buff_head *xmitq) +{ + struct sk_buff *skb, *_skb, *tmp; + struct tipc_msg *hdr; + u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; + u16 ack = l->rcv_nxt - 1; + u16 seqno; + u16 n = 0; + + skb_queue_walk_safe(&l->transmq, skb, tmp) { + seqno = buf_seqno(skb); + +next_gap_ack: + if (less_eq(seqno, acked)) { + /* release skb */ + __skb_unlink(skb, &l->transmq); + kfree_skb(skb); + } else if (less_eq(seqno, acked + gap)) { + /* retransmit skb */ + if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) + continue; + TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME; + + _skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC); + if (!_skb) + continue; + hdr = buf_msg(_skb); + msg_set_ack(hdr, ack); + msg_set_bcast_ack(hdr, bc_ack); + _skb->priority = TC_PRIO_CONTROL; + __skb_queue_tail(xmitq, _skb); + l->stats.retransmitted++; + } else { + /* retry with Gap ACK blocks if any */ + if (!ga || n >= ga->gack_cnt) + break; + acked = ntohs(ga->gacks[n].ack); + gap = ntohs(ga->gacks[n].gap); + n++; + goto next_gap_ack; + } + } +} + /* tipc_link_build_state_msg: prepare link state message for transmission * * Note that sending of broadcast ack is coordinated among nodes, to reduce @@ -1282,6 +1425,7 @@ static int tipc_link_build_nack_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { u32 def_cnt = ++l->stats.deferred_recv; + u32 defq_len = skb_queue_len(&l->deferdq); int match1, match2; if (link_is_bc_rcvlink(l)) { @@ -1292,7 +1436,7 @@ static int tipc_link_build_nack_msg(struct tipc_link *l, return 0; } - if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV)) + if (defq_len >= 3 && !((defq_len - 3) % 16)) tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq); return 0; } @@ -1306,29 +1450,29 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq) { struct sk_buff_head *defq = &l->deferdq; - struct tipc_msg *hdr; + struct tipc_msg *hdr = buf_msg(skb); u16 seqno, rcv_nxt, win_lim; int rc = 0; + /* Verify and update link state */ + if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) + return tipc_link_proto_rcv(l, skb, xmitq); + + /* Don't send probe at next timeout expiration */ + l->silent_intv_cnt = 0; + do { hdr = buf_msg(skb); seqno = msg_seqno(hdr); rcv_nxt = l->rcv_nxt; win_lim = rcv_nxt + TIPC_MAX_LINK_WIN; - /* Verify and update link state */ - if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) - return tipc_link_proto_rcv(l, skb, xmitq); - if (unlikely(!link_is_up(l))) { if (l->state == LINK_ESTABLISHING) rc = TIPC_LINK_UP_EVT; goto drop; } - /* Don't send probe at next timeout expiration */ - l->silent_intv_cnt = 0; - /* Drop if outside receive window */ if (unlikely(less(seqno, rcv_nxt) || more(seqno, win_lim))) { l->stats.duplicates++; @@ -1353,13 +1497,16 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, /* Deliver packet */ l->rcv_nxt++; l->stats.recv_pkts++; - if (!tipc_data_input(l, skb, l->inputq)) - rc |= tipc_link_input(l, skb, l->inputq); + + if (unlikely(msg_user(hdr) == TUNNEL_PROTOCOL)) + rc |= tipc_link_tnl_rcv(l, skb, l->inputq); + else if (!tipc_data_input(l, skb, l->inputq)) + rc |= tipc_link_input(l, skb, l->inputq, &l->reasm_buf); if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) rc |= tipc_link_build_state_msg(l, xmitq); if (unlikely(rc & ~TIPC_LINK_SND_STATE)) break; - } while ((skb = __skb_dequeue(defq))); + } while ((skb = __tipc_skb_dequeue(defq, l->rcv_nxt))); return rc; drop: @@ -1380,6 +1527,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, struct tipc_mon_state *mstate = &l->mon_state; int dlen = 0; void *data; + u16 glen = 0; /* Don't send protocol message during reset or link failover */ if (tipc_link_is_blocked(l)) @@ -1392,8 +1540,8 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt; skb = tipc_msg_create(LINK_PROTOCOL, mtyp, INT_H_SIZE, - tipc_max_domain_size, l->addr, - tipc_own_addr(l->net), 0, 0, 0); + tipc_max_domain_size + MAX_GAP_ACK_BLKS_SZ, + l->addr, tipc_own_addr(l->net), 0, 0, 0); if (!skb) return; @@ -1420,9 +1568,11 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl)); msg_set_probe(hdr, probe); msg_set_is_keepalive(hdr, probe || probe_reply); - tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id); - msg_set_size(hdr, INT_H_SIZE + dlen); - skb_trim(skb, INT_H_SIZE + dlen); + if (l->peer_caps & TIPC_GAP_ACK_BLOCK) + glen = tipc_build_gap_ack_blks(l, data); + tipc_mon_prep(l->net, data + glen, &dlen, mstate, l->bearer_id); + msg_set_size(hdr, INT_H_SIZE + glen + dlen); + skb_trim(skb, INT_H_SIZE + glen + dlen); l->stats.sent_states++; l->rcv_unacked = 0; } else { @@ -1481,6 +1631,7 @@ void tipc_link_create_dummy_tnl_msg(struct tipc_link *l, void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, int mtyp, struct sk_buff_head *xmitq) { + struct sk_buff_head *fdefq = &tnl->failover_deferdq; struct sk_buff *skb, *tnlskb; struct tipc_msg *hdr, tnlhdr; struct sk_buff_head *queue = &l->transmq; @@ -1508,7 +1659,11 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, /* Initialize reusable tunnel packet header */ tipc_msg_init(tipc_own_addr(l->net), &tnlhdr, TUNNEL_PROTOCOL, mtyp, INT_H_SIZE, l->addr); - pktcnt = skb_queue_len(&l->transmq) + skb_queue_len(&l->backlogq); + if (mtyp == SYNCH_MSG) + pktcnt = l->snd_nxt - buf_seqno(skb_peek(&l->transmq)); + else + pktcnt = skb_queue_len(&l->transmq); + pktcnt += skb_queue_len(&l->backlogq); msg_set_msgcnt(&tnlhdr, pktcnt); msg_set_bearer_id(&tnlhdr, l->peer_bearer_id); tnl: @@ -1539,6 +1694,49 @@ tnl: tnl->drop_point = l->rcv_nxt; tnl->failover_reasm_skb = l->reasm_buf; l->reasm_buf = NULL; + + /* Failover the link's deferdq */ + if (unlikely(!skb_queue_empty(fdefq))) { + pr_warn("Link failover deferdq not empty: %d!\n", + skb_queue_len(fdefq)); + __skb_queue_purge(fdefq); + } + skb_queue_splice_init(&l->deferdq, fdefq); + } +} + +/** + * tipc_link_failover_prepare() - prepare tnl for link failover + * + * This is a special version of the precursor - tipc_link_tnl_prepare(), + * see the tipc_node_link_failover() for details + * + * @l: failover link + * @tnl: tunnel link + * @xmitq: queue for messages to be xmited + */ +void tipc_link_failover_prepare(struct tipc_link *l, struct tipc_link *tnl, + struct sk_buff_head *xmitq) +{ + struct sk_buff_head *fdefq = &tnl->failover_deferdq; + + tipc_link_create_dummy_tnl_msg(tnl, xmitq); + + /* This failover link enpoint was never established before, + * so it has not received anything from peer. + * Otherwise, it must be a normal failover situation or the + * node has entered SELF_DOWN_PEER_LEAVING and both peer nodes + * would have to start over from scratch instead. + */ + WARN_ON(l && tipc_link_is_up(l)); + tnl->drop_point = 1; + tnl->failover_reasm_skb = NULL; + + /* Initiate the link's failover deferdq */ + if (unlikely(!skb_queue_empty(fdefq))) { + pr_warn("Link failover deferdq not empty: %d!\n", + skb_queue_len(fdefq)); + __skb_queue_purge(fdefq); } } @@ -1592,6 +1790,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq) { struct tipc_msg *hdr = buf_msg(skb); + struct tipc_gap_ack_blks *ga = NULL; u16 rcvgap = 0; u16 ack = msg_ack(hdr); u16 gap = msg_seq_gap(hdr); @@ -1602,6 +1801,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, u16 dlen = msg_data_sz(hdr); int mtyp = msg_type(hdr); bool reply = msg_probe(hdr); + u16 glen = 0; void *data; char *if_name; int rc = 0; @@ -1699,7 +1899,17 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, rc = TIPC_LINK_UP_EVT; break; } - tipc_mon_rcv(l->net, data, dlen, l->addr, + + /* Receive Gap ACK blocks from peer if any */ + if (l->peer_caps & TIPC_GAP_ACK_BLOCK) { + ga = (struct tipc_gap_ack_blks *)data; + glen = ntohs(ga->len); + /* sanity check: if failed, ignore Gap ACK blocks */ + if (glen != tipc_gap_ack_blks_sz(ga->gack_cnt)) + ga = NULL; + } + + tipc_mon_rcv(l->net, data + glen, dlen - glen, l->addr, &l->mon_state, l->bearer_id); /* Send NACK if peer has sent pkts we haven't received yet */ @@ -1708,13 +1918,12 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, if (rcvgap || reply) tipc_link_build_proto_msg(l, STATE_MSG, 0, reply, rcvgap, 0, 0, xmitq); - tipc_link_release_pkts(l, ack); + + tipc_link_advance_transmq(l, ack, gap, ga, xmitq); /* If NACK, retransmit will now start at right position */ - if (gap) { - rc = tipc_link_retrans(l, l, ack + 1, ack + gap, xmitq); + if (gap) l->stats.recv_nacks++; - } tipc_link_advance_backlog(l, xmitq); if (unlikely(!skb_queue_empty(&l->wakeupq))) @@ -1974,8 +2183,8 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]) { int err; - err = nla_parse_nested(props, TIPC_NLA_PROP_MAX, prop, - tipc_nl_prop_policy, NULL); + err = nla_parse_nested_deprecated(props, TIPC_NLA_PROP_MAX, prop, + tipc_nl_prop_policy, NULL); if (err) return err; @@ -2054,7 +2263,7 @@ static int __tipc_nl_add_stats(struct sk_buff *skb, struct tipc_stats *s) (s->accu_queue_sz / s->queue_sz_counts) : 0} }; - stats = nla_nest_start(skb, TIPC_NLA_LINK_STATS); + stats = nla_nest_start_noflag(skb, TIPC_NLA_LINK_STATS); if (!stats) return -EMSGSIZE; @@ -2086,7 +2295,7 @@ int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, if (!hdr) return -EMSGSIZE; - attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK); if (!attrs) goto msg_full; @@ -2108,7 +2317,7 @@ int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, if (nla_put_flag(msg->skb, TIPC_NLA_LINK_ACTIVE)) goto attr_msg_full; - prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP); + prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK_PROP); if (!prop) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, link->priority)) @@ -2175,7 +2384,7 @@ static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb, (stats->accu_queue_sz / stats->queue_sz_counts) : 0} }; - nest = nla_nest_start(skb, TIPC_NLA_LINK_STATS); + nest = nla_nest_start_noflag(skb, TIPC_NLA_LINK_STATS); if (!nest) return -EMSGSIZE; @@ -2199,6 +2408,8 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) struct nlattr *attrs; struct nlattr *prop; struct tipc_net *tn = net_generic(net, tipc_net_id); + u32 bc_mode = tipc_bcast_get_broadcast_mode(net); + u32 bc_ratio = tipc_bcast_get_broadcast_ratio(net); struct tipc_link *bcl = tn->bcl; if (!bcl) @@ -2213,7 +2424,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) return -EMSGSIZE; } - attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK); if (!attrs) goto msg_full; @@ -2230,11 +2441,17 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, 0)) goto attr_msg_full; - prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP); + prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK_PROP); if (!prop) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window)) goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_BROADCAST, bc_mode)) + goto prop_msg_full; + if (bc_mode & BCLINK_MODE_SEL) + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_BROADCAST_RATIO, + bc_ratio)) + goto prop_msg_full; nla_nest_end(msg->skb, prop); err = __tipc_nl_add_bc_link_stat(msg->skb, &bcl->stats); diff --git a/net/tipc/link.h b/net/tipc/link.h index 8439e0ee53a8..adcad65e761c 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -90,6 +90,8 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, int mtyp, struct sk_buff_head *xmitq); void tipc_link_create_dummy_tnl_msg(struct tipc_link *tnl, struct sk_buff_head *xmitq); +void tipc_link_failover_prepare(struct tipc_link *l, struct tipc_link *tnl, + struct sk_buff_head *xmitq); void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_fsm_evt(struct tipc_link *l, int evt); bool tipc_link_is_up(struct tipc_link *l); diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 67f69389ec17..6a6eae88442f 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -696,7 +696,7 @@ static int __tipc_nl_add_monitor_peer(struct tipc_peer *peer, if (!hdr) return -EMSGSIZE; - attrs = nla_nest_start(msg->skb, TIPC_NLA_MON_PEER); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MON_PEER); if (!attrs) goto msg_full; @@ -785,7 +785,7 @@ int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg, if (!hdr) return -EMSGSIZE; - attrs = nla_nest_start(msg->skb, TIPC_NLA_MON); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MON); if (!attrs) goto msg_full; diff --git a/net/tipc/msg.h b/net/tipc/msg.h index d7e4b8b93f9d..8de02ad6e352 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -117,6 +117,37 @@ struct tipc_msg { __be32 hdr[15]; }; +/* struct tipc_gap_ack - TIPC Gap ACK block + * @ack: seqno of the last consecutive packet in link deferdq + * @gap: number of gap packets since the last ack + * + * E.g: + * link deferdq: 1 2 3 4 10 11 13 14 15 20 + * --> Gap ACK blocks: <4, 5>, <11, 1>, <15, 4>, <20, 0> + */ +struct tipc_gap_ack { + __be16 ack; + __be16 gap; +}; + +/* struct tipc_gap_ack_blks + * @len: actual length of the record + * @gack_cnt: number of Gap ACK blocks in the record + * @gacks: array of Gap ACK blocks + */ +struct tipc_gap_ack_blks { + __be16 len; + u8 gack_cnt; + u8 reserved; + struct tipc_gap_ack gacks[]; +}; + +#define tipc_gap_ack_blks_sz(n) (sizeof(struct tipc_gap_ack_blks) + \ + sizeof(struct tipc_gap_ack) * (n)) + +#define MAX_GAP_ACK_BLKS 32 +#define MAX_GAP_ACK_BLKS_SZ tipc_gap_ack_blks_sz(MAX_GAP_ACK_BLKS) + static inline struct tipc_msg *buf_msg(struct sk_buff *skb) { return (struct tipc_msg *)skb->data; @@ -257,6 +288,16 @@ static inline void msg_set_src_droppable(struct tipc_msg *m, u32 d) msg_set_bits(m, 0, 18, 1, d); } +static inline bool msg_is_rcast(struct tipc_msg *m) +{ + return msg_bits(m, 0, 18, 0x1); +} + +static inline void msg_set_is_rcast(struct tipc_msg *m, bool d) +{ + msg_set_bits(m, 0, 18, 0x1, d); +} + static inline void msg_set_size(struct tipc_msg *m, u32 sz) { m->hdr[0] = htonl((msg_word(m, 0) & ~0x1ffff) | sz); @@ -1110,4 +1151,25 @@ static inline void tipc_skb_queue_splice_tail_init(struct sk_buff_head *list, tipc_skb_queue_splice_tail(&tmp, head); } +/* __tipc_skb_dequeue() - dequeue the head skb according to expected seqno + * @list: list to be dequeued from + * @seqno: seqno of the expected msg + * + * returns skb dequeued from the list if its seqno is less than or equal to + * the expected one, otherwise the skb is still hold + * + * Note: must be used with appropriate locks held only + */ +static inline struct sk_buff *__tipc_skb_dequeue(struct sk_buff_head *list, + u16 seqno) +{ + struct sk_buff *skb = skb_peek(list); + + if (skb && less_eq(buf_seqno(skb), seqno)) { + __skb_unlink(skb, list); + return skb; + } + return NULL; +} + #endif diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 89993afe0fbd..66a65c2cdb23 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -829,11 +829,11 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, if (!hdr) return -EMSGSIZE; - attrs = nla_nest_start(msg->skb, TIPC_NLA_NAME_TABLE); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_NAME_TABLE); if (!attrs) goto msg_full; - b = nla_nest_start(msg->skb, TIPC_NLA_NAME_TABLE_PUBL); + b = nla_nest_start_noflag(msg->skb, TIPC_NLA_NAME_TABLE_PUBL); if (!b) goto attr_msg_full; diff --git a/net/tipc/net.c b/net/tipc/net.c index 7ce1e86b024f..85707c185360 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -187,7 +187,7 @@ static int __tipc_nl_add_net(struct net *net, struct tipc_nl_msg *msg) if (!hdr) return -EMSGSIZE; - attrs = nla_nest_start(msg->skb, TIPC_NLA_NET); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_NET); if (!attrs) goto msg_full; @@ -245,9 +245,9 @@ int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_NET]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX, - info->attrs[TIPC_NLA_NET], tipc_nl_net_policy, - info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_NET_MAX, + info->attrs[TIPC_NLA_NET], + tipc_nl_net_policy, info->extack); if (err) return err; diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 99ee419210ba..99bd166bccec 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -110,7 +110,9 @@ const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { [TIPC_NLA_PROP_UNSPEC] = { .type = NLA_UNSPEC }, [TIPC_NLA_PROP_PRIO] = { .type = NLA_U32 }, [TIPC_NLA_PROP_TOL] = { .type = NLA_U32 }, - [TIPC_NLA_PROP_WIN] = { .type = NLA_U32 } + [TIPC_NLA_PROP_WIN] = { .type = NLA_U32 }, + [TIPC_NLA_PROP_BROADCAST] = { .type = NLA_U32 }, + [TIPC_NLA_PROP_BROADCAST_RATIO] = { .type = NLA_U32 } }; const struct nla_policy tipc_nl_bearer_policy[TIPC_NLA_BEARER_MAX + 1] = { @@ -141,115 +143,115 @@ const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = { static const struct genl_ops tipc_genl_v2_ops[] = { { .cmd = TIPC_NL_BEARER_DISABLE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_bearer_disable, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_BEARER_ENABLE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_bearer_enable, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_BEARER_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_bearer_get, .dumpit = tipc_nl_bearer_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_BEARER_ADD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_bearer_add, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_BEARER_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_bearer_set, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_SOCK_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .start = tipc_dump_start, .dumpit = tipc_nl_sk_dump, .done = tipc_dump_done, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_PUBL_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = tipc_nl_publ_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_LINK_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_node_get_link, .dumpit = tipc_nl_node_dump_link, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_LINK_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_node_set_link, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_LINK_RESET_STATS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_node_reset_link_stats, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_MEDIA_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_media_get, .dumpit = tipc_nl_media_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_MEDIA_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_media_set, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_NODE_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = tipc_nl_node_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_NET_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = tipc_nl_net_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_NET_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_net_set, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_NAME_TABLE_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = tipc_nl_name_table_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_MON_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_node_set_monitor, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_MON_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_node_get_monitor, .dumpit = tipc_nl_node_dump_monitor, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_MON_PEER_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = tipc_nl_node_dump_monitor_peer, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_PEER_REMOVE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_peer_rm, - .policy = tipc_nl_policy, }, #ifdef CONFIG_TIPC_MEDIA_UDP { .cmd = TIPC_NL_UDP_GET_REMOTEIP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = tipc_udp_nl_dump_remoteip, - .policy = tipc_nl_policy, }, #endif }; @@ -259,6 +261,7 @@ struct genl_family tipc_genl_family __ro_after_init = { .version = TIPC_GENL_V2_VERSION, .hdrsize = 0, .maxattr = TIPC_NLA_MAX, + .policy = tipc_nl_policy, .netnsok = true, .module = THIS_MODULE, .ops = tipc_genl_v2_ops, @@ -273,8 +276,8 @@ int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) if (!*attr) return -EOPNOTSUPP; - return nlmsg_parse(nlh, GENL_HDRLEN, *attr, maxattr, tipc_nl_policy, - NULL); + return nlmsg_parse_deprecated(nlh, GENL_HDRLEN, *attr, maxattr, + tipc_nl_policy, NULL); } int __init tipc_netlink_start(void) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 340a6e7c43a7..c6a04c09d075 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -328,9 +328,9 @@ static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, if (err) goto doit_out; - err = nla_parse(attrbuf, tipc_genl_family.maxattr, - (const struct nlattr *)trans_buf->data, - trans_buf->len, NULL, NULL); + err = nla_parse_deprecated(attrbuf, tipc_genl_family.maxattr, + (const struct nlattr *)trans_buf->data, + trans_buf->len, NULL, NULL); if (err) goto doit_out; @@ -378,8 +378,8 @@ static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_BEARER]) return -EINVAL; - err = nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX, - attrs[TIPC_NLA_BEARER], NULL, NULL); + err = nla_parse_nested_deprecated(bearer, TIPC_NLA_BEARER_MAX, + attrs[TIPC_NLA_BEARER], NULL, NULL); if (err) return err; @@ -399,7 +399,7 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd, b = (struct tipc_bearer_config *)TLV_DATA(msg->req); - bearer = nla_nest_start(skb, TIPC_NLA_BEARER); + bearer = nla_nest_start_noflag(skb, TIPC_NLA_BEARER); if (!bearer) return -EMSGSIZE; @@ -419,7 +419,7 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd, return -EMSGSIZE; if (ntohl(b->priority) <= TIPC_MAX_LINK_PRI) { - prop = nla_nest_start(skb, TIPC_NLA_BEARER_PROP); + prop = nla_nest_start_noflag(skb, TIPC_NLA_BEARER_PROP); if (!prop) return -EMSGSIZE; if (nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(b->priority))) @@ -441,7 +441,7 @@ static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd, name = (char *)TLV_DATA(msg->req); - bearer = nla_nest_start(skb, TIPC_NLA_BEARER); + bearer = nla_nest_start_noflag(skb, TIPC_NLA_BEARER); if (!bearer) return -EMSGSIZE; @@ -514,24 +514,26 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_LINK]) return -EINVAL; - err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], - NULL, NULL); + err = nla_parse_nested_deprecated(link, TIPC_NLA_LINK_MAX, + attrs[TIPC_NLA_LINK], NULL, NULL); if (err) return err; if (!link[TIPC_NLA_LINK_PROP]) return -EINVAL; - err = nla_parse_nested(prop, TIPC_NLA_PROP_MAX, - link[TIPC_NLA_LINK_PROP], NULL, NULL); + err = nla_parse_nested_deprecated(prop, TIPC_NLA_PROP_MAX, + link[TIPC_NLA_LINK_PROP], NULL, + NULL); if (err) return err; if (!link[TIPC_NLA_LINK_STATS]) return -EINVAL; - err = nla_parse_nested(stats, TIPC_NLA_STATS_MAX, - link[TIPC_NLA_LINK_STATS], NULL, NULL); + err = nla_parse_nested_deprecated(stats, TIPC_NLA_STATS_MAX, + link[TIPC_NLA_LINK_STATS], NULL, + NULL); if (err) return err; @@ -645,8 +647,8 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_LINK]) return -EINVAL; - err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], - NULL, NULL); + err = nla_parse_nested_deprecated(link, TIPC_NLA_LINK_MAX, + attrs[TIPC_NLA_LINK], NULL, NULL); if (err) return err; @@ -685,7 +687,7 @@ static int tipc_nl_compat_media_set(struct sk_buff *skb, lc = (struct tipc_link_config *)TLV_DATA(msg->req); - media = nla_nest_start(skb, TIPC_NLA_MEDIA); + media = nla_nest_start_noflag(skb, TIPC_NLA_MEDIA); if (!media) return -EMSGSIZE; @@ -696,7 +698,7 @@ static int tipc_nl_compat_media_set(struct sk_buff *skb, if (nla_put_string(skb, TIPC_NLA_MEDIA_NAME, lc->name)) return -EMSGSIZE; - prop = nla_nest_start(skb, TIPC_NLA_MEDIA_PROP); + prop = nla_nest_start_noflag(skb, TIPC_NLA_MEDIA_PROP); if (!prop) return -EMSGSIZE; @@ -717,7 +719,7 @@ static int tipc_nl_compat_bearer_set(struct sk_buff *skb, lc = (struct tipc_link_config *)TLV_DATA(msg->req); - bearer = nla_nest_start(skb, TIPC_NLA_BEARER); + bearer = nla_nest_start_noflag(skb, TIPC_NLA_BEARER); if (!bearer) return -EMSGSIZE; @@ -728,7 +730,7 @@ static int tipc_nl_compat_bearer_set(struct sk_buff *skb, if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, lc->name)) return -EMSGSIZE; - prop = nla_nest_start(skb, TIPC_NLA_BEARER_PROP); + prop = nla_nest_start_noflag(skb, TIPC_NLA_BEARER_PROP); if (!prop) return -EMSGSIZE; @@ -748,14 +750,14 @@ static int __tipc_nl_compat_link_set(struct sk_buff *skb, lc = (struct tipc_link_config *)TLV_DATA(msg->req); - link = nla_nest_start(skb, TIPC_NLA_LINK); + link = nla_nest_start_noflag(skb, TIPC_NLA_LINK); if (!link) return -EMSGSIZE; if (nla_put_string(skb, TIPC_NLA_LINK_NAME, lc->name)) return -EMSGSIZE; - prop = nla_nest_start(skb, TIPC_NLA_LINK_PROP); + prop = nla_nest_start_noflag(skb, TIPC_NLA_LINK_PROP); if (!prop) return -EMSGSIZE; @@ -811,7 +813,7 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd, name = (char *)TLV_DATA(msg->req); - link = nla_nest_start(skb, TIPC_NLA_LINK); + link = nla_nest_start_noflag(skb, TIPC_NLA_LINK); if (!link) return -EMSGSIZE; @@ -869,16 +871,18 @@ static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_NAME_TABLE]) return -EINVAL; - err = nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX, - attrs[TIPC_NLA_NAME_TABLE], NULL, NULL); + err = nla_parse_nested_deprecated(nt, TIPC_NLA_NAME_TABLE_MAX, + attrs[TIPC_NLA_NAME_TABLE], NULL, + NULL); if (err) return err; if (!nt[TIPC_NLA_NAME_TABLE_PUBL]) return -EINVAL; - err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, - nt[TIPC_NLA_NAME_TABLE_PUBL], NULL, NULL); + err = nla_parse_nested_deprecated(publ, TIPC_NLA_PUBL_MAX, + nt[TIPC_NLA_NAME_TABLE_PUBL], NULL, + NULL); if (err) return err; @@ -937,8 +941,8 @@ static int __tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_PUBL]) return -EINVAL; - err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, attrs[TIPC_NLA_PUBL], - NULL, NULL); + err = nla_parse_nested_deprecated(publ, TIPC_NLA_PUBL_MAX, + attrs[TIPC_NLA_PUBL], NULL, NULL); if (err) return err; @@ -973,7 +977,7 @@ static int tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, u32 sock) return -EMSGSIZE; } - nest = nla_nest_start(args, TIPC_NLA_SOCK); + nest = nla_nest_start_noflag(args, TIPC_NLA_SOCK); if (!nest) { kfree_skb(args); return -EMSGSIZE; @@ -1007,8 +1011,8 @@ static int tipc_nl_compat_sk_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_SOCK]) return -EINVAL; - err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, attrs[TIPC_NLA_SOCK], - NULL, NULL); + err = nla_parse_nested_deprecated(sock, TIPC_NLA_SOCK_MAX, + attrs[TIPC_NLA_SOCK], NULL, NULL); if (err) return err; @@ -1019,8 +1023,9 @@ static int tipc_nl_compat_sk_dump(struct tipc_nl_compat_msg *msg, u32 node; struct nlattr *con[TIPC_NLA_CON_MAX + 1]; - err = nla_parse_nested(con, TIPC_NLA_CON_MAX, - sock[TIPC_NLA_SOCK_CON], NULL, NULL); + err = nla_parse_nested_deprecated(con, TIPC_NLA_CON_MAX, + sock[TIPC_NLA_SOCK_CON], + NULL, NULL); if (err) return err; @@ -1059,8 +1064,8 @@ static int tipc_nl_compat_media_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_MEDIA]) return -EINVAL; - err = nla_parse_nested(media, TIPC_NLA_MEDIA_MAX, - attrs[TIPC_NLA_MEDIA], NULL, NULL); + err = nla_parse_nested_deprecated(media, TIPC_NLA_MEDIA_MAX, + attrs[TIPC_NLA_MEDIA], NULL, NULL); if (err) return err; @@ -1079,8 +1084,8 @@ static int tipc_nl_compat_node_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_NODE]) return -EINVAL; - err = nla_parse_nested(node, TIPC_NLA_NODE_MAX, attrs[TIPC_NLA_NODE], - NULL, NULL); + err = nla_parse_nested_deprecated(node, TIPC_NLA_NODE_MAX, + attrs[TIPC_NLA_NODE], NULL, NULL); if (err) return err; @@ -1100,7 +1105,7 @@ static int tipc_nl_compat_net_set(struct tipc_nl_compat_cmd_doit *cmd, val = ntohl(*(__be32 *)TLV_DATA(msg->req)); - net = nla_nest_start(skb, TIPC_NLA_NET); + net = nla_nest_start_noflag(skb, TIPC_NLA_NET); if (!net) return -EMSGSIZE; @@ -1126,8 +1131,8 @@ static int tipc_nl_compat_net_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_NET]) return -EINVAL; - err = nla_parse_nested(net, TIPC_NLA_NET_MAX, attrs[TIPC_NLA_NET], - NULL, NULL); + err = nla_parse_nested_deprecated(net, TIPC_NLA_NET_MAX, + attrs[TIPC_NLA_NET], NULL, NULL); if (err) return err; @@ -1300,6 +1305,7 @@ send: static const struct genl_ops tipc_genl_compat_ops[] = { { .cmd = TIPC_GENL_CMD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_compat_recv, }, }; diff --git a/net/tipc/node.c b/net/tipc/node.c index dd3b6dc17662..9e106d3ed187 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -375,14 +375,20 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, if (n->capabilities == capabilities) goto exit; /* Same node may come back with new capabilities */ - write_lock_bh(&n->lock); + tipc_node_write_lock(n); n->capabilities = capabilities; for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { l = n->links[bearer_id].link; if (l) tipc_link_update_caps(l, capabilities); } - write_unlock_bh(&n->lock); + tipc_node_write_unlock_fast(n); + + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + tn->capabilities &= temp_node->capabilities; + } goto exit; } n = kzalloc(sizeof(*n), GFP_ATOMIC); @@ -433,6 +439,11 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, break; } list_add_tail_rcu(&n->list, &temp_node->list); + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + tn->capabilities &= temp_node->capabilities; + } trace_tipc_node_create(n, true, " "); exit: spin_unlock_bh(&tn->node_list_lock); @@ -589,6 +600,7 @@ static void tipc_node_clear_links(struct tipc_node *node) */ static bool tipc_node_cleanup(struct tipc_node *peer) { + struct tipc_node *temp_node; struct tipc_net *tn = tipc_net(peer->net); bool deleted = false; @@ -604,6 +616,13 @@ static bool tipc_node_cleanup(struct tipc_node *peer) deleted = true; } tipc_node_write_unlock(peer); + + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + tn->capabilities &= temp_node->capabilities; + } + spin_unlock_bh(&tn->node_list_lock); return deleted; } @@ -695,7 +714,6 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, *slot0 = bearer_id; *slot1 = bearer_id; tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT); - n->failover_sent = false; n->action_flags |= TIPC_NOTIFY_NODE_UP; tipc_link_set_active(nl, true); tipc_bcast_add_peer(n->net, nl, xmitq); @@ -738,6 +756,45 @@ static void tipc_node_link_up(struct tipc_node *n, int bearer_id, } /** + * tipc_node_link_failover() - start failover in case "half-failover" + * + * This function is only called in a very special situation where link + * failover can be already started on peer node but not on this node. + * This can happen when e.g. + * 1. Both links <1A-2A>, <1B-2B> down + * 2. Link endpoint 2A up, but 1A still down (e.g. due to network + * disturbance, wrong session, etc.) + * 3. Link <1B-2B> up + * 4. Link endpoint 2A down (e.g. due to link tolerance timeout) + * 5. Node B starts failover onto link <1B-2B> + * + * ==> Node A does never start link/node failover! + * + * @n: tipc node structure + * @l: link peer endpoint failingover (- can be NULL) + * @tnl: tunnel link + * @xmitq: queue for messages to be xmited on tnl link later + */ +static void tipc_node_link_failover(struct tipc_node *n, struct tipc_link *l, + struct tipc_link *tnl, + struct sk_buff_head *xmitq) +{ + /* Avoid to be "self-failover" that can never end */ + if (!tipc_link_is_up(tnl)) + return; + + tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT); + tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); + + n->sync_point = tipc_link_rcv_nxt(tnl) + (U16_MAX / 2 - 1); + tipc_link_failover_prepare(l, tnl, xmitq); + + if (l) + tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); + tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT); +} + +/** * __tipc_node_link_down - handle loss of link */ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, @@ -1340,7 +1397,7 @@ static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node) if (!hdr) return -EMSGSIZE; - attrs = nla_nest_start(msg->skb, TIPC_NLA_NODE); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_NODE); if (!attrs) goto msg_full; @@ -1656,14 +1713,16 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, tipc_skb_queue_splice_tail_init(tipc_link_inputq(pl), tipc_link_inputq(l)); } + /* If parallel link was already down, and this happened before - * the tunnel link came up, FAILOVER was never sent. Ensure that - * FAILOVER is sent to get peer out of NODE_FAILINGOVER state. + * the tunnel link came up, node failover was never started. + * Ensure that a FAILOVER_MSG is sent to get peer out of + * NODE_FAILINGOVER state, also this node must accept + * TUNNEL_MSGs from peer. */ - if (n->state != NODE_FAILINGOVER && !n->failover_sent) { - tipc_link_create_dummy_tnl_msg(l, xmitq); - n->failover_sent = true; - } + if (n->state != NODE_FAILINGOVER) + tipc_node_link_failover(n, pl, l, xmitq); + /* If pkts arrive out of order, use lowest calculated syncpt */ if (less(syncpt, n->sync_point)) n->sync_point = syncpt; @@ -1866,9 +1925,9 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_NET]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX, - info->attrs[TIPC_NLA_NET], tipc_nl_net_policy, - info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_NET_MAX, + info->attrs[TIPC_NLA_NET], + tipc_nl_net_policy, info->extack); if (err) return err; @@ -2024,9 +2083,9 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_LINK]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, - info->attrs[TIPC_NLA_LINK], - tipc_nl_link_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy, info->extack); if (err) return err; @@ -2100,9 +2159,9 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_LINK]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, - info->attrs[TIPC_NLA_LINK], - tipc_nl_link_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy, info->extack); if (err) return err; @@ -2165,9 +2224,9 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_LINK]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, - info->attrs[TIPC_NLA_LINK], - tipc_nl_link_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy, info->extack); if (err) return err; @@ -2305,9 +2364,10 @@ int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_MON]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_MON_MAX, - info->attrs[TIPC_NLA_MON], - tipc_nl_monitor_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_MON_MAX, + info->attrs[TIPC_NLA_MON], + tipc_nl_monitor_policy, + info->extack); if (err) return err; @@ -2334,7 +2394,7 @@ static int __tipc_nl_add_monitor_prop(struct net *net, struct tipc_nl_msg *msg) if (!hdr) return -EMSGSIZE; - attrs = nla_nest_start(msg->skb, TIPC_NLA_MON); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MON); if (!attrs) goto msg_full; @@ -2425,9 +2485,10 @@ int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb, if (!attrs[TIPC_NLA_MON]) return -EINVAL; - err = nla_parse_nested(mon, TIPC_NLA_MON_MAX, - attrs[TIPC_NLA_MON], - tipc_nl_monitor_policy, NULL); + err = nla_parse_nested_deprecated(mon, TIPC_NLA_MON_MAX, + attrs[TIPC_NLA_MON], + tipc_nl_monitor_policy, + NULL); if (err) return err; diff --git a/net/tipc/node.h b/net/tipc/node.h index 4f59a30e989a..c0bf49ea3de4 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -51,7 +51,9 @@ enum { TIPC_BLOCK_FLOWCTL = (1 << 3), TIPC_BCAST_RCAST = (1 << 4), TIPC_NODE_ID128 = (1 << 5), - TIPC_LINK_PROTO_SEQNO = (1 << 6) + TIPC_LINK_PROTO_SEQNO = (1 << 6), + TIPC_MCAST_RBCTL = (1 << 7), + TIPC_GAP_ACK_BLOCK = (1 << 8) }; #define TIPC_NODE_CAPABILITIES (TIPC_SYN_BIT | \ @@ -60,7 +62,9 @@ enum { TIPC_BCAST_RCAST | \ TIPC_BLOCK_FLOWCTL | \ TIPC_NODE_ID128 | \ - TIPC_LINK_PROTO_SEQNO) + TIPC_LINK_PROTO_SEQNO | \ + TIPC_MCAST_RBCTL | \ + TIPC_GAP_ACK_BLOCK) #define INVALID_BEARER_ID -1 void tipc_node_stop(struct net *net); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b542f14ed444..145e4decb0c9 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -485,6 +485,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, tsk_set_unreturnable(tsk, true); if (sock->type == SOCK_DGRAM) tsk_set_unreliable(tsk, true); + __skb_queue_head_init(&tsk->mc_method.deferredq); } trace_tipc_sk_create(sk, NULL, TIPC_DUMP_NONE, " "); @@ -582,6 +583,7 @@ static int tipc_release(struct socket *sock) sk->sk_shutdown = SHUTDOWN_MASK; tipc_sk_leave(tsk); tipc_sk_withdraw(tsk, 0, NULL); + __skb_queue_purge(&tsk->mc_method.deferredq); sk_stop_timer(sk, &sk->sk_timer); tipc_sk_remove(tsk); @@ -2149,6 +2151,7 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, struct tipc_msg *hdr = buf_msg(skb); struct net *net = sock_net(sk); struct sk_buff_head inputq; + int mtyp = msg_type(hdr); int limit, err = TIPC_OK; trace_tipc_sk_filter_rcv(sk, skb, TIPC_DUMP_ALL, " "); @@ -2162,6 +2165,9 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, if (unlikely(grp)) tipc_group_filter_msg(grp, &inputq, xmitq); + if (unlikely(!grp) && mtyp == TIPC_MCAST_MSG) + tipc_mcast_filter_msg(net, &tsk->mc_method.deferredq, &inputq); + /* Validate and add to receive buffer if there is space */ while ((skb = __skb_dequeue(&inputq))) { hdr = buf_msg(skb); @@ -3064,6 +3070,9 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt, case TIPC_SOCK_RECVQ_DEPTH: value = skb_queue_len(&sk->sk_receive_queue); break; + case TIPC_SOCK_RECVQ_USED: + value = sk_rmem_alloc_get(sk); + break; case TIPC_GROUP_JOIN: seq.type = 0; if (tsk->group) @@ -3264,7 +3273,7 @@ static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk) peer_node = tsk_peer_node(tsk); peer_port = tsk_peer_port(tsk); - nest = nla_nest_start(skb, TIPC_NLA_SOCK_CON); + nest = nla_nest_start_noflag(skb, TIPC_NLA_SOCK_CON); if (!nest) return -EMSGSIZE; @@ -3323,7 +3332,7 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, if (!hdr) goto msg_cancel; - attrs = nla_nest_start(skb, TIPC_NLA_SOCK); + attrs = nla_nest_start_noflag(skb, TIPC_NLA_SOCK); if (!attrs) goto genlmsg_cancel; @@ -3428,7 +3437,7 @@ int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb, if (!(sk_filter_state & (1 << sk->sk_state))) return 0; - attrs = nla_nest_start(skb, TIPC_NLA_SOCK); + attrs = nla_nest_start_noflag(skb, TIPC_NLA_SOCK); if (!attrs) goto msg_cancel; @@ -3446,7 +3455,7 @@ int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb, TIPC_NLA_SOCK_PAD)) goto attr_msg_cancel; - stat = nla_nest_start(skb, TIPC_NLA_SOCK_STAT); + stat = nla_nest_start_noflag(skb, TIPC_NLA_SOCK_STAT); if (!stat) goto attr_msg_cancel; @@ -3503,7 +3512,7 @@ static int __tipc_nl_add_sk_publ(struct sk_buff *skb, if (!hdr) goto msg_cancel; - attrs = nla_nest_start(skb, TIPC_NLA_PUBL); + attrs = nla_nest_start_noflag(skb, TIPC_NLA_PUBL); if (!attrs) goto genlmsg_cancel; @@ -3590,9 +3599,9 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!attrs[TIPC_NLA_SOCK]) return -EINVAL; - err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, - attrs[TIPC_NLA_SOCK], - tipc_nl_sock_policy, NULL); + err = nla_parse_nested_deprecated(sock, TIPC_NLA_SOCK_MAX, + attrs[TIPC_NLA_SOCK], + tipc_nl_sock_policy, NULL); if (err) return err; diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 4d85d71f16e2..7fc02d84c4f1 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -44,7 +44,7 @@ #include <net/sock.h> #include <net/ip.h> #include <net/udp_tunnel.h> -#include <net/addrconf.h> +#include <net/ipv6_stubs.h> #include <linux/tipc_netlink.h> #include "core.h" #include "addr.h" @@ -354,25 +354,21 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) skb_pull(skb, sizeof(struct udphdr)); hdr = buf_msg(skb); - rcu_read_lock(); - b = rcu_dereference_rtnl(ub->bearer); + b = rcu_dereference(ub->bearer); if (!b) - goto rcu_out; + goto out; if (b && test_bit(0, &b->up)) { tipc_rcv(sock_net(sk), skb, b); - rcu_read_unlock(); return 0; } if (unlikely(msg_user(hdr) == LINK_CONFIG)) { err = tipc_udp_rcast_disc(b, skb); if (err) - goto rcu_out; + goto out; } -rcu_out: - rcu_read_unlock(); out: kfree_skb(skb); return 0; @@ -451,9 +447,9 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb) if (!attrs[TIPC_NLA_BEARER]) return -EINVAL; - err = nla_parse_nested(battrs, TIPC_NLA_BEARER_MAX, - attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy, NULL); + err = nla_parse_nested_deprecated(battrs, TIPC_NLA_BEARER_MAX, + attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, NULL); if (err) return err; @@ -527,7 +523,7 @@ int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b) if (!ub) return -ENODEV; - nest = nla_nest_start(msg->skb, TIPC_NLA_BEARER_UDP_OPTS); + nest = nla_nest_start_noflag(msg->skb, TIPC_NLA_BEARER_UDP_OPTS); if (!nest) goto msg_full; @@ -605,8 +601,7 @@ int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr) struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; struct udp_media_addr *dst; - if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, attr, - tipc_nl_udp_policy, NULL)) + if (nla_parse_nested_deprecated(opts, TIPC_NLA_UDP_MAX, attr, tipc_nl_udp_policy, NULL)) return -EINVAL; if (!opts[TIPC_NLA_UDP_REMOTE]) @@ -659,9 +654,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, if (!attrs[TIPC_NLA_BEARER_UDP_OPTS]) goto err; - if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, - attrs[TIPC_NLA_BEARER_UDP_OPTS], - tipc_nl_udp_policy, NULL)) + if (nla_parse_nested_deprecated(opts, TIPC_NLA_UDP_MAX, attrs[TIPC_NLA_BEARER_UDP_OPTS], tipc_nl_udp_policy, NULL)) goto err; if (!opts[TIPC_NLA_UDP_LOCAL] || !opts[TIPC_NLA_UDP_REMOTE]) { diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 14dedb24fa7b..e225c81e6b35 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -89,22 +89,6 @@ static void tls_device_gc_task(struct work_struct *work) } } -static void tls_device_attach(struct tls_context *ctx, struct sock *sk, - struct net_device *netdev) -{ - if (sk->sk_destruct != tls_device_sk_destruct) { - refcount_set(&ctx->refcount, 1); - dev_hold(netdev); - ctx->netdev = netdev; - spin_lock_irq(&tls_device_lock); - list_add_tail(&ctx->list, &tls_device_list); - spin_unlock_irq(&tls_device_lock); - - ctx->sk_destruct = sk->sk_destruct; - sk->sk_destruct = tls_device_sk_destruct; - } -} - static void tls_device_queue_ctx_destruction(struct tls_context *ctx) { unsigned long flags; @@ -199,7 +183,7 @@ static void tls_icsk_clean_acked(struct sock *sk, u32 acked_seq) * socket and no in-flight SKBs associated with this * socket, so it is safe to free all the resources. */ -void tls_device_sk_destruct(struct sock *sk) +static void tls_device_sk_destruct(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx); @@ -217,7 +201,6 @@ void tls_device_sk_destruct(struct sock *sk) if (refcount_dec_and_test(&tls_ctx->refcount)) tls_device_queue_ctx_destruction(tls_ctx); } -EXPORT_SYMBOL(tls_device_sk_destruct); void tls_device_free_resources_tx(struct sock *sk) { @@ -584,7 +567,7 @@ void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn) rx_ctx = tls_offload_ctx_rx(tls_ctx); resync_req = atomic64_read(&rx_ctx->resync_req); - req_seq = ntohl(resync_req >> 32) - ((u32)TLS_HEADER_SIZE - 1); + req_seq = (resync_req >> 32) - ((u32)TLS_HEADER_SIZE - 1); is_req_pending = resync_req; if (unlikely(is_req_pending) && req_seq == seq && @@ -699,6 +682,22 @@ int tls_device_decrypted(struct sock *sk, struct sk_buff *skb) tls_device_reencrypt(sk, skb); } +static void tls_device_attach(struct tls_context *ctx, struct sock *sk, + struct net_device *netdev) +{ + if (sk->sk_destruct != tls_device_sk_destruct) { + refcount_set(&ctx->refcount, 1); + dev_hold(netdev); + ctx->netdev = netdev; + spin_lock_irq(&tls_device_lock); + list_add_tail(&ctx->list, &tls_device_list); + spin_unlock_irq(&tls_device_lock); + + ctx->sk_destruct = sk->sk_destruct; + sk->sk_destruct = tls_device_sk_destruct; + } +} + int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) { u16 nonce_size, tag_size, iv_size, rec_seq_size; @@ -882,8 +881,6 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) } if (!(netdev->features & NETIF_F_HW_TLS_RX)) { - pr_err_ratelimited("%s: netdev %s with no TLS offload\n", - __func__, netdev->name); rc = -ENOTSUPP; goto release_netdev; } @@ -911,11 +908,8 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_RX, &ctx->crypto_recv.info, tcp_sk(sk)->copied_seq); - if (rc) { - pr_err_ratelimited("%s: The netdev has refused to offload this socket\n", - __func__); + if (rc) goto free_sw_resources; - } tls_device_attach(ctx, sk, netdev); goto release_netdev; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 478603f43964..fc81ae18cc44 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -490,27 +490,32 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval, switch (crypto_info->cipher_type) { case TLS_CIPHER_AES_GCM_128: + optsize = sizeof(struct tls12_crypto_info_aes_gcm_128); + break; case TLS_CIPHER_AES_GCM_256: { - optsize = crypto_info->cipher_type == TLS_CIPHER_AES_GCM_128 ? - sizeof(struct tls12_crypto_info_aes_gcm_128) : - sizeof(struct tls12_crypto_info_aes_gcm_256); - if (optlen != optsize) { - rc = -EINVAL; - goto err_crypto_info; - } - rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info), - optlen - sizeof(*crypto_info)); - if (rc) { - rc = -EFAULT; - goto err_crypto_info; - } + optsize = sizeof(struct tls12_crypto_info_aes_gcm_256); break; } + case TLS_CIPHER_AES_CCM_128: + optsize = sizeof(struct tls12_crypto_info_aes_ccm_128); + break; default: rc = -EINVAL; goto err_crypto_info; } + if (optlen != optsize) { + rc = -EINVAL; + goto err_crypto_info; + } + + rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info), + optlen - sizeof(*crypto_info)); + if (rc) { + rc = -EFAULT; + goto err_crypto_info; + } + if (tx) { #ifdef CONFIG_TLS_DEVICE rc = tls_set_device_offload(sk, ctx); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 29d6af43dd24..c02293fb10e6 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -42,8 +42,6 @@ #include <net/strparser.h> #include <net/tls.h> -#define MAX_IV_SIZE TLS_CIPHER_AES_GCM_128_IV_SIZE - static int __skb_nsg(struct sk_buff *skb, int offset, int len, unsigned int recursion_level) { @@ -225,7 +223,7 @@ static int tls_do_decryption(struct sock *sk, /* Using skb->sk to push sk through to crypto async callback * handler. This allows propagating errors up to the socket * if needed. It _must_ be cleared in the async handler - * before kfree_skb is called. We _know_ skb->sk is NULL + * before consume_skb is called. We _know_ skb->sk is NULL * because it is a clone from strparser. */ skb->sk = sk; @@ -479,11 +477,18 @@ static int tls_do_encryption(struct sock *sk, struct tls_rec *rec = ctx->open_rec; struct sk_msg *msg_en = &rec->msg_encrypted; struct scatterlist *sge = sk_msg_elem(msg_en, start); - int rc; + int rc, iv_offset = 0; + + /* For CCM based ciphers, first byte of IV is a constant */ + if (prot->cipher_type == TLS_CIPHER_AES_CCM_128) { + rec->iv_data[0] = TLS_AES_CCM_IV_B0_BYTE; + iv_offset = 1; + } + + memcpy(&rec->iv_data[iv_offset], tls_ctx->tx.iv, + prot->iv_size + prot->salt_size); - memcpy(rec->iv_data, tls_ctx->tx.iv, sizeof(rec->iv_data)); - xor_iv_with_seq(prot->version, rec->iv_data, - tls_ctx->tx.rec_seq); + xor_iv_with_seq(prot->version, rec->iv_data, tls_ctx->tx.rec_seq); sge->offset += prot->prepend_size; sge->length -= prot->prepend_size; @@ -1344,6 +1349,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, struct scatterlist *sgout = NULL; const int data_len = rxm->full_len - prot->overhead_size + prot->tail_size; + int iv_offset = 0; if (*zc && (out_iov || out_sg)) { if (out_iov) @@ -1386,18 +1392,25 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, aad = (u8 *)(sgout + n_sgout); iv = aad + prot->aad_size; + /* For CCM based ciphers, first byte of nonce+iv is always '2' */ + if (prot->cipher_type == TLS_CIPHER_AES_CCM_128) { + iv[0] = 2; + iv_offset = 1; + } + /* Prepare IV */ err = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE, - iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, + iv + iv_offset + prot->salt_size, prot->iv_size); if (err < 0) { kfree(mem); return err; } if (prot->version == TLS_1_3_VERSION) - memcpy(iv, tls_ctx->rx.iv, crypto_aead_ivsize(ctx->aead_recv)); + memcpy(iv + iv_offset, tls_ctx->rx.iv, + crypto_aead_ivsize(ctx->aead_recv)); else - memcpy(iv, tls_ctx->rx.iv, TLS_CIPHER_AES_GCM_128_SALT_SIZE); + memcpy(iv + iv_offset, tls_ctx->rx.iv, prot->salt_size); xor_iv_with_seq(prot->version, iv, tls_ctx->rx.rec_seq); @@ -1524,7 +1537,7 @@ static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb, rxm->full_len -= len; return false; } - kfree_skb(skb); + consume_skb(skb); } /* Finished with message */ @@ -1633,7 +1646,7 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, if (!is_peek) { skb_unlink(skb, &ctx->rx_list); - kfree_skb(skb); + consume_skb(skb); } skb = next_skb; @@ -2144,14 +2157,15 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) struct tls_crypto_info *crypto_info; struct tls12_crypto_info_aes_gcm_128 *gcm_128_info; struct tls12_crypto_info_aes_gcm_256 *gcm_256_info; + struct tls12_crypto_info_aes_ccm_128 *ccm_128_info; struct tls_sw_context_tx *sw_ctx_tx = NULL; struct tls_sw_context_rx *sw_ctx_rx = NULL; struct cipher_context *cctx; struct crypto_aead **aead; struct strp_callbacks cb; - u16 nonce_size, tag_size, iv_size, rec_seq_size; + u16 nonce_size, tag_size, iv_size, rec_seq_size, salt_size; struct crypto_tfm *tfm; - char *iv, *rec_seq, *key, *salt; + char *iv, *rec_seq, *key, *salt, *cipher_name; size_t keysize; int rc = 0; @@ -2216,6 +2230,8 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) keysize = TLS_CIPHER_AES_GCM_128_KEY_SIZE; key = gcm_128_info->key; salt = gcm_128_info->salt; + salt_size = TLS_CIPHER_AES_GCM_128_SALT_SIZE; + cipher_name = "gcm(aes)"; break; } case TLS_CIPHER_AES_GCM_256: { @@ -2231,6 +2247,25 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) keysize = TLS_CIPHER_AES_GCM_256_KEY_SIZE; key = gcm_256_info->key; salt = gcm_256_info->salt; + salt_size = TLS_CIPHER_AES_GCM_256_SALT_SIZE; + cipher_name = "gcm(aes)"; + break; + } + case TLS_CIPHER_AES_CCM_128: { + nonce_size = TLS_CIPHER_AES_CCM_128_IV_SIZE; + tag_size = TLS_CIPHER_AES_CCM_128_TAG_SIZE; + iv_size = TLS_CIPHER_AES_CCM_128_IV_SIZE; + iv = ((struct tls12_crypto_info_aes_ccm_128 *)crypto_info)->iv; + rec_seq_size = TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE; + rec_seq = + ((struct tls12_crypto_info_aes_ccm_128 *)crypto_info)->rec_seq; + ccm_128_info = + (struct tls12_crypto_info_aes_ccm_128 *)crypto_info; + keysize = TLS_CIPHER_AES_CCM_128_KEY_SIZE; + key = ccm_128_info->key; + salt = ccm_128_info->salt; + salt_size = TLS_CIPHER_AES_CCM_128_SALT_SIZE; + cipher_name = "ccm(aes)"; break; } default: @@ -2260,16 +2295,16 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) prot->overhead_size = prot->prepend_size + prot->tag_size + prot->tail_size; prot->iv_size = iv_size; - cctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, - GFP_KERNEL); + prot->salt_size = salt_size; + cctx->iv = kmalloc(iv_size + salt_size, GFP_KERNEL); if (!cctx->iv) { rc = -ENOMEM; goto free_priv; } /* Note: 128 & 256 bit salt are the same size */ - memcpy(cctx->iv, salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE); - memcpy(cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size); prot->rec_seq_size = rec_seq_size; + memcpy(cctx->iv, salt, salt_size); + memcpy(cctx->iv + salt_size, iv, iv_size); cctx->rec_seq = kmemdup(rec_seq, rec_seq_size, GFP_KERNEL); if (!cctx->rec_seq) { rc = -ENOMEM; @@ -2277,7 +2312,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) } if (!*aead) { - *aead = crypto_alloc_aead("gcm(aes)", 0, 0); + *aead = crypto_alloc_aead(cipher_name, 0, 0); if (IS_ERR(*aead)) { rc = PTR_ERR(*aead); *aead = NULL; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ddb838a1b74c..e68d7454f2e3 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2040,8 +2040,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, struct unix_sock *u = unix_sk(sk); struct sk_buff *skb, *last; long timeo; + int skip; int err; - int peeked, skip; err = -EOPNOTSUPP; if (flags&MSG_OOB) @@ -2053,8 +2053,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, mutex_lock(&u->iolock); skip = sk_peek_offset(sk, flags); - skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip, - &err, &last); + skb = __skb_try_recv_datagram(sk, flags, NULL, &skip, &err, + &last); if (skb) break; diff --git a/net/wimax/stack.c b/net/wimax/stack.c index a6307813b6d5..4969de672886 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -419,26 +419,26 @@ static const struct nla_policy wimax_gnl_policy[WIMAX_GNL_ATTR_MAX + 1] = { static const struct genl_ops wimax_gnl_ops[] = { { .cmd = WIMAX_GNL_OP_MSG_FROM_USER, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = wimax_gnl_policy, .doit = wimax_gnl_doit_msg_from_user, }, { .cmd = WIMAX_GNL_OP_RESET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = wimax_gnl_policy, .doit = wimax_gnl_doit_reset, }, { .cmd = WIMAX_GNL_OP_RFKILL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = wimax_gnl_policy, .doit = wimax_gnl_doit_rfkill, }, { .cmd = WIMAX_GNL_OP_STATE_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, - .policy = wimax_gnl_policy, .doit = wimax_gnl_doit_state_get, }, }; @@ -582,6 +582,7 @@ struct genl_family wimax_gnl_family __ro_after_init = { .version = WIMAX_GNL_VERSION, .hdrsize = 0, .maxattr = WIMAX_GNL_ATTR_MAX, + .policy = wimax_gnl_policy, .module = THIS_MODULE, .ops = wimax_gnl_ops, .n_ops = ARRAY_SIZE(wimax_gnl_ops), diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 47e30a58566c..fffe4b371e23 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -331,6 +331,11 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { .len = NL80211_MAX_SUPP_RATES }, [NL80211_ATTR_STA_PLINK_ACTION] = NLA_POLICY_MAX(NLA_U8, NUM_NL80211_PLINK_ACTIONS - 1), + [NL80211_ATTR_STA_TX_POWER_SETTING] = + NLA_POLICY_RANGE(NLA_U8, + NL80211_TX_POWER_AUTOMATIC, + NL80211_TX_POWER_FIXED), + [NL80211_ATTR_STA_TX_POWER] = { .type = NLA_S16 }, [NL80211_ATTR_STA_VLAN] = { .type = NLA_U32 }, [NL80211_ATTR_MNTR_FLAGS] = { /* NLA_NESTED can't be empty */ }, [NL80211_ATTR_MESH_ID] = { .type = NLA_BINARY, @@ -553,6 +558,7 @@ static const struct nla_policy nl80211_key_policy[NL80211_KEY_MAX + 1] = { [NL80211_KEY_DEFAULT_MGMT] = { .type = NLA_FLAG }, [NL80211_KEY_TYPE] = NLA_POLICY_MAX(NLA_U32, NUM_NL80211_KEYTYPES - 1), [NL80211_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED }, + [NL80211_KEY_MODE] = NLA_POLICY_RANGE(NLA_U8, 0, NL80211_KEY_SET_TX), }; /* policy for the key default flags */ @@ -618,11 +624,20 @@ nl80211_rekey_policy[NUM_NL80211_REKEY_DATA] = { }; static const struct nla_policy +nl80211_match_band_rssi_policy[NUM_NL80211_BANDS] = { + [NL80211_BAND_2GHZ] = { .type = NLA_S32 }, + [NL80211_BAND_5GHZ] = { .type = NLA_S32 }, + [NL80211_BAND_60GHZ] = { .type = NLA_S32 }, +}; + +static const struct nla_policy nl80211_match_policy[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1] = { [NL80211_SCHED_SCAN_MATCH_ATTR_SSID] = { .type = NLA_BINARY, .len = IEEE80211_MAX_SSID_LEN }, [NL80211_SCHED_SCAN_MATCH_ATTR_BSSID] = { .len = ETH_ALEN }, [NL80211_SCHED_SCAN_MATCH_ATTR_RSSI] = { .type = NLA_U32 }, + [NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI] = + NLA_POLICY_NESTED(nl80211_match_band_rssi_policy), }; static const struct nla_policy @@ -688,9 +703,11 @@ int nl80211_prepare_wdev_dump(struct netlink_callback *cb, int err; if (!cb->args[0]) { - err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, - genl_family_attrbuf(&nl80211_fam), - nl80211_fam.maxattr, nl80211_policy, NULL); + err = nlmsg_parse_deprecated(cb->nlh, + GENL_HDRLEN + nl80211_fam.hdrsize, + genl_family_attrbuf(&nl80211_fam), + nl80211_fam.maxattr, + nl80211_policy, NULL); if (err) return err; @@ -740,13 +757,13 @@ static int nl80211_msg_put_wmm_rules(struct sk_buff *msg, { int j; struct nlattr *nl_wmm_rules = - nla_nest_start(msg, NL80211_FREQUENCY_ATTR_WMM); + nla_nest_start_noflag(msg, NL80211_FREQUENCY_ATTR_WMM); if (!nl_wmm_rules) goto nla_put_failure; for (j = 0; j < IEEE80211_NUM_ACS; j++) { - struct nlattr *nl_wmm_rule = nla_nest_start(msg, j); + struct nlattr *nl_wmm_rule = nla_nest_start_noflag(msg, j); if (!nl_wmm_rule) goto nla_put_failure; @@ -875,7 +892,7 @@ static bool nl80211_put_txq_stats(struct sk_buff *msg, return false; \ } while (0) - txqattr = nla_nest_start(msg, attrtype); + txqattr = nla_nest_start_noflag(msg, attrtype); if (!txqattr) return false; @@ -910,8 +927,9 @@ static int nl80211_parse_key_new(struct genl_info *info, struct nlattr *key, struct key_parse *k) { struct nlattr *tb[NL80211_KEY_MAX + 1]; - int err = nla_parse_nested(tb, NL80211_KEY_MAX, key, - nl80211_key_policy, info->extack); + int err = nla_parse_nested_deprecated(tb, NL80211_KEY_MAX, key, + nl80211_key_policy, + info->extack); if (err) return err; @@ -947,10 +965,11 @@ static int nl80211_parse_key_new(struct genl_info *info, struct nlattr *key, if (tb[NL80211_KEY_DEFAULT_TYPES]) { struct nlattr *kdt[NUM_NL80211_KEY_DEFAULT_TYPES]; - err = nla_parse_nested(kdt, NUM_NL80211_KEY_DEFAULT_TYPES - 1, - tb[NL80211_KEY_DEFAULT_TYPES], - nl80211_key_default_policy, - info->extack); + err = nla_parse_nested_deprecated(kdt, + NUM_NL80211_KEY_DEFAULT_TYPES - 1, + tb[NL80211_KEY_DEFAULT_TYPES], + nl80211_key_default_policy, + info->extack); if (err) return err; @@ -958,6 +977,9 @@ static int nl80211_parse_key_new(struct genl_info *info, struct nlattr *key, k->def_multi = kdt[NL80211_KEY_DEFAULT_TYPE_MULTICAST]; } + if (tb[NL80211_KEY_MODE]) + k->p.mode = nla_get_u8(tb[NL80211_KEY_MODE]); + return 0; } @@ -994,11 +1016,11 @@ static int nl80211_parse_key_old(struct genl_info *info, struct key_parse *k) if (info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES]) { struct nlattr *kdt[NUM_NL80211_KEY_DEFAULT_TYPES]; - int err = nla_parse_nested(kdt, - NUM_NL80211_KEY_DEFAULT_TYPES - 1, - info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES], - nl80211_key_default_policy, - info->extack); + int err = nla_parse_nested_deprecated(kdt, + NUM_NL80211_KEY_DEFAULT_TYPES - 1, + info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES], + nl80211_key_default_policy, + info->extack); if (err) return err; @@ -1187,7 +1209,7 @@ static struct ieee80211_channel *nl80211_get_valid_chan(struct wiphy *wiphy, static int nl80211_put_iftypes(struct sk_buff *msg, u32 attr, u16 ifmodes) { - struct nlattr *nl_modes = nla_nest_start(msg, attr); + struct nlattr *nl_modes = nla_nest_start_noflag(msg, attr); int i; if (!nl_modes) @@ -1215,8 +1237,8 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy, struct nlattr *nl_combis; int i, j; - nl_combis = nla_nest_start(msg, - NL80211_ATTR_INTERFACE_COMBINATIONS); + nl_combis = nla_nest_start_noflag(msg, + NL80211_ATTR_INTERFACE_COMBINATIONS); if (!nl_combis) goto nla_put_failure; @@ -1226,18 +1248,19 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy, c = &wiphy->iface_combinations[i]; - nl_combi = nla_nest_start(msg, i + 1); + nl_combi = nla_nest_start_noflag(msg, i + 1); if (!nl_combi) goto nla_put_failure; - nl_limits = nla_nest_start(msg, NL80211_IFACE_COMB_LIMITS); + nl_limits = nla_nest_start_noflag(msg, + NL80211_IFACE_COMB_LIMITS); if (!nl_limits) goto nla_put_failure; for (j = 0; j < c->n_limits; j++) { struct nlattr *nl_limit; - nl_limit = nla_nest_start(msg, j + 1); + nl_limit = nla_nest_start_noflag(msg, j + 1); if (!nl_limit) goto nla_put_failure; if (nla_put_u32(msg, NL80211_IFACE_LIMIT_MAX, @@ -1290,7 +1313,8 @@ static int nl80211_send_wowlan_tcp_caps(struct cfg80211_registered_device *rdev, if (!tcp) return 0; - nl_tcp = nla_nest_start(msg, NL80211_WOWLAN_TRIG_TCP_CONNECTION); + nl_tcp = nla_nest_start_noflag(msg, + NL80211_WOWLAN_TRIG_TCP_CONNECTION); if (!nl_tcp) return -ENOBUFS; @@ -1330,7 +1354,8 @@ static int nl80211_send_wowlan(struct sk_buff *msg, if (!rdev->wiphy.wowlan) return 0; - nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED); + nl_wowlan = nla_nest_start_noflag(msg, + NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED); if (!nl_wowlan) return -ENOBUFS; @@ -1459,7 +1484,8 @@ static int nl80211_send_band_rateinfo(struct sk_buff *msg, if (sband->n_iftype_data) { struct nlattr *nl_iftype_data = - nla_nest_start(msg, NL80211_BAND_ATTR_IFTYPE_DATA); + nla_nest_start_noflag(msg, + NL80211_BAND_ATTR_IFTYPE_DATA); int err; if (!nl_iftype_data) @@ -1468,7 +1494,7 @@ static int nl80211_send_band_rateinfo(struct sk_buff *msg, for (i = 0; i < sband->n_iftype_data; i++) { struct nlattr *iftdata; - iftdata = nla_nest_start(msg, i + 1); + iftdata = nla_nest_start_noflag(msg, i + 1); if (!iftdata) return -ENOBUFS; @@ -1484,12 +1510,12 @@ static int nl80211_send_band_rateinfo(struct sk_buff *msg, } /* add bitrates */ - nl_rates = nla_nest_start(msg, NL80211_BAND_ATTR_RATES); + nl_rates = nla_nest_start_noflag(msg, NL80211_BAND_ATTR_RATES); if (!nl_rates) return -ENOBUFS; for (i = 0; i < sband->n_bitrates; i++) { - nl_rate = nla_nest_start(msg, i); + nl_rate = nla_nest_start_noflag(msg, i); if (!nl_rate) return -ENOBUFS; @@ -1522,12 +1548,12 @@ nl80211_send_mgmt_stypes(struct sk_buff *msg, if (!mgmt_stypes) return 0; - nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES); + nl_ifs = nla_nest_start_noflag(msg, NL80211_ATTR_TX_FRAME_TYPES); if (!nl_ifs) return -ENOBUFS; for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { - nl_ftypes = nla_nest_start(msg, ift); + nl_ftypes = nla_nest_start_noflag(msg, ift); if (!nl_ftypes) return -ENOBUFS; i = 0; @@ -1545,12 +1571,12 @@ nl80211_send_mgmt_stypes(struct sk_buff *msg, nla_nest_end(msg, nl_ifs); - nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES); + nl_ifs = nla_nest_start_noflag(msg, NL80211_ATTR_RX_FRAME_TYPES); if (!nl_ifs) return -ENOBUFS; for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { - nl_ftypes = nla_nest_start(msg, ift); + nl_ftypes = nla_nest_start_noflag(msg, ift); if (!nl_ftypes) return -ENOBUFS; i = 0; @@ -1668,7 +1694,7 @@ nl80211_send_pmsr_ftm_capa(const struct cfg80211_pmsr_capabilities *cap, if (!cap->ftm.supported) return 0; - ftm = nla_nest_start(msg, NL80211_PMSR_TYPE_FTM); + ftm = nla_nest_start_noflag(msg, NL80211_PMSR_TYPE_FTM); if (!ftm) return -ENOBUFS; @@ -1716,7 +1742,7 @@ static int nl80211_send_pmsr_capa(struct cfg80211_registered_device *rdev, * will genlmsg_cancel() if we fail */ - pmsr = nla_nest_start(msg, NL80211_ATTR_PEER_MEASUREMENTS); + pmsr = nla_nest_start_noflag(msg, NL80211_ATTR_PEER_MEASUREMENTS); if (!pmsr) return -ENOBUFS; @@ -1731,7 +1757,7 @@ static int nl80211_send_pmsr_capa(struct cfg80211_registered_device *rdev, nla_put_flag(msg, NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR)) return -ENOBUFS; - caps = nla_nest_start(msg, NL80211_PMSR_ATTR_TYPE_CAPA); + caps = nla_nest_start_noflag(msg, NL80211_PMSR_ATTR_TYPE_CAPA); if (!caps) return -ENOBUFS; @@ -1892,7 +1918,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, break; /* fall through */ case 3: - nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS); + nl_bands = nla_nest_start_noflag(msg, + NL80211_ATTR_WIPHY_BANDS); if (!nl_bands) goto nla_put_failure; @@ -1905,7 +1932,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, if (!sband) continue; - nl_band = nla_nest_start(msg, band); + nl_band = nla_nest_start_noflag(msg, band); if (!nl_band) goto nla_put_failure; @@ -1919,15 +1946,16 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, /* fall through */ default: /* add frequencies */ - nl_freqs = nla_nest_start( - msg, NL80211_BAND_ATTR_FREQS); + nl_freqs = nla_nest_start_noflag(msg, + NL80211_BAND_ATTR_FREQS); if (!nl_freqs) goto nla_put_failure; for (i = state->chan_start - 1; i < sband->n_channels; i++) { - nl_freq = nla_nest_start(msg, i); + nl_freq = nla_nest_start_noflag(msg, + i); if (!nl_freq) goto nla_put_failure; @@ -1972,7 +2000,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, break; /* fall through */ case 4: - nl_cmds = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_COMMANDS); + nl_cmds = nla_nest_start_noflag(msg, + NL80211_ATTR_SUPPORTED_COMMANDS); if (!nl_cmds) goto nla_put_failure; @@ -2120,7 +2149,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, const struct nl80211_vendor_cmd_info *info; struct nlattr *nested; - nested = nla_nest_start(msg, NL80211_ATTR_VENDOR_DATA); + nested = nla_nest_start_noflag(msg, + NL80211_ATTR_VENDOR_DATA); if (!nested) goto nla_put_failure; @@ -2136,8 +2166,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, const struct nl80211_vendor_cmd_info *info; struct nlattr *nested; - nested = nla_nest_start(msg, - NL80211_ATTR_VENDOR_EVENTS); + nested = nla_nest_start_noflag(msg, + NL80211_ATTR_VENDOR_EVENTS); if (!nested) goto nla_put_failure; @@ -2174,7 +2204,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, struct nlattr *nested; u32 bss_select_support = rdev->wiphy.bss_select_support; - nested = nla_nest_start(msg, NL80211_ATTR_BSS_SELECT); + nested = nla_nest_start_noflag(msg, + NL80211_ATTR_BSS_SELECT); if (!nested) goto nla_put_failure; @@ -2196,8 +2227,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, rdev->wiphy.iftype_ext_capab) { struct nlattr *nested_ext_capab, *nested; - nested = nla_nest_start(msg, - NL80211_ATTR_IFTYPE_EXT_CAPA); + nested = nla_nest_start_noflag(msg, + NL80211_ATTR_IFTYPE_EXT_CAPA); if (!nested) goto nla_put_failure; @@ -2207,7 +2238,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, capab = &rdev->wiphy.iftype_ext_capab[i]; - nested_ext_capab = nla_nest_start(msg, i); + nested_ext_capab = nla_nest_start_noflag(msg, + i); if (!nested_ext_capab || nla_put_u32(msg, NL80211_ATTR_IFTYPE, capab->iftype) || @@ -2289,8 +2321,10 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb, struct nl80211_dump_wiphy_state *state) { struct nlattr **tb = genl_family_attrbuf(&nl80211_fam); - int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, tb, - nl80211_fam.maxattr, nl80211_policy, NULL); + int ret = nlmsg_parse_deprecated(cb->nlh, + GENL_HDRLEN + nl80211_fam.hdrsize, + tb, nl80211_fam.maxattr, + nl80211_policy, NULL); /* ignore parse errors for backward compatibility */ if (ret) return 0; @@ -2733,10 +2767,11 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) nla_for_each_nested(nl_txq_params, info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS], rem_txq_params) { - result = nla_parse_nested(tb, NL80211_TXQ_ATTR_MAX, - nl_txq_params, - txq_params_policy, - info->extack); + result = nla_parse_nested_deprecated(tb, + NL80211_TXQ_ATTR_MAX, + nl_txq_params, + txq_params_policy, + info->extack); if (result) return result; result = parse_txq_params(tb, &txq_params); @@ -3193,8 +3228,7 @@ static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags) if (!nla) return -EINVAL; - if (nla_parse_nested(flags, NL80211_MNTR_FLAG_MAX, nla, - mntr_flags_policy, NULL)) + if (nla_parse_nested_deprecated(flags, NL80211_MNTR_FLAG_MAX, nla, mntr_flags_policy, NULL)) return -EINVAL; for (flag = 1; flag <= NL80211_MNTR_FLAG_MAX; flag++) @@ -3521,7 +3555,7 @@ static void get_key_callback(void *c, struct key_params *params) params->cipher))) goto nla_put_failure; - key = nla_nest_start(cookie->msg, NL80211_ATTR_KEY); + key = nla_nest_start_noflag(cookie->msg, NL80211_ATTR_KEY); if (!key) goto nla_put_failure; @@ -3634,8 +3668,11 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) if (key.idx < 0) return -EINVAL; - /* only support setting default key */ - if (!key.def && !key.defmgmt) + /* Only support setting default key and + * Extended Key ID action NL80211_KEY_SET_TX. + */ + if (!key.def && !key.defmgmt && + !(key.p.mode == NL80211_KEY_SET_TX)) return -EINVAL; wdev_lock(dev->ieee80211_ptr); @@ -3659,7 +3696,7 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) #ifdef CONFIG_CFG80211_WEXT dev->ieee80211_ptr->wext.default_key = key.idx; #endif - } else { + } else if (key.defmgmt) { if (key.def_uni || !key.def_multi) { err = -EINVAL; goto out; @@ -3681,8 +3718,25 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) #ifdef CONFIG_CFG80211_WEXT dev->ieee80211_ptr->wext.default_mgmt_key = key.idx; #endif - } + } else if (key.p.mode == NL80211_KEY_SET_TX && + wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_EXT_KEY_ID)) { + u8 *mac_addr = NULL; + + if (info->attrs[NL80211_ATTR_MAC]) + mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); + + if (!mac_addr || key.idx < 0 || key.idx > 1) { + err = -EINVAL; + goto out; + } + err = rdev_add_key(rdev, dev, key.idx, + NL80211_KEYTYPE_PAIRWISE, + mac_addr, &key.p); + } else { + err = -EINVAL; + } out: wdev_unlock(dev->ieee80211_ptr); @@ -3843,8 +3897,7 @@ static struct cfg80211_acl_data *parse_acl_data(struct wiphy *wiphy, if (n_entries > wiphy->max_acl_mac_addrs) return ERR_PTR(-ENOTSUPP); - acl = kzalloc(sizeof(*acl) + (sizeof(struct mac_address) * n_entries), - GFP_KERNEL); + acl = kzalloc(struct_size(acl, mac_addrs, n_entries), GFP_KERNEL); if (!acl) return ERR_PTR(-ENOMEM); @@ -4054,8 +4107,10 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, sband = rdev->wiphy.bands[band]; if (sband == NULL) return -EINVAL; - err = nla_parse_nested(tb, NL80211_TXRATE_MAX, tx_rates, - nl80211_txattr_policy, info->extack); + err = nla_parse_nested_deprecated(tb, NL80211_TXRATE_MAX, + tx_rates, + nl80211_txattr_policy, + info->extack); if (err) return err; if (tb[NL80211_TXRATE_LEGACY]) { @@ -4223,9 +4278,10 @@ static int nl80211_parse_beacon(struct cfg80211_registered_device *rdev, if (attrs[NL80211_ATTR_FTM_RESPONDER]) { struct nlattr *tb[NL80211_FTM_RESP_ATTR_MAX + 1]; - err = nla_parse_nested(tb, NL80211_FTM_RESP_ATTR_MAX, - attrs[NL80211_ATTR_FTM_RESPONDER], - NULL, NULL); + err = nla_parse_nested_deprecated(tb, + NL80211_FTM_RESP_ATTR_MAX, + attrs[NL80211_ATTR_FTM_RESPONDER], + NULL, NULL); if (err) return err; @@ -4633,8 +4689,7 @@ static int parse_station_flags(struct genl_info *info, if (!nla) return 0; - if (nla_parse_nested(flags, NL80211_STA_FLAG_MAX, nla, - sta_flags_policy, info->extack)) + if (nla_parse_nested_deprecated(flags, NL80211_STA_FLAG_MAX, nla, sta_flags_policy, info->extack)) return -EINVAL; /* @@ -4686,7 +4741,7 @@ bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, int attr) u16 bitrate_compat; enum nl80211_rate_info rate_flg; - rate = nla_nest_start(msg, attr); + rate = nla_nest_start_noflag(msg, attr); if (!rate) return false; @@ -4773,7 +4828,7 @@ static bool nl80211_put_signal(struct sk_buff *msg, u8 mask, s8 *signal, if (!mask) return true; - attr = nla_nest_start(msg, id); + attr = nla_nest_start_noflag(msg, id); if (!attr) return false; @@ -4808,7 +4863,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, nla_put_u32(msg, NL80211_ATTR_GENERATION, sinfo->generation)) goto nla_put_failure; - sinfoattr = nla_nest_start(msg, NL80211_ATTR_STA_INFO); + sinfoattr = nla_nest_start_noflag(msg, NL80211_ATTR_STA_INFO); if (!sinfoattr) goto nla_put_failure; @@ -4889,6 +4944,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, PUT_SINFO(TX_RETRIES, tx_retries, u32); PUT_SINFO(TX_FAILED, tx_failed, u32); PUT_SINFO(EXPECTED_THROUGHPUT, expected_throughput, u32); + PUT_SINFO(AIRTIME_LINK_METRIC, airtime_link_metric, u32); PUT_SINFO(BEACON_LOSS, beacon_loss_count, u32); PUT_SINFO(LOCAL_PM, local_pm, u32); PUT_SINFO(PEER_PM, peer_pm, u32); @@ -4896,7 +4952,8 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, PUT_SINFO(CONNECTED_TO_GATE, connected_to_gate, u8); if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_BSS_PARAM)) { - bss_param = nla_nest_start(msg, NL80211_STA_INFO_BSS_PARAM); + bss_param = nla_nest_start_noflag(msg, + NL80211_STA_INFO_BSS_PARAM); if (!bss_param) goto nla_put_failure; @@ -4939,7 +4996,8 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, struct nlattr *tidsattr; int tid; - tidsattr = nla_nest_start(msg, NL80211_STA_INFO_TID_STATS); + tidsattr = nla_nest_start_noflag(msg, + NL80211_STA_INFO_TID_STATS); if (!tidsattr) goto nla_put_failure; @@ -4952,7 +5010,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, if (!tidstats->filled) continue; - tidattr = nla_nest_start(msg, tid + 1); + tidattr = nla_nest_start_noflag(msg, tid + 1); if (!tidattr) goto nla_put_failure; @@ -5300,8 +5358,9 @@ static int nl80211_parse_sta_wme(struct genl_info *info, return 0; nla = info->attrs[NL80211_ATTR_STA_WME]; - err = nla_parse_nested(tb, NL80211_STA_WME_MAX, nla, - nl80211_sta_wme_policy, info->extack); + err = nla_parse_nested_deprecated(tb, NL80211_STA_WME_MAX, nla, + nl80211_sta_wme_policy, + info->extack); if (err) return err; @@ -5387,6 +5446,36 @@ static int nl80211_set_station_tdls(struct genl_info *info, return nl80211_parse_sta_wme(info, params); } +static int nl80211_parse_sta_txpower_setting(struct genl_info *info, + struct station_parameters *params) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + int idx; + + if (info->attrs[NL80211_ATTR_STA_TX_POWER_SETTING]) { + if (!rdev->ops->set_tx_power || + !wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_STA_TX_PWR)) + return -EOPNOTSUPP; + + idx = NL80211_ATTR_STA_TX_POWER_SETTING; + params->txpwr.type = nla_get_u8(info->attrs[idx]); + + if (params->txpwr.type == NL80211_TX_POWER_LIMITED) { + idx = NL80211_ATTR_STA_TX_POWER; + + if (info->attrs[idx]) + params->txpwr.power = + nla_get_s16(info->attrs[idx]); + else + return -EINVAL; + } + params->sta_modify_mask |= STATION_PARAM_APPLY_STA_TXPOWER; + } + + return 0; +} + static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -5480,6 +5569,10 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) return -EOPNOTSUPP; + err = nl80211_parse_sta_txpower_setting(info, ¶ms); + if (err) + return err; + /* Include parameters for TDLS peer (will check later) */ err = nl80211_set_station_tdls(info, ¶ms); if (err) @@ -5617,6 +5710,10 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) return -EOPNOTSUPP; + err = nl80211_parse_sta_txpower_setting(info, ¶ms); + if (err) + return err; + err = nl80211_parse_sta_channel_info(info, ¶ms); if (err) return err; @@ -5799,7 +5896,7 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 portid, u32 seq, nla_put_u32(msg, NL80211_ATTR_GENERATION, pinfo->generation)) goto nla_put_failure; - pinfoattr = nla_nest_start(msg, NL80211_ATTR_MPATH_INFO); + pinfoattr = nla_nest_start_noflag(msg, NL80211_ATTR_MPATH_INFO); if (!pinfoattr) goto nla_put_failure; if ((pinfo->filled & MPATH_INFO_FRAME_QLEN) && @@ -6250,7 +6347,7 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, NL80211_CMD_GET_MESH_CONFIG); if (!hdr) goto out; - pinfoattr = nla_nest_start(msg, NL80211_ATTR_MESH_CONFIG); + pinfoattr = nla_nest_start_noflag(msg, NL80211_ATTR_MESH_CONFIG); if (!pinfoattr) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || @@ -6403,9 +6500,7 @@ do { \ if (!info->attrs[NL80211_ATTR_MESH_CONFIG]) return -EINVAL; - if (nla_parse_nested(tb, NL80211_MESHCONF_ATTR_MAX, - info->attrs[NL80211_ATTR_MESH_CONFIG], - nl80211_meshconf_params_policy, info->extack)) + if (nla_parse_nested_deprecated(tb, NL80211_MESHCONF_ATTR_MAX, info->attrs[NL80211_ATTR_MESH_CONFIG], nl80211_meshconf_params_policy, info->extack)) return -EINVAL; /* This makes sure that there aren't more than 32 mesh config @@ -6538,9 +6633,7 @@ static int nl80211_parse_mesh_setup(struct genl_info *info, if (!info->attrs[NL80211_ATTR_MESH_SETUP]) return -EINVAL; - if (nla_parse_nested(tb, NL80211_MESH_SETUP_ATTR_MAX, - info->attrs[NL80211_ATTR_MESH_SETUP], - nl80211_mesh_setup_params_policy, info->extack)) + if (nla_parse_nested_deprecated(tb, NL80211_MESH_SETUP_ATTR_MAX, info->attrs[NL80211_ATTR_MESH_SETUP], nl80211_mesh_setup_params_policy, info->extack)) return -EINVAL; if (tb[NL80211_MESH_SETUP_ENABLE_VENDOR_SYNC]) @@ -6629,7 +6722,7 @@ static int nl80211_put_regdom(const struct ieee80211_regdomain *regdom, nla_put_u8(msg, NL80211_ATTR_DFS_REGION, regdom->dfs_region))) goto nla_put_failure; - nl_reg_rules = nla_nest_start(msg, NL80211_ATTR_REG_RULES); + nl_reg_rules = nla_nest_start_noflag(msg, NL80211_ATTR_REG_RULES); if (!nl_reg_rules) goto nla_put_failure; @@ -6644,7 +6737,7 @@ static int nl80211_put_regdom(const struct ieee80211_regdomain *regdom, freq_range = ®_rule->freq_range; power_rule = ®_rule->power_rule; - nl_reg_rule = nla_nest_start(msg, i); + nl_reg_rule = nla_nest_start_noflag(msg, i); if (!nl_reg_rule) goto nla_put_failure; @@ -6882,7 +6975,7 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) struct nlattr *nl_reg_rule; char *alpha2; int rem_reg_rules, r; - u32 num_rules = 0, rule_idx = 0, size_of_regd; + u32 num_rules = 0, rule_idx = 0; enum nl80211_dfs_regions dfs_region = NL80211_DFS_UNSET; struct ieee80211_regdomain *rd; @@ -6907,10 +7000,7 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) if (!reg_is_valid_request(alpha2)) return -EINVAL; - size_of_regd = sizeof(struct ieee80211_regdomain) + - num_rules * sizeof(struct ieee80211_reg_rule); - - rd = kzalloc(size_of_regd, GFP_KERNEL); + rd = kzalloc(struct_size(rd, reg_rules, num_rules), GFP_KERNEL); if (!rd) return -ENOMEM; @@ -6927,9 +7017,9 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES], rem_reg_rules) { - r = nla_parse_nested(tb, NL80211_REG_RULE_ATTR_MAX, - nl_reg_rule, reg_rule_policy, - info->extack); + r = nla_parse_nested_deprecated(tb, NL80211_REG_RULE_ATTR_MAX, + nl_reg_rule, reg_rule_policy, + info->extack); if (r) goto bad_reg; r = parse_reg_rule(tb, &rd->reg_rules[rule_idx]); @@ -7000,8 +7090,9 @@ static int parse_bss_select(struct nlattr *nla, struct wiphy *wiphy, if (!nla_ok(nest, nla_len(nest))) return -EINVAL; - err = nla_parse_nested(attr, NL80211_BSS_SELECT_ATTR_MAX, nest, - nl80211_bss_select_policy, NULL); + err = nla_parse_nested_deprecated(attr, NL80211_BSS_SELECT_ATTR_MAX, + nest, nl80211_bss_select_policy, + NULL); if (err) return err; @@ -7494,8 +7585,10 @@ nl80211_parse_sched_scan_plans(struct wiphy *wiphy, int n_plans, if (WARN_ON(i >= n_plans)) return -EINVAL; - err = nla_parse_nested(plan, NL80211_SCHED_SCAN_PLAN_MAX, - attr, nl80211_plan_policy, NULL); + err = nla_parse_nested_deprecated(plan, + NL80211_SCHED_SCAN_PLAN_MAX, + attr, nl80211_plan_policy, + NULL); if (err) return err; @@ -7537,6 +7630,41 @@ nl80211_parse_sched_scan_plans(struct wiphy *wiphy, int n_plans, return 0; } +static int +nl80211_parse_sched_scan_per_band_rssi(struct wiphy *wiphy, + struct cfg80211_match_set *match_sets, + struct nlattr *tb_band_rssi, + s32 rssi_thold) +{ + struct nlattr *attr; + int i, tmp, ret = 0; + + if (!wiphy_ext_feature_isset(wiphy, + NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD)) { + if (tb_band_rssi) + ret = -EOPNOTSUPP; + else + for (i = 0; i < NUM_NL80211_BANDS; i++) + match_sets->per_band_rssi_thold[i] = + NL80211_SCAN_RSSI_THOLD_OFF; + return ret; + } + + for (i = 0; i < NUM_NL80211_BANDS; i++) + match_sets->per_band_rssi_thold[i] = rssi_thold; + + nla_for_each_nested(attr, tb_band_rssi, tmp) { + enum nl80211_band band = nla_type(attr); + + if (band < 0 || band >= NUM_NL80211_BANDS) + return -EINVAL; + + match_sets->per_band_rssi_thold[band] = nla_get_s32(attr); + } + + return 0; +} + static struct cfg80211_sched_scan_request * nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, struct nlattr **attrs, int max_match_sets) @@ -7581,10 +7709,11 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, tmp) { struct nlattr *rssi; - err = nla_parse_nested(tb, - NL80211_SCHED_SCAN_MATCH_ATTR_MAX, - attr, nl80211_match_policy, - NULL); + err = nla_parse_nested_deprecated(tb, + NL80211_SCHED_SCAN_MATCH_ATTR_MAX, + attr, + nl80211_match_policy, + NULL); if (err) return ERR_PTR(err); @@ -7768,51 +7897,64 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, tmp) { struct nlattr *ssid, *bssid, *rssi; - err = nla_parse_nested(tb, - NL80211_SCHED_SCAN_MATCH_ATTR_MAX, - attr, nl80211_match_policy, - NULL); + err = nla_parse_nested_deprecated(tb, + NL80211_SCHED_SCAN_MATCH_ATTR_MAX, + attr, + nl80211_match_policy, + NULL); if (err) goto out_free; ssid = tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID]; bssid = tb[NL80211_SCHED_SCAN_MATCH_ATTR_BSSID]; - if (ssid || bssid) { - if (WARN_ON(i >= n_match_sets)) { - /* this indicates a programming error, - * the loop above should have verified - * things properly - */ + + if (!ssid && !bssid) { + i++; + continue; + } + + if (WARN_ON(i >= n_match_sets)) { + /* this indicates a programming error, + * the loop above should have verified + * things properly + */ + err = -EINVAL; + goto out_free; + } + + if (ssid) { + if (nla_len(ssid) > IEEE80211_MAX_SSID_LEN) { err = -EINVAL; goto out_free; } - - if (ssid) { - if (nla_len(ssid) > IEEE80211_MAX_SSID_LEN) { - err = -EINVAL; - goto out_free; - } - memcpy(request->match_sets[i].ssid.ssid, - nla_data(ssid), nla_len(ssid)); - request->match_sets[i].ssid.ssid_len = - nla_len(ssid); - } - if (bssid) { - if (nla_len(bssid) != ETH_ALEN) { - err = -EINVAL; - goto out_free; - } - memcpy(request->match_sets[i].bssid, - nla_data(bssid), ETH_ALEN); + memcpy(request->match_sets[i].ssid.ssid, + nla_data(ssid), nla_len(ssid)); + request->match_sets[i].ssid.ssid_len = + nla_len(ssid); + } + if (bssid) { + if (nla_len(bssid) != ETH_ALEN) { + err = -EINVAL; + goto out_free; } + memcpy(request->match_sets[i].bssid, + nla_data(bssid), ETH_ALEN); + } - /* special attribute - old implementation w/a */ + /* special attribute - old implementation w/a */ + request->match_sets[i].rssi_thold = default_match_rssi; + rssi = tb[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI]; + if (rssi) request->match_sets[i].rssi_thold = - default_match_rssi; - rssi = tb[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI]; - if (rssi) - request->match_sets[i].rssi_thold = - nla_get_s32(rssi); - } + nla_get_s32(rssi); + + /* Parse per band RSSI attribute */ + err = nl80211_parse_sched_scan_per_band_rssi(wiphy, + &request->match_sets[i], + tb[NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI], + request->match_sets[i].rssi_thold); + if (err) + goto out_free; + i++; } @@ -8061,7 +8203,7 @@ static int nl80211_notify_radar_detection(struct sk_buff *skb, cfg80211_sched_dfs_chan_update(rdev); - memcpy(&rdev->radar_chandef, &chandef, sizeof(chandef)); + rdev->radar_chandef = chandef; /* Propagate this notification to other radios as well */ queue_work(cfg80211_wq, &rdev->propagate_radar_detect_wk); @@ -8143,9 +8285,9 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) if (err) return err; - err = nla_parse_nested(csa_attrs, NL80211_ATTR_MAX, - info->attrs[NL80211_ATTR_CSA_IES], - nl80211_policy, info->extack); + err = nla_parse_nested_deprecated(csa_attrs, NL80211_ATTR_MAX, + info->attrs[NL80211_ATTR_CSA_IES], + nl80211_policy, info->extack); if (err) return err; @@ -8269,7 +8411,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, NL80211_ATTR_PAD)) goto nla_put_failure; - bss = nla_nest_start(msg, NL80211_ATTR_BSS); + bss = nla_nest_start_noflag(msg, NL80211_ATTR_BSS); if (!bss) goto nla_put_failure; if ((!is_zero_ether_addr(res->bssid) && @@ -8446,7 +8588,7 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq, if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; - infoattr = nla_nest_start(msg, NL80211_ATTR_SURVEY_INFO); + infoattr = nla_nest_start_noflag(msg, NL80211_ATTR_SURVEY_INFO); if (!infoattr) goto nla_put_failure; @@ -9287,7 +9429,7 @@ __cfg80211_alloc_vendor_skb(struct cfg80211_registered_device *rdev, goto nla_put_failure; } - data = nla_nest_start(skb, attr); + data = nla_nest_start_noflag(skb, attr); if (!data) goto nla_put_failure; @@ -9420,9 +9562,10 @@ static int nl80211_testmode_dump(struct sk_buff *skb, } else { struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam); - err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, - attrbuf, nl80211_fam.maxattr, - nl80211_policy, NULL); + err = nlmsg_parse_deprecated(cb->nlh, + GENL_HDRLEN + nl80211_fam.hdrsize, + attrbuf, nl80211_fam.maxattr, + nl80211_policy, NULL); if (err) goto out_err; @@ -9461,7 +9604,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb, break; } - tmdata = nla_nest_start(skb, NL80211_ATTR_TESTDATA); + tmdata = nla_nest_start_noflag(skb, NL80211_ATTR_TESTDATA); if (!tmdata) { genlmsg_cancel(skb, hdr); break; @@ -10546,8 +10689,9 @@ static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info) if (!cqm) return -EINVAL; - err = nla_parse_nested(attrs, NL80211_ATTR_CQM_MAX, cqm, - nl80211_attr_cqm_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, NL80211_ATTR_CQM_MAX, cqm, + nl80211_attr_cqm_policy, + info->extack); if (err) return err; @@ -10739,12 +10883,12 @@ static int nl80211_send_wowlan_patterns(struct sk_buff *msg, if (!wowlan->n_patterns) return 0; - nl_pats = nla_nest_start(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN); + nl_pats = nla_nest_start_noflag(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN); if (!nl_pats) return -ENOBUFS; for (i = 0; i < wowlan->n_patterns; i++) { - nl_pat = nla_nest_start(msg, i + 1); + nl_pat = nla_nest_start_noflag(msg, i + 1); if (!nl_pat) return -ENOBUFS; pat_len = wowlan->patterns[i].pattern_len; @@ -10770,7 +10914,8 @@ static int nl80211_send_wowlan_tcp(struct sk_buff *msg, if (!tcp) return 0; - nl_tcp = nla_nest_start(msg, NL80211_WOWLAN_TRIG_TCP_CONNECTION); + nl_tcp = nla_nest_start_noflag(msg, + NL80211_WOWLAN_TRIG_TCP_CONNECTION); if (!nl_tcp) return -ENOBUFS; @@ -10814,7 +10959,7 @@ static int nl80211_send_wowlan_nd(struct sk_buff *msg, if (!req) return 0; - nd = nla_nest_start(msg, NL80211_WOWLAN_TRIG_NET_DETECT); + nd = nla_nest_start_noflag(msg, NL80211_WOWLAN_TRIG_NET_DETECT); if (!nd) return -ENOBUFS; @@ -10840,7 +10985,7 @@ static int nl80211_send_wowlan_nd(struct sk_buff *msg, return -ENOBUFS; } - freqs = nla_nest_start(msg, NL80211_ATTR_SCAN_FREQUENCIES); + freqs = nla_nest_start_noflag(msg, NL80211_ATTR_SCAN_FREQUENCIES); if (!freqs) return -ENOBUFS; @@ -10852,12 +10997,13 @@ static int nl80211_send_wowlan_nd(struct sk_buff *msg, nla_nest_end(msg, freqs); if (req->n_match_sets) { - matches = nla_nest_start(msg, NL80211_ATTR_SCHED_SCAN_MATCH); + matches = nla_nest_start_noflag(msg, + NL80211_ATTR_SCHED_SCAN_MATCH); if (!matches) return -ENOBUFS; for (i = 0; i < req->n_match_sets; i++) { - match = nla_nest_start(msg, i); + match = nla_nest_start_noflag(msg, i); if (!match) return -ENOBUFS; @@ -10870,12 +11016,12 @@ static int nl80211_send_wowlan_nd(struct sk_buff *msg, nla_nest_end(msg, matches); } - scan_plans = nla_nest_start(msg, NL80211_ATTR_SCHED_SCAN_PLANS); + scan_plans = nla_nest_start_noflag(msg, NL80211_ATTR_SCHED_SCAN_PLANS); if (!scan_plans) return -ENOBUFS; for (i = 0; i < req->n_scan_plans; i++) { - scan_plan = nla_nest_start(msg, i + 1); + scan_plan = nla_nest_start_noflag(msg, i + 1); if (!scan_plan) return -ENOBUFS; @@ -10924,7 +11070,8 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) if (rdev->wiphy.wowlan_config) { struct nlattr *nl_wowlan; - nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS); + nl_wowlan = nla_nest_start_noflag(msg, + NL80211_ATTR_WOWLAN_TRIGGERS); if (!nl_wowlan) goto nla_put_failure; @@ -10982,8 +11129,8 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, if (!rdev->wiphy.wowlan->tcp) return -EINVAL; - err = nla_parse_nested(tb, MAX_NL80211_WOWLAN_TCP, attr, - nl80211_wowlan_tcp_policy, NULL); + err = nla_parse_nested_deprecated(tb, MAX_NL80211_WOWLAN_TCP, attr, + nl80211_wowlan_tcp_policy, NULL); if (err) return err; @@ -11128,8 +11275,8 @@ static int nl80211_parse_wowlan_nd(struct cfg80211_registered_device *rdev, goto out; } - err = nla_parse_nested(tb, NL80211_ATTR_MAX, attr, nl80211_policy, - NULL); + err = nla_parse_nested_deprecated(tb, NL80211_ATTR_MAX, attr, + nl80211_policy, NULL); if (err) goto out; @@ -11164,9 +11311,9 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) goto set_wakeup; } - err = nla_parse_nested(tb, MAX_NL80211_WOWLAN_TRIG, - info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS], - nl80211_wowlan_policy, info->extack); + err = nla_parse_nested_deprecated(tb, MAX_NL80211_WOWLAN_TRIG, + info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS], + nl80211_wowlan_policy, info->extack); if (err) return err; @@ -11248,9 +11395,11 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) rem) { u8 *mask_pat; - err = nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, - nl80211_packet_pattern_policy, - info->extack); + err = nla_parse_nested_deprecated(pat_tb, + MAX_NL80211_PKTPAT, + pat, + nl80211_packet_pattern_policy, + info->extack); if (err) goto error; @@ -11358,12 +11507,12 @@ static int nl80211_send_coalesce_rules(struct sk_buff *msg, if (!rdev->coalesce->n_rules) return 0; - nl_rules = nla_nest_start(msg, NL80211_ATTR_COALESCE_RULE); + nl_rules = nla_nest_start_noflag(msg, NL80211_ATTR_COALESCE_RULE); if (!nl_rules) return -ENOBUFS; for (i = 0; i < rdev->coalesce->n_rules; i++) { - nl_rule = nla_nest_start(msg, i + 1); + nl_rule = nla_nest_start_noflag(msg, i + 1); if (!nl_rule) return -ENOBUFS; @@ -11376,13 +11525,13 @@ static int nl80211_send_coalesce_rules(struct sk_buff *msg, rule->condition)) return -ENOBUFS; - nl_pats = nla_nest_start(msg, - NL80211_ATTR_COALESCE_RULE_PKT_PATTERN); + nl_pats = nla_nest_start_noflag(msg, + NL80211_ATTR_COALESCE_RULE_PKT_PATTERN); if (!nl_pats) return -ENOBUFS; for (j = 0; j < rule->n_patterns; j++) { - nl_pat = nla_nest_start(msg, j + 1); + nl_pat = nla_nest_start_noflag(msg, j + 1); if (!nl_pat) return -ENOBUFS; pat_len = rule->patterns[j].pattern_len; @@ -11463,8 +11612,8 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, int rem, pat_len, mask_len, pkt_offset, n_patterns = 0; struct nlattr *pat_tb[NUM_NL80211_PKTPAT]; - err = nla_parse_nested(tb, NL80211_ATTR_COALESCE_RULE_MAX, rule, - nl80211_coalesce_policy, NULL); + err = nla_parse_nested_deprecated(tb, NL80211_ATTR_COALESCE_RULE_MAX, + rule, nl80211_coalesce_policy, NULL); if (err) return err; @@ -11499,8 +11648,10 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, rem) { u8 *mask_pat; - err = nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, - nl80211_packet_pattern_policy, NULL); + err = nla_parse_nested_deprecated(pat_tb, MAX_NL80211_PKTPAT, + pat, + nl80211_packet_pattern_policy, + NULL); if (err) return err; @@ -11622,9 +11773,9 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_REKEY_DATA]) return -EINVAL; - err = nla_parse_nested(tb, MAX_NL80211_REKEY_DATA, - info->attrs[NL80211_ATTR_REKEY_DATA], - nl80211_rekey_policy, info->extack); + err = nla_parse_nested_deprecated(tb, MAX_NL80211_REKEY_DATA, + info->attrs[NL80211_ATTR_REKEY_DATA], + nl80211_rekey_policy, info->extack); if (err) return err; @@ -11936,9 +12087,10 @@ static int nl80211_nan_add_func(struct sk_buff *skb, if (!info->attrs[NL80211_ATTR_NAN_FUNC]) return -EINVAL; - err = nla_parse_nested(tb, NL80211_NAN_FUNC_ATTR_MAX, - info->attrs[NL80211_ATTR_NAN_FUNC], - nl80211_nan_func_policy, info->extack); + err = nla_parse_nested_deprecated(tb, NL80211_NAN_FUNC_ATTR_MAX, + info->attrs[NL80211_ATTR_NAN_FUNC], + nl80211_nan_func_policy, + info->extack); if (err) return err; @@ -12034,9 +12186,11 @@ static int nl80211_nan_add_func(struct sk_buff *skb, if (tb[NL80211_NAN_FUNC_SRF]) { struct nlattr *srf_tb[NUM_NL80211_NAN_SRF_ATTR]; - err = nla_parse_nested(srf_tb, NL80211_NAN_SRF_ATTR_MAX, - tb[NL80211_NAN_FUNC_SRF], - nl80211_nan_srf_policy, info->extack); + err = nla_parse_nested_deprecated(srf_tb, + NL80211_NAN_SRF_ATTR_MAX, + tb[NL80211_NAN_FUNC_SRF], + nl80211_nan_srf_policy, + info->extack); if (err) goto out; @@ -12134,7 +12288,7 @@ out: NL80211_ATTR_PAD)) goto nla_put_failure; - func_attr = nla_nest_start(msg, NL80211_ATTR_NAN_FUNC); + func_attr = nla_nest_start_noflag(msg, NL80211_ATTR_NAN_FUNC); if (!func_attr) goto nla_put_failure; @@ -12251,11 +12405,12 @@ void cfg80211_nan_match(struct wireless_dev *wdev, nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, match->addr)) goto nla_put_failure; - match_attr = nla_nest_start(msg, NL80211_ATTR_NAN_MATCH); + match_attr = nla_nest_start_noflag(msg, NL80211_ATTR_NAN_MATCH); if (!match_attr) goto nla_put_failure; - local_func_attr = nla_nest_start(msg, NL80211_NAN_MATCH_FUNC_LOCAL); + local_func_attr = nla_nest_start_noflag(msg, + NL80211_NAN_MATCH_FUNC_LOCAL); if (!local_func_attr) goto nla_put_failure; @@ -12264,7 +12419,8 @@ void cfg80211_nan_match(struct wireless_dev *wdev, nla_nest_end(msg, local_func_attr); - peer_func_attr = nla_nest_start(msg, NL80211_NAN_MATCH_FUNC_PEER); + peer_func_attr = nla_nest_start_noflag(msg, + NL80211_NAN_MATCH_FUNC_PEER); if (!peer_func_attr) goto nla_put_failure; @@ -12330,7 +12486,7 @@ void cfg80211_nan_func_terminated(struct wireless_dev *wdev, NL80211_ATTR_PAD)) goto nla_put_failure; - func_attr = nla_nest_start(msg, NL80211_ATTR_NAN_FUNC); + func_attr = nla_nest_start_noflag(msg, NL80211_ATTR_NAN_FUNC); if (!func_attr) goto nla_put_failure; @@ -12567,8 +12723,10 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, return 0; } - err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, attrbuf, - nl80211_fam.maxattr, nl80211_policy, NULL); + err = nlmsg_parse_deprecated(cb->nlh, + GENL_HDRLEN + nl80211_fam.hdrsize, + attrbuf, nl80211_fam.maxattr, + nl80211_policy, NULL); if (err) return err; @@ -12679,7 +12837,8 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb, break; } - vendor_data = nla_nest_start(skb, NL80211_ATTR_VENDOR_DATA); + vendor_data = nla_nest_start_noflag(skb, + NL80211_ATTR_VENDOR_DATA); if (!vendor_data) { genlmsg_cancel(skb, hdr); break; @@ -13223,7 +13382,8 @@ static int nl80211_get_ftm_responder_stats(struct sk_buff *skb, if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; - ftm_stats_attr = nla_nest_start(msg, NL80211_ATTR_FTM_RESPONDER_STATS); + ftm_stats_attr = nla_nest_start_noflag(msg, + NL80211_ATTR_FTM_RESPONDER_STATS); if (!ftm_stats_attr) goto nla_put_failure; @@ -13259,6 +13419,72 @@ nla_put_failure: return -ENOBUFS; } +static int nl80211_update_owe_info(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct cfg80211_update_owe_info owe_info; + struct net_device *dev = info->user_ptr[1]; + + if (!rdev->ops->update_owe_info) + return -EOPNOTSUPP; + + if (!info->attrs[NL80211_ATTR_STATUS_CODE] || + !info->attrs[NL80211_ATTR_MAC]) + return -EINVAL; + + memset(&owe_info, 0, sizeof(owe_info)); + owe_info.status = nla_get_u16(info->attrs[NL80211_ATTR_STATUS_CODE]); + nla_memcpy(owe_info.peer, info->attrs[NL80211_ATTR_MAC], ETH_ALEN); + + if (info->attrs[NL80211_ATTR_IE]) { + owe_info.ie = nla_data(info->attrs[NL80211_ATTR_IE]); + owe_info.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); + } + + return rdev_update_owe_info(rdev, dev, &owe_info); +} + +static int nl80211_probe_mesh_link(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct station_info sinfo = {}; + const u8 *buf; + size_t len; + u8 *dest; + int err; + + if (!rdev->ops->probe_mesh_link || !rdev->ops->get_station) + return -EOPNOTSUPP; + + if (!info->attrs[NL80211_ATTR_MAC] || + !info->attrs[NL80211_ATTR_FRAME]) { + GENL_SET_ERR_MSG(info, "Frame or MAC missing"); + return -EINVAL; + } + + if (wdev->iftype != NL80211_IFTYPE_MESH_POINT) + return -EOPNOTSUPP; + + dest = nla_data(info->attrs[NL80211_ATTR_MAC]); + buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); + len = nla_len(info->attrs[NL80211_ATTR_FRAME]); + + if (len < sizeof(struct ethhdr)) + return -EINVAL; + + if (!ether_addr_equal(buf, dest) || is_multicast_ether_addr(buf) || + !ether_addr_equal(buf + ETH_ALEN, dev->dev_addr)) + return -EINVAL; + + err = rdev_get_station(rdev, dev, dest, &sinfo); + if (err) + return err; + + return rdev_probe_mesh_link(rdev, dev, dest, buf, len); +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -13365,66 +13591,66 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb, static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WIPHY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_wiphy, .dumpit = nl80211_dump_wiphy, .done = nl80211_dump_wiphy_done, - .policy = nl80211_policy, /* can be retrieved by unprivileged users */ .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_WIPHY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_wiphy, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_GET_INTERFACE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_interface, .dumpit = nl80211_dump_interface, - .policy = nl80211_policy, /* can be retrieved by unprivileged users */ .internal_flags = NL80211_FLAG_NEED_WDEV | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_INTERFACE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_interface, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_NEW_INTERFACE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_new_interface, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_DEL_INTERFACE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_interface, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_GET_KEY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_key, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_KEY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_key, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL | @@ -13432,8 +13658,8 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_NEW_KEY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_new_key, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL | @@ -13441,15 +13667,15 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_DEL_KEY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_key, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_BEACON, - .policy = nl80211_policy, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_set_beacon, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13457,7 +13683,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_START_AP, - .policy = nl80211_policy, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_start_ap, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13465,7 +13691,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_STOP_AP, - .policy = nl80211_policy, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_stop_ap, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13473,172 +13699,172 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_GET_STATION, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_station, .dumpit = nl80211_dump_station, - .policy = nl80211_policy, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_STATION, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_station, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_NEW_STATION, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_new_station, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_DEL_STATION, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_station, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_GET_MPATH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_mpath, .dumpit = nl80211_dump_mpath, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_GET_MPP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_mpp, .dumpit = nl80211_dump_mpp, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_MPATH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_mpath, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_NEW_MPATH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_new_mpath, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_DEL_MPATH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_mpath, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_BSS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_bss, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_GET_REG, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_reg_do, .dumpit = nl80211_get_reg_dump, - .policy = nl80211_policy, .internal_flags = NL80211_FLAG_NEED_RTNL, /* can be retrieved by unprivileged users */ }, #ifdef CONFIG_CFG80211_CRDA_SUPPORT { .cmd = NL80211_CMD_SET_REG, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_reg, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_RTNL, }, #endif { .cmd = NL80211_CMD_REQ_SET_REG, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_req_set_reg, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = NL80211_CMD_RELOAD_REGDB, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_reload_regdb, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = NL80211_CMD_GET_MESH_CONFIG, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_mesh_config, - .policy = nl80211_policy, /* can be retrieved by unprivileged users */ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_MESH_CONFIG, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_update_mesh_config, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_TRIGGER_SCAN, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_trigger_scan, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_ABORT_SCAN, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_abort_scan, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_GET_SCAN, - .policy = nl80211_policy, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = nl80211_dump_scan, }, { .cmd = NL80211_CMD_START_SCHED_SCAN, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_start_sched_scan, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_STOP_SCHED_SCAN, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_stop_sched_scan, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_AUTHENTICATE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_authenticate, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL | @@ -13646,8 +13872,8 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_ASSOCIATE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_associate, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL | @@ -13655,32 +13881,32 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_DEAUTHENTICATE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_deauthenticate, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_DISASSOCIATE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_disassociate, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_JOIN_IBSS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_join_ibss, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_LEAVE_IBSS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_leave_ibss, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13688,9 +13914,9 @@ static const struct genl_ops nl80211_ops[] = { #ifdef CONFIG_NL80211_TESTMODE { .cmd = NL80211_CMD_TESTMODE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_testmode_do, .dumpit = nl80211_testmode_dump, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, @@ -13698,8 +13924,8 @@ static const struct genl_ops nl80211_ops[] = { #endif { .cmd = NL80211_CMD_CONNECT, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_connect, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL | @@ -13707,8 +13933,8 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_UPDATE_CONNECT_PARAMS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_update_connect_params, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL | @@ -13716,29 +13942,29 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_DISCONNECT, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_disconnect, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_WIPHY_NETNS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_wiphy_netns, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_GET_SURVEY, - .policy = nl80211_policy, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = nl80211_dump_survey, }, { .cmd = NL80211_CMD_SET_PMKSA, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_setdel_pmksa, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL | @@ -13746,136 +13972,136 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_DEL_PMKSA, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_setdel_pmksa, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_FLUSH_PMKSA, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_flush_pmksa, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_REMAIN_ON_CHANNEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_remain_on_channel, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_cancel_remain_on_channel, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_TX_BITRATE_MASK, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_tx_bitrate_mask, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_REGISTER_FRAME, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_register_mgmt, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_FRAME, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tx_mgmt, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_FRAME_WAIT_CANCEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tx_mgmt_cancel_wait, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_POWER_SAVE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_power_save, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_GET_POWER_SAVE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_power_save, - .policy = nl80211_policy, /* can be retrieved by unprivileged users */ .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_CQM, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_cqm, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_CHANNEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_channel, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_WDS_PEER, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_wds_peer, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_JOIN_MESH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_join_mesh, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_LEAVE_MESH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_leave_mesh, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_JOIN_OCB, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_join_ocb, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_LEAVE_OCB, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_leave_ocb, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13883,16 +14109,16 @@ static const struct genl_ops nl80211_ops[] = { #ifdef CONFIG_PM { .cmd = NL80211_CMD_GET_WOWLAN, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_wowlan, - .policy = nl80211_policy, /* can be retrieved by unprivileged users */ .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_WOWLAN, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_wowlan, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, @@ -13900,8 +14126,8 @@ static const struct genl_ops nl80211_ops[] = { #endif { .cmd = NL80211_CMD_SET_REKEY_OFFLOAD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_rekey_data, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL | @@ -13909,189 +14135,189 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_TDLS_MGMT, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tdls_mgmt, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_TDLS_OPER, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tdls_oper, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_UNEXPECTED_FRAME, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_register_unexpected_frame, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_PROBE_CLIENT, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_probe_client, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_REGISTER_BEACONS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_register_beacons, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_NOACK_MAP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_noack_map, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_START_P2P_DEVICE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_start_p2p_device, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_STOP_P2P_DEVICE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_stop_p2p_device, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_START_NAN, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_start_nan, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_STOP_NAN, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_stop_nan, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_ADD_NAN_FUNCTION, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_nan_add_func, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_DEL_NAN_FUNCTION, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_nan_del_func, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_CHANGE_NAN_CONFIG, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_nan_change_config, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_MCAST_RATE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_mcast_rate, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_MAC_ACL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_mac_acl, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_RADAR_DETECT, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_start_radar_detection, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_GET_PROTOCOL_FEATURES, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_protocol_features, - .policy = nl80211_policy, }, { .cmd = NL80211_CMD_UPDATE_FT_IES, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_update_ft_ies, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_CRIT_PROTOCOL_START, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_crit_protocol_start, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_CRIT_PROTOCOL_STOP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_crit_protocol_stop, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_GET_COALESCE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_coalesce, - .policy = nl80211_policy, .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_COALESCE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_coalesce, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_CHANNEL_SWITCH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_channel_switch, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_VENDOR, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_vendor_cmd, .dumpit = nl80211_vendor_cmd_dump, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL | @@ -14099,102 +14325,116 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_SET_QOS_MAP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_qos_map, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_ADD_TX_TS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_add_tx_ts, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_DEL_TX_TS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_tx_ts, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_TDLS_CHANNEL_SWITCH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tdls_channel_switch, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tdls_cancel_channel_switch, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_MULTICAST_TO_UNICAST, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_multicast_to_unicast, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_PMK, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_pmk, - .policy = nl80211_policy, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL | NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DEL_PMK, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_pmk, - .policy = nl80211_policy, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_EXTERNAL_AUTH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_external_auth, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_CONTROL_PORT_FRAME, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tx_control_port, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_GET_FTM_RESPONDER_STATS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_ftm_responder_stats, - .policy = nl80211_policy, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_PEER_MEASUREMENT_START, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_pmsr_start, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_NOTIFY_RADAR, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_notify_radar_detection, - .policy = nl80211_policy, + .flags = GENL_UNS_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_UPDATE_OWE_INFO, + .doit = nl80211_update_owe_info, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_PROBE_MESH_LINK, + .doit = nl80211_probe_mesh_link, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14206,6 +14446,7 @@ static struct genl_family nl80211_fam __ro_after_init = { .hdrsize = 0, /* no private header */ .version = 1, /* no particular meaning now */ .maxattr = NL80211_ATTR_MAX, + .policy = nl80211_policy, .netnsok = true, .pre_doit = nl80211_pre_doit, .post_doit = nl80211_post_doit, @@ -14269,7 +14510,7 @@ static int nl80211_add_scan_req(struct sk_buff *msg, if (WARN_ON(!req)) return 0; - nest = nla_nest_start(msg, NL80211_ATTR_SCAN_SSIDS); + nest = nla_nest_start_noflag(msg, NL80211_ATTR_SCAN_SSIDS); if (!nest) goto nla_put_failure; for (i = 0; i < req->n_ssids; i++) { @@ -14278,7 +14519,7 @@ static int nl80211_add_scan_req(struct sk_buff *msg, } nla_nest_end(msg, nest); - nest = nla_nest_start(msg, NL80211_ATTR_SCAN_FREQUENCIES); + nest = nla_nest_start_noflag(msg, NL80211_ATTR_SCAN_FREQUENCIES); if (!nest) goto nla_put_failure; for (i = 0; i < req->n_channels; i++) { @@ -14540,7 +14781,7 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev, if (uapsd_queues >= 0) { struct nlattr *nla_wmm = - nla_nest_start(msg, NL80211_ATTR_STA_WME); + nla_nest_start_noflag(msg, NL80211_ATTR_STA_WME); if (!nla_wmm) goto nla_put_failure; @@ -14981,7 +15222,7 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy, goto nla_put_failure; /* Before */ - nl_freq = nla_nest_start(msg, NL80211_ATTR_FREQ_BEFORE); + nl_freq = nla_nest_start_noflag(msg, NL80211_ATTR_FREQ_BEFORE); if (!nl_freq) goto nla_put_failure; @@ -14990,7 +15231,7 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy, nla_nest_end(msg, nl_freq); /* After */ - nl_freq = nla_nest_start(msg, NL80211_ATTR_FREQ_AFTER); + nl_freq = nla_nest_start_noflag(msg, NL80211_ATTR_FREQ_AFTER); if (!nl_freq) goto nla_put_failure; @@ -15424,7 +15665,7 @@ static struct sk_buff *cfg80211_prepare_cqm(struct net_device *dev, if (mac && nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac)) goto nla_put_failure; - cb[1] = nla_nest_start(msg, NL80211_ATTR_CQM); + cb[1] = nla_nest_start_noflag(msg, NL80211_ATTR_CQM); if (!cb[1]) goto nla_put_failure; @@ -15585,7 +15826,7 @@ static void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev, nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid)) goto nla_put_failure; - rekey_attr = nla_nest_start(msg, NL80211_ATTR_REKEY_DATA); + rekey_attr = nla_nest_start_noflag(msg, NL80211_ATTR_REKEY_DATA); if (!rekey_attr) goto nla_put_failure; @@ -15640,7 +15881,7 @@ nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev, nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex)) goto nla_put_failure; - attr = nla_nest_start(msg, NL80211_ATTR_PMKSA_CANDIDATE); + attr = nla_nest_start_noflag(msg, NL80211_ATTR_PMKSA_CANDIDATE); if (!attr) goto nla_put_failure; @@ -15727,6 +15968,11 @@ void cfg80211_ch_switch_notify(struct net_device *dev, wdev->chandef = *chandef; wdev->preset_chandef = *chandef; + + if (wdev->iftype == NL80211_IFTYPE_STATION && + !WARN_ON(!wdev->current_bss)) + wdev->current_bss->pub.channel = chandef->chan; + nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL, NL80211_CMD_CH_SWITCH_NOTIFY, 0); } @@ -15945,15 +16191,15 @@ static int cfg80211_net_detect_results(struct sk_buff *msg, struct nlattr *nl_results, *nl_match, *nl_freqs; int i, j; - nl_results = nla_nest_start( - msg, NL80211_WOWLAN_TRIG_NET_DETECT_RESULTS); + nl_results = nla_nest_start_noflag(msg, + NL80211_WOWLAN_TRIG_NET_DETECT_RESULTS); if (!nl_results) return -EMSGSIZE; for (i = 0; i < nd->n_matches; i++) { struct cfg80211_wowlan_nd_match *match = nd->matches[i]; - nl_match = nla_nest_start(msg, i); + nl_match = nla_nest_start_noflag(msg, i); if (!nl_match) break; @@ -15971,8 +16217,8 @@ static int cfg80211_net_detect_results(struct sk_buff *msg, } if (match->n_channels) { - nl_freqs = nla_nest_start( - msg, NL80211_ATTR_SCAN_FREQUENCIES); + nl_freqs = nla_nest_start_noflag(msg, + NL80211_ATTR_SCAN_FREQUENCIES); if (!nl_freqs) { nla_nest_cancel(msg, nl_match); goto out; @@ -16031,7 +16277,8 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, if (wakeup) { struct nlattr *reasons; - reasons = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS); + reasons = nla_nest_start_noflag(msg, + NL80211_ATTR_WOWLAN_TRIGGERS); if (!reasons) goto free_msg; @@ -16370,6 +16617,46 @@ int cfg80211_external_auth_request(struct net_device *dev, } EXPORT_SYMBOL(cfg80211_external_auth_request); +void cfg80211_update_owe_info_event(struct net_device *netdev, + struct cfg80211_update_owe_info *owe_info, + gfp_t gfp) +{ + struct wiphy *wiphy = netdev->ieee80211_ptr->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct sk_buff *msg; + void *hdr; + + trace_cfg80211_update_owe_info_event(wiphy, netdev, owe_info); + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_UPDATE_OWE_INFO); + if (!hdr) + goto nla_put_failure; + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, owe_info->peer)) + goto nla_put_failure; + + if (!owe_info->ie_len || + nla_put(msg, NL80211_ATTR_IE, owe_info->ie_len, owe_info->ie)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, + NL80211_MCGRP_MLME, gfp); + return; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_update_owe_info_event); + /* initialisation/exit functions */ int __init nl80211_init(void) diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index 5e2ab01d325c..1b190475359a 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c @@ -25,7 +25,8 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev, } /* no validation needed - was already done via nested policy */ - nla_parse_nested(tb, NL80211_PMSR_FTM_REQ_ATTR_MAX, ftmreq, NULL, NULL); + nla_parse_nested_deprecated(tb, NL80211_PMSR_FTM_REQ_ATTR_MAX, ftmreq, + NULL, NULL); if (tb[NL80211_PMSR_FTM_REQ_ATTR_PREAMBLE]) preamble = nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_PREAMBLE]); @@ -139,7 +140,8 @@ static int pmsr_parse_peer(struct cfg80211_registered_device *rdev, int err, rem; /* no validation needed - was already done via nested policy */ - nla_parse_nested(tb, NL80211_PMSR_PEER_ATTR_MAX, peer, NULL, NULL); + nla_parse_nested_deprecated(tb, NL80211_PMSR_PEER_ATTR_MAX, peer, + NULL, NULL); if (!tb[NL80211_PMSR_PEER_ATTR_ADDR] || !tb[NL80211_PMSR_PEER_ATTR_CHAN] || @@ -154,9 +156,9 @@ static int pmsr_parse_peer(struct cfg80211_registered_device *rdev, /* reuse info->attrs */ memset(info->attrs, 0, sizeof(*info->attrs) * (NL80211_ATTR_MAX + 1)); /* need to validate here, we don't want to have validation recursion */ - err = nla_parse_nested(info->attrs, NL80211_ATTR_MAX, - tb[NL80211_PMSR_PEER_ATTR_CHAN], - nl80211_policy, info->extack); + err = nla_parse_nested_deprecated(info->attrs, NL80211_ATTR_MAX, + tb[NL80211_PMSR_PEER_ATTR_CHAN], + nl80211_policy, info->extack); if (err) return err; @@ -165,9 +167,9 @@ static int pmsr_parse_peer(struct cfg80211_registered_device *rdev, return err; /* no validation needed - was already done via nested policy */ - nla_parse_nested(req, NL80211_PMSR_REQ_ATTR_MAX, - tb[NL80211_PMSR_PEER_ATTR_REQ], - NULL, NULL); + nla_parse_nested_deprecated(req, NL80211_PMSR_REQ_ATTR_MAX, + tb[NL80211_PMSR_PEER_ATTR_REQ], NULL, + NULL); if (!req[NL80211_PMSR_REQ_ATTR_DATA]) { NL_SET_ERR_MSG_ATTR(info->extack, @@ -420,22 +422,22 @@ static int nl80211_pmsr_send_result(struct sk_buff *msg, { struct nlattr *pmsr, *peers, *peer, *resp, *data, *typedata; - pmsr = nla_nest_start(msg, NL80211_ATTR_PEER_MEASUREMENTS); + pmsr = nla_nest_start_noflag(msg, NL80211_ATTR_PEER_MEASUREMENTS); if (!pmsr) goto error; - peers = nla_nest_start(msg, NL80211_PMSR_ATTR_PEERS); + peers = nla_nest_start_noflag(msg, NL80211_PMSR_ATTR_PEERS); if (!peers) goto error; - peer = nla_nest_start(msg, 1); + peer = nla_nest_start_noflag(msg, 1); if (!peer) goto error; if (nla_put(msg, NL80211_PMSR_PEER_ATTR_ADDR, ETH_ALEN, res->addr)) goto error; - resp = nla_nest_start(msg, NL80211_PMSR_PEER_ATTR_RESP); + resp = nla_nest_start_noflag(msg, NL80211_PMSR_PEER_ATTR_RESP); if (!resp) goto error; @@ -452,11 +454,11 @@ static int nl80211_pmsr_send_result(struct sk_buff *msg, if (res->final && nla_put_flag(msg, NL80211_PMSR_RESP_ATTR_FINAL)) goto error; - data = nla_nest_start(msg, NL80211_PMSR_RESP_ATTR_DATA); + data = nla_nest_start_noflag(msg, NL80211_PMSR_RESP_ATTR_DATA); if (!data) goto error; - typedata = nla_nest_start(msg, res->type); + typedata = nla_nest_start_noflag(msg, res->type); if (!typedata) goto error; diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 5cb48d135fab..e853a4fe6f97 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -77,7 +77,8 @@ static inline int rdev_add_key(struct cfg80211_registered_device *rdev, struct key_params *params) { int ret; - trace_rdev_add_key(&rdev->wiphy, netdev, key_index, pairwise, mac_addr); + trace_rdev_add_key(&rdev->wiphy, netdev, key_index, pairwise, + mac_addr, params->mode); ret = rdev->ops->add_key(&rdev->wiphy, netdev, key_index, pairwise, mac_addr, params); trace_rdev_return_int(&rdev->wiphy, ret); @@ -1272,4 +1273,30 @@ rdev_abort_pmsr(struct cfg80211_registered_device *rdev, trace_rdev_return_void(&rdev->wiphy); } +static inline int rdev_update_owe_info(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_update_owe_info *oweinfo) +{ + int ret = -EOPNOTSUPP; + + trace_rdev_update_owe_info(&rdev->wiphy, dev, oweinfo); + if (rdev->ops->update_owe_info) + ret = rdev->ops->update_owe_info(&rdev->wiphy, dev, oweinfo); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + +static inline int +rdev_probe_mesh_link(struct cfg80211_registered_device *rdev, + struct net_device *dev, const u8 *dest, + const void *buf, size_t len) +{ + int ret; + + trace_rdev_probe_mesh_link(&rdev->wiphy, dev, dest, buf, len); + ret = rdev->ops->probe_mesh_link(&rdev->wiphy, dev, buf, len); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index a6fd5ce199da..4831ad745f91 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -427,14 +427,10 @@ static const struct ieee80211_regdomain * reg_copy_regd(const struct ieee80211_regdomain *src_regd) { struct ieee80211_regdomain *regd; - int size_of_regd; unsigned int i; - size_of_regd = - sizeof(struct ieee80211_regdomain) + - src_regd->n_reg_rules * sizeof(struct ieee80211_reg_rule); - - regd = kzalloc(size_of_regd, GFP_KERNEL); + regd = kzalloc(struct_size(regd, reg_rules, src_regd->n_reg_rules), + GFP_KERNEL); if (!regd) return ERR_PTR(-ENOMEM); @@ -948,12 +944,10 @@ static int regdb_query_country(const struct fwdb_header *db, unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2; struct fwdb_collection *coll = (void *)((u8 *)db + ptr); struct ieee80211_regdomain *regdom; - unsigned int size_of_regd, i; - - size_of_regd = sizeof(struct ieee80211_regdomain) + - coll->n_rules * sizeof(struct ieee80211_reg_rule); + unsigned int i; - regdom = kzalloc(size_of_regd, GFP_KERNEL); + regdom = kzalloc(struct_size(regdom, reg_rules, coll->n_rules), + GFP_KERNEL); if (!regdom) return -ENOMEM; @@ -1489,7 +1483,7 @@ static struct ieee80211_regdomain * regdom_intersect(const struct ieee80211_regdomain *rd1, const struct ieee80211_regdomain *rd2) { - int r, size_of_regd; + int r; unsigned int x, y; unsigned int num_rules = 0; const struct ieee80211_reg_rule *rule1, *rule2; @@ -1520,10 +1514,7 @@ regdom_intersect(const struct ieee80211_regdomain *rd1, if (!num_rules) return NULL; - size_of_regd = sizeof(struct ieee80211_regdomain) + - num_rules * sizeof(struct ieee80211_reg_rule); - - rd = kzalloc(size_of_regd, GFP_KERNEL); + rd = kzalloc(struct_size(rd, reg_rules, num_rules), GFP_KERNEL); if (!rd) return NULL; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 04d888628f29..c04f5451f89b 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -179,12 +179,63 @@ static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *rdev, return true; } +bool cfg80211_is_element_inherited(const struct element *elem, + const struct element *non_inherit_elem) +{ + u8 id_len, ext_id_len, i, loop_len, id; + const u8 *list; + + if (elem->id == WLAN_EID_MULTIPLE_BSSID) + return false; + + if (!non_inherit_elem || non_inherit_elem->datalen < 2) + return true; + + /* + * non inheritance element format is: + * ext ID (56) | IDs list len | list | extension IDs list len | list + * Both lists are optional. Both lengths are mandatory. + * This means valid length is: + * elem_len = 1 (extension ID) + 2 (list len fields) + list lengths + */ + id_len = non_inherit_elem->data[1]; + if (non_inherit_elem->datalen < 3 + id_len) + return true; + + ext_id_len = non_inherit_elem->data[2 + id_len]; + if (non_inherit_elem->datalen < 3 + id_len + ext_id_len) + return true; + + if (elem->id == WLAN_EID_EXTENSION) { + if (!ext_id_len) + return true; + loop_len = ext_id_len; + list = &non_inherit_elem->data[3 + id_len]; + id = elem->data[0]; + } else { + if (!id_len) + return true; + loop_len = id_len; + list = &non_inherit_elem->data[2]; + id = elem->id; + } + + for (i = 0; i < loop_len; i++) { + if (list[i] == id) + return false; + } + + return true; +} +EXPORT_SYMBOL(cfg80211_is_element_inherited); + static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen, const u8 *subelement, size_t subie_len, u8 *new_ie, gfp_t gfp) { u8 *pos, *tmp; const u8 *tmp_old, *tmp_new; + const struct element *non_inherit_elem; u8 *sub_copy; /* copy subelement as we need to change its content to @@ -203,6 +254,11 @@ static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen, pos += (tmp_new[1] + 2); } + /* get non inheritance list if exists */ + non_inherit_elem = + cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, + sub_copy, subie_len); + /* go through IEs in ie (skip SSID) and subelement, * merge them into new_ie */ @@ -223,8 +279,11 @@ static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen, subie_len); if (!tmp) { + const struct element *old_elem = (void *)tmp_old; + /* ie in old ie but not in subelement */ - if (tmp_old[0] != WLAN_EID_MULTIPLE_BSSID) { + if (cfg80211_is_element_inherited(old_elem, + non_inherit_elem)) { memcpy(pos, tmp_old, tmp_old[1] + 2); pos += tmp_old[1] + 2; } @@ -268,8 +327,7 @@ static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen, tmp_new = sub_copy; while (tmp_new + tmp_new[1] + 2 - sub_copy <= subie_len) { if (!(tmp_new[0] == WLAN_EID_NON_TX_BSSID_CAP || - tmp_new[0] == WLAN_EID_SSID || - tmp_new[0] == WLAN_EID_MULTI_BSSID_IDX)) { + tmp_new[0] == WLAN_EID_SSID)) { memcpy(pos, tmp_new, tmp_new[1] + 2); pos += tmp_new[1] + 2; } @@ -1397,6 +1455,78 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, return &res->pub; } +static const struct element +*cfg80211_get_profile_continuation(const u8 *ie, size_t ielen, + const struct element *mbssid_elem, + const struct element *sub_elem) +{ + const u8 *mbssid_end = mbssid_elem->data + mbssid_elem->datalen; + const struct element *next_mbssid; + const struct element *next_sub; + + next_mbssid = cfg80211_find_elem(WLAN_EID_MULTIPLE_BSSID, + mbssid_end, + ielen - (mbssid_end - ie)); + + /* + * If is is not the last subelement in current MBSSID IE or there isn't + * a next MBSSID IE - profile is complete. + */ + if ((sub_elem->data + sub_elem->datalen < mbssid_end - 1) || + !next_mbssid) + return NULL; + + /* For any length error, just return NULL */ + + if (next_mbssid->datalen < 4) + return NULL; + + next_sub = (void *)&next_mbssid->data[1]; + + if (next_mbssid->data + next_mbssid->datalen < + next_sub->data + next_sub->datalen) + return NULL; + + if (next_sub->id != 0 || next_sub->datalen < 2) + return NULL; + + /* + * Check if the first element in the next sub element is a start + * of a new profile + */ + return next_sub->data[0] == WLAN_EID_NON_TX_BSSID_CAP ? + NULL : next_mbssid; +} + +size_t cfg80211_merge_profile(const u8 *ie, size_t ielen, + const struct element *mbssid_elem, + const struct element *sub_elem, + u8 *merged_ie, size_t max_copy_len) +{ + size_t copied_len = sub_elem->datalen; + const struct element *next_mbssid; + + if (sub_elem->datalen > max_copy_len) + return 0; + + memcpy(merged_ie, sub_elem->data, sub_elem->datalen); + + while ((next_mbssid = cfg80211_get_profile_continuation(ie, ielen, + mbssid_elem, + sub_elem))) { + const struct element *next_sub = (void *)&next_mbssid->data[1]; + + if (copied_len + next_sub->datalen > max_copy_len) + break; + memcpy(merged_ie + copied_len, next_sub->data, + next_sub->datalen); + copied_len += next_sub->datalen; + } + + return copied_len; +} +EXPORT_SYMBOL(cfg80211_merge_profile); + static void cfg80211_parse_mbssid_data(struct wiphy *wiphy, struct cfg80211_inform_bss *data, enum cfg80211_bss_frame_type ftype, @@ -1410,7 +1540,8 @@ static void cfg80211_parse_mbssid_data(struct wiphy *wiphy, const struct element *elem, *sub; size_t new_ie_len; u8 new_bssid[ETH_ALEN]; - u8 *new_ie; + u8 *new_ie, *profile; + u64 seen_indices = 0; u16 capability; struct cfg80211_bss *bss; @@ -1428,10 +1559,16 @@ static void cfg80211_parse_mbssid_data(struct wiphy *wiphy, if (!new_ie) return; + profile = kmalloc(ielen, gfp); + if (!profile) + goto out; + for_each_element_id(elem, WLAN_EID_MULTIPLE_BSSID, ie, ielen) { if (elem->datalen < 4) continue; for_each_element(sub, elem->data + 1, elem->datalen - 1) { + u8 profile_len; + if (sub->id != 0 || sub->datalen < 4) { /* not a valid BSS profile */ continue; @@ -1446,16 +1583,31 @@ static void cfg80211_parse_mbssid_data(struct wiphy *wiphy, continue; } + memset(profile, 0, ielen); + profile_len = cfg80211_merge_profile(ie, ielen, + elem, + sub, + profile, + ielen); + /* found a Nontransmitted BSSID Profile */ mbssid_index_ie = cfg80211_find_ie (WLAN_EID_MULTI_BSSID_IDX, - sub->data, sub->datalen); + profile, profile_len); if (!mbssid_index_ie || mbssid_index_ie[1] < 1 || - mbssid_index_ie[2] == 0) { + mbssid_index_ie[2] == 0 || + mbssid_index_ie[2] > 46) { /* No valid Multiple BSSID-Index element */ continue; } + if (seen_indices & BIT(mbssid_index_ie[2])) + /* We don't support legacy split of a profile */ + net_dbg_ratelimited("Partial info for BSSID index %d\n", + mbssid_index_ie[2]); + + seen_indices |= BIT(mbssid_index_ie[2]); + non_tx_data->bssid_index = mbssid_index_ie[2]; non_tx_data->max_bssid_indicator = elem->data[0]; @@ -1464,13 +1616,14 @@ static void cfg80211_parse_mbssid_data(struct wiphy *wiphy, non_tx_data->bssid_index, new_bssid); memset(new_ie, 0, IEEE80211_MAX_DATA_LEN); - new_ie_len = cfg80211_gen_new_ie(ie, ielen, sub->data, - sub->datalen, new_ie, + new_ie_len = cfg80211_gen_new_ie(ie, ielen, + profile, + profile_len, new_ie, gfp); if (!new_ie_len) continue; - capability = get_unaligned_le16(sub->data + 2); + capability = get_unaligned_le16(profile + 2); bss = cfg80211_inform_single_bss_data(wiphy, data, ftype, new_bssid, tsf, @@ -1486,7 +1639,9 @@ static void cfg80211_parse_mbssid_data(struct wiphy *wiphy, } } +out: kfree(new_ie); + kfree(profile); } struct cfg80211_bss * diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 44b2ce1bb13a..2abfff925aac 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -430,22 +430,43 @@ DECLARE_EVENT_CLASS(key_handle, BOOL_TO_STR(__entry->pairwise), MAC_PR_ARG(mac_addr)) ); -DEFINE_EVENT(key_handle, rdev_add_key, +DEFINE_EVENT(key_handle, rdev_get_key, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 key_index, bool pairwise, const u8 *mac_addr), TP_ARGS(wiphy, netdev, key_index, pairwise, mac_addr) ); -DEFINE_EVENT(key_handle, rdev_get_key, +DEFINE_EVENT(key_handle, rdev_del_key, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 key_index, bool pairwise, const u8 *mac_addr), TP_ARGS(wiphy, netdev, key_index, pairwise, mac_addr) ); -DEFINE_EVENT(key_handle, rdev_del_key, +TRACE_EVENT(rdev_add_key, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 key_index, - bool pairwise, const u8 *mac_addr), - TP_ARGS(wiphy, netdev, key_index, pairwise, mac_addr) + bool pairwise, const u8 *mac_addr, u8 mode), + TP_ARGS(wiphy, netdev, key_index, pairwise, mac_addr, mode), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + MAC_ENTRY(mac_addr) + __field(u8, key_index) + __field(bool, pairwise) + __field(u8, mode) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + MAC_ASSIGN(mac_addr, mac_addr); + __entry->key_index = key_index; + __entry->pairwise = pairwise; + __entry->mode = mode; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", key_index: %u, " + "mode: %u, pairwise: %s, mac addr: " MAC_PR_FMT, + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->key_index, + __entry->mode, BOOL_TO_STR(__entry->pairwise), + MAC_PR_ARG(mac_addr)) ); TRACE_EVENT(rdev_set_default_key, @@ -3362,6 +3383,62 @@ TRACE_EVENT(cfg80211_pmsr_complete, WIPHY_PR_ARG, WDEV_PR_ARG, (unsigned long long)__entry->cookie) ); + +TRACE_EVENT(rdev_update_owe_info, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_update_owe_info *owe_info), + TP_ARGS(wiphy, netdev, owe_info), + TP_STRUCT__entry(WIPHY_ENTRY + NETDEV_ENTRY + MAC_ENTRY(peer) + __field(u16, status) + __dynamic_array(u8, ie, owe_info->ie_len)), + TP_fast_assign(WIPHY_ASSIGN; + NETDEV_ASSIGN; + MAC_ASSIGN(peer, owe_info->peer); + __entry->status = owe_info->status; + memcpy(__get_dynamic_array(ie), + owe_info->ie, owe_info->ie_len);), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT + " status %d", WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), + __entry->status) +); + +TRACE_EVENT(cfg80211_update_owe_info_event, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_update_owe_info *owe_info), + TP_ARGS(wiphy, netdev, owe_info), + TP_STRUCT__entry(WIPHY_ENTRY + NETDEV_ENTRY + MAC_ENTRY(peer) + __dynamic_array(u8, ie, owe_info->ie_len)), + TP_fast_assign(WIPHY_ASSIGN; + NETDEV_ASSIGN; + MAC_ASSIGN(peer, owe_info->peer); + memcpy(__get_dynamic_array(ie), owe_info->ie, + owe_info->ie_len);), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT, + WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer)) +); + +TRACE_EVENT(rdev_probe_mesh_link, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + const u8 *dest, const u8 *buf, size_t len), + TP_ARGS(wiphy, netdev, dest, buf, len), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + MAC_ENTRY(dest) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + MAC_ASSIGN(dest, dest); + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT, + WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(dest)) +); + #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/wireless/util.c b/net/wireless/util.c index 75899b62bdc9..cf63b635afc0 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -237,14 +237,23 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, case WLAN_CIPHER_SUITE_CCMP_256: case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: - /* Disallow pairwise keys with non-zero index unless it's WEP - * or a vendor specific cipher (because current deployments use - * pairwise WEP keys with non-zero indices and for vendor - * specific ciphers this should be validated in the driver or - * hardware level - but 802.11i clearly specifies to use zero) + /* IEEE802.11-2016 allows only 0 and - when using Extended Key + * ID - 1 as index for pairwise keys. + * @NL80211_KEY_NO_TX is only allowed for pairwise keys when + * the driver supports Extended Key ID. + * @NL80211_KEY_SET_TX can't be set when installing and + * validating a key. */ - if (pairwise && key_idx) + if (params->mode == NL80211_KEY_NO_TX) { + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_EXT_KEY_ID)) + return -EINVAL; + else if (!pairwise || key_idx < 0 || key_idx > 1) + return -EINVAL; + } else if ((pairwise && key_idx) || + params->mode == NL80211_KEY_SET_TX) { return -EINVAL; + } break; case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_BIP_CMAC_256: diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index d522787c7354..46e4d69db845 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -353,9 +353,6 @@ static int cfg80211_wext_siwretry(struct net_device *dev, changed |= WIPHY_PARAM_RETRY_SHORT; } - if (!changed) - return 0; - err = rdev_set_wiphy_params(rdev, changed); if (err) { wdev->wiphy->retry_short = oshort; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 20a511398389..0ea48a52ce79 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1398,18 +1398,6 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) break; } - case SIOCGSTAMP: - rc = -EINVAL; - if (sk) - rc = sock_get_timestamp(sk, - (struct timeval __user *)argp); - break; - case SIOCGSTAMPNS: - rc = -EINVAL; - if (sk) - rc = sock_get_timestampns(sk, - (struct timespec __user *)argp); - break; case SIOCGIFADDR: case SIOCSIFADDR: case SIOCGIFDSTADDR: @@ -1681,8 +1669,6 @@ static int compat_x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { void __user *argp = compat_ptr(arg); - struct sock *sk = sock->sk; - int rc = -ENOIOCTLCMD; switch(cmd) { @@ -1690,18 +1676,6 @@ static int compat_x25_ioctl(struct socket *sock, unsigned int cmd, case TIOCINQ: rc = x25_ioctl(sock, cmd, (unsigned long)argp); break; - case SIOCGSTAMP: - rc = -EINVAL; - if (sk) - rc = compat_sock_get_timestamp(sk, - (struct timeval __user*)argp); - break; - case SIOCGSTAMPNS: - rc = -EINVAL; - if (sk) - rc = compat_sock_get_timestampns(sk, - (struct timespec __user*)argp); - break; case SIOCGIFADDR: case SIOCSIFADDR: case SIOCGIFDSTADDR: @@ -1765,6 +1739,7 @@ static const struct proto_ops x25_proto_ops = { #ifdef CONFIG_COMPAT .compat_ioctl = compat_x25_ioctl, #endif + .gettstamp = sock_gettstamp, .listen = x25_listen, .shutdown = sock_no_shutdown, .setsockopt = x25_setsockopt, diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 610c0bdc0c2b..88b9ae24658d 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -43,6 +43,48 @@ struct xsk_queue { u64 invalid_descs; }; +/* The structure of the shared state of the rings are the same as the + * ring buffer in kernel/events/ring_buffer.c. For the Rx and completion + * ring, the kernel is the producer and user space is the consumer. For + * the Tx and fill rings, the kernel is the consumer and user space is + * the producer. + * + * producer consumer + * + * if (LOAD ->consumer) { LOAD ->producer + * (A) smp_rmb() (C) + * STORE $data LOAD $data + * smp_wmb() (B) smp_mb() (D) + * STORE ->producer STORE ->consumer + * } + * + * (A) pairs with (D), and (B) pairs with (C). + * + * Starting with (B), it protects the data from being written after + * the producer pointer. If this barrier was missing, the consumer + * could observe the producer pointer being set and thus load the data + * before the producer has written the new data. The consumer would in + * this case load the old data. + * + * (C) protects the consumer from speculatively loading the data before + * the producer pointer actually has been read. If we do not have this + * barrier, some architectures could load old data as speculative loads + * are not discarded as the CPU does not know there is a dependency + * between ->producer and data. + * + * (A) is a control dependency that separates the load of ->consumer + * from the stores of $data. In case ->consumer indicates there is no + * room in the buffer to store $data we do not. So no barrier is needed. + * + * (D) protects the load of the data to be observed to happen after the + * store of the consumer pointer. If we did not have this memory + * barrier, the producer could observe the consumer pointer being set + * and overwrite the data with a new value before the consumer got the + * chance to read the old value. The consumer would thus miss reading + * the old entry and very likely read the new entry twice, once right + * now and again after circling through the ring. + */ + /* Common functions operating for both RXTX and umem queues */ static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q) @@ -106,6 +148,7 @@ static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr) static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr) { if (q->cons_tail == q->cons_head) { + smp_mb(); /* D, matches A */ WRITE_ONCE(q->ring->consumer, q->cons_tail); q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE); @@ -128,10 +171,11 @@ static inline int xskq_produce_addr(struct xsk_queue *q, u64 addr) if (xskq_nb_free(q, q->prod_tail, 1) == 0) return -ENOSPC; + /* A, matches D */ ring->desc[q->prod_tail++ & q->ring_mask] = addr; /* Order producer and data */ - smp_wmb(); + smp_wmb(); /* B, matches C */ WRITE_ONCE(q->ring->producer, q->prod_tail); return 0; @@ -144,6 +188,7 @@ static inline int xskq_produce_addr_lazy(struct xsk_queue *q, u64 addr) if (xskq_nb_free(q, q->prod_head, LAZY_UPDATE_THRESHOLD) == 0) return -ENOSPC; + /* A, matches D */ ring->desc[q->prod_head++ & q->ring_mask] = addr; return 0; } @@ -152,7 +197,7 @@ static inline void xskq_produce_flush_addr_n(struct xsk_queue *q, u32 nb_entries) { /* Order producer and data */ - smp_wmb(); + smp_wmb(); /* B, matches C */ q->prod_tail += nb_entries; WRITE_ONCE(q->ring->producer, q->prod_tail); @@ -163,6 +208,7 @@ static inline int xskq_reserve_addr(struct xsk_queue *q) if (xskq_nb_free(q, q->prod_head, 1) == 0) return -ENOSPC; + /* A, matches D */ q->prod_head++; return 0; } @@ -204,11 +250,12 @@ static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q, struct xdp_desc *desc) { if (q->cons_tail == q->cons_head) { + smp_mb(); /* D, matches A */ WRITE_ONCE(q->ring->consumer, q->cons_tail); q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE); /* Order consumer and data */ - smp_rmb(); + smp_rmb(); /* C, matches B */ } return xskq_validate_desc(q, desc); @@ -228,6 +275,7 @@ static inline int xskq_produce_batch_desc(struct xsk_queue *q, if (xskq_nb_free(q, q->prod_head, 1) == 0) return -ENOSPC; + /* A, matches D */ idx = (q->prod_head++) & q->ring_mask; ring->desc[idx].addr = addr; ring->desc[idx].len = len; @@ -238,7 +286,7 @@ static inline int xskq_produce_batch_desc(struct xsk_queue *q, static inline void xskq_produce_flush_desc(struct xsk_queue *q) { /* Order producer and data */ - smp_wmb(); + smp_wmb(); /* B, matches C */ q->prod_tail = q->prod_head, WRITE_ONCE(q->ring->producer, q->prod_tail); diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index 5d43aaa17027..1ec8071226b2 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -3,7 +3,7 @@ # config XFRM bool - depends on NET + depends on INET select GRO_CELLS select SKB_EXTENSIONS @@ -15,9 +15,9 @@ config XFRM_ALGO select XFRM select CRYPTO +if INET config XFRM_USER tristate "Transformation user configuration interface" - depends on INET select XFRM_ALGO ---help--- Support for Transformation(XFRM) user configuration interface @@ -56,7 +56,7 @@ config XFRM_MIGRATE config XFRM_STATISTICS bool "Transformation statistics" - depends on INET && XFRM && PROC_FS + depends on XFRM && PROC_FS ---help--- This statistics is not a SNMP/MIB specification but shows statistics about transformation error (or almost error) factor @@ -95,3 +95,5 @@ config NET_KEY_MIGRATE <draft-sugimoto-mip6-pfkey-migrate>. If unsure, say N. + +endif # INET diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index b8736f56e7f7..b24cd86a02c3 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -23,6 +23,60 @@ #include <linux/notifier.h> #ifdef CONFIG_XFRM_OFFLOAD +static void __xfrm_transport_prep(struct xfrm_state *x, struct sk_buff *skb, + unsigned int hsize) +{ + struct xfrm_offload *xo = xfrm_offload(skb); + + skb_reset_mac_len(skb); + pskb_pull(skb, skb->mac_len + hsize + x->props.header_len); + + if (xo->flags & XFRM_GSO_SEGMENT) { + skb_reset_transport_header(skb); + skb->transport_header -= x->props.header_len; + } +} + +static void __xfrm_mode_tunnel_prep(struct xfrm_state *x, struct sk_buff *skb, + unsigned int hsize) + +{ + struct xfrm_offload *xo = xfrm_offload(skb); + + if (xo->flags & XFRM_GSO_SEGMENT) + skb->transport_header = skb->network_header + hsize; + + skb_reset_mac_len(skb); + pskb_pull(skb, skb->mac_len + x->props.header_len); +} + +/* Adjust pointers into the packet when IPsec is done at layer2 */ +static void xfrm_outer_mode_prep(struct xfrm_state *x, struct sk_buff *skb) +{ + switch (x->outer_mode.encap) { + case XFRM_MODE_TUNNEL: + if (x->outer_mode.family == AF_INET) + return __xfrm_mode_tunnel_prep(x, skb, + sizeof(struct iphdr)); + if (x->outer_mode.family == AF_INET6) + return __xfrm_mode_tunnel_prep(x, skb, + sizeof(struct ipv6hdr)); + break; + case XFRM_MODE_TRANSPORT: + if (x->outer_mode.family == AF_INET) + return __xfrm_transport_prep(x, skb, + sizeof(struct iphdr)); + if (x->outer_mode.family == AF_INET6) + return __xfrm_transport_prep(x, skb, + sizeof(struct ipv6hdr)); + break; + case XFRM_MODE_ROUTEOPTIMIZATION: + case XFRM_MODE_IN_TRIGGER: + case XFRM_MODE_BEET: + break; + } +} + struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again) { int err; @@ -78,7 +132,8 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur } if (!skb->next) { - x->outer_mode->xmit(x, skb); + esp_features |= skb->dev->gso_partial_features; + xfrm_outer_mode_prep(x, skb); xo->flags |= XFRM_DEV_RESUME; @@ -101,12 +156,14 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur do { struct sk_buff *nskb = skb2->next; + + esp_features |= skb->dev->gso_partial_features; skb_mark_not_on_list(skb2); xo = xfrm_offload(skb2); xo->flags |= XFRM_DEV_RESUME; - x->outer_mode->xmit(x, skb2); + xfrm_outer_mode_prep(x, skb2); err = x->type_offload->xmit(x, skb2, esp_features); if (!err) { @@ -247,7 +304,7 @@ void xfrm_dev_resume(struct sk_buff *skb) unsigned long flags; rcu_read_lock(); - txq = netdev_pick_tx(dev, skb, NULL); + txq = netdev_core_pick_tx(dev, skb, NULL); HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_stopped(txq)) diff --git a/net/xfrm/xfrm_inout.h b/net/xfrm/xfrm_inout.h new file mode 100644 index 000000000000..c7b0318938e2 --- /dev/null +++ b/net/xfrm/xfrm_inout.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/ipv6.h> +#include <net/dsfield.h> +#include <net/xfrm.h> + +#ifndef XFRM_INOUT_H +#define XFRM_INOUT_H 1 + +static inline void xfrm6_beet_make_header(struct sk_buff *skb) +{ + struct ipv6hdr *iph = ipv6_hdr(skb); + + iph->version = 6; + + memcpy(iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl, + sizeof(iph->flow_lbl)); + iph->nexthdr = XFRM_MODE_SKB_CB(skb)->protocol; + + ipv6_change_dsfield(iph, 0, XFRM_MODE_SKB_CB(skb)->tos); + iph->hop_limit = XFRM_MODE_SKB_CB(skb)->ttl; +} + +static inline void xfrm4_beet_make_header(struct sk_buff *skb) +{ + struct iphdr *iph = ip_hdr(skb); + + iph->ihl = 5; + iph->version = 4; + + iph->protocol = XFRM_MODE_SKB_CB(skb)->protocol; + iph->tos = XFRM_MODE_SKB_CB(skb)->tos; + + iph->id = XFRM_MODE_SKB_CB(skb)->id; + iph->frag_off = XFRM_MODE_SKB_CB(skb)->frag_off; + iph->ttl = XFRM_MODE_SKB_CB(skb)->ttl; +} + +#endif diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index b3b613660d44..314973aaa414 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -21,6 +21,8 @@ #include <net/ip_tunnels.h> #include <net/ip6_tunnel.h> +#include "xfrm_inout.h" + struct xfrm_trans_tasklet { struct tasklet_struct tasklet; struct sk_buff_head queue; @@ -166,35 +168,299 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) } EXPORT_SYMBOL(xfrm_parse_spi); -int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) +static int xfrm4_remove_beet_encap(struct xfrm_state *x, struct sk_buff *skb) +{ + struct iphdr *iph; + int optlen = 0; + int err = -EINVAL; + + if (unlikely(XFRM_MODE_SKB_CB(skb)->protocol == IPPROTO_BEETPH)) { + struct ip_beet_phdr *ph; + int phlen; + + if (!pskb_may_pull(skb, sizeof(*ph))) + goto out; + + ph = (struct ip_beet_phdr *)skb->data; + + phlen = sizeof(*ph) + ph->padlen; + optlen = ph->hdrlen * 8 + (IPV4_BEET_PHMAXLEN - phlen); + if (optlen < 0 || optlen & 3 || optlen > 250) + goto out; + + XFRM_MODE_SKB_CB(skb)->protocol = ph->nexthdr; + + if (!pskb_may_pull(skb, phlen)) + goto out; + __skb_pull(skb, phlen); + } + + skb_push(skb, sizeof(*iph)); + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + + xfrm4_beet_make_header(skb); + + iph = ip_hdr(skb); + + iph->ihl += optlen / 4; + iph->tot_len = htons(skb->len); + iph->daddr = x->sel.daddr.a4; + iph->saddr = x->sel.saddr.a4; + iph->check = 0; + iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl); + err = 0; +out: + return err; +} + +static void ipip_ecn_decapsulate(struct sk_buff *skb) +{ + struct iphdr *inner_iph = ipip_hdr(skb); + + if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos)) + IP_ECN_set_ce(inner_iph); +} + +static int xfrm4_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb) { - struct xfrm_mode *inner_mode = x->inner_mode; + int err = -EINVAL; + + if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP) + goto out; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto out; + + err = skb_unclone(skb, GFP_ATOMIC); + if (err) + goto out; + + if (x->props.flags & XFRM_STATE_DECAP_DSCP) + ipv4_copy_dscp(XFRM_MODE_SKB_CB(skb)->tos, ipip_hdr(skb)); + if (!(x->props.flags & XFRM_STATE_NOECN)) + ipip_ecn_decapsulate(skb); + + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + if (skb->mac_len) + eth_hdr(skb)->h_proto = skb->protocol; + + err = 0; + +out: + return err; +} + +static void ipip6_ecn_decapsulate(struct sk_buff *skb) +{ + struct ipv6hdr *inner_iph = ipipv6_hdr(skb); + + if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos)) + IP6_ECN_set_ce(skb, inner_iph); +} + +static int xfrm6_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb) +{ + int err = -EINVAL; + + if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6) + goto out; + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + goto out; + + err = skb_unclone(skb, GFP_ATOMIC); + if (err) + goto out; + + if (x->props.flags & XFRM_STATE_DECAP_DSCP) + ipv6_copy_dscp(ipv6_get_dsfield(ipv6_hdr(skb)), + ipipv6_hdr(skb)); + if (!(x->props.flags & XFRM_STATE_NOECN)) + ipip6_ecn_decapsulate(skb); + + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + if (skb->mac_len) + eth_hdr(skb)->h_proto = skb->protocol; + + err = 0; + +out: + return err; +} + +static int xfrm6_remove_beet_encap(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipv6hdr *ip6h; + int size = sizeof(struct ipv6hdr); int err; - err = x->outer_mode->afinfo->extract_input(x, skb); + err = skb_cow_head(skb, size + skb->mac_len); if (err) + goto out; + + __skb_push(skb, size); + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + + xfrm6_beet_make_header(skb); + + ip6h = ipv6_hdr(skb); + ip6h->payload_len = htons(skb->len - size); + ip6h->daddr = x->sel.daddr.in6; + ip6h->saddr = x->sel.saddr.in6; + err = 0; +out: + return err; +} + +/* Remove encapsulation header. + * + * The IP header will be moved over the top of the encapsulation + * header. + * + * On entry, the transport header shall point to where the IP header + * should be and the network header shall be set to where the IP + * header currently is. skb->data shall point to the start of the + * payload. + */ +static int +xfrm_inner_mode_encap_remove(struct xfrm_state *x, + const struct xfrm_mode *inner_mode, + struct sk_buff *skb) +{ + switch (inner_mode->encap) { + case XFRM_MODE_BEET: + if (inner_mode->family == AF_INET) + return xfrm4_remove_beet_encap(x, skb); + if (inner_mode->family == AF_INET6) + return xfrm6_remove_beet_encap(x, skb); + break; + case XFRM_MODE_TUNNEL: + if (inner_mode->family == AF_INET) + return xfrm4_remove_tunnel_encap(x, skb); + if (inner_mode->family == AF_INET6) + return xfrm6_remove_tunnel_encap(x, skb); + break; + } + + WARN_ON_ONCE(1); + return -EOPNOTSUPP; +} + +static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) +{ + const struct xfrm_mode *inner_mode = &x->inner_mode; + const struct xfrm_state_afinfo *afinfo; + int err = -EAFNOSUPPORT; + + rcu_read_lock(); + afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family); + if (likely(afinfo)) + err = afinfo->extract_input(x, skb); + + if (err) { + rcu_read_unlock(); return err; + } if (x->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); - if (inner_mode == NULL) + if (!inner_mode) { + rcu_read_unlock(); return -EAFNOSUPPORT; + } } - skb->protocol = inner_mode->afinfo->eth_proto; - return inner_mode->input2(x, skb); + afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family); + if (unlikely(!afinfo)) { + rcu_read_unlock(); + return -EAFNOSUPPORT; + } + + skb->protocol = afinfo->eth_proto; + rcu_read_unlock(); + return xfrm_inner_mode_encap_remove(x, inner_mode, skb); +} + +/* Remove encapsulation header. + * + * The IP header will be moved over the top of the encapsulation header. + * + * On entry, skb_transport_header() shall point to where the IP header + * should be and skb_network_header() shall be set to where the IP header + * currently is. skb->data shall point to the start of the payload. + */ +static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) +{ + int ihl = skb->data - skb_transport_header(skb); + + if (skb->transport_header != skb->network_header) { + memmove(skb_transport_header(skb), + skb_network_header(skb), ihl); + skb->network_header = skb->transport_header; + } + ip_hdr(skb)->tot_len = htons(skb->len + ihl); + skb_reset_transport_header(skb); + return 0; +} + +static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) + int ihl = skb->data - skb_transport_header(skb); + + if (skb->transport_header != skb->network_header) { + memmove(skb_transport_header(skb), + skb_network_header(skb), ihl); + skb->network_header = skb->transport_header; + } + ipv6_hdr(skb)->payload_len = htons(skb->len + ihl - + sizeof(struct ipv6hdr)); + skb_reset_transport_header(skb); + return 0; +#else + WARN_ON_ONCE(1); + return -EAFNOSUPPORT; +#endif +} + +static int xfrm_inner_mode_input(struct xfrm_state *x, + const struct xfrm_mode *inner_mode, + struct sk_buff *skb) +{ + switch (inner_mode->encap) { + case XFRM_MODE_BEET: + case XFRM_MODE_TUNNEL: + return xfrm_prepare_input(x, skb); + case XFRM_MODE_TRANSPORT: + if (inner_mode->family == AF_INET) + return xfrm4_transport_input(x, skb); + if (inner_mode->family == AF_INET6) + return xfrm6_transport_input(x, skb); + break; + case XFRM_MODE_ROUTEOPTIMIZATION: + WARN_ON_ONCE(1); + break; + default: + WARN_ON_ONCE(1); + break; + } + + return -EOPNOTSUPP; } -EXPORT_SYMBOL(xfrm_prepare_input); int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) { + const struct xfrm_state_afinfo *afinfo; struct net *net = dev_net(skb->dev); + const struct xfrm_mode *inner_mode; int err; __be32 seq; __be32 seq_hi; struct xfrm_state *x = NULL; xfrm_address_t *daddr; - struct xfrm_mode *inner_mode; u32 mark = skb->mark; unsigned int family = AF_UNSPEC; int decaps = 0; @@ -216,7 +482,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop; } - family = x->outer_mode->afinfo->family; + family = x->outer_mode.family; /* An encap_type of -1 indicates async resumption. */ if (encap_type == -1) { @@ -400,7 +666,7 @@ resume: XFRM_MODE_SKB_CB(skb)->protocol = nexthdr; - inner_mode = x->inner_mode; + inner_mode = &x->inner_mode; if (x->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); @@ -410,12 +676,12 @@ resume: } } - if (inner_mode->input(x, skb)) { + if (xfrm_inner_mode_input(x, inner_mode, skb)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); goto drop; } - if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { + if (x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL) { decaps = 1; break; } @@ -425,7 +691,7 @@ resume: * transport mode so the outer address is identical. */ daddr = &x->id.daddr; - family = x->outer_mode->afinfo->family; + family = x->outer_mode.family; err = xfrm_parse_spi(skb, nexthdr, &spi, &seq); if (err < 0) { @@ -453,7 +719,12 @@ resume: if (xo) xfrm_gro = xo->flags & XFRM_GRO; - err = x->inner_mode->afinfo->transport_finish(skb, xfrm_gro || async); + err = -EAFNOSUPPORT; + rcu_read_lock(); + afinfo = xfrm_state_afinfo_get_rcu(x->inner_mode.family); + if (likely(afinfo)) + err = afinfo->transport_finish(skb, xfrm_gro || async); + rcu_read_unlock(); if (xfrm_gro) { sp = skb_sec_path(skb); if (sp) diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 85fec98676d3..ad3a2555c517 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -255,8 +255,8 @@ static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet) static int xfrmi_rcv_cb(struct sk_buff *skb, int err) { + const struct xfrm_mode *inner_mode; struct pcpu_sw_netstats *tstats; - struct xfrm_mode *inner_mode; struct net_device *dev; struct xfrm_state *x; struct xfrm_if *xi; @@ -284,7 +284,7 @@ static int xfrmi_rcv_cb(struct sk_buff *skb, int err) xnet = !net_eq(xi->net, dev_net(skb->dev)); if (xnet) { - inner_mode = x->inner_mode; + inner_mode = &x->inner_mode; if (x->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); @@ -296,7 +296,7 @@ static int xfrmi_rcv_cb(struct sk_buff *skb, int err) } if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, - inner_mode->afinfo->family)) + inner_mode->family)) return -EPERM; } diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 9333153bafda..a55510f9ff35 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -17,9 +17,13 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <net/dst.h> +#include <net/inet_ecn.h> #include <net/xfrm.h> +#include "xfrm_inout.h" + static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb); +static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb); static int xfrm_skb_check_space(struct sk_buff *skb) { @@ -50,6 +54,360 @@ static struct dst_entry *skb_dst_pop(struct sk_buff *skb) return child; } +/* Add encapsulation header. + * + * The IP header will be moved forward to make space for the encapsulation + * header. + */ +static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) +{ + struct iphdr *iph = ip_hdr(skb); + int ihl = iph->ihl * 4; + + skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + + skb_set_network_header(skb, -x->props.header_len); + skb->mac_header = skb->network_header + + offsetof(struct iphdr, protocol); + skb->transport_header = skb->network_header + ihl; + __skb_pull(skb, ihl); + memmove(skb_network_header(skb), iph, ihl); + return 0; +} + +/* Add encapsulation header. + * + * The IP header and mutable extension headers will be moved forward to make + * space for the encapsulation header. + */ +static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct ipv6hdr *iph; + u8 *prevhdr; + int hdr_len; + + iph = ipv6_hdr(skb); + skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + + hdr_len = x->type->hdr_offset(x, skb, &prevhdr); + if (hdr_len < 0) + return hdr_len; + skb_set_mac_header(skb, + (prevhdr - x->props.header_len) - skb->data); + skb_set_network_header(skb, -x->props.header_len); + skb->transport_header = skb->network_header + hdr_len; + __skb_pull(skb, hdr_len); + memmove(ipv6_hdr(skb), iph, hdr_len); + return 0; +#else + WARN_ON_ONCE(1); + return -EAFNOSUPPORT; +#endif +} + +/* Add route optimization header space. + * + * The IP header and mutable extension headers will be moved forward to make + * space for the route optimization header. + */ +static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct ipv6hdr *iph; + u8 *prevhdr; + int hdr_len; + + iph = ipv6_hdr(skb); + + hdr_len = x->type->hdr_offset(x, skb, &prevhdr); + if (hdr_len < 0) + return hdr_len; + skb_set_mac_header(skb, + (prevhdr - x->props.header_len) - skb->data); + skb_set_network_header(skb, -x->props.header_len); + skb->transport_header = skb->network_header + hdr_len; + __skb_pull(skb, hdr_len); + memmove(ipv6_hdr(skb), iph, hdr_len); + + x->lastused = ktime_get_real_seconds(); + + return 0; +#else + WARN_ON_ONCE(1); + return -EAFNOSUPPORT; +#endif +} + +/* Add encapsulation header. + * + * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. + */ +static int xfrm4_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ip_beet_phdr *ph; + struct iphdr *top_iph; + int hdrlen, optlen; + + hdrlen = 0; + optlen = XFRM_MODE_SKB_CB(skb)->optlen; + if (unlikely(optlen)) + hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); + + skb_set_network_header(skb, -x->props.header_len - hdrlen + + (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph))); + if (x->sel.family != AF_INET6) + skb->network_header += IPV4_BEET_PHMAXLEN; + skb->mac_header = skb->network_header + + offsetof(struct iphdr, protocol); + skb->transport_header = skb->network_header + sizeof(*top_iph); + + xfrm4_beet_make_header(skb); + + ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdrlen); + + top_iph = ip_hdr(skb); + + if (unlikely(optlen)) { + if (WARN_ON(optlen < 0)) + return -EINVAL; + + ph->padlen = 4 - (optlen & 4); + ph->hdrlen = optlen / 8; + ph->nexthdr = top_iph->protocol; + if (ph->padlen) + memset(ph + 1, IPOPT_NOP, ph->padlen); + + top_iph->protocol = IPPROTO_BEETPH; + top_iph->ihl = sizeof(struct iphdr) / 4; + } + + top_iph->saddr = x->props.saddr.a4; + top_iph->daddr = x->id.daddr.a4; + + return 0; +} + +/* Add encapsulation header. + * + * The top IP header will be constructed per RFC 2401. + */ +static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct iphdr *top_iph; + int flags; + + skb_set_inner_network_header(skb, skb_network_offset(skb)); + skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + + skb_set_network_header(skb, -x->props.header_len); + skb->mac_header = skb->network_header + + offsetof(struct iphdr, protocol); + skb->transport_header = skb->network_header + sizeof(*top_iph); + top_iph = ip_hdr(skb); + + top_iph->ihl = 5; + top_iph->version = 4; + + top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family); + + /* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */ + if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) + top_iph->tos = 0; + else + top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos; + top_iph->tos = INET_ECN_encapsulate(top_iph->tos, + XFRM_MODE_SKB_CB(skb)->tos); + + flags = x->props.flags; + if (flags & XFRM_STATE_NOECN) + IP_ECN_clear(top_iph); + + top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? + 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); + + top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst)); + + top_iph->saddr = x->props.saddr.a4; + top_iph->daddr = x->id.daddr.a4; + ip_select_ident(dev_net(dst->dev), skb, NULL); + + return 0; +} + +#if IS_ENABLED(CONFIG_IPV6) +static int xfrm6_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct ipv6hdr *top_iph; + int dsfield; + + skb_set_inner_network_header(skb, skb_network_offset(skb)); + skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + + skb_set_network_header(skb, -x->props.header_len); + skb->mac_header = skb->network_header + + offsetof(struct ipv6hdr, nexthdr); + skb->transport_header = skb->network_header + sizeof(*top_iph); + top_iph = ipv6_hdr(skb); + + top_iph->version = 6; + + memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl, + sizeof(top_iph->flow_lbl)); + top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family); + + if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) + dsfield = 0; + else + dsfield = XFRM_MODE_SKB_CB(skb)->tos; + dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos); + if (x->props.flags & XFRM_STATE_NOECN) + dsfield &= ~INET_ECN_MASK; + ipv6_change_dsfield(top_iph, 0, dsfield); + top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst)); + top_iph->saddr = *(struct in6_addr *)&x->props.saddr; + top_iph->daddr = *(struct in6_addr *)&x->id.daddr; + return 0; +} + +static int xfrm6_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipv6hdr *top_iph; + struct ip_beet_phdr *ph; + int optlen, hdr_len; + + hdr_len = 0; + optlen = XFRM_MODE_SKB_CB(skb)->optlen; + if (unlikely(optlen)) + hdr_len += IPV4_BEET_PHMAXLEN - (optlen & 4); + + skb_set_network_header(skb, -x->props.header_len - hdr_len); + if (x->sel.family != AF_INET6) + skb->network_header += IPV4_BEET_PHMAXLEN; + skb->mac_header = skb->network_header + + offsetof(struct ipv6hdr, nexthdr); + skb->transport_header = skb->network_header + sizeof(*top_iph); + ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdr_len); + + xfrm6_beet_make_header(skb); + + top_iph = ipv6_hdr(skb); + if (unlikely(optlen)) { + if (WARN_ON(optlen < 0)) + return -EINVAL; + + ph->padlen = 4 - (optlen & 4); + ph->hdrlen = optlen / 8; + ph->nexthdr = top_iph->nexthdr; + if (ph->padlen) + memset(ph + 1, IPOPT_NOP, ph->padlen); + + top_iph->nexthdr = IPPROTO_BEETPH; + } + + top_iph->saddr = *(struct in6_addr *)&x->props.saddr; + top_iph->daddr = *(struct in6_addr *)&x->id.daddr; + return 0; +} +#endif + +/* Add encapsulation header. + * + * On exit, the transport header will be set to the start of the + * encapsulation header to be filled in by x->type->output and the mac + * header will be set to the nextheader (protocol for IPv4) field of the + * extension header directly preceding the encapsulation header, or in + * its absence, that of the top IP header. + * The value of the network header will always point to the top IP header + * while skb->data will point to the payload. + */ +static int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) +{ + int err; + + err = xfrm_inner_extract_output(x, skb); + if (err) + return err; + + IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; + skb->protocol = htons(ETH_P_IP); + + switch (x->outer_mode.encap) { + case XFRM_MODE_BEET: + return xfrm4_beet_encap_add(x, skb); + case XFRM_MODE_TUNNEL: + return xfrm4_tunnel_encap_add(x, skb); + } + + WARN_ON_ONCE(1); + return -EOPNOTSUPP; +} + +static int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) + int err; + + err = xfrm_inner_extract_output(x, skb); + if (err) + return err; + + skb->ignore_df = 1; + skb->protocol = htons(ETH_P_IPV6); + + switch (x->outer_mode.encap) { + case XFRM_MODE_BEET: + return xfrm6_beet_encap_add(x, skb); + case XFRM_MODE_TUNNEL: + return xfrm6_tunnel_encap_add(x, skb); + default: + WARN_ON_ONCE(1); + return -EOPNOTSUPP; + } +#endif + WARN_ON_ONCE(1); + return -EAFNOSUPPORT; +} + +static int xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb) +{ + switch (x->outer_mode.encap) { + case XFRM_MODE_BEET: + case XFRM_MODE_TUNNEL: + if (x->outer_mode.family == AF_INET) + return xfrm4_prepare_output(x, skb); + if (x->outer_mode.family == AF_INET6) + return xfrm6_prepare_output(x, skb); + break; + case XFRM_MODE_TRANSPORT: + if (x->outer_mode.family == AF_INET) + return xfrm4_transport_output(x, skb); + if (x->outer_mode.family == AF_INET6) + return xfrm6_transport_output(x, skb); + break; + case XFRM_MODE_ROUTEOPTIMIZATION: + if (x->outer_mode.family == AF_INET6) + return xfrm6_ro_output(x, skb); + WARN_ON_ONCE(1); + break; + default: + WARN_ON_ONCE(1); + break; + } + + return -EOPNOTSUPP; +} + +#if IS_ENABLED(CONFIG_NET_PKTGEN) +int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb) +{ + return xfrm_outer_mode_output(x, skb); +} +EXPORT_SYMBOL_GPL(pktgen_xfrm_outer_mode_output); +#endif + static int xfrm_output_one(struct sk_buff *skb, int err) { struct dst_entry *dst = skb_dst(skb); @@ -68,7 +426,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err) skb->mark = xfrm_smark_get(skb->mark, x); - err = x->outer_mode->output(x, skb); + err = xfrm_outer_mode_output(x, skb); if (err) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR); goto error_nolock; @@ -131,7 +489,7 @@ resume: } skb_dst_set(skb, dst); x = dst->xfrm; - } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); + } while (x && !(x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL)); return 0; @@ -258,20 +616,29 @@ out: } EXPORT_SYMBOL_GPL(xfrm_output); -int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) +static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) { - struct xfrm_mode *inner_mode; + const struct xfrm_state_afinfo *afinfo; + const struct xfrm_mode *inner_mode; + int err = -EAFNOSUPPORT; + if (x->sel.family == AF_UNSPEC) inner_mode = xfrm_ip2inner_mode(x, xfrm_af2proto(skb_dst(skb)->ops->family)); else - inner_mode = x->inner_mode; + inner_mode = &x->inner_mode; if (inner_mode == NULL) return -EAFNOSUPPORT; - return inner_mode->afinfo->extract_output(x, skb); + + rcu_read_lock(); + afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family); + if (likely(afinfo)) + err = afinfo->extract_output(x, skb); + rcu_read_unlock(); + + return err; } -EXPORT_SYMBOL_GPL(xfrm_inner_extract_output); void xfrm_local_error(struct sk_buff *skb, int mtu) { diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index a6b58df7a70f..410233c5681e 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -27,10 +27,14 @@ #include <linux/cpu.h> #include <linux/audit.h> #include <linux/rhashtable.h> +#include <linux/if_tunnel.h> #include <net/dst.h> #include <net/flow.h> #include <net/xfrm.h> #include <net/ip.h> +#if IS_ENABLED(CONFIG_IPV6_MIP6) +#include <net/mip6.h> +#endif #ifdef CONFIG_XFRM_STATISTICS #include <net/snmp.h> #endif @@ -2450,18 +2454,10 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl, static int xfrm_get_tos(const struct flowi *fl, int family) { - const struct xfrm_policy_afinfo *afinfo; - int tos; + if (family == AF_INET) + return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos; - afinfo = xfrm_policy_get_afinfo(family); - if (!afinfo) - return 0; - - tos = afinfo->get_tos(fl); - - rcu_read_unlock(); - - return tos; + return 0; } static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) @@ -2499,21 +2495,14 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) return xdst; } -static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, - int nfheader_len) +static void xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, + int nfheader_len) { - const struct xfrm_policy_afinfo *afinfo = - xfrm_policy_get_afinfo(dst->ops->family); - int err; - - if (!afinfo) - return -EINVAL; - - err = afinfo->init_path(path, dst, nfheader_len); - - rcu_read_unlock(); - - return err; + if (dst->ops->family == AF_INET6) { + struct rt6_info *rt = (struct rt6_info *)dst; + path->path_cookie = rt6_get_cookie(rt); + path->u.rt6.rt6i_nfheader_len = nfheader_len; + } } static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, @@ -2545,10 +2534,11 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, const struct flowi *fl, struct dst_entry *dst) { + const struct xfrm_state_afinfo *afinfo; + const struct xfrm_mode *inner_mode; struct net *net = xp_net(policy); unsigned long now = jiffies; struct net_device *dev; - struct xfrm_mode *inner_mode; struct xfrm_dst *xdst_prev = NULL; struct xfrm_dst *xdst0 = NULL; int i = 0; @@ -2594,7 +2584,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, goto put_states; } } else - inner_mode = xfrm[i]->inner_mode; + inner_mode = &xfrm[i]->inner_mode; xdst->route = dst; dst_copy_metrics(dst1, dst); @@ -2622,7 +2612,14 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, dst1->lastuse = now; dst1->input = dst_discard; - dst1->output = inner_mode->afinfo->output; + + rcu_read_lock(); + afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family); + if (likely(afinfo)) + dst1->output = afinfo->output; + else + dst1->output = dst_discard_out; + rcu_read_unlock(); xdst_prev = xdst; @@ -3263,20 +3260,229 @@ xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int star return start; } +static void +decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse) +{ + const struct iphdr *iph = ip_hdr(skb); + u8 *xprth = skb_network_header(skb) + iph->ihl * 4; + struct flowi4 *fl4 = &fl->u.ip4; + int oif = 0; + + if (skb_dst(skb)) + oif = skb_dst(skb)->dev->ifindex; + + memset(fl4, 0, sizeof(struct flowi4)); + fl4->flowi4_mark = skb->mark; + fl4->flowi4_oif = reverse ? skb->skb_iif : oif; + + if (!ip_is_fragment(iph)) { + switch (iph->protocol) { + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + case IPPROTO_TCP: + case IPPROTO_SCTP: + case IPPROTO_DCCP: + if (xprth + 4 < skb->data || + pskb_may_pull(skb, xprth + 4 - skb->data)) { + __be16 *ports; + + xprth = skb_network_header(skb) + iph->ihl * 4; + ports = (__be16 *)xprth; + + fl4->fl4_sport = ports[!!reverse]; + fl4->fl4_dport = ports[!reverse]; + } + break; + case IPPROTO_ICMP: + if (xprth + 2 < skb->data || + pskb_may_pull(skb, xprth + 2 - skb->data)) { + u8 *icmp; + + xprth = skb_network_header(skb) + iph->ihl * 4; + icmp = xprth; + + fl4->fl4_icmp_type = icmp[0]; + fl4->fl4_icmp_code = icmp[1]; + } + break; + case IPPROTO_ESP: + if (xprth + 4 < skb->data || + pskb_may_pull(skb, xprth + 4 - skb->data)) { + __be32 *ehdr; + + xprth = skb_network_header(skb) + iph->ihl * 4; + ehdr = (__be32 *)xprth; + + fl4->fl4_ipsec_spi = ehdr[0]; + } + break; + case IPPROTO_AH: + if (xprth + 8 < skb->data || + pskb_may_pull(skb, xprth + 8 - skb->data)) { + __be32 *ah_hdr; + + xprth = skb_network_header(skb) + iph->ihl * 4; + ah_hdr = (__be32 *)xprth; + + fl4->fl4_ipsec_spi = ah_hdr[1]; + } + break; + case IPPROTO_COMP: + if (xprth + 4 < skb->data || + pskb_may_pull(skb, xprth + 4 - skb->data)) { + __be16 *ipcomp_hdr; + + xprth = skb_network_header(skb) + iph->ihl * 4; + ipcomp_hdr = (__be16 *)xprth; + + fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); + } + break; + case IPPROTO_GRE: + if (xprth + 12 < skb->data || + pskb_may_pull(skb, xprth + 12 - skb->data)) { + __be16 *greflags; + __be32 *gre_hdr; + + xprth = skb_network_header(skb) + iph->ihl * 4; + greflags = (__be16 *)xprth; + gre_hdr = (__be32 *)xprth; + + if (greflags[0] & GRE_KEY) { + if (greflags[0] & GRE_CSUM) + gre_hdr++; + fl4->fl4_gre_key = gre_hdr[1]; + } + } + break; + default: + fl4->fl4_ipsec_spi = 0; + break; + } + } + fl4->flowi4_proto = iph->protocol; + fl4->daddr = reverse ? iph->saddr : iph->daddr; + fl4->saddr = reverse ? iph->daddr : iph->saddr; + fl4->flowi4_tos = iph->tos; +} + +#if IS_ENABLED(CONFIG_IPV6) +static void +decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse) +{ + struct flowi6 *fl6 = &fl->u.ip6; + int onlyproto = 0; + const struct ipv6hdr *hdr = ipv6_hdr(skb); + u32 offset = sizeof(*hdr); + struct ipv6_opt_hdr *exthdr; + const unsigned char *nh = skb_network_header(skb); + u16 nhoff = IP6CB(skb)->nhoff; + int oif = 0; + u8 nexthdr; + + if (!nhoff) + nhoff = offsetof(struct ipv6hdr, nexthdr); + + nexthdr = nh[nhoff]; + + if (skb_dst(skb)) + oif = skb_dst(skb)->dev->ifindex; + + memset(fl6, 0, sizeof(struct flowi6)); + fl6->flowi6_mark = skb->mark; + fl6->flowi6_oif = reverse ? skb->skb_iif : oif; + + fl6->daddr = reverse ? hdr->saddr : hdr->daddr; + fl6->saddr = reverse ? hdr->daddr : hdr->saddr; + + while (nh + offset + sizeof(*exthdr) < skb->data || + pskb_may_pull(skb, nh + offset + sizeof(*exthdr) - skb->data)) { + nh = skb_network_header(skb); + exthdr = (struct ipv6_opt_hdr *)(nh + offset); + + switch (nexthdr) { + case NEXTHDR_FRAGMENT: + onlyproto = 1; + /* fall through */ + case NEXTHDR_ROUTING: + case NEXTHDR_HOP: + case NEXTHDR_DEST: + offset += ipv6_optlen(exthdr); + nexthdr = exthdr->nexthdr; + exthdr = (struct ipv6_opt_hdr *)(nh + offset); + break; + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + case IPPROTO_TCP: + case IPPROTO_SCTP: + case IPPROTO_DCCP: + if (!onlyproto && (nh + offset + 4 < skb->data || + pskb_may_pull(skb, nh + offset + 4 - skb->data))) { + __be16 *ports; + + nh = skb_network_header(skb); + ports = (__be16 *)(nh + offset); + fl6->fl6_sport = ports[!!reverse]; + fl6->fl6_dport = ports[!reverse]; + } + fl6->flowi6_proto = nexthdr; + return; + case IPPROTO_ICMPV6: + if (!onlyproto && (nh + offset + 2 < skb->data || + pskb_may_pull(skb, nh + offset + 2 - skb->data))) { + u8 *icmp; + + nh = skb_network_header(skb); + icmp = (u8 *)(nh + offset); + fl6->fl6_icmp_type = icmp[0]; + fl6->fl6_icmp_code = icmp[1]; + } + fl6->flowi6_proto = nexthdr; + return; +#if IS_ENABLED(CONFIG_IPV6_MIP6) + case IPPROTO_MH: + offset += ipv6_optlen(exthdr); + if (!onlyproto && (nh + offset + 3 < skb->data || + pskb_may_pull(skb, nh + offset + 3 - skb->data))) { + struct ip6_mh *mh; + + nh = skb_network_header(skb); + mh = (struct ip6_mh *)(nh + offset); + fl6->fl6_mh_type = mh->ip6mh_type; + } + fl6->flowi6_proto = nexthdr; + return; +#endif + /* XXX Why are there these headers? */ + case IPPROTO_AH: + case IPPROTO_ESP: + case IPPROTO_COMP: + default: + fl6->fl6_ipsec_spi = 0; + fl6->flowi6_proto = nexthdr; + return; + } + } +} +#endif + int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned int family, int reverse) { - const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); - int err; - - if (unlikely(afinfo == NULL)) + switch (family) { + case AF_INET: + decode_session4(skb, fl, reverse); + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + decode_session6(skb, fl, reverse); + break; +#endif + default: return -EAFNOSUPPORT; + } - afinfo->decode_session(skb, fl, reverse); - - err = security_xfrm_decode_session(skb, &fl->flowi_secid); - rcu_read_unlock(); - return err; + return security_xfrm_decode_session(skb, &fl->flowi_secid); } EXPORT_SYMBOL(__xfrm_decode_session); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index c62f712fdaf7..c5d81316330b 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -173,7 +173,7 @@ static DEFINE_SPINLOCK(xfrm_state_gc_lock); int __xfrm_state_delete(struct xfrm_state *x); int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); -bool km_is_alive(const struct km_event *c); +static bool km_is_alive(const struct km_event *c); void km_state_expired(struct xfrm_state *x, int hard, u32 portid); static DEFINE_SPINLOCK(xfrm_type_lock); @@ -330,100 +330,67 @@ static void xfrm_put_type_offload(const struct xfrm_type_offload *type) module_put(type->owner); } -static DEFINE_SPINLOCK(xfrm_mode_lock); -int xfrm_register_mode(struct xfrm_mode *mode, int family) -{ - struct xfrm_state_afinfo *afinfo; - struct xfrm_mode **modemap; - int err; - - if (unlikely(mode->encap >= XFRM_MODE_MAX)) - return -EINVAL; - - afinfo = xfrm_state_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - - err = -EEXIST; - modemap = afinfo->mode_map; - spin_lock_bh(&xfrm_mode_lock); - if (modemap[mode->encap]) - goto out; - - err = -ENOENT; - if (!try_module_get(afinfo->owner)) - goto out; - - mode->afinfo = afinfo; - modemap[mode->encap] = mode; - err = 0; - -out: - spin_unlock_bh(&xfrm_mode_lock); - rcu_read_unlock(); - return err; -} -EXPORT_SYMBOL(xfrm_register_mode); - -int xfrm_unregister_mode(struct xfrm_mode *mode, int family) -{ - struct xfrm_state_afinfo *afinfo; - struct xfrm_mode **modemap; - int err; - - if (unlikely(mode->encap >= XFRM_MODE_MAX)) - return -EINVAL; - - afinfo = xfrm_state_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - - err = -ENOENT; - modemap = afinfo->mode_map; - spin_lock_bh(&xfrm_mode_lock); - if (likely(modemap[mode->encap] == mode)) { - modemap[mode->encap] = NULL; - module_put(mode->afinfo->owner); - err = 0; - } - - spin_unlock_bh(&xfrm_mode_lock); - rcu_read_unlock(); - return err; -} -EXPORT_SYMBOL(xfrm_unregister_mode); - -static struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family) -{ - struct xfrm_state_afinfo *afinfo; - struct xfrm_mode *mode; - int modload_attempted = 0; +static const struct xfrm_mode xfrm4_mode_map[XFRM_MODE_MAX] = { + [XFRM_MODE_BEET] = { + .encap = XFRM_MODE_BEET, + .flags = XFRM_MODE_FLAG_TUNNEL, + .family = AF_INET, + }, + [XFRM_MODE_TRANSPORT] = { + .encap = XFRM_MODE_TRANSPORT, + .family = AF_INET, + }, + [XFRM_MODE_TUNNEL] = { + .encap = XFRM_MODE_TUNNEL, + .flags = XFRM_MODE_FLAG_TUNNEL, + .family = AF_INET, + }, +}; + +static const struct xfrm_mode xfrm6_mode_map[XFRM_MODE_MAX] = { + [XFRM_MODE_BEET] = { + .encap = XFRM_MODE_BEET, + .flags = XFRM_MODE_FLAG_TUNNEL, + .family = AF_INET6, + }, + [XFRM_MODE_ROUTEOPTIMIZATION] = { + .encap = XFRM_MODE_ROUTEOPTIMIZATION, + .family = AF_INET6, + }, + [XFRM_MODE_TRANSPORT] = { + .encap = XFRM_MODE_TRANSPORT, + .family = AF_INET6, + }, + [XFRM_MODE_TUNNEL] = { + .encap = XFRM_MODE_TUNNEL, + .flags = XFRM_MODE_FLAG_TUNNEL, + .family = AF_INET6, + }, +}; + +static const struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family) +{ + const struct xfrm_mode *mode; if (unlikely(encap >= XFRM_MODE_MAX)) return NULL; -retry: - afinfo = xfrm_state_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return NULL; - - mode = READ_ONCE(afinfo->mode_map[encap]); - if (unlikely(mode && !try_module_get(mode->owner))) - mode = NULL; - - rcu_read_unlock(); - if (!mode && !modload_attempted) { - request_module("xfrm-mode-%d-%d", family, encap); - modload_attempted = 1; - goto retry; + switch (family) { + case AF_INET: + mode = &xfrm4_mode_map[encap]; + if (mode->family == family) + return mode; + break; + case AF_INET6: + mode = &xfrm6_mode_map[encap]; + if (mode->family == family) + return mode; + break; + default: + break; } - return mode; -} - -static void xfrm_put_mode(struct xfrm_mode *mode) -{ - module_put(mode->owner); + return NULL; } void xfrm_state_free(struct xfrm_state *x) @@ -444,12 +411,6 @@ static void ___xfrm_state_destroy(struct xfrm_state *x) kfree(x->coaddr); kfree(x->replay_esn); kfree(x->preplay_esn); - if (x->inner_mode) - xfrm_put_mode(x->inner_mode); - if (x->inner_mode_iaf) - xfrm_put_mode(x->inner_mode_iaf); - if (x->outer_mode) - xfrm_put_mode(x->outer_mode); if (x->type_offload) xfrm_put_type_offload(x->type_offload); if (x->type) { @@ -591,8 +552,6 @@ struct xfrm_state *xfrm_state_alloc(struct net *net) x->lft.hard_packet_limit = XFRM_INF; x->replay_maxage = 0; x->replay_maxdiff = 0; - x->inner_mode = NULL; - x->inner_mode_iaf = NULL; spin_lock_init(&x->lock); } return x; @@ -2072,7 +2031,7 @@ int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address } EXPORT_SYMBOL(km_report); -bool km_is_alive(const struct km_event *c) +static bool km_is_alive(const struct km_event *c) { struct xfrm_mgr *km; bool is_alive = false; @@ -2088,7 +2047,6 @@ bool km_is_alive(const struct km_event *c) return is_alive; } -EXPORT_SYMBOL(km_is_alive); int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen) { @@ -2201,6 +2159,7 @@ struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family) return rcu_dereference(xfrm_state_afinfo[family]); } +EXPORT_SYMBOL_GPL(xfrm_state_afinfo_get_rcu); struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family) { @@ -2248,8 +2207,9 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu) int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) { - struct xfrm_state_afinfo *afinfo; - struct xfrm_mode *inner_mode; + const struct xfrm_state_afinfo *afinfo; + const struct xfrm_mode *inner_mode; + const struct xfrm_mode *outer_mode; int family = x->props.family; int err; @@ -2275,25 +2235,22 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) goto error; if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) && - family != x->sel.family) { - xfrm_put_mode(inner_mode); + family != x->sel.family) goto error; - } - x->inner_mode = inner_mode; + x->inner_mode = *inner_mode; } else { - struct xfrm_mode *inner_mode_iaf; + const struct xfrm_mode *inner_mode_iaf; int iafamily = AF_INET; inner_mode = xfrm_get_mode(x->props.mode, x->props.family); if (inner_mode == NULL) goto error; - if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) { - xfrm_put_mode(inner_mode); + if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) goto error; - } - x->inner_mode = inner_mode; + + x->inner_mode = *inner_mode; if (x->props.family == AF_INET) iafamily = AF_INET6; @@ -2301,9 +2258,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) inner_mode_iaf = xfrm_get_mode(x->props.mode, iafamily); if (inner_mode_iaf) { if (inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL) - x->inner_mode_iaf = inner_mode_iaf; - else - xfrm_put_mode(inner_mode_iaf); + x->inner_mode_iaf = *inner_mode_iaf; } } @@ -2317,12 +2272,13 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) if (err) goto error; - x->outer_mode = xfrm_get_mode(x->props.mode, family); - if (x->outer_mode == NULL) { + outer_mode = xfrm_get_mode(x->props.mode, family); + if (!outer_mode) { err = -EPROTONOSUPPORT; goto error; } + x->outer_mode = *outer_mode; if (init_replay) { err = xfrm_init_replay(x); if (err) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 6916931b1de1..eb8d14389601 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1006,8 +1006,8 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) u8 proto = 0; int err; - err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX, xfrma_policy, - cb->extack); + err = nlmsg_parse_deprecated(cb->nlh, 0, attrs, XFRMA_MAX, + xfrma_policy, cb->extack); if (err < 0) return err; @@ -2644,9 +2644,9 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, } } - err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs, - link->nla_max ? : XFRMA_MAX, - link->nla_pol ? : xfrma_policy, extack); + err = nlmsg_parse_deprecated(nlh, xfrm_msg_min[type], attrs, + link->nla_max ? : XFRMA_MAX, + link->nla_pol ? : xfrma_policy, extack); if (err < 0) return err; |