summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_dev.c2
-rw-r--r--net/8021q/vlan_netlink.c6
-rw-r--r--net/9p/Kconfig4
-rw-r--r--net/9p/trans_usbg.c4
-rw-r--r--net/9p/trans_xen.c9
-rw-r--r--net/Kconfig3
-rw-r--r--net/Kconfig.debug15
-rw-r--r--net/Makefile1
-rw-r--r--net/appletalk/Makefile2
-rw-r--r--net/appletalk/dev.c46
-rw-r--r--net/batman-adv/bat_iv_ogm.c4
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c8
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/translation-table.c96
-rw-r--r--net/bluetooth/hci_conn.c230
-rw-r--r--net/bluetooth/hci_core.c26
-rw-r--r--net/bluetooth/hci_event.c49
-rw-r--r--net/bluetooth/hci_sync.c9
-rw-r--r--net/bluetooth/hci_sysfs.c15
-rw-r--r--net/bluetooth/iso.c121
-rw-r--r--net/bluetooth/l2cap_sock.c1
-rw-r--r--net/bluetooth/mgmt.c98
-rw-r--r--net/bluetooth/rfcomm/sock.c20
-rw-r--r--net/bluetooth/sco.c99
-rw-r--r--net/bridge/br_device.c2
-rw-r--r--net/bridge/br_fdb.c45
-rw-r--r--net/bridge/br_netfilter_hooks.c15
-rw-r--r--net/bridge/br_netlink.c6
-rw-r--r--net/bridge/br_private.h4
-rw-r--r--net/bridge/netfilter/Kconfig8
-rw-r--r--net/bridge/netfilter/nft_meta_bridge.c2
-rw-r--r--net/caif/cfsrvl.c6
-rw-r--r--net/can/af_can.c1
-rw-r--r--net/can/gw.c29
-rw-r--r--net/can/raw.c2
-rw-r--r--net/ceph/crypto.c12
-rw-r--r--net/ceph/crypto.h1
-rw-r--r--net/ceph/osd_client.c34
-rw-r--r--net/ceph/pagelist.c38
-rw-r--r--net/ceph/pagevec.c52
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/bpf_sk_storage.c6
-rw-r--r--net/core/dev.c143
-rw-r--r--net/core/dev.h123
-rw-r--r--net/core/dev_ioctl.c6
-rw-r--r--net/core/fib_notifier.c2
-rw-r--r--net/core/fib_rules.c34
-rw-r--r--net/core/filter.c152
-rw-r--r--net/core/lwt_bpf.c11
-rw-r--r--net/core/neighbour.c360
-rw-r--r--net/core/net-sysfs.c4
-rw-r--r--net/core/net_namespace.c36
-rw-r--r--net/core/netdev-genl-gen.c23
-rw-r--r--net/core/netdev-genl-gen.h1
-rw-r--r--net/core/netdev-genl.c75
-rw-r--r--net/core/netpoll.c49
-rw-r--r--net/core/page_pool.c2
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/core/rtnetlink.c1035
-rw-r--r--net/core/rtnl_net_debug.c125
-rw-r--r--net/core/skb_fault_injection.c106
-rw-r--r--net/core/skbuff.c8
-rw-r--r--net/core/skmsg.c4
-rw-r--r--net/core/sock.c34
-rw-r--r--net/core/sysctl_net_core.c56
-rw-r--r--net/dcb/dcbnl.c8
-rw-r--r--net/devlink/dev.c18
-rw-r--r--net/devlink/devl_internal.h7
-rw-r--r--net/devlink/dpipe.c18
-rw-r--r--net/devlink/health.c25
-rw-r--r--net/devlink/rate.c8
-rw-r--r--net/devlink/region.c15
-rw-r--r--net/devlink/resource.c101
-rw-r--r--net/devlink/trap.c34
-rw-r--r--net/dsa/devlink.c23
-rw-r--r--net/dsa/dsa.c8
-rw-r--r--net/dsa/port.c40
-rw-r--r--net/dsa/user.c94
-rw-r--r--net/ethtool/cmis.h16
-rw-r--r--net/ethtool/cmis_cdb.c94
-rw-r--r--net/ethtool/cmis_fw_update.c108
-rw-r--r--net/ethtool/common.c90
-rw-r--r--net/ethtool/common.h1
-rw-r--r--net/ethtool/ioctl.c13
-rw-r--r--net/ethtool/rss.c2
-rw-r--r--net/handshake/request.c1
-rw-r--r--net/hsr/hsr_device.c89
-rw-r--r--net/hsr/hsr_forward.c19
-rw-r--r--net/hsr/hsr_netlink.c11
-rw-r--r--net/ieee802154/nl-mac.c15
-rw-r--r--net/ieee802154/nl802154.c26
-rw-r--r--net/ieee802154/socket.c12
-rw-r--r--net/ipv4/af_inet.c22
-rw-r--r--net/ipv4/arp.c2
-rw-r--r--net/ipv4/devinet.c281
-rw-r--r--net/ipv4/esp4_offload.c6
-rw-r--r--net/ipv4/fib_frontend.c44
-rw-r--r--net/ipv4/fib_notifier.c10
-rw-r--r--net/ipv4/fib_rules.c2
-rw-r--r--net/ipv4/fib_semantics.c88
-rw-r--r--net/ipv4/fib_trie.c8
-rw-r--r--net/ipv4/fou_nl.c4
-rw-r--r--net/ipv4/icmp.c21
-rw-r--r--net/ipv4/igmp.c26
-rw-r--r--net/ipv4/inet_connection_sock.c8
-rw-r--r--net/ipv4/inet_diag.c10
-rw-r--r--net/ipv4/inetpeer.c9
-rw-r--r--net/ipv4/ip_fragment.c11
-rw-r--r--net/ipv4/ip_input.c20
-rw-r--r--net/ipv4/ip_options.c3
-rw-r--r--net/ipv4/ip_output.c26
-rw-r--r--net/ipv4/ipmr.c96
-rw-r--r--net/ipv4/netfilter.c2
-rw-r--r--net/ipv4/netfilter/Kconfig16
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c2
-rw-r--r--net/ipv4/netfilter/nf_dup_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c3
-rw-r--r--net/ipv4/nexthop.c44
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c256
-rw-r--r--net/ipv4/tcp.c9
-rw-r--r--net/ipv4/tcp_ao.c42
-rw-r--r--net/ipv4/tcp_cong.c3
-rw-r--r--net/ipv4/tcp_input.c4
-rw-r--r--net/ipv4/tcp_ipv4.c17
-rw-r--r--net/ipv4/tcp_output.c18
-rw-r--r--net/ipv4/tcp_timer.c19
-rw-r--r--net/ipv4/udp.c249
-rw-r--r--net/ipv4/xfrm4_input.c2
-rw-r--r--net/ipv4/xfrm4_policy.c3
-rw-r--r--net/ipv4/xfrm4_protocol.c2
-rw-r--r--net/ipv6/addrconf.c112
-rw-r--r--net/ipv6/addrlabel.c28
-rw-r--r--net/ipv6/af_inet6.c22
-rw-r--r--net/ipv6/anycast.c5
-rw-r--r--net/ipv6/esp6_offload.c6
-rw-r--r--net/ipv6/fib6_notifier.c2
-rw-r--r--net/ipv6/fib6_rules.c2
-rw-r--r--net/ipv6/ila/ila_xlat.c15
-rw-r--r--net/ipv6/ioam6.c14
-rw-r--r--net/ipv6/ioam6_iptunnel.c6
-rw-r--r--net/ipv6/ip6_fib.c41
-rw-r--r--net/ipv6/ip6_output.c24
-rw-r--r--net/ipv6/ip6_tunnel.c4
-rw-r--r--net/ipv6/ip6mr.c79
-rw-r--r--net/ipv6/netfilter/Kconfig9
-rw-r--r--net/ipv6/raw.c2
-rw-r--r--net/ipv6/route.c74
-rw-r--r--net/ipv6/seg6_local.c14
-rw-r--r--net/ipv6/tcp_ipv6.c17
-rw-r--r--net/ipv6/udp.c117
-rw-r--r--net/iucv/af_iucv.c26
-rw-r--r--net/kcm/kcmsock.c10
-rw-r--r--net/key/af_key.c7
-rw-r--r--net/l2tp/l2tp_core.c22
-rw-r--r--net/llc/af_llc.c2
-rw-r--r--net/mac80211/agg-rx.c94
-rw-r--r--net/mac80211/agg-tx.c33
-rw-r--r--net/mac80211/cfg.c186
-rw-r--r--net/mac80211/chan.c65
-rw-r--r--net/mac80211/debugfs.c28
-rw-r--r--net/mac80211/debugfs_key.c9
-rw-r--r--net/mac80211/debugfs_netdev.c3
-rw-r--r--net/mac80211/debugfs_sta.c9
-rw-r--r--net/mac80211/driver-ops.c16
-rw-r--r--net/mac80211/driver-ops.h18
-rw-r--r--net/mac80211/eht.c21
-rw-r--r--net/mac80211/ht.c2
-rw-r--r--net/mac80211/ibss.c7
-rw-r--r--net/mac80211/ieee80211_i.h25
-rw-r--r--net/mac80211/iface.c52
-rw-r--r--net/mac80211/link.c54
-rw-r--r--net/mac80211/mesh.c2
-rw-r--r--net/mac80211/mesh_hwmp.c6
-rw-r--r--net/mac80211/mesh_pathtbl.c10
-rw-r--r--net/mac80211/mesh_plink.c7
-rw-r--r--net/mac80211/mesh_sync.c2
-rw-r--r--net/mac80211/mlme.c118
-rw-r--r--net/mac80211/ocb.c4
-rw-r--r--net/mac80211/rate.c35
-rw-r--r--net/mac80211/rate.h10
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c2
-rw-r--r--net/mac80211/rx.c75
-rw-r--r--net/mac80211/scan.c22
-rw-r--r--net/mac80211/spectmgmt.c9
-rw-r--r--net/mac80211/sta_info.h2
-rw-r--r--net/mac80211/status.c5
-rw-r--r--net/mac80211/tdls.c3
-rw-r--r--net/mac80211/tkip.c2
-rw-r--r--net/mac80211/trace.h34
-rw-r--r--net/mac80211/tx.c8
-rw-r--r--net/mac80211/util.c20
-rw-r--r--net/mac80211/vht.c29
-rw-r--r--net/mac80211/wpa.c3
-rw-r--r--net/mctp/device.c28
-rw-r--r--net/mpls/af_mpls.c7
-rw-r--r--net/mptcp/diag.c2
-rw-r--r--net/mptcp/mptcp_pm_gen.c2
-rw-r--r--net/mptcp/options.c4
-rw-r--r--net/mptcp/pm.c3
-rw-r--r--net/mptcp/pm_netlink.c47
-rw-r--r--net/mptcp/protocol.c15
-rw-r--r--net/mptcp/protocol.h6
-rw-r--r--net/mptcp/sched.c2
-rw-r--r--net/mptcp/subflow.c17
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c7
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c5
-rw-r--r--net/netfilter/nf_bpf_link.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c2
-rw-r--r--net/netfilter/nf_nat_core.c6
-rw-r--r--net/netfilter/nf_tables_api.c517
-rw-r--r--net/netfilter/nfnetlink.c2
-rw-r--r--net/netfilter/nft_bitwise.c166
-rw-r--r--net/netfilter/nft_flow_offload.c8
-rw-r--r--net/netfilter/nft_set_bitmap.c10
-rw-r--r--net/netfilter/nft_set_hash.c3
-rw-r--r--net/netfilter/nft_tunnel.c5
-rw-r--r--net/netfilter/xt_IDLETIMER.c4
-rw-r--r--net/netlabel/netlabel_mgmt.c13
-rw-r--r--net/netlabel/netlabel_unlabeled.c2
-rw-r--r--net/netlabel/netlabel_user.c7
-rw-r--r--net/netlabel/netlabel_user.h2
-rw-r--r--net/netlink/af_netlink.c40
-rw-r--r--net/netlink/genetlink.c4
-rw-r--r--net/nfc/nci/core.c13
-rw-r--r--net/nfc/nci/ntf.c32
-rw-r--r--net/nfc/netlink.c5
-rw-r--r--net/openvswitch/datapath.c10
-rw-r--r--net/openvswitch/flow_netlink.c2
-rw-r--r--net/openvswitch/vport-internal_dev.c1
-rw-r--r--net/packet/af_packet.c27
-rw-r--r--net/phonet/pn_dev.c74
-rw-r--r--net/phonet/pn_netlink.c127
-rw-r--r--net/rds/ib_rdma.c4
-rw-r--r--net/rfkill/rfkill-gpio.c8
-rw-r--r--net/rxrpc/af_rxrpc.c7
-rw-r--r--net/rxrpc/conn_object.c4
-rw-r--r--net/rxrpc/local_object.c4
-rw-r--r--net/rxrpc/sendmsg.c1
-rw-r--r--net/sched/act_api.c102
-rw-r--r--net/sched/act_ct.c10
-rw-r--r--net/sched/act_ctinfo.c8
-rw-r--r--net/sched/act_gate.c11
-rw-r--r--net/sched/act_mpls.c18
-rw-r--r--net/sched/act_police.c6
-rw-r--r--net/sched/cls_api.c72
-rw-r--r--net/sched/sch_api.c20
-rw-r--r--net/sched/sch_cbs.c2
-rw-r--r--net/sched/sch_choke.c2
-rw-r--r--net/sched/sch_fq.c42
-rw-r--r--net/sched/sch_gred.c2
-rw-r--r--net/sched/sch_htb.c4
-rw-r--r--net/sched/sch_netem.c1
-rw-r--r--net/sched/sch_qfq.c5
-rw-r--r--net/sched/sch_red.c2
-rw-r--r--net/sched/sch_sfq.c39
-rw-r--r--net/sched/sch_taprio.c2
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sctp/protocol.c16
-rw-r--r--net/shaper/Makefile8
-rw-r--r--net/shaper/shaper.c1438
-rw-r--r--net/shaper/shaper_nl_gen.c154
-rw-r--r--net/shaper/shaper_nl_gen.h44
-rw-r--r--net/smc/smc.h2
-rw-r--r--net/smc/smc_clc.h2
-rw-r--r--net/smc/smc_core.c2
-rw-r--r--net/smc/smc_core.h4
-rw-r--r--net/socket.c311
-rw-r--r--net/sunrpc/cache.c4
-rw-r--r--net/sunrpc/svcsock.c10
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c19
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c8
-rw-r--r--net/sunrpc/xprtsock.c18
-rw-r--r--net/vmw_vsock/af_vsock.c1
-rw-r--r--net/vmw_vsock/hyperv_transport.c1
-rw-r--r--net/wireless/Kconfig45
-rw-r--r--net/wireless/Makefile5
-rw-r--r--net/wireless/chan.c5
-rw-r--r--net/wireless/core.c66
-rw-r--r--net/wireless/core.h1
-rw-r--r--net/wireless/lib80211.c257
-rw-r--r--net/wireless/lib80211_crypt_ccmp.c448
-rw-r--r--net/wireless/lib80211_crypt_tkip.c738
-rw-r--r--net/wireless/lib80211_crypt_wep.c256
-rw-r--r--net/wireless/mlme.c6
-rw-r--r--net/wireless/nl80211.c161
-rw-r--r--net/wireless/radiotap.c2
-rw-r--r--net/wireless/rdev-ops.h5
-rw-r--r--net/wireless/reg.c2
-rw-r--r--net/wireless/scan.c12
-rw-r--r--net/wireless/trace.h10
-rw-r--r--net/wireless/util.c31
-rw-r--r--net/wireless/wext-compat.c13
-rw-r--r--net/wireless/wext-compat.h6
-rw-r--r--net/wireless/wext-core.c2
-rw-r--r--net/wireless/wext-spy.c232
-rw-r--r--net/xdp/xsk.c49
-rw-r--r--net/xdp/xsk_buff_pool.c54
-rw-r--r--net/xdp/xsk_queue.h2
-rw-r--r--net/xfrm/xfrm_compat.c6
-rw-r--r--net/xfrm/xfrm_input.c2
-rw-r--r--net/xfrm/xfrm_policy.c28
-rw-r--r--net/xfrm/xfrm_state.c171
-rw-r--r--net/xfrm/xfrm_user.c83
304 files changed, 8143 insertions, 5708 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 458040e8a0e0..91d134961357 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -725,7 +725,7 @@ static void vlan_dev_poll_controller(struct net_device *dev)
return;
}
-static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo)
+static int vlan_dev_netpoll_setup(struct net_device *dev)
{
struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
struct net_device *real_dev = vlan->real_dev;
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index cf5219df7903..134419667d59 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -161,10 +161,8 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
return -ENODEV;
}
- if (data[IFLA_VLAN_PROTOCOL])
- proto = nla_get_be16(data[IFLA_VLAN_PROTOCOL]);
- else
- proto = htons(ETH_P_8021Q);
+ proto = nla_get_be16_default(data[IFLA_VLAN_PROTOCOL],
+ htons(ETH_P_8021Q));
vlan->vlan_proto = proto;
vlan->vlan_id = nla_get_u16(data[IFLA_VLAN_ID]);
diff --git a/net/9p/Kconfig b/net/9p/Kconfig
index ee967fd25312..22f8c167845d 100644
--- a/net/9p/Kconfig
+++ b/net/9p/Kconfig
@@ -41,8 +41,8 @@ config NET_9P_XEN
two Xen domains.
config NET_9P_USBG
- bool "9P USB Gadget Transport"
- depends on USB_GADGET=y || USB_GADGET=NET_9P
+ tristate "9P USB Gadget Transport"
+ depends on USB_GADGET
select CONFIGFS_FS
select USB_LIBCOMPOSITE
help
diff --git a/net/9p/trans_usbg.c b/net/9p/trans_usbg.c
index 975b76839dca..6b694f117aef 100644
--- a/net/9p/trans_usbg.c
+++ b/net/9p/trans_usbg.c
@@ -909,9 +909,9 @@ static struct usb_function_instance *usb9pfs_alloc_instance(void)
usb9pfs_opts->buflen = DEFAULT_BUFLEN;
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
- if (IS_ERR(dev)) {
+ if (!dev) {
kfree(usb9pfs_opts);
- return ERR_CAST(dev);
+ return ERR_PTR(-ENOMEM);
}
usb9pfs_opts->dev = dev;
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index dfdbe1ca5338..b9ff69c7522a 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -286,7 +286,7 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv)
if (!priv->rings[i].intf)
break;
if (priv->rings[i].irq > 0)
- unbind_from_irqhandler(priv->rings[i].irq, priv->dev);
+ unbind_from_irqhandler(priv->rings[i].irq, ring);
if (priv->rings[i].data.in) {
for (j = 0;
j < (1 << priv->rings[i].intf->ring_order);
@@ -465,6 +465,7 @@ static int xen_9pfs_front_init(struct xenbus_device *dev)
goto error;
}
+ xenbus_switch_state(dev, XenbusStateInitialised);
return 0;
error_xenbus:
@@ -512,8 +513,10 @@ static void xen_9pfs_front_changed(struct xenbus_device *dev,
break;
case XenbusStateInitWait:
- if (!xen_9pfs_front_init(dev))
- xenbus_switch_state(dev, XenbusStateInitialised);
+ if (dev->state != XenbusStateInitialising)
+ break;
+
+ xen_9pfs_front_init(dev);
break;
case XenbusStateConnected:
diff --git a/net/Kconfig b/net/Kconfig
index a629f92dc86b..c3fca69a7c83 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -72,6 +72,9 @@ config NET_DEVMEM
depends on GENERIC_ALLOCATOR
depends on PAGE_POOL
+config NET_SHAPER
+ bool
+
menu "Networking options"
source "net/packet/Kconfig"
diff --git a/net/Kconfig.debug b/net/Kconfig.debug
index 5e3fffe707dd..277fab8c4d77 100644
--- a/net/Kconfig.debug
+++ b/net/Kconfig.debug
@@ -24,3 +24,18 @@ config DEBUG_NET
help
Enable extra sanity checks in networking.
This is mostly used by fuzzers, but is safe to select.
+
+config DEBUG_NET_SMALL_RTNL
+ bool "Add extra per-netns mutex inside RTNL"
+ depends on DEBUG_KERNEL && NET && LOCK_DEBUGGING_SUPPORT
+ select PROVE_LOCKING
+ default n
+ help
+ rtnl_lock() is being replaced with rtnl_net_lock() that
+ acquires the global RTNL and a small per-netns RTNL mutex.
+
+ During the conversion, rtnl_net_lock() just adds an extra
+ mutex in every RTNL scope and slows down the operations.
+
+ Once the conversion completes, rtnl_lock() will be removed
+ and rtnetlink will gain per-netns scalability.
diff --git a/net/Makefile b/net/Makefile
index 65bb8c72a35e..60ed5190eda8 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -79,3 +79,4 @@ obj-$(CONFIG_XDP_SOCKETS) += xdp/
obj-$(CONFIG_MPTCP) += mptcp/
obj-$(CONFIG_MCTP) += mctp/
obj-$(CONFIG_NET_HANDSHAKE) += handshake/
+obj-$(CONFIG_NET_SHAPER) += shaper/
diff --git a/net/appletalk/Makefile b/net/appletalk/Makefile
index 33164d972d37..152312a15180 100644
--- a/net/appletalk/Makefile
+++ b/net/appletalk/Makefile
@@ -5,6 +5,6 @@
obj-$(CONFIG_ATALK) += appletalk.o
-appletalk-y := aarp.o ddp.o dev.o
+appletalk-y := aarp.o ddp.o
appletalk-$(CONFIG_PROC_FS) += atalk_proc.o
appletalk-$(CONFIG_SYSCTL) += sysctl_net_atalk.o
diff --git a/net/appletalk/dev.c b/net/appletalk/dev.c
deleted file mode 100644
index 284c8e585533..000000000000
--- a/net/appletalk/dev.c
+++ /dev/null
@@ -1,46 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Moved here from drivers/net/net_init.c, which is:
- * Written 1993,1994,1995 by Donald Becker.
- */
-
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/netdevice.h>
-#include <linux/if_arp.h>
-#include <linux/if_ltalk.h>
-
-static void ltalk_setup(struct net_device *dev)
-{
- /* Fill in the fields of the device structure with localtalk-generic values. */
-
- dev->type = ARPHRD_LOCALTLK;
- dev->hard_header_len = LTALK_HLEN;
- dev->mtu = LTALK_MTU;
- dev->addr_len = LTALK_ALEN;
- dev->tx_queue_len = 10;
-
- dev->broadcast[0] = 0xFF;
-
- dev->flags = IFF_BROADCAST|IFF_MULTICAST|IFF_NOARP;
-}
-
-/**
- * alloc_ltalkdev - Allocates and sets up an localtalk device
- * @sizeof_priv: Size of additional driver-private structure to be allocated
- * for this localtalk device
- *
- * Fill in the fields of the device structure with localtalk-generic
- * values. Basically does everything except registering the device.
- *
- * Constructs a new net device, complete with a private data area of
- * size @sizeof_priv. A 32-byte (not bit) alignment is enforced for
- * this private data area.
- */
-
-struct net_device *alloc_ltalkdev(int sizeof_priv)
-{
- return alloc_netdev(sizeof_priv, "lt%d", NET_NAME_UNKNOWN,
- ltalk_setup);
-}
-EXPORT_SYMBOL(alloc_ltalkdev);
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 74b49c35ddc1..07ae5dd1f150 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -36,6 +36,7 @@
#include <linux/spinlock.h>
#include <linux/stddef.h>
#include <linux/string.h>
+#include <linux/string_choices.h>
#include <linux/types.h>
#include <linux/workqueue.h>
#include <net/genetlink.h>
@@ -371,8 +372,7 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet,
batadv_ogm_packet->orig,
ntohl(batadv_ogm_packet->seqno),
batadv_ogm_packet->tq, batadv_ogm_packet->ttl,
- ((batadv_ogm_packet->flags & BATADV_DIRECTLINK) ?
- "on" : "off"),
+ str_on_off(batadv_ogm_packet->flags & BATADV_DIRECTLINK),
hard_iface->net_dev->name,
hard_iface->net_dev->dev_addr);
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 5f46ca3d4bb8..449faf5a5487 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -33,6 +33,7 @@
#include <linux/sprintf.h>
#include <linux/stddef.h>
#include <linux/string.h>
+#include <linux/string_choices.h>
#include <linux/workqueue.h>
#include <net/arp.h>
#include <net/genetlink.h>
@@ -1946,16 +1947,15 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
claim = batadv_claim_hash_find(bat_priv, &search_claim);
if (!claim) {
+ bool local = batadv_is_my_client(bat_priv, ethhdr->h_source, vid);
+
/* possible optimization: race for a claim */
/* No claim exists yet, claim it for us!
*/
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"%s(): Unclaimed MAC %pM found. Claim it. Local: %s\n",
- __func__, ethhdr->h_source,
- batadv_is_my_client(bat_priv,
- ethhdr->h_source, vid) ?
- "yes" : "no");
+ __func__, ethhdr->h_source, str_yes_no(local));
batadv_handle_claim(bat_priv, primary_if,
primary_if->net_dev->dev_addr,
ethhdr->h_source, vid);
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 3d4c36ae2e1a..97ea71a052f8 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2024.2"
+#define BATADV_SOURCE_VERSION "2024.3"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 2243cec18ecc..b44c382226a1 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -28,6 +28,7 @@
#include <linux/net.h>
#include <linux/netdevice.h>
#include <linux/netlink.h>
+#include <linux/overflow.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/skbuff.h>
@@ -209,20 +210,6 @@ batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr,
}
/**
- * batadv_tt_local_entry_free_rcu() - free the tt_local_entry
- * @rcu: rcu pointer of the tt_local_entry
- */
-static void batadv_tt_local_entry_free_rcu(struct rcu_head *rcu)
-{
- struct batadv_tt_local_entry *tt_local_entry;
-
- tt_local_entry = container_of(rcu, struct batadv_tt_local_entry,
- common.rcu);
-
- kmem_cache_free(batadv_tl_cache, tt_local_entry);
-}
-
-/**
* batadv_tt_local_entry_release() - release tt_local_entry from lists and queue
* for free after rcu grace period
* @ref: kref pointer of the nc_node
@@ -236,7 +223,7 @@ static void batadv_tt_local_entry_release(struct kref *ref)
batadv_softif_vlan_put(tt_local_entry->vlan);
- call_rcu(&tt_local_entry->common.rcu, batadv_tt_local_entry_free_rcu);
+ kfree_rcu(tt_local_entry, common.rcu);
}
/**
@@ -255,20 +242,6 @@ batadv_tt_local_entry_put(struct batadv_tt_local_entry *tt_local_entry)
}
/**
- * batadv_tt_global_entry_free_rcu() - free the tt_global_entry
- * @rcu: rcu pointer of the tt_global_entry
- */
-static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu)
-{
- struct batadv_tt_global_entry *tt_global_entry;
-
- tt_global_entry = container_of(rcu, struct batadv_tt_global_entry,
- common.rcu);
-
- kmem_cache_free(batadv_tg_cache, tt_global_entry);
-}
-
-/**
* batadv_tt_global_entry_release() - release tt_global_entry from lists and
* queue for free after rcu grace period
* @ref: kref pointer of the nc_node
@@ -282,7 +255,7 @@ void batadv_tt_global_entry_release(struct kref *ref)
batadv_tt_global_del_orig_list(tt_global_entry);
- call_rcu(&tt_global_entry->common.rcu, batadv_tt_global_entry_free_rcu);
+ kfree_rcu(tt_global_entry, common.rcu);
}
/**
@@ -408,19 +381,6 @@ static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node,
}
/**
- * batadv_tt_orig_list_entry_free_rcu() - free the orig_entry
- * @rcu: rcu pointer of the orig_entry
- */
-static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu)
-{
- struct batadv_tt_orig_list_entry *orig_entry;
-
- orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu);
-
- kmem_cache_free(batadv_tt_orig_cache, orig_entry);
-}
-
-/**
* batadv_tt_orig_list_entry_release() - release tt orig entry from lists and
* queue for free after rcu grace period
* @ref: kref pointer of the tt orig entry
@@ -433,7 +393,7 @@ static void batadv_tt_orig_list_entry_release(struct kref *ref)
refcount);
batadv_orig_node_put(orig_entry->orig_node);
- call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu);
+ kfree_rcu(orig_entry, rcu);
}
/**
@@ -856,8 +816,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node,
num_entries += atomic_read(&vlan->tt.num_entries);
}
- change_offset = sizeof(**tt_data);
- change_offset += num_vlan * sizeof(*tt_vlan);
+ change_offset = struct_size(*tt_data, vlan_data, num_vlan);
/* if tt_len is negative, allocate the space needed by the full table */
if (*tt_len < 0)
@@ -876,7 +835,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node,
(*tt_data)->ttvn = atomic_read(&orig_node->last_ttvn);
(*tt_data)->num_vlan = htons(num_vlan);
- tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(*tt_data + 1);
+ tt_vlan = (*tt_data)->vlan_data;
hlist_for_each_entry(vlan, &orig_node->vlan_list, list) {
tt_vlan->vid = htons(vlan->vid);
tt_vlan->crc = htonl(vlan->tt.crc);
@@ -936,8 +895,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
total_entries += vlan_entries;
}
- change_offset = sizeof(**tt_data);
- change_offset += num_vlan * sizeof(*tt_vlan);
+ change_offset = struct_size(*tt_data, vlan_data, num_vlan);
/* if tt_len is negative, allocate the space needed by the full table */
if (*tt_len < 0)
@@ -956,7 +914,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
(*tt_data)->ttvn = atomic_read(&bat_priv->tt.vn);
(*tt_data)->num_vlan = htons(num_vlan);
- tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(*tt_data + 1);
+ tt_vlan = (*tt_data)->vlan_data;
hlist_for_each_entry(vlan, &bat_priv->softif_vlan_list, list) {
vlan_entries = atomic_read(&vlan->tt.num_entries);
if (vlan_entries < 1)
@@ -2916,7 +2874,6 @@ static bool batadv_send_tt_request(struct batadv_priv *bat_priv,
{
struct batadv_tvlv_tt_data *tvlv_tt_data = NULL;
struct batadv_tt_req_node *tt_req_node = NULL;
- struct batadv_tvlv_tt_vlan_data *tt_vlan_req;
struct batadv_hard_iface *primary_if;
bool ret = false;
int i, size;
@@ -2932,7 +2889,7 @@ static bool batadv_send_tt_request(struct batadv_priv *bat_priv,
if (!tt_req_node)
goto out;
- size = sizeof(*tvlv_tt_data) + sizeof(*tt_vlan_req) * num_vlan;
+ size = struct_size(tvlv_tt_data, vlan_data, num_vlan);
tvlv_tt_data = kzalloc(size, GFP_ATOMIC);
if (!tvlv_tt_data)
goto out;
@@ -2944,12 +2901,10 @@ static bool batadv_send_tt_request(struct batadv_priv *bat_priv,
/* send all the CRCs within the request. This is needed by intermediate
* nodes to ensure they have the correct table before replying
*/
- tt_vlan_req = (struct batadv_tvlv_tt_vlan_data *)(tvlv_tt_data + 1);
for (i = 0; i < num_vlan; i++) {
- tt_vlan_req->vid = tt_vlan->vid;
- tt_vlan_req->crc = tt_vlan->crc;
+ tvlv_tt_data->vlan_data[i].vid = tt_vlan->vid;
+ tvlv_tt_data->vlan_data[i].crc = tt_vlan->crc;
- tt_vlan_req++;
tt_vlan++;
}
@@ -3001,7 +2956,6 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv,
struct batadv_orig_node *res_dst_orig_node = NULL;
struct batadv_tvlv_tt_change *tt_change;
struct batadv_tvlv_tt_data *tvlv_tt_data = NULL;
- struct batadv_tvlv_tt_vlan_data *tt_vlan;
bool ret = false, full_table;
u8 orig_ttvn, req_ttvn;
u16 tvlv_len;
@@ -3024,10 +2978,9 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv,
orig_ttvn = (u8)atomic_read(&req_dst_orig_node->last_ttvn);
req_ttvn = tt_data->ttvn;
- tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(tt_data + 1);
/* this node doesn't have the requested data */
if (orig_ttvn != req_ttvn ||
- !batadv_tt_global_check_crc(req_dst_orig_node, tt_vlan,
+ !batadv_tt_global_check_crc(req_dst_orig_node, tt_data->vlan_data,
ntohs(tt_data->num_vlan)))
goto out;
@@ -3370,7 +3323,6 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node = NULL;
struct batadv_tvlv_tt_change *tt_change;
u8 *tvlv_ptr = (u8 *)tt_data;
- u16 change_offset;
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Received TT_RESPONSE from %pM for ttvn %d t_size: %d [%c]\n",
@@ -3383,10 +3335,7 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv,
spin_lock_bh(&orig_node->tt_lock);
- change_offset = sizeof(struct batadv_tvlv_tt_vlan_data);
- change_offset *= ntohs(tt_data->num_vlan);
- change_offset += sizeof(*tt_data);
- tvlv_ptr += change_offset;
+ tvlv_ptr += struct_size(tt_data, vlan_data, ntohs(tt_data->num_vlan));
tt_change = (struct batadv_tvlv_tt_change *)tvlv_ptr;
if (tt_data->flags & BATADV_TT_FULL_TABLE) {
@@ -3985,10 +3934,10 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
u8 flags, void *tvlv_value,
u16 tvlv_value_len)
{
- struct batadv_tvlv_tt_vlan_data *tt_vlan;
struct batadv_tvlv_tt_change *tt_change;
struct batadv_tvlv_tt_data *tt_data;
u16 num_entries, num_vlan;
+ size_t flex_size;
if (tvlv_value_len < sizeof(*tt_data))
return;
@@ -3998,17 +3947,18 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
num_vlan = ntohs(tt_data->num_vlan);
- if (tvlv_value_len < sizeof(*tt_vlan) * num_vlan)
+ flex_size = flex_array_size(tt_data, vlan_data, num_vlan);
+ if (tvlv_value_len < flex_size)
return;
- tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(tt_data + 1);
- tt_change = (struct batadv_tvlv_tt_change *)(tt_vlan + num_vlan);
- tvlv_value_len -= sizeof(*tt_vlan) * num_vlan;
+ tt_change = (struct batadv_tvlv_tt_change *)((void *)tt_data
+ + flex_size);
+ tvlv_value_len -= flex_size;
num_entries = batadv_tt_entries(tvlv_value_len);
- batadv_tt_update_orig(bat_priv, orig, tt_vlan, num_vlan, tt_change,
- num_entries, tt_data->ttvn);
+ batadv_tt_update_orig(bat_priv, orig, tt_data->vlan_data, num_vlan,
+ tt_change, num_entries, tt_data->ttvn);
}
/**
@@ -4039,8 +3989,8 @@ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
tt_data = tvlv_value;
tvlv_value_len -= sizeof(*tt_data);
- tt_vlan_len = sizeof(struct batadv_tvlv_tt_vlan_data);
- tt_vlan_len *= ntohs(tt_data->num_vlan);
+ tt_vlan_len = flex_array_size(tt_data, vlan_data,
+ ntohs(tt_data->num_vlan));
if (tvlv_value_len < tt_vlan_len)
return NET_RX_SUCCESS;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index c4c74b82ed21..d097e308a755 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -952,6 +952,7 @@ static struct hci_conn *__hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t
conn->tx_power = HCI_TX_POWER_INVALID;
conn->max_tx_power = HCI_TX_POWER_INVALID;
conn->sync_handle = HCI_SYNC_HANDLE_INVALID;
+ conn->sid = HCI_SID_INVALID;
set_bit(HCI_CONN_POWER_SAVE, &conn->flags);
conn->disc_timeout = HCI_DISCONN_TIMEOUT;
@@ -1127,9 +1128,9 @@ void hci_conn_del(struct hci_conn *conn)
hci_conn_unlink(conn);
- cancel_delayed_work_sync(&conn->disc_work);
- cancel_delayed_work_sync(&conn->auto_accept_work);
- cancel_delayed_work_sync(&conn->idle_work);
+ disable_delayed_work_sync(&conn->disc_work);
+ disable_delayed_work_sync(&conn->auto_accept_work);
+ disable_delayed_work_sync(&conn->idle_work);
if (conn->type == ACL_LINK) {
/* Unacked frames */
@@ -2062,105 +2063,217 @@ static int create_big_sync(struct hci_dev *hdev, void *data)
static void create_pa_complete(struct hci_dev *hdev, void *data, int err)
{
- struct hci_cp_le_pa_create_sync *cp = data;
-
bt_dev_dbg(hdev, "");
if (err)
bt_dev_err(hdev, "Unable to create PA: %d", err);
+}
- kfree(cp);
+static bool hci_conn_check_create_pa_sync(struct hci_conn *conn)
+{
+ if (conn->type != ISO_LINK || conn->sid == HCI_SID_INVALID)
+ return false;
+
+ return true;
}
static int create_pa_sync(struct hci_dev *hdev, void *data)
{
- struct hci_cp_le_pa_create_sync *cp = data;
- int err;
+ struct hci_cp_le_pa_create_sync cp = {0};
+ struct hci_conn *conn;
+ int err = 0;
- err = __hci_cmd_sync_status(hdev, HCI_OP_LE_PA_CREATE_SYNC,
- sizeof(*cp), cp, HCI_CMD_TIMEOUT);
- if (err) {
- hci_dev_clear_flag(hdev, HCI_PA_SYNC);
- return err;
+ hci_dev_lock(hdev);
+
+ rcu_read_lock();
+
+ /* The spec allows only one pending LE Periodic Advertising Create
+ * Sync command at a time. If the command is pending now, don't do
+ * anything. We check for pending connections after each PA Sync
+ * Established event.
+ *
+ * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
+ * page 2493:
+ *
+ * If the Host issues this command when another HCI_LE_Periodic_
+ * Advertising_Create_Sync command is pending, the Controller shall
+ * return the error code Command Disallowed (0x0C).
+ */
+ list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
+ if (test_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags))
+ goto unlock;
}
- return hci_update_passive_scan_sync(hdev);
+ list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
+ if (hci_conn_check_create_pa_sync(conn)) {
+ struct bt_iso_qos *qos = &conn->iso_qos;
+
+ cp.options = qos->bcast.options;
+ cp.sid = conn->sid;
+ cp.addr_type = conn->dst_type;
+ bacpy(&cp.addr, &conn->dst);
+ cp.skip = cpu_to_le16(qos->bcast.skip);
+ cp.sync_timeout = cpu_to_le16(qos->bcast.sync_timeout);
+ cp.sync_cte_type = qos->bcast.sync_cte_type;
+
+ break;
+ }
+ }
+
+unlock:
+ rcu_read_unlock();
+
+ hci_dev_unlock(hdev);
+
+ if (bacmp(&cp.addr, BDADDR_ANY)) {
+ hci_dev_set_flag(hdev, HCI_PA_SYNC);
+ set_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags);
+
+ err = __hci_cmd_sync_status(hdev, HCI_OP_LE_PA_CREATE_SYNC,
+ sizeof(cp), &cp, HCI_CMD_TIMEOUT);
+ if (!err)
+ err = hci_update_passive_scan_sync(hdev);
+
+ if (err) {
+ hci_dev_clear_flag(hdev, HCI_PA_SYNC);
+ clear_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags);
+ }
+ }
+
+ return err;
+}
+
+int hci_pa_create_sync_pending(struct hci_dev *hdev)
+{
+ /* Queue start pa_create_sync and scan */
+ return hci_cmd_sync_queue(hdev, create_pa_sync,
+ NULL, create_pa_complete);
}
struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst,
__u8 dst_type, __u8 sid,
struct bt_iso_qos *qos)
{
- struct hci_cp_le_pa_create_sync *cp;
struct hci_conn *conn;
- int err;
-
- if (hci_dev_test_and_set_flag(hdev, HCI_PA_SYNC))
- return ERR_PTR(-EBUSY);
conn = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_SLAVE);
if (IS_ERR(conn))
return conn;
conn->iso_qos = *qos;
+ conn->dst_type = dst_type;
+ conn->sid = sid;
conn->state = BT_LISTEN;
hci_conn_hold(conn);
- cp = kzalloc(sizeof(*cp), GFP_KERNEL);
- if (!cp) {
- hci_dev_clear_flag(hdev, HCI_PA_SYNC);
- hci_conn_drop(conn);
- return ERR_PTR(-ENOMEM);
+ hci_pa_create_sync_pending(hdev);
+
+ return conn;
+}
+
+static bool hci_conn_check_create_big_sync(struct hci_conn *conn)
+{
+ if (!conn->num_bis)
+ return false;
+
+ return true;
+}
+
+static void big_create_sync_complete(struct hci_dev *hdev, void *data, int err)
+{
+ bt_dev_dbg(hdev, "");
+
+ if (err)
+ bt_dev_err(hdev, "Unable to create BIG sync: %d", err);
+}
+
+static int big_create_sync(struct hci_dev *hdev, void *data)
+{
+ DEFINE_FLEX(struct hci_cp_le_big_create_sync, pdu, bis, num_bis, 0x11);
+ struct hci_conn *conn;
+
+ rcu_read_lock();
+
+ pdu->num_bis = 0;
+
+ /* The spec allows only one pending LE BIG Create Sync command at
+ * a time. If the command is pending now, don't do anything. We
+ * check for pending connections after each BIG Sync Established
+ * event.
+ *
+ * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
+ * page 2586:
+ *
+ * If the Host sends this command when the Controller is in the
+ * process of synchronizing to any BIG, i.e. the HCI_LE_BIG_Sync_
+ * Established event has not been generated, the Controller shall
+ * return the error code Command Disallowed (0x0C).
+ */
+ list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
+ if (test_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags))
+ goto unlock;
}
- cp->options = qos->bcast.options;
- cp->sid = sid;
- cp->addr_type = dst_type;
- bacpy(&cp->addr, dst);
- cp->skip = cpu_to_le16(qos->bcast.skip);
- cp->sync_timeout = cpu_to_le16(qos->bcast.sync_timeout);
- cp->sync_cte_type = qos->bcast.sync_cte_type;
+ list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
+ if (hci_conn_check_create_big_sync(conn)) {
+ struct bt_iso_qos *qos = &conn->iso_qos;
- /* Queue start pa_create_sync and scan */
- err = hci_cmd_sync_queue(hdev, create_pa_sync, cp, create_pa_complete);
- if (err < 0) {
- hci_conn_drop(conn);
- kfree(cp);
- return ERR_PTR(err);
+ set_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags);
+
+ pdu->handle = qos->bcast.big;
+ pdu->sync_handle = cpu_to_le16(conn->sync_handle);
+ pdu->encryption = qos->bcast.encryption;
+ memcpy(pdu->bcode, qos->bcast.bcode,
+ sizeof(pdu->bcode));
+ pdu->mse = qos->bcast.mse;
+ pdu->timeout = cpu_to_le16(qos->bcast.timeout);
+ pdu->num_bis = conn->num_bis;
+ memcpy(pdu->bis, conn->bis, conn->num_bis);
+
+ break;
+ }
}
- return conn;
+unlock:
+ rcu_read_unlock();
+
+ if (!pdu->num_bis)
+ return 0;
+
+ return hci_send_cmd(hdev, HCI_OP_LE_BIG_CREATE_SYNC,
+ struct_size(pdu, bis, pdu->num_bis), pdu);
+}
+
+int hci_le_big_create_sync_pending(struct hci_dev *hdev)
+{
+ /* Queue big_create_sync */
+ return hci_cmd_sync_queue_once(hdev, big_create_sync,
+ NULL, big_create_sync_complete);
}
int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon,
struct bt_iso_qos *qos,
__u16 sync_handle, __u8 num_bis, __u8 bis[])
{
- DEFINE_FLEX(struct hci_cp_le_big_create_sync, pdu, bis, num_bis, 0x11);
int err;
- if (num_bis < 0x01 || num_bis > pdu->num_bis)
+ if (num_bis < 0x01 || num_bis > ISO_MAX_NUM_BIS)
return -EINVAL;
err = qos_set_big(hdev, qos);
if (err)
return err;
- if (hcon)
- hcon->iso_qos.bcast.big = qos->bcast.big;
+ if (hcon) {
+ /* Update hcon QoS */
+ hcon->iso_qos = *qos;
- pdu->handle = qos->bcast.big;
- pdu->sync_handle = cpu_to_le16(sync_handle);
- pdu->encryption = qos->bcast.encryption;
- memcpy(pdu->bcode, qos->bcast.bcode, sizeof(pdu->bcode));
- pdu->mse = qos->bcast.mse;
- pdu->timeout = cpu_to_le16(qos->bcast.timeout);
- pdu->num_bis = num_bis;
- memcpy(pdu->bis, bis, num_bis);
+ hcon->num_bis = num_bis;
+ memcpy(hcon->bis, bis, num_bis);
+ }
- return hci_send_cmd(hdev, HCI_OP_LE_BIG_CREATE_SYNC,
- struct_size(pdu, bis, num_bis), pdu);
+ return hci_le_big_create_sync_pending(hdev);
}
static void create_big_complete(struct hci_dev *hdev, void *data, int err)
@@ -2224,13 +2337,9 @@ struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst,
conn->iso_qos.bcast.big);
if (parent && parent != conn) {
link = hci_conn_link(parent, conn);
- if (!link) {
- hci_conn_drop(conn);
- return ERR_PTR(-ENOLINK);
- }
-
- /* Link takes the refcount */
hci_conn_drop(conn);
+ if (!link)
+ return ERR_PTR(-ENOLINK);
}
return conn;
@@ -2320,15 +2429,12 @@ struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst,
}
link = hci_conn_link(le, cis);
+ hci_conn_drop(cis);
if (!link) {
hci_conn_drop(le);
- hci_conn_drop(cis);
return ERR_PTR(-ENOLINK);
}
- /* Link takes the refcount */
- hci_conn_drop(cis);
-
cis->state = BT_CONNECT;
hci_le_create_cis_pending(hdev);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 0ac354db8177..f9e19f9cb5a3 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3771,18 +3771,22 @@ static void hci_tx_work(struct work_struct *work)
/* ACL data packet */
static void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb)
{
- struct hci_acl_hdr *hdr = (void *) skb->data;
+ struct hci_acl_hdr *hdr;
struct hci_conn *conn;
__u16 handle, flags;
- skb_pull(skb, HCI_ACL_HDR_SIZE);
+ hdr = skb_pull_data(skb, sizeof(*hdr));
+ if (!hdr) {
+ bt_dev_err(hdev, "ACL packet too small");
+ goto drop;
+ }
handle = __le16_to_cpu(hdr->handle);
flags = hci_flags(handle);
handle = hci_handle(handle);
- BT_DBG("%s len %d handle 0x%4.4x flags 0x%4.4x", hdev->name, skb->len,
- handle, flags);
+ bt_dev_dbg(hdev, "len %d handle 0x%4.4x flags 0x%4.4x", skb->len,
+ handle, flags);
hdev->stat.acl_rx++;
@@ -3801,24 +3805,29 @@ static void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb)
handle);
}
+drop:
kfree_skb(skb);
}
/* SCO data packet */
static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb)
{
- struct hci_sco_hdr *hdr = (void *) skb->data;
+ struct hci_sco_hdr *hdr;
struct hci_conn *conn;
__u16 handle, flags;
- skb_pull(skb, HCI_SCO_HDR_SIZE);
+ hdr = skb_pull_data(skb, sizeof(*hdr));
+ if (!hdr) {
+ bt_dev_err(hdev, "SCO packet too small");
+ goto drop;
+ }
handle = __le16_to_cpu(hdr->handle);
flags = hci_flags(handle);
handle = hci_handle(handle);
- BT_DBG("%s len %d handle 0x%4.4x flags 0x%4.4x", hdev->name, skb->len,
- handle, flags);
+ bt_dev_dbg(hdev, "len %d handle 0x%4.4x flags 0x%4.4x", skb->len,
+ handle, flags);
hdev->stat.sco_rx++;
@@ -3836,6 +3845,7 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb)
handle);
}
+drop:
kfree_skb(skb);
}
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 0bbad90ddd6f..1427d6e2f3c9 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -42,8 +42,6 @@
#define ZERO_KEY "\x00\x00\x00\x00\x00\x00\x00\x00" \
"\x00\x00\x00\x00\x00\x00\x00\x00"
-#define secs_to_jiffies(_secs) msecs_to_jiffies((_secs) * 1000)
-
/* Handle HCI Event packets */
static void *hci_ev_skb_pull(struct hci_dev *hdev, struct sk_buff *skb,
@@ -3626,6 +3624,13 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data,
goto unlock;
}
+ /* We skip the WRITE_AUTH_PAYLOAD_TIMEOUT for ATS2851 based controllers
+ * to avoid unexpected SMP command errors when pairing.
+ */
+ if (test_bit(HCI_QUIRK_BROKEN_WRITE_AUTH_PAYLOAD_TIMEOUT,
+ &hdev->quirks))
+ goto notify;
+
/* Set the default Authenticated Payload Timeout after
* an LE Link is established. As per Core Spec v5.0, Vol 2, Part B
* Section 3.3, the HCI command WRITE_AUTH_PAYLOAD_TIMEOUT should be
@@ -6345,7 +6350,7 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data,
struct hci_ev_le_pa_sync_established *ev = data;
int mask = hdev->link_mode;
__u8 flags = 0;
- struct hci_conn *pa_sync;
+ struct hci_conn *pa_sync, *conn;
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
@@ -6353,6 +6358,20 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data,
hci_dev_clear_flag(hdev, HCI_PA_SYNC);
+ conn = hci_conn_hash_lookup_sid(hdev, ev->sid, &ev->bdaddr,
+ ev->bdaddr_type);
+ if (!conn) {
+ bt_dev_err(hdev,
+ "Unable to find connection for dst %pMR sid 0x%2.2x",
+ &ev->bdaddr, ev->sid);
+ goto unlock;
+ }
+
+ clear_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags);
+
+ conn->sync_handle = le16_to_cpu(ev->handle);
+ conn->sid = HCI_SID_INVALID;
+
mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ISO_LINK, &flags);
if (!(mask & HCI_LM_ACCEPT)) {
hci_le_pa_term_sync(hdev, ev->handle);
@@ -6379,6 +6398,9 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data,
}
unlock:
+ /* Handle any other pending PA sync command */
+ hci_pa_create_sync_pending(hdev);
+
hci_dev_unlock(hdev);
}
@@ -6896,7 +6918,7 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
struct hci_evt_le_big_sync_estabilished *ev = data;
- struct hci_conn *bis;
+ struct hci_conn *bis, *conn;
int i;
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
@@ -6907,6 +6929,20 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
+ conn = hci_conn_hash_lookup_big_sync_pend(hdev, ev->handle,
+ ev->num_bis);
+ if (!conn) {
+ bt_dev_err(hdev,
+ "Unable to find connection for big 0x%2.2x",
+ ev->handle);
+ goto unlock;
+ }
+
+ clear_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags);
+
+ conn->num_bis = 0;
+ memset(conn->bis, 0, sizeof(conn->num_bis));
+
for (i = 0; i < ev->num_bis; i++) {
u16 handle = le16_to_cpu(ev->bis[i]);
__le32 interval;
@@ -6927,6 +6963,7 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
/* Mark PA sync as established */
set_bit(HCI_CONN_PA_SYNC, &bis->flags);
+ bis->sync_handle = conn->sync_handle;
bis->iso_qos.bcast.big = ev->handle;
memset(&interval, 0, sizeof(interval));
memcpy(&interval, ev->latency, sizeof(ev->latency));
@@ -6956,6 +6993,10 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
hci_connect_cfm(bis, ev->status);
}
+unlock:
+ /* Handle any other pending BIG sync command */
+ hci_le_big_create_sync_pending(hdev);
+
hci_dev_unlock(hdev);
}
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index c0203a2b5107..c86f4e42e69c 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -4842,6 +4842,13 @@ static const struct {
HCI_QUIRK_BROKEN(SET_RPA_TIMEOUT,
"HCI LE Set Random Private Address Timeout command is "
"advertised, but not supported."),
+ HCI_QUIRK_BROKEN(EXT_CREATE_CONN,
+ "HCI LE Extended Create Connection command is "
+ "advertised, but not supported."),
+ HCI_QUIRK_BROKEN(WRITE_AUTH_PAYLOAD_TIMEOUT,
+ "HCI WRITE AUTH PAYLOAD TIMEOUT command leads "
+ "to unexpected SMP errors when pairing "
+ "and will not be used."),
HCI_QUIRK_BROKEN(LE_CODED,
"HCI LE Coded PHY feature bit is set, "
"but its usage is not supported.")
@@ -6477,7 +6484,7 @@ static int hci_le_create_conn_sync(struct hci_dev *hdev, void *data)
&own_addr_type);
if (err)
goto done;
-
+ /* Send command LE Extended Create Connection if supported */
if (use_ext_conn(hdev)) {
err = hci_le_ext_create_conn_sync(hdev, conn, own_addr_type);
goto done;
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 367e32fe30eb..4b54dbbf0729 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -21,16 +21,6 @@ static const struct device_type bt_link = {
.release = bt_link_release,
};
-/*
- * The rfcomm tty device will possibly retain even when conn
- * is down, and sysfs doesn't support move zombie device,
- * so we should move the device before conn device is destroyed.
- */
-static int __match_tty(struct device *dev, void *data)
-{
- return !strncmp(dev_name(dev), "rfcomm", 6);
-}
-
void hci_conn_init_sysfs(struct hci_conn *conn)
{
struct hci_dev *hdev = conn->hdev;
@@ -73,10 +63,13 @@ void hci_conn_del_sysfs(struct hci_conn *conn)
return;
}
+ /* If there are devices using the connection as parent reset it to NULL
+ * before unregistering the device.
+ */
while (1) {
struct device *dev;
- dev = device_find_child(&conn->dev, NULL, __match_tty);
+ dev = device_find_any_child(&conn->dev);
if (!dev)
break;
device_move(dev, NULL, DPM_ORDER_DEV_LAST);
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index 7a83e400ac77..1b40fd2b2f02 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -35,6 +35,7 @@ struct iso_conn {
struct sk_buff *rx_skb;
__u32 rx_len;
__u16 tx_sn;
+ struct kref ref;
};
#define iso_conn_lock(c) spin_lock(&(c)->lock)
@@ -93,6 +94,49 @@ static struct sock *iso_get_sock(bdaddr_t *src, bdaddr_t *dst,
#define ISO_CONN_TIMEOUT (HZ * 40)
#define ISO_DISCONN_TIMEOUT (HZ * 2)
+static void iso_conn_free(struct kref *ref)
+{
+ struct iso_conn *conn = container_of(ref, struct iso_conn, ref);
+
+ BT_DBG("conn %p", conn);
+
+ if (conn->sk)
+ iso_pi(conn->sk)->conn = NULL;
+
+ if (conn->hcon) {
+ conn->hcon->iso_data = NULL;
+ hci_conn_drop(conn->hcon);
+ }
+
+ /* Ensure no more work items will run since hci_conn has been dropped */
+ disable_delayed_work_sync(&conn->timeout_work);
+
+ kfree(conn);
+}
+
+static void iso_conn_put(struct iso_conn *conn)
+{
+ if (!conn)
+ return;
+
+ BT_DBG("conn %p refcnt %d", conn, kref_read(&conn->ref));
+
+ kref_put(&conn->ref, iso_conn_free);
+}
+
+static struct iso_conn *iso_conn_hold_unless_zero(struct iso_conn *conn)
+{
+ if (!conn)
+ return NULL;
+
+ BT_DBG("conn %p refcnt %u", conn, kref_read(&conn->ref));
+
+ if (!kref_get_unless_zero(&conn->ref))
+ return NULL;
+
+ return conn;
+}
+
static struct sock *iso_sock_hold(struct iso_conn *conn)
{
if (!conn || !bt_sock_linked(&iso_sk_list, conn->sk))
@@ -109,9 +153,14 @@ static void iso_sock_timeout(struct work_struct *work)
timeout_work.work);
struct sock *sk;
+ conn = iso_conn_hold_unless_zero(conn);
+ if (!conn)
+ return;
+
iso_conn_lock(conn);
sk = iso_sock_hold(conn);
iso_conn_unlock(conn);
+ iso_conn_put(conn);
if (!sk)
return;
@@ -149,9 +198,14 @@ static struct iso_conn *iso_conn_add(struct hci_conn *hcon)
{
struct iso_conn *conn = hcon->iso_data;
+ conn = iso_conn_hold_unless_zero(conn);
if (conn) {
- if (!conn->hcon)
+ if (!conn->hcon) {
+ iso_conn_lock(conn);
conn->hcon = hcon;
+ iso_conn_unlock(conn);
+ }
+ iso_conn_put(conn);
return conn;
}
@@ -159,6 +213,7 @@ static struct iso_conn *iso_conn_add(struct hci_conn *hcon)
if (!conn)
return NULL;
+ kref_init(&conn->ref);
spin_lock_init(&conn->lock);
INIT_DELAYED_WORK(&conn->timeout_work, iso_sock_timeout);
@@ -178,17 +233,15 @@ static void iso_chan_del(struct sock *sk, int err)
struct sock *parent;
conn = iso_pi(sk)->conn;
+ iso_pi(sk)->conn = NULL;
BT_DBG("sk %p, conn %p, err %d", sk, conn, err);
if (conn) {
iso_conn_lock(conn);
conn->sk = NULL;
- iso_pi(sk)->conn = NULL;
iso_conn_unlock(conn);
-
- if (conn->hcon)
- hci_conn_drop(conn->hcon);
+ iso_conn_put(conn);
}
sk->sk_state = BT_CLOSED;
@@ -210,6 +263,7 @@ static void iso_conn_del(struct hci_conn *hcon, int err)
struct iso_conn *conn = hcon->iso_data;
struct sock *sk;
+ conn = iso_conn_hold_unless_zero(conn);
if (!conn)
return;
@@ -219,20 +273,18 @@ static void iso_conn_del(struct hci_conn *hcon, int err)
iso_conn_lock(conn);
sk = iso_sock_hold(conn);
iso_conn_unlock(conn);
+ iso_conn_put(conn);
- if (sk) {
- lock_sock(sk);
- iso_sock_clear_timer(sk);
- iso_chan_del(sk, err);
- release_sock(sk);
- sock_put(sk);
+ if (!sk) {
+ iso_conn_put(conn);
+ return;
}
- /* Ensure no more work items will run before freeing conn. */
- cancel_delayed_work_sync(&conn->timeout_work);
-
- hcon->iso_data = NULL;
- kfree(conn);
+ lock_sock(sk);
+ iso_sock_clear_timer(sk);
+ iso_chan_del(sk, err);
+ release_sock(sk);
+ sock_put(sk);
}
static int __iso_chan_add(struct iso_conn *conn, struct sock *sk,
@@ -652,6 +704,8 @@ static void iso_sock_destruct(struct sock *sk)
{
BT_DBG("sk %p", sk);
+ iso_conn_put(iso_pi(sk)->conn);
+
skb_queue_purge(&sk->sk_receive_queue);
skb_queue_purge(&sk->sk_write_queue);
}
@@ -711,6 +765,7 @@ static void iso_sock_disconn(struct sock *sk)
*/
if (bis_sk) {
hcon->state = BT_OPEN;
+ hcon->iso_data = NULL;
iso_pi(sk)->conn->hcon = NULL;
iso_sock_clear_timer(sk);
iso_chan_del(sk, bt_to_errno(hcon->abort_reason));
@@ -720,7 +775,6 @@ static void iso_sock_disconn(struct sock *sk)
}
sk->sk_state = BT_DISCONN;
- iso_sock_set_timer(sk, ISO_DISCONN_TIMEOUT);
iso_conn_lock(iso_pi(sk)->conn);
hci_conn_drop(iso_pi(sk)->conn->hcon);
iso_pi(sk)->conn->hcon = NULL;
@@ -1338,6 +1392,13 @@ static void iso_conn_big_sync(struct sock *sk)
if (!hdev)
return;
+ /* hci_le_big_create_sync requires hdev lock to be held, since
+ * it enqueues the HCI LE BIG Create Sync command via
+ * hci_cmd_sync_queue_once, which checks hdev flags that might
+ * change.
+ */
+ hci_dev_lock(hdev);
+
if (!test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags)) {
err = hci_le_big_create_sync(hdev, iso_pi(sk)->conn->hcon,
&iso_pi(sk)->qos,
@@ -1348,6 +1409,8 @@ static void iso_conn_big_sync(struct sock *sk)
bt_dev_err(hdev, "hci_le_big_create_sync: %d",
err);
}
+
+ hci_dev_unlock(hdev);
}
static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg,
@@ -1733,6 +1796,13 @@ static bool iso_match_big(struct sock *sk, void *data)
return ev->handle == iso_pi(sk)->qos.bcast.big;
}
+static bool iso_match_big_hcon(struct sock *sk, void *data)
+{
+ struct hci_conn *hcon = data;
+
+ return hcon->iso_qos.bcast.big == iso_pi(sk)->qos.bcast.big;
+}
+
static bool iso_match_pa_sync_flag(struct sock *sk, void *data)
{
return test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags);
@@ -1756,8 +1826,16 @@ static void iso_conn_ready(struct iso_conn *conn)
if (!hcon)
return;
- if (test_bit(HCI_CONN_BIG_SYNC, &hcon->flags) ||
- test_bit(HCI_CONN_BIG_SYNC_FAILED, &hcon->flags)) {
+ if (test_bit(HCI_CONN_BIG_SYNC, &hcon->flags)) {
+ /* A BIS slave hcon is notified to the ISO layer
+ * after the Command Complete for the LE Setup
+ * ISO Data Path command is received. Get the
+ * parent socket that matches the hcon BIG handle.
+ */
+ parent = iso_get_sock(&hcon->src, &hcon->dst,
+ BT_LISTEN, iso_match_big_hcon,
+ hcon);
+ } else if (test_bit(HCI_CONN_BIG_SYNC_FAILED, &hcon->flags)) {
ev = hci_recv_event_data(hcon->hdev,
HCI_EVT_LE_BIG_SYNC_ESTABILISHED);
@@ -1824,7 +1902,6 @@ static void iso_conn_ready(struct iso_conn *conn)
if (!bacmp(&hcon->dst, BDADDR_ANY)) {
bacpy(&hcon->dst, &iso_pi(parent)->dst);
hcon->dst_type = iso_pi(parent)->dst_type;
- hcon->sync_handle = iso_pi(parent)->sync_handle;
}
if (ev3) {
@@ -1942,6 +2019,7 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
if (sk) {
int err;
+ struct hci_conn *hcon = iso_pi(sk)->conn->hcon;
iso_pi(sk)->qos.bcast.encryption = ev2->encryption;
@@ -1950,7 +2028,8 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
if (!test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags) &&
!test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags)) {
- err = hci_le_big_create_sync(hdev, NULL,
+ err = hci_le_big_create_sync(hdev,
+ hcon,
&iso_pi(sk)->qos,
iso_pi(sk)->sync_handle,
iso_pi(sk)->bc_num_bis,
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index ba437c6f6ee5..18e89e764f3b 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1886,6 +1886,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock,
chan = l2cap_chan_create();
if (!chan) {
sk_free(sk);
+ sock->sk = NULL;
return NULL;
}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index a429661b676a..b31192d473d0 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -132,6 +132,7 @@ static const u16 mgmt_commands[] = {
MGMT_OP_MESH_READ_FEATURES,
MGMT_OP_MESH_SEND,
MGMT_OP_MESH_SEND_CANCEL,
+ MGMT_OP_HCI_CMD_SYNC,
};
static const u16 mgmt_events[] = {
@@ -1317,7 +1318,8 @@ static void mgmt_set_powered_complete(struct hci_dev *hdev, void *data, int err)
struct mgmt_mode *cp;
/* Make sure cmd still outstanding. */
- if (cmd != pending_find(MGMT_OP_SET_POWERED, hdev))
+ if (err == -ECANCELED ||
+ cmd != pending_find(MGMT_OP_SET_POWERED, hdev))
return;
cp = cmd->param;
@@ -1350,7 +1352,13 @@ static void mgmt_set_powered_complete(struct hci_dev *hdev, void *data, int err)
static int set_powered_sync(struct hci_dev *hdev, void *data)
{
struct mgmt_pending_cmd *cmd = data;
- struct mgmt_mode *cp = cmd->param;
+ struct mgmt_mode *cp;
+
+ /* Make sure cmd still outstanding. */
+ if (cmd != pending_find(MGMT_OP_SET_POWERED, hdev))
+ return -ECANCELED;
+
+ cp = cmd->param;
BT_DBG("%s", hdev->name);
@@ -1510,7 +1518,8 @@ static void mgmt_set_discoverable_complete(struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "err %d", err);
/* Make sure cmd still outstanding. */
- if (cmd != pending_find(MGMT_OP_SET_DISCOVERABLE, hdev))
+ if (err == -ECANCELED ||
+ cmd != pending_find(MGMT_OP_SET_DISCOVERABLE, hdev))
return;
hci_dev_lock(hdev);
@@ -1684,7 +1693,8 @@ static void mgmt_set_connectable_complete(struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "err %d", err);
/* Make sure cmd still outstanding. */
- if (cmd != pending_find(MGMT_OP_SET_CONNECTABLE, hdev))
+ if (err == -ECANCELED ||
+ cmd != pending_find(MGMT_OP_SET_CONNECTABLE, hdev))
return;
hci_dev_lock(hdev);
@@ -1916,7 +1926,7 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err)
bool changed;
/* Make sure cmd still outstanding. */
- if (cmd != pending_find(MGMT_OP_SET_SSP, hdev))
+ if (err == -ECANCELED || cmd != pending_find(MGMT_OP_SET_SSP, hdev))
return;
if (err) {
@@ -2515,6 +2525,64 @@ unlock:
return err;
}
+static int send_hci_cmd_sync(struct hci_dev *hdev, void *data)
+{
+ struct mgmt_pending_cmd *cmd = data;
+ struct mgmt_cp_hci_cmd_sync *cp = cmd->param;
+ struct sk_buff *skb;
+
+ skb = __hci_cmd_sync_ev(hdev, le16_to_cpu(cp->opcode),
+ le16_to_cpu(cp->params_len), cp->params,
+ cp->event, cp->timeout ?
+ msecs_to_jiffies(cp->timeout * 1000) :
+ HCI_CMD_TIMEOUT);
+ if (IS_ERR(skb)) {
+ mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_HCI_CMD_SYNC,
+ mgmt_status(PTR_ERR(skb)));
+ goto done;
+ }
+
+ mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_HCI_CMD_SYNC, 0,
+ skb->data, skb->len);
+
+ kfree_skb(skb);
+
+done:
+ mgmt_pending_free(cmd);
+
+ return 0;
+}
+
+static int mgmt_hci_cmd_sync(struct sock *sk, struct hci_dev *hdev,
+ void *data, u16 len)
+{
+ struct mgmt_cp_hci_cmd_sync *cp = data;
+ struct mgmt_pending_cmd *cmd;
+ int err;
+
+ if (len < sizeof(*cp))
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_HCI_CMD_SYNC,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ hci_dev_lock(hdev);
+ cmd = mgmt_pending_new(sk, MGMT_OP_HCI_CMD_SYNC, hdev, data, len);
+ if (!cmd)
+ err = -ENOMEM;
+ else
+ err = hci_cmd_sync_queue(hdev, send_hci_cmd_sync, cmd, NULL);
+
+ if (err < 0) {
+ err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_HCI_CMD_SYNC,
+ MGMT_STATUS_FAILED);
+
+ if (cmd)
+ mgmt_pending_free(cmd);
+ }
+
+ hci_dev_unlock(hdev);
+ return err;
+}
+
/* This is a helper function to test for pending mgmt commands that can
* cause CoD or EIR HCI commands. We can only allow one such pending
* mgmt command at a time since otherwise we cannot easily track what
@@ -3782,7 +3850,8 @@ static void set_name_complete(struct hci_dev *hdev, void *data, int err)
bt_dev_dbg(hdev, "err %d", err);
- if (cmd != pending_find(MGMT_OP_SET_LOCAL_NAME, hdev))
+ if (err == -ECANCELED ||
+ cmd != pending_find(MGMT_OP_SET_LOCAL_NAME, hdev))
return;
if (status) {
@@ -3957,7 +4026,8 @@ static void set_default_phy_complete(struct hci_dev *hdev, void *data, int err)
struct sk_buff *skb = cmd->skb;
u8 status = mgmt_status(err);
- if (cmd != pending_find(MGMT_OP_SET_PHY_CONFIGURATION, hdev))
+ if (err == -ECANCELED ||
+ cmd != pending_find(MGMT_OP_SET_PHY_CONFIGURATION, hdev))
return;
if (!status) {
@@ -5848,13 +5918,16 @@ static void start_discovery_complete(struct hci_dev *hdev, void *data, int err)
{
struct mgmt_pending_cmd *cmd = data;
+ bt_dev_dbg(hdev, "err %d", err);
+
+ if (err == -ECANCELED)
+ return;
+
if (cmd != pending_find(MGMT_OP_START_DISCOVERY, hdev) &&
cmd != pending_find(MGMT_OP_START_LIMITED_DISCOVERY, hdev) &&
cmd != pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev))
return;
- bt_dev_dbg(hdev, "err %d", err);
-
mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err),
cmd->param, 1);
mgmt_pending_remove(cmd);
@@ -6087,7 +6160,8 @@ static void stop_discovery_complete(struct hci_dev *hdev, void *data, int err)
{
struct mgmt_pending_cmd *cmd = data;
- if (cmd != pending_find(MGMT_OP_STOP_DISCOVERY, hdev))
+ if (err == -ECANCELED ||
+ cmd != pending_find(MGMT_OP_STOP_DISCOVERY, hdev))
return;
bt_dev_dbg(hdev, "err %d", err);
@@ -8078,7 +8152,8 @@ static void read_local_oob_ext_data_complete(struct hci_dev *hdev, void *data,
u8 status = mgmt_status(err);
u16 eir_len;
- if (cmd != pending_find(MGMT_OP_READ_LOCAL_OOB_EXT_DATA, hdev))
+ if (err == -ECANCELED ||
+ cmd != pending_find(MGMT_OP_READ_LOCAL_OOB_EXT_DATA, hdev))
return;
if (!status) {
@@ -9371,6 +9446,7 @@ static const struct hci_mgmt_handler mgmt_handlers[] = {
{ mesh_send, MGMT_MESH_SEND_SIZE,
HCI_MGMT_VAR_LEN },
{ mesh_send_cancel, MGMT_MESH_SEND_CANCEL_SIZE },
+ { mgmt_hci_cmd_sync, MGMT_HCI_CMD_SYNC_SIZE, HCI_MGMT_VAR_LEN },
};
void mgmt_index_added(struct hci_dev *hdev)
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index f48250e3f2e1..40766f8119ed 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -274,13 +274,13 @@ static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock,
struct rfcomm_dlc *d;
struct sock *sk;
- sk = bt_sock_alloc(net, sock, &rfcomm_proto, proto, prio, kern);
- if (!sk)
+ d = rfcomm_dlc_alloc(prio);
+ if (!d)
return NULL;
- d = rfcomm_dlc_alloc(prio);
- if (!d) {
- sk_free(sk);
+ sk = bt_sock_alloc(net, sock, &rfcomm_proto, proto, prio, kern);
+ if (!sk) {
+ rfcomm_dlc_free(d);
return NULL;
}
@@ -729,7 +729,8 @@ static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __u
struct sock *l2cap_sk;
struct l2cap_conn *conn;
struct rfcomm_conninfo cinfo;
- int len, err = 0;
+ int err = 0;
+ size_t len;
u32 opt;
BT_DBG("sk %p", sk);
@@ -783,7 +784,7 @@ static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __u
cinfo.hci_handle = conn->hcon->handle;
memcpy(cinfo.dev_class, conn->hcon->dev_class, 3);
- len = min_t(unsigned int, len, sizeof(cinfo));
+ len = min(len, sizeof(cinfo));
if (copy_to_user(optval, (char *) &cinfo, len))
err = -EFAULT;
@@ -802,7 +803,8 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c
{
struct sock *sk = sock->sk;
struct bt_security sec;
- int len, err = 0;
+ int err = 0;
+ size_t len;
BT_DBG("sk %p", sk);
@@ -827,7 +829,7 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c
sec.level = rfcomm_pi(sk)->sec_level;
sec.key_size = 0;
- len = min_t(unsigned int, len, sizeof(sec));
+ len = min(len, sizeof(sec));
if (copy_to_user(optval, (char *) &sec, len))
err = -EFAULT;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 1c7252a36866..78f7bca24487 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -51,6 +51,7 @@ struct sco_conn {
struct delayed_work timeout_work;
unsigned int mtu;
+ struct kref ref;
};
#define sco_conn_lock(c) spin_lock(&c->lock)
@@ -76,6 +77,49 @@ struct sco_pinfo {
#define SCO_CONN_TIMEOUT (HZ * 40)
#define SCO_DISCONN_TIMEOUT (HZ * 2)
+static void sco_conn_free(struct kref *ref)
+{
+ struct sco_conn *conn = container_of(ref, struct sco_conn, ref);
+
+ BT_DBG("conn %p", conn);
+
+ if (conn->sk)
+ sco_pi(conn->sk)->conn = NULL;
+
+ if (conn->hcon) {
+ conn->hcon->sco_data = NULL;
+ hci_conn_drop(conn->hcon);
+ }
+
+ /* Ensure no more work items will run since hci_conn has been dropped */
+ disable_delayed_work_sync(&conn->timeout_work);
+
+ kfree(conn);
+}
+
+static void sco_conn_put(struct sco_conn *conn)
+{
+ if (!conn)
+ return;
+
+ BT_DBG("conn %p refcnt %d", conn, kref_read(&conn->ref));
+
+ kref_put(&conn->ref, sco_conn_free);
+}
+
+static struct sco_conn *sco_conn_hold_unless_zero(struct sco_conn *conn)
+{
+ if (!conn)
+ return NULL;
+
+ BT_DBG("conn %p refcnt %u", conn, kref_read(&conn->ref));
+
+ if (!kref_get_unless_zero(&conn->ref))
+ return NULL;
+
+ return conn;
+}
+
static struct sock *sco_sock_hold(struct sco_conn *conn)
{
if (!conn || !bt_sock_linked(&sco_sk_list, conn->sk))
@@ -92,13 +136,19 @@ static void sco_sock_timeout(struct work_struct *work)
timeout_work.work);
struct sock *sk;
+ conn = sco_conn_hold_unless_zero(conn);
+ if (!conn)
+ return;
+
sco_conn_lock(conn);
if (!conn->hcon) {
sco_conn_unlock(conn);
+ sco_conn_put(conn);
return;
}
sk = sco_sock_hold(conn);
sco_conn_unlock(conn);
+ sco_conn_put(conn);
if (!sk)
return;
@@ -136,9 +186,13 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon)
{
struct sco_conn *conn = hcon->sco_data;
+ conn = sco_conn_hold_unless_zero(conn);
if (conn) {
- if (!conn->hcon)
+ if (!conn->hcon) {
+ sco_conn_lock(conn);
conn->hcon = hcon;
+ sco_conn_unlock(conn);
+ }
return conn;
}
@@ -146,6 +200,7 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon)
if (!conn)
return NULL;
+ kref_init(&conn->ref);
spin_lock_init(&conn->lock);
INIT_DELAYED_WORK(&conn->timeout_work, sco_sock_timeout);
@@ -170,17 +225,15 @@ static void sco_chan_del(struct sock *sk, int err)
struct sco_conn *conn;
conn = sco_pi(sk)->conn;
+ sco_pi(sk)->conn = NULL;
BT_DBG("sk %p, conn %p, err %d", sk, conn, err);
if (conn) {
sco_conn_lock(conn);
conn->sk = NULL;
- sco_pi(sk)->conn = NULL;
sco_conn_unlock(conn);
-
- if (conn->hcon)
- hci_conn_drop(conn->hcon);
+ sco_conn_put(conn);
}
sk->sk_state = BT_CLOSED;
@@ -195,29 +248,28 @@ static void sco_conn_del(struct hci_conn *hcon, int err)
struct sco_conn *conn = hcon->sco_data;
struct sock *sk;
+ conn = sco_conn_hold_unless_zero(conn);
if (!conn)
return;
BT_DBG("hcon %p conn %p, err %d", hcon, conn, err);
- /* Kill socket */
sco_conn_lock(conn);
sk = sco_sock_hold(conn);
sco_conn_unlock(conn);
+ sco_conn_put(conn);
- if (sk) {
- lock_sock(sk);
- sco_sock_clear_timer(sk);
- sco_chan_del(sk, err);
- release_sock(sk);
- sock_put(sk);
+ if (!sk) {
+ sco_conn_put(conn);
+ return;
}
- /* Ensure no more work items will run before freeing conn. */
- cancel_delayed_work_sync(&conn->timeout_work);
-
- hcon->sco_data = NULL;
- kfree(conn);
+ /* Kill socket */
+ lock_sock(sk);
+ sco_sock_clear_timer(sk);
+ sco_chan_del(sk, err);
+ release_sock(sk);
+ sock_put(sk);
}
static void __sco_chan_add(struct sco_conn *conn, struct sock *sk,
@@ -401,6 +453,8 @@ static void sco_sock_destruct(struct sock *sk)
{
BT_DBG("sk %p", sk);
+ sco_conn_put(sco_pi(sk)->conn);
+
skb_queue_purge(&sk->sk_receive_queue);
skb_queue_purge(&sk->sk_write_queue);
}
@@ -448,17 +502,6 @@ static void __sco_sock_close(struct sock *sk)
case BT_CONNECTED:
case BT_CONFIG:
- if (sco_pi(sk)->conn->hcon) {
- sk->sk_state = BT_DISCONN;
- sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT);
- sco_conn_lock(sco_pi(sk)->conn);
- hci_conn_drop(sco_pi(sk)->conn->hcon);
- sco_pi(sk)->conn->hcon = NULL;
- sco_conn_unlock(sco_pi(sk)->conn);
- } else
- sco_chan_del(sk, ECONNRESET);
- break;
-
case BT_CONNECT2:
case BT_CONNECT:
case BT_DISCONN:
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 26b79feb385d..0ab4613aa07a 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -328,7 +328,7 @@ int br_netpoll_enable(struct net_bridge_port *p)
return __br_netpoll_enable(p);
}
-static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni)
+static int br_netpoll_setup(struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
struct net_bridge_port *p;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 642b8ccaae8e..82bac2426631 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -73,13 +73,6 @@ static inline int has_expired(const struct net_bridge *br,
time_before_eq(fdb->updated + hold_time(br), jiffies);
}
-static void fdb_rcu_free(struct rcu_head *head)
-{
- struct net_bridge_fdb_entry *ent
- = container_of(head, struct net_bridge_fdb_entry, rcu);
- kmem_cache_free(br_fdb_cache, ent);
-}
-
static int fdb_to_nud(const struct net_bridge *br,
const struct net_bridge_fdb_entry *fdb)
{
@@ -329,7 +322,7 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f,
if (test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, &f->flags))
atomic_dec(&br->fdb_n_learned);
fdb_notify(br, f, RTM_DELNEIGH, swdev_notify);
- call_rcu(&f->rcu, fdb_rcu_free);
+ kfree_rcu(f, rcu);
}
/* Delete a local entry if no other port had the same address.
@@ -1159,7 +1152,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
struct net_bridge_port *p, const unsigned char *addr,
u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[],
- struct netlink_ext_ack *extack)
+ bool *notified, struct netlink_ext_ack *extack)
{
int err = 0;
@@ -1190,6 +1183,8 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
spin_unlock_bh(&br->hash_lock);
}
+ if (!err)
+ *notified = true;
return err;
}
@@ -1202,7 +1197,7 @@ static const struct nla_policy br_nda_fdb_pol[NFEA_MAX + 1] = {
int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
const unsigned char *addr, u16 vid, u16 nlh_flags,
- struct netlink_ext_ack *extack)
+ bool *notified, struct netlink_ext_ack *extack)
{
struct nlattr *nfea_tb[NFEA_MAX + 1], *attr;
struct net_bridge_vlan_group *vg;
@@ -1265,10 +1260,10 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
/* VID was specified, so use it. */
err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb,
- extack);
+ notified, extack);
} else {
err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb,
- extack);
+ notified, extack);
if (err || !vg || !vg->num_vlans)
goto out;
@@ -1280,7 +1275,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
if (!br_vlan_should_use(v))
continue;
err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid,
- nfea_tb, extack);
+ nfea_tb, notified, extack);
if (err)
goto out;
}
@@ -1292,7 +1287,7 @@ out:
static int fdb_delete_by_addr_and_port(struct net_bridge *br,
const struct net_bridge_port *p,
- const u8 *addr, u16 vlan)
+ const u8 *addr, u16 vlan, bool *notified)
{
struct net_bridge_fdb_entry *fdb;
@@ -1301,18 +1296,19 @@ static int fdb_delete_by_addr_and_port(struct net_bridge *br,
return -ENOENT;
fdb_delete(br, fdb, true);
+ *notified = true;
return 0;
}
static int __br_fdb_delete(struct net_bridge *br,
const struct net_bridge_port *p,
- const unsigned char *addr, u16 vid)
+ const unsigned char *addr, u16 vid, bool *notified)
{
int err;
spin_lock_bh(&br->hash_lock);
- err = fdb_delete_by_addr_and_port(br, p, addr, vid);
+ err = fdb_delete_by_addr_and_port(br, p, addr, vid, notified);
spin_unlock_bh(&br->hash_lock);
return err;
@@ -1321,12 +1317,11 @@ static int __br_fdb_delete(struct net_bridge *br,
/* Remove neighbor entry with RTM_DELNEIGH */
int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
- const unsigned char *addr, u16 vid,
+ const unsigned char *addr, u16 vid, bool *notified,
struct netlink_ext_ack *extack)
{
struct net_bridge_vlan_group *vg;
struct net_bridge_port *p = NULL;
- struct net_bridge_vlan *v;
struct net_bridge *br;
int err;
@@ -1345,23 +1340,19 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
}
if (vid) {
- v = br_vlan_find(vg, vid);
- if (!v) {
- pr_info("bridge: RTM_DELNEIGH with unconfigured vlan %d on %s\n", vid, dev->name);
- return -EINVAL;
- }
-
- err = __br_fdb_delete(br, p, addr, vid);
+ err = __br_fdb_delete(br, p, addr, vid, notified);
} else {
+ struct net_bridge_vlan *v;
+
err = -ENOENT;
- err &= __br_fdb_delete(br, p, addr, 0);
+ err &= __br_fdb_delete(br, p, addr, 0, notified);
if (!vg || !vg->num_vlans)
return err;
list_for_each_entry(v, &vg->vlan_list, vlist) {
if (!br_vlan_should_use(v))
continue;
- err &= __br_fdb_delete(br, p, addr, v->vid);
+ err &= __br_fdb_delete(br, p, addr, v->vid, notified);
}
}
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 1d458e9da660..451e45b9a6a5 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -370,11 +370,11 @@ br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb,
*/
static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct net_device *dev = skb->dev, *br_indev;
- struct iphdr *iph = ip_hdr(skb);
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+ struct net_device *dev = skb->dev, *br_indev;
+ const struct iphdr *iph = ip_hdr(skb);
+ enum skb_drop_reason reason;
struct rtable *rt;
- int err;
br_indev = nf_bridge_get_physindev(skb, net);
if (!br_indev) {
@@ -390,7 +390,9 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_
}
nf_bridge->in_prerouting = 0;
if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) {
- if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
+ reason = ip_route_input(skb, iph->daddr, iph->saddr,
+ ip4h_dscp(iph), dev);
+ if (reason) {
struct in_device *in_dev = __in_dev_get_rcu(dev);
/* If err equals -EHOSTUNREACH the error is due to a
@@ -400,11 +402,12 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_
* martian destinations: loopback destinations and destination
* 0.0.0.0. In both cases the packet will be dropped because the
* destination is the loopback device and not the bridge. */
- if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev))
+ if (reason != SKB_DROP_REASON_IP_INADDRERRORS || !in_dev ||
+ IN_DEV_FORWARD(in_dev))
goto free_skb;
rt = ip_route_output(net, iph->daddr, 0,
- iph->tos & INET_DSCP_MASK, 0,
+ ip4h_dscp(iph), 0,
RT_SCOPE_UNIVERSE);
if (!IS_ERR(rt)) {
/* - Bridged-and-DNAT'ed traffic doesn't
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 6b97ae47f855..3e0f47203f2a 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1924,7 +1924,9 @@ int __init br_netlink_init(void)
if (err)
goto out;
- rtnl_af_register(&br_af_ops);
+ err = rtnl_af_register(&br_af_ops);
+ if (err)
+ goto out_vlan;
err = rtnl_link_register(&br_link_ops);
if (err)
@@ -1934,6 +1936,8 @@ int __init br_netlink_init(void)
out_af:
rtnl_af_unregister(&br_af_ops);
+out_vlan:
+ br_vlan_rtnl_uninit();
out:
return err;
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 041f6e571a20..9853cfbb9d14 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -853,12 +853,12 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev, const unsigned char *addr, u16 vid,
- struct netlink_ext_ack *extack);
+ bool *notified, struct netlink_ext_ack *extack);
int br_fdb_delete_bulk(struct nlmsghdr *nlh, struct net_device *dev,
struct netlink_ext_ack *extack);
int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev,
const unsigned char *addr, u16 vid, u16 nlh_flags,
- struct netlink_ext_ack *extack);
+ bool *notified, struct netlink_ext_ack *extack);
int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev, struct net_device *fdev, int *idx);
int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev,
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 104c0125e32e..f16bbbbb9481 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -41,7 +41,13 @@ config NF_CONNTRACK_BRIDGE
# old sockopt interface and eval loop
config BRIDGE_NF_EBTABLES_LEGACY
- tristate
+ tristate "Legacy EBTABLES support"
+ depends on BRIDGE && NETFILTER_XTABLES
+ default n
+ help
+ Legacy ebtables packet/frame classifier.
+ This is not needed if you are using ebtables over nftables
+ (iptables-nft).
menuconfig BRIDGE_NF_EBTABLES
tristate "Ethernet Bridge tables (ebtables) support"
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
index d12a221366d6..5adced1e7d0c 100644
--- a/net/bridge/netfilter/nft_meta_bridge.c
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -63,7 +63,7 @@ static void nft_meta_bridge_get_eval(const struct nft_expr *expr,
return nft_meta_get_eval(expr, regs, pkt);
}
- strncpy((char *)dest, br_dev ? br_dev->name : "", IFNAMSIZ);
+ strscpy_pad((char *)dest, br_dev ? br_dev->name : "", IFNAMSIZ);
return;
err:
regs->verdict.code = NFT_BREAK;
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index 9cef9496a707..171fa32ada85 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -183,12 +183,6 @@ bool cfsrvl_ready(struct cfsrvl *service, int *err)
return true;
}
-u8 cfsrvl_getphyid(struct cflayer *layer)
-{
- struct cfsrvl *servl = container_obj(layer);
- return servl->dev_info.id;
-}
-
bool cfsrvl_phyid_match(struct cflayer *layer, int phyid)
{
struct cfsrvl *servl = container_obj(layer);
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 707576eeeb58..01f3fbb3b67d 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -171,6 +171,7 @@ static int can_create(struct net *net, struct socket *sock, int protocol,
/* release sk on errors */
sock_orphan(sk);
sock_put(sk);
+ sock->sk = NULL;
}
errout:
diff --git a/net/can/gw.c b/net/can/gw.c
index 37528826935e..ef93293c1fae 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1265,6 +1265,15 @@ static struct pernet_operations cangw_pernet_ops = {
.exit_batch = cangw_pernet_exit_batch,
};
+static const struct rtnl_msg_handler cgw_rtnl_msg_handlers[] __initconst_or_module = {
+ {.owner = THIS_MODULE, .protocol = PF_CAN, .msgtype = RTM_NEWROUTE,
+ .doit = cgw_create_job},
+ {.owner = THIS_MODULE, .protocol = PF_CAN, .msgtype = RTM_DELROUTE,
+ .doit = cgw_remove_job},
+ {.owner = THIS_MODULE, .protocol = PF_CAN, .msgtype = RTM_GETROUTE,
+ .dumpit = cgw_dump_jobs},
+};
+
static __init int cgw_module_init(void)
{
int ret;
@@ -1290,27 +1299,13 @@ static __init int cgw_module_init(void)
if (ret)
goto out_register_notifier;
- ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_GETROUTE,
- NULL, cgw_dump_jobs, 0);
- if (ret)
- goto out_rtnl_register1;
-
- ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_NEWROUTE,
- cgw_create_job, NULL, 0);
- if (ret)
- goto out_rtnl_register2;
- ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_DELROUTE,
- cgw_remove_job, NULL, 0);
+ ret = rtnl_register_many(cgw_rtnl_msg_handlers);
if (ret)
- goto out_rtnl_register3;
+ goto out_rtnl_register;
return 0;
-out_rtnl_register3:
- rtnl_unregister(PF_CAN, RTM_NEWROUTE);
-out_rtnl_register2:
- rtnl_unregister(PF_CAN, RTM_GETROUTE);
-out_rtnl_register1:
+out_rtnl_register:
unregister_netdevice_notifier(&notifier);
out_register_notifier:
kmem_cache_destroy(cgw_cache);
diff --git a/net/can/raw.c b/net/can/raw.c
index 00533f64d69d..255c0a8f39d6 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -966,7 +966,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
skb->mark = READ_ONCE(sk->sk_mark);
skb->tstamp = sockc.transmit_time;
- skb_setup_tx_timestamp(skb, sockc.tsflags);
+ skb_setup_tx_timestamp(skb, &sockc);
err = can_send(skb, ro->loopback);
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 051d22c0e4ad..01b2ce1e8fc0 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -74,18 +74,6 @@ int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
return set_secret(dst, src->key);
}
-int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end)
-{
- if (*p + sizeof(u16) + sizeof(key->created) +
- sizeof(u16) + key->len > end)
- return -ERANGE;
- ceph_encode_16(p, key->type);
- ceph_encode_copy(p, &key->created, sizeof(key->created));
- ceph_encode_16(p, key->len);
- ceph_encode_copy(p, key->key, key->len);
- return 0;
-}
-
int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end)
{
int ret;
diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h
index 13bd526349fa..23de29fc613c 100644
--- a/net/ceph/crypto.h
+++ b/net/ceph/crypto.h
@@ -21,7 +21,6 @@ struct ceph_crypto_key {
int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
const struct ceph_crypto_key *src);
-int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end);
int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end);
int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *in);
void ceph_crypto_key_destroy(struct ceph_crypto_key *key);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 9d078b37fe0b..9b1168eb77ab 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -4999,40 +4999,6 @@ out_put_lreq:
}
EXPORT_SYMBOL(ceph_osdc_notify);
-/*
- * Return the number of milliseconds since the watch was last
- * confirmed, or an error. If there is an error, the watch is no
- * longer valid, and should be destroyed with ceph_osdc_unwatch().
- */
-int ceph_osdc_watch_check(struct ceph_osd_client *osdc,
- struct ceph_osd_linger_request *lreq)
-{
- unsigned long stamp, age;
- int ret;
-
- down_read(&osdc->lock);
- mutex_lock(&lreq->lock);
- stamp = lreq->watch_valid_thru;
- if (!list_empty(&lreq->pending_lworks)) {
- struct linger_work *lwork =
- list_first_entry(&lreq->pending_lworks,
- struct linger_work,
- pending_item);
-
- if (time_before(lwork->queued_stamp, stamp))
- stamp = lwork->queued_stamp;
- }
- age = jiffies - stamp;
- dout("%s lreq %p linger_id %llu age %lu last_error %d\n", __func__,
- lreq, lreq->linger_id, age, lreq->last_error);
- /* we are truncating to msecs, so return a safe upper bound */
- ret = lreq->last_error ?: 1 + jiffies_to_msecs(age);
-
- mutex_unlock(&lreq->lock);
- up_read(&osdc->lock);
- return ret;
-}
-
static int decode_watcher(void **p, void *end, struct ceph_watch_item *item)
{
u8 struct_v;
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index 74622b278d57..5a9c4be5f222 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c
@@ -131,41 +131,3 @@ int ceph_pagelist_free_reserve(struct ceph_pagelist *pl)
return 0;
}
EXPORT_SYMBOL(ceph_pagelist_free_reserve);
-
-/* Create a truncation point. */
-void ceph_pagelist_set_cursor(struct ceph_pagelist *pl,
- struct ceph_pagelist_cursor *c)
-{
- c->pl = pl;
- c->page_lru = pl->head.prev;
- c->room = pl->room;
-}
-EXPORT_SYMBOL(ceph_pagelist_set_cursor);
-
-/* Truncate a pagelist to the given point. Move extra pages to reserve.
- * This won't sleep.
- * Returns: 0 on success,
- * -EINVAL if the pagelist doesn't match the trunc point pagelist
- */
-int ceph_pagelist_truncate(struct ceph_pagelist *pl,
- struct ceph_pagelist_cursor *c)
-{
- struct page *page;
-
- if (pl != c->pl)
- return -EINVAL;
- ceph_pagelist_unmap_tail(pl);
- while (pl->head.prev != c->page_lru) {
- page = list_entry(pl->head.prev, struct page, lru);
- /* move from pagelist to reserve */
- list_move_tail(&page->lru, &pl->free_list);
- ++pl->num_pages_free;
- }
- pl->room = c->room;
- if (!list_empty(&pl->head)) {
- page = list_entry(pl->head.prev, struct page, lru);
- pl->mapped_tail = kmap(page);
- }
- return 0;
-}
-EXPORT_SYMBOL(ceph_pagelist_truncate);
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 64305e7056a1..4509757d8b3b 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -55,58 +55,6 @@ struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags)
}
EXPORT_SYMBOL(ceph_alloc_page_vector);
-/*
- * copy user data into a page vector
- */
-int ceph_copy_user_to_page_vector(struct page **pages,
- const void __user *data,
- loff_t off, size_t len)
-{
- int i = 0;
- int po = off & ~PAGE_MASK;
- int left = len;
- int l, bad;
-
- while (left > 0) {
- l = min_t(int, PAGE_SIZE-po, left);
- bad = copy_from_user(page_address(pages[i]) + po, data, l);
- if (bad == l)
- return -EFAULT;
- data += l - bad;
- left -= l - bad;
- po += l - bad;
- if (po == PAGE_SIZE) {
- po = 0;
- i++;
- }
- }
- return len;
-}
-EXPORT_SYMBOL(ceph_copy_user_to_page_vector);
-
-void ceph_copy_to_page_vector(struct page **pages,
- const void *data,
- loff_t off, size_t len)
-{
- int i = 0;
- size_t po = off & ~PAGE_MASK;
- size_t left = len;
-
- while (left > 0) {
- size_t l = min_t(size_t, PAGE_SIZE-po, left);
-
- memcpy(page_address(pages[i]) + po, data, l);
- data += l;
- left -= l;
- po += l;
- if (po == PAGE_SIZE) {
- po = 0;
- i++;
- }
- }
-}
-EXPORT_SYMBOL(ceph_copy_to_page_vector);
-
void ceph_copy_from_page_vector(struct page **pages,
void *data,
loff_t off, size_t len)
diff --git a/net/core/Makefile b/net/core/Makefile
index c3ebbaf9c81e..d9326600e289 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -45,3 +45,5 @@ obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
obj-$(CONFIG_OF) += of_net.o
obj-$(CONFIG_NET_TEST) += net_test.o
obj-$(CONFIG_NET_DEVMEM) += devmem.o
+obj-$(CONFIG_DEBUG_NET_SMALL_RTNL) += rtnl_net_debug.o
+obj-$(CONFIG_FAIL_SKB_REALLOC) += skb_fault_injection.o
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index bc01b3aa6b0f..2f4ed83a75ae 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -106,7 +106,7 @@ static long bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key,
if (sock) {
sdata = bpf_local_storage_update(
sock->sk, (struct bpf_local_storage_map *)map, value,
- map_flags, GFP_ATOMIC);
+ map_flags, false, GFP_ATOMIC);
sockfd_put(sock);
return PTR_ERR_OR_ZERO(sdata);
}
@@ -137,7 +137,7 @@ bpf_sk_storage_clone_elem(struct sock *newsk,
{
struct bpf_local_storage_elem *copy_selem;
- copy_selem = bpf_selem_alloc(smap, newsk, NULL, true, GFP_ATOMIC);
+ copy_selem = bpf_selem_alloc(smap, newsk, NULL, true, false, GFP_ATOMIC);
if (!copy_selem)
return NULL;
@@ -243,7 +243,7 @@ BPF_CALL_5(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
refcount_inc_not_zero(&sk->sk_refcnt)) {
sdata = bpf_local_storage_update(
sk, (struct bpf_local_storage_map *)map, value,
- BPF_NOEXIST, gfp_flags);
+ BPF_NOEXIST, false, gfp_flags);
/* sk must be a fullsock (guaranteed by verifier),
* so sock_gen_put() is unnecessary.
*/
diff --git a/net/core/dev.c b/net/core/dev.c
index 8453e14d301b..13d00fc10f55 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2949,6 +2949,8 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
if (dev->num_tc)
netif_setup_tc(dev, txq);
+ net_shaper_set_real_num_tx_queues(dev, txq);
+
dev_qdisc_change_real_num_tx(dev, txq);
dev->real_num_tx_queues = txq;
@@ -6234,12 +6236,12 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
if (work_done) {
if (n->gro_bitmask)
- timeout = READ_ONCE(n->dev->gro_flush_timeout);
- n->defer_hard_irqs_count = READ_ONCE(n->dev->napi_defer_hard_irqs);
+ timeout = napi_get_gro_flush_timeout(n);
+ n->defer_hard_irqs_count = napi_get_defer_hard_irqs(n);
}
if (n->defer_hard_irqs_count > 0) {
n->defer_hard_irqs_count--;
- timeout = READ_ONCE(n->dev->gro_flush_timeout);
+ timeout = napi_get_gro_flush_timeout(n);
if (timeout)
ret = false;
}
@@ -6373,8 +6375,8 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
if (flags & NAPI_F_PREFER_BUSY_POLL) {
- napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs);
- timeout = READ_ONCE(napi->dev->gro_flush_timeout);
+ napi->defer_hard_irqs_count = napi_get_defer_hard_irqs(napi);
+ timeout = napi_get_gro_flush_timeout(napi);
if (napi->defer_hard_irqs_count && timeout) {
hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED);
skip_schedule = true;
@@ -6505,8 +6507,62 @@ void napi_busy_loop(unsigned int napi_id,
}
EXPORT_SYMBOL(napi_busy_loop);
+void napi_suspend_irqs(unsigned int napi_id)
+{
+ struct napi_struct *napi;
+
+ rcu_read_lock();
+ napi = napi_by_id(napi_id);
+ if (napi) {
+ unsigned long timeout = napi_get_irq_suspend_timeout(napi);
+
+ if (timeout)
+ hrtimer_start(&napi->timer, ns_to_ktime(timeout),
+ HRTIMER_MODE_REL_PINNED);
+ }
+ rcu_read_unlock();
+}
+
+void napi_resume_irqs(unsigned int napi_id)
+{
+ struct napi_struct *napi;
+
+ rcu_read_lock();
+ napi = napi_by_id(napi_id);
+ if (napi) {
+ /* If irq_suspend_timeout is set to 0 between the call to
+ * napi_suspend_irqs and now, the original value still
+ * determines the safety timeout as intended and napi_watchdog
+ * will resume irq processing.
+ */
+ if (napi_get_irq_suspend_timeout(napi)) {
+ local_bh_disable();
+ napi_schedule(napi);
+ local_bh_enable();
+ }
+ }
+ rcu_read_unlock();
+}
+
#endif /* CONFIG_NET_RX_BUSY_POLL */
+static void __napi_hash_add_with_id(struct napi_struct *napi,
+ unsigned int napi_id)
+{
+ napi->napi_id = napi_id;
+ hlist_add_head_rcu(&napi->napi_hash_node,
+ &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
+}
+
+static void napi_hash_add_with_id(struct napi_struct *napi,
+ unsigned int napi_id)
+{
+ spin_lock(&napi_hash_lock);
+ WARN_ON_ONCE(napi_by_id(napi_id));
+ __napi_hash_add_with_id(napi, napi_id);
+ spin_unlock(&napi_hash_lock);
+}
+
static void napi_hash_add(struct napi_struct *napi)
{
if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state))
@@ -6519,10 +6575,8 @@ static void napi_hash_add(struct napi_struct *napi)
if (unlikely(++napi_gen_id < MIN_NAPI_ID))
napi_gen_id = MIN_NAPI_ID;
} while (napi_by_id(napi_gen_id));
- napi->napi_id = napi_gen_id;
- hlist_add_head_rcu(&napi->napi_hash_node,
- &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
+ __napi_hash_add_with_id(napi, napi_gen_id);
spin_unlock(&napi_hash_lock);
}
@@ -6645,6 +6699,30 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index,
}
EXPORT_SYMBOL(netif_queue_set_napi);
+static void napi_restore_config(struct napi_struct *n)
+{
+ n->defer_hard_irqs = n->config->defer_hard_irqs;
+ n->gro_flush_timeout = n->config->gro_flush_timeout;
+ n->irq_suspend_timeout = n->config->irq_suspend_timeout;
+ /* a NAPI ID might be stored in the config, if so use it. if not, use
+ * napi_hash_add to generate one for us. It will be saved to the config
+ * in napi_disable.
+ */
+ if (n->config->napi_id)
+ napi_hash_add_with_id(n, n->config->napi_id);
+ else
+ napi_hash_add(n);
+}
+
+static void napi_save_config(struct napi_struct *n)
+{
+ n->config->defer_hard_irqs = n->defer_hard_irqs;
+ n->config->gro_flush_timeout = n->gro_flush_timeout;
+ n->config->irq_suspend_timeout = n->irq_suspend_timeout;
+ n->config->napi_id = n->napi_id;
+ napi_hash_del(n);
+}
+
void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
@@ -6672,7 +6750,13 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
set_bit(NAPI_STATE_SCHED, &napi->state);
set_bit(NAPI_STATE_NPSVC, &napi->state);
list_add_rcu(&napi->dev_list, &dev->napi_list);
- napi_hash_add(napi);
+
+ /* default settings from sysfs are applied to all NAPIs. any per-NAPI
+ * configuration will be loaded in napi_enable
+ */
+ napi_set_defer_hard_irqs(napi, READ_ONCE(dev->napi_defer_hard_irqs));
+ napi_set_gro_flush_timeout(napi, READ_ONCE(dev->gro_flush_timeout));
+
napi_get_frags_check(napi);
/* Create kthread for this napi if dev->threaded is set.
* Clear dev->threaded if kthread creation failed so that
@@ -6704,6 +6788,11 @@ void napi_disable(struct napi_struct *n)
hrtimer_cancel(&n->timer);
+ if (n->config)
+ napi_save_config(n);
+ else
+ napi_hash_del(n);
+
clear_bit(NAPI_STATE_DISABLE, &n->state);
}
EXPORT_SYMBOL(napi_disable);
@@ -6719,6 +6808,11 @@ void napi_enable(struct napi_struct *n)
{
unsigned long new, val = READ_ONCE(n->state);
+ if (n->config)
+ napi_restore_config(n);
+ else
+ napi_hash_add(n);
+
do {
BUG_ON(!test_bit(NAPI_STATE_SCHED, &val));
@@ -6748,7 +6842,11 @@ void __netif_napi_del(struct napi_struct *napi)
if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state))
return;
- napi_hash_del(napi);
+ if (napi->config) {
+ napi->index = -1;
+ napi->config = NULL;
+ }
+
list_del_rcu(&napi->dev_list);
napi_free_frags(napi);
@@ -11060,8 +11158,8 @@ void netdev_sw_irq_coalesce_default_on(struct net_device *dev)
WARN_ON(dev->reg_state == NETREG_REGISTERED);
if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
- dev->gro_flush_timeout = 20000;
- dev->napi_defer_hard_irqs = 1;
+ netdev_set_gro_flush_timeout(dev, 20000);
+ netdev_set_defer_hard_irqs(dev, 1);
}
}
EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on);
@@ -11085,6 +11183,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
unsigned int txqs, unsigned int rxqs)
{
struct net_device *dev;
+ size_t napi_config_sz;
+ unsigned int maxqs;
BUG_ON(strlen(name) >= sizeof(dev->name));
@@ -11098,6 +11198,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
return NULL;
}
+ maxqs = max(txqs, rxqs);
+
dev = kvzalloc(struct_size(dev, priv, sizeof_priv),
GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
if (!dev)
@@ -11151,6 +11253,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
hash_init(dev->qdisc_hash);
#endif
+ mutex_init(&dev->lock);
+
dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
setup(dev);
@@ -11172,6 +11276,11 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
if (!dev->ethtool)
goto free_all;
+ napi_config_sz = array_size(maxqs, sizeof(*dev->napi_config));
+ dev->napi_config = kvzalloc(napi_config_sz, GFP_KERNEL_ACCOUNT);
+ if (!dev->napi_config)
+ goto free_all;
+
strscpy(dev->name, name);
dev->name_assign_type = name_assign_type;
dev->group = INIT_NETDEV_GROUP;
@@ -11221,6 +11330,8 @@ void free_netdev(struct net_device *dev)
return;
}
+ mutex_destroy(&dev->lock);
+
kfree(dev->ethtool);
netif_free_tx_queues(dev);
netif_free_rx_queues(dev);
@@ -11233,6 +11344,8 @@ void free_netdev(struct net_device *dev)
list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
netif_napi_del(p);
+ kvfree(dev->napi_config);
+
ref_tracker_dir_exit(&dev->refcnt_tracker);
#ifdef CONFIG_PCPU_DEV_REFCNT
free_percpu(dev->pcpu_refcnt);
@@ -11430,6 +11543,8 @@ void unregister_netdevice_many_notify(struct list_head *head,
mutex_destroy(&dev->ethtool->rss_lock);
+ net_shaper_flush_netdev(dev);
+
if (skb)
rtmsg_ifinfo_send(skb, dev, GFP_KERNEL, portid, nlh);
@@ -11998,8 +12113,6 @@ static void __init net_dev_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ifindex);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, real_num_rx_queues);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, _rx);
- CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_flush_timeout);
- CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, napi_defer_hard_irqs);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_max_size);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_ipv4_max_size);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler);
@@ -12011,7 +12124,7 @@ static void __init net_dev_struct_check(void)
#ifdef CONFIG_NET_XGRESS
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, tcx_ingress);
#endif
- CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 104);
+ CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 92);
}
/*
diff --git a/net/core/dev.h b/net/core/dev.h
index 5654325c5b71..d043dee25a68 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -35,6 +35,16 @@ void dev_addr_flush(struct net_device *dev);
int dev_addr_init(struct net_device *dev);
void dev_addr_check(struct net_device *dev);
+#if IS_ENABLED(CONFIG_NET_SHAPER)
+void net_shaper_flush_netdev(struct net_device *dev);
+void net_shaper_set_real_num_tx_queues(struct net_device *dev,
+ unsigned int txq);
+#else
+static inline void net_shaper_flush_netdev(struct net_device *dev) {}
+static inline void net_shaper_set_real_num_tx_queues(struct net_device *dev,
+ unsigned int txq) {}
+#endif
+
/* sysctls not referred to from outside net/core/ */
extern int netdev_unregister_timeout_secs;
extern int weight_p;
@@ -138,6 +148,119 @@ static inline void netif_set_gro_ipv4_max_size(struct net_device *dev,
WRITE_ONCE(dev->gro_ipv4_max_size, size);
}
+/**
+ * napi_get_defer_hard_irqs - get the NAPI's defer_hard_irqs
+ * @n: napi struct to get the defer_hard_irqs field from
+ *
+ * Return: the per-NAPI value of the defar_hard_irqs field.
+ */
+static inline u32 napi_get_defer_hard_irqs(const struct napi_struct *n)
+{
+ return READ_ONCE(n->defer_hard_irqs);
+}
+
+/**
+ * napi_set_defer_hard_irqs - set the defer_hard_irqs for a napi
+ * @n: napi_struct to set the defer_hard_irqs field
+ * @defer: the value the field should be set to
+ */
+static inline void napi_set_defer_hard_irqs(struct napi_struct *n, u32 defer)
+{
+ WRITE_ONCE(n->defer_hard_irqs, defer);
+}
+
+/**
+ * netdev_set_defer_hard_irqs - set defer_hard_irqs for all NAPIs of a netdev
+ * @netdev: the net_device for which all NAPIs will have defer_hard_irqs set
+ * @defer: the defer_hard_irqs value to set
+ */
+static inline void netdev_set_defer_hard_irqs(struct net_device *netdev,
+ u32 defer)
+{
+ unsigned int count = max(netdev->num_rx_queues,
+ netdev->num_tx_queues);
+ struct napi_struct *napi;
+ int i;
+
+ WRITE_ONCE(netdev->napi_defer_hard_irqs, defer);
+ list_for_each_entry(napi, &netdev->napi_list, dev_list)
+ napi_set_defer_hard_irqs(napi, defer);
+
+ for (i = 0; i < count; i++)
+ netdev->napi_config[i].defer_hard_irqs = defer;
+}
+
+/**
+ * napi_get_gro_flush_timeout - get the gro_flush_timeout
+ * @n: napi struct to get the gro_flush_timeout from
+ *
+ * Return: the per-NAPI value of the gro_flush_timeout field.
+ */
+static inline unsigned long
+napi_get_gro_flush_timeout(const struct napi_struct *n)
+{
+ return READ_ONCE(n->gro_flush_timeout);
+}
+
+/**
+ * napi_set_gro_flush_timeout - set the gro_flush_timeout for a napi
+ * @n: napi struct to set the gro_flush_timeout
+ * @timeout: timeout value to set
+ *
+ * napi_set_gro_flush_timeout sets the per-NAPI gro_flush_timeout
+ */
+static inline void napi_set_gro_flush_timeout(struct napi_struct *n,
+ unsigned long timeout)
+{
+ WRITE_ONCE(n->gro_flush_timeout, timeout);
+}
+
+/**
+ * netdev_set_gro_flush_timeout - set gro_flush_timeout of a netdev's NAPIs
+ * @netdev: the net_device for which all NAPIs will have gro_flush_timeout set
+ * @timeout: the timeout value to set
+ */
+static inline void netdev_set_gro_flush_timeout(struct net_device *netdev,
+ unsigned long timeout)
+{
+ unsigned int count = max(netdev->num_rx_queues,
+ netdev->num_tx_queues);
+ struct napi_struct *napi;
+ int i;
+
+ WRITE_ONCE(netdev->gro_flush_timeout, timeout);
+ list_for_each_entry(napi, &netdev->napi_list, dev_list)
+ napi_set_gro_flush_timeout(napi, timeout);
+
+ for (i = 0; i < count; i++)
+ netdev->napi_config[i].gro_flush_timeout = timeout;
+}
+
+/**
+ * napi_get_irq_suspend_timeout - get the irq_suspend_timeout
+ * @n: napi struct to get the irq_suspend_timeout from
+ *
+ * Return: the per-NAPI value of the irq_suspend_timeout field.
+ */
+static inline unsigned long
+napi_get_irq_suspend_timeout(const struct napi_struct *n)
+{
+ return READ_ONCE(n->irq_suspend_timeout);
+}
+
+/**
+ * napi_set_irq_suspend_timeout - set the irq_suspend_timeout for a napi
+ * @n: napi struct to set the irq_suspend_timeout
+ * @timeout: timeout value to set
+ *
+ * napi_set_irq_suspend_timeout sets the per-NAPI irq_suspend_timeout
+ */
+static inline void napi_set_irq_suspend_timeout(struct napi_struct *n,
+ unsigned long timeout)
+{
+ WRITE_ONCE(n->irq_suspend_timeout, timeout);
+}
+
int rps_cpumask_housekeeping(struct cpumask *mask);
#if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL)
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 473c437b6b53..46d43b950471 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -64,7 +64,7 @@ int dev_ifconf(struct net *net, struct ifconf __user *uifc)
}
/* Loop over the interfaces, and write an info block for each. */
- rtnl_lock();
+ rtnl_net_lock(net);
for_each_netdev(net, dev) {
if (!pos)
done = inet_gifconf(dev, NULL, 0, size);
@@ -72,12 +72,12 @@ int dev_ifconf(struct net *net, struct ifconf __user *uifc)
done = inet_gifconf(dev, pos + total,
len - total, size);
if (done < 0) {
- rtnl_unlock();
+ rtnl_net_unlock(net);
return -EFAULT;
}
total += done;
}
- rtnl_unlock();
+ rtnl_net_unlock(net);
return put_user(total, &uifc->ifc_len);
}
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
index fc96259807b6..5cdca49b1d7c 100644
--- a/net/core/fib_notifier.c
+++ b/net/core/fib_notifier.c
@@ -43,7 +43,6 @@ static unsigned int fib_seq_sum(struct net *net)
struct fib_notifier_ops *ops;
unsigned int fib_seq = 0;
- rtnl_lock();
rcu_read_lock();
list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) {
if (!try_module_get(ops->owner))
@@ -52,7 +51,6 @@ static unsigned int fib_seq_sum(struct net *net)
module_put(ops->owner);
}
rcu_read_unlock();
- rtnl_unlock();
return fib_seq;
}
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 154a2681f55c..34185d138c95 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -101,7 +101,8 @@ static void notify_rule_change(int event, struct fib_rule *rule,
struct fib_rules_ops *ops, struct nlmsghdr *nlh,
u32 pid);
-static struct fib_rules_ops *lookup_rules_ops(struct net *net, int family)
+static struct fib_rules_ops *lookup_rules_ops(const struct net *net,
+ int family)
{
struct fib_rules_ops *ops;
@@ -370,7 +371,9 @@ static int call_fib_rule_notifiers(struct net *net,
.rule = rule,
};
- ops->fib_rules_seq++;
+ ASSERT_RTNL();
+ /* Paired with READ_ONCE() in fib_rules_seq() */
+ WRITE_ONCE(ops->fib_rules_seq, ops->fib_rules_seq + 1);
return call_fib_notifiers(net, event_type, &info.info);
}
@@ -397,17 +400,16 @@ int fib_rules_dump(struct net *net, struct notifier_block *nb, int family,
}
EXPORT_SYMBOL_GPL(fib_rules_dump);
-unsigned int fib_rules_seq_read(struct net *net, int family)
+unsigned int fib_rules_seq_read(const struct net *net, int family)
{
unsigned int fib_rules_seq;
struct fib_rules_ops *ops;
- ASSERT_RTNL();
-
ops = lookup_rules_ops(net, family);
if (!ops)
return 0;
- fib_rules_seq = ops->fib_rules_seq;
+ /* Paired with WRITE_ONCE() in call_fib_rule_notifiers() */
+ fib_rules_seq = READ_ONCE(ops->fib_rules_seq);
rules_ops_put(ops);
return fib_rules_seq;
@@ -556,8 +558,7 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
nlrule->pref = fib_default_rule_pref(ops);
}
- nlrule->proto = tb[FRA_PROTOCOL] ?
- nla_get_u8(tb[FRA_PROTOCOL]) : RTPROT_UNSPEC;
+ nlrule->proto = nla_get_u8_default(tb[FRA_PROTOCOL], RTPROT_UNSPEC);
if (tb[FRA_IIFNAME]) {
struct net_device *dev;
@@ -1289,13 +1290,18 @@ static struct pernet_operations fib_rules_net_ops = {
.exit = fib_rules_net_exit,
};
+static const struct rtnl_msg_handler fib_rules_rtnl_msg_handlers[] __initconst = {
+ {.msgtype = RTM_NEWRULE, .doit = fib_nl_newrule},
+ {.msgtype = RTM_DELRULE, .doit = fib_nl_delrule},
+ {.msgtype = RTM_GETRULE, .dumpit = fib_nl_dumprule,
+ .flags = RTNL_FLAG_DUMP_UNLOCKED},
+};
+
static int __init fib_rules_init(void)
{
int err;
- rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule,
- RTNL_FLAG_DUMP_UNLOCKED);
+
+ rtnl_register_many(fib_rules_rtnl_msg_handlers);
err = register_pernet_subsys(&fib_rules_net_ops);
if (err < 0)
@@ -1310,9 +1316,7 @@ static int __init fib_rules_init(void)
fail_unregister:
unregister_pernet_subsys(&fib_rules_net_ops);
fail:
- rtnl_unregister(PF_UNSPEC, RTM_NEWRULE);
- rtnl_unregister(PF_UNSPEC, RTM_DELRULE);
- rtnl_unregister(PF_UNSPEC, RTM_GETRULE);
+ rtnl_unregister_many(fib_rules_rtnl_msg_handlers);
return err;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index fb56567c551e..6625b3f563a4 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1654,18 +1654,6 @@ void sk_reuseport_prog_free(struct bpf_prog *prog)
bpf_prog_destroy(prog);
}
-struct bpf_scratchpad {
- union {
- __be32 diff[MAX_BPF_STACK / sizeof(__be32)];
- u8 buff[MAX_BPF_STACK];
- };
- local_lock_t bh_lock;
-};
-
-static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp) = {
- .bh_lock = INIT_LOCAL_LOCK(bh_lock),
-};
-
static inline int __bpf_try_make_writable(struct sk_buff *skb,
unsigned int write_len)
{
@@ -2022,11 +2010,6 @@ static const struct bpf_func_proto bpf_l4_csum_replace_proto = {
BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size,
__be32 *, to, u32, to_size, __wsum, seed)
{
- struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
- u32 diff_size = from_size + to_size;
- int i, j = 0;
- __wsum ret;
-
/* This is quite flexible, some examples:
*
* from_size == 0, to_size > 0, seed := csum --> pushing data
@@ -2035,19 +2018,19 @@ BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size,
*
* Even for diffing, from_size and to_size don't need to be equal.
*/
- if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) ||
- diff_size > sizeof(sp->diff)))
- return -EINVAL;
- local_lock_nested_bh(&bpf_sp.bh_lock);
- for (i = 0; i < from_size / sizeof(__be32); i++, j++)
- sp->diff[j] = ~from[i];
- for (i = 0; i < to_size / sizeof(__be32); i++, j++)
- sp->diff[j] = to[i];
+ __wsum ret = seed;
- ret = csum_partial(sp->diff, diff_size, seed);
- local_unlock_nested_bh(&bpf_sp.bh_lock);
- return ret;
+ if (from_size && to_size)
+ ret = csum_sub(csum_partial(to, to_size, ret),
+ csum_partial(from, from_size, 0));
+ else if (to_size)
+ ret = csum_partial(to, to_size, ret);
+
+ else if (from_size)
+ ret = ~csum_partial(from, from_size, ~ret);
+
+ return csum_from32to16((__force unsigned int)ret);
}
static const struct bpf_func_proto bpf_csum_diff_proto = {
@@ -2372,7 +2355,7 @@ static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
struct flowi4 fl4 = {
.flowi4_flags = FLOWI_FLAG_ANYSRC,
.flowi4_mark = skb->mark,
- .flowi4_tos = ip4h->tos & INET_DSCP_MASK,
+ .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip4h)),
.flowi4_oif = dev->ifindex,
.flowi4_proto = ip4h->protocol,
.daddr = ip4h->daddr,
@@ -2621,18 +2604,16 @@ BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes)
static void sk_msg_reset_curr(struct sk_msg *msg)
{
- u32 i = msg->sg.start;
- u32 len = 0;
-
- do {
- len += sk_msg_elem(msg, i)->length;
- sk_msg_iter_var_next(i);
- if (len >= msg->sg.size)
- break;
- } while (i != msg->sg.end);
+ if (!msg->sg.size) {
+ msg->sg.curr = msg->sg.start;
+ msg->sg.copybreak = 0;
+ } else {
+ u32 i = msg->sg.end;
- msg->sg.curr = i;
- msg->sg.copybreak = 0;
+ sk_msg_iter_var_prev(i);
+ msg->sg.curr = i;
+ msg->sg.copybreak = msg->sg.data[i].length;
+ }
}
static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
@@ -2795,7 +2776,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
sk_msg_iter_var_next(i);
} while (i != msg->sg.end);
- if (start >= offset + l)
+ if (start > offset + l)
return -EINVAL;
space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
@@ -2820,6 +2801,8 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
raw = page_address(page);
+ if (i == msg->sg.end)
+ sk_msg_iter_var_prev(i);
psge = sk_msg_elem(msg, i);
front = start - offset;
back = psge->length - front;
@@ -2836,7 +2819,13 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
}
put_page(sg_page(psge));
- } else if (start - offset) {
+ new = i;
+ goto place_new;
+ }
+
+ if (start - offset) {
+ if (i == msg->sg.end)
+ sk_msg_iter_var_prev(i);
psge = sk_msg_elem(msg, i);
rsge = sk_msg_elem_cpy(msg, i);
@@ -2847,39 +2836,44 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
sk_msg_iter_var_next(i);
sg_unmark_end(psge);
sg_unmark_end(&rsge);
- sk_msg_iter_next(msg, end);
}
/* Slot(s) to place newly allocated data */
+ sk_msg_iter_next(msg, end);
new = i;
+ sk_msg_iter_var_next(i);
+
+ if (i == msg->sg.end) {
+ if (!rsge.length)
+ goto place_new;
+ sk_msg_iter_next(msg, end);
+ goto place_new;
+ }
/* Shift one or two slots as needed */
- if (!copy) {
- sge = sk_msg_elem_cpy(msg, i);
+ sge = sk_msg_elem_cpy(msg, new);
+ sg_unmark_end(&sge);
+ nsge = sk_msg_elem_cpy(msg, i);
+ if (rsge.length) {
sk_msg_iter_var_next(i);
- sg_unmark_end(&sge);
+ nnsge = sk_msg_elem_cpy(msg, i);
sk_msg_iter_next(msg, end);
+ }
- nsge = sk_msg_elem_cpy(msg, i);
+ while (i != msg->sg.end) {
+ msg->sg.data[i] = sge;
+ sge = nsge;
+ sk_msg_iter_var_next(i);
if (rsge.length) {
- sk_msg_iter_var_next(i);
+ nsge = nnsge;
nnsge = sk_msg_elem_cpy(msg, i);
- }
-
- while (i != msg->sg.end) {
- msg->sg.data[i] = sge;
- sge = nsge;
- sk_msg_iter_var_next(i);
- if (rsge.length) {
- nsge = nnsge;
- nnsge = sk_msg_elem_cpy(msg, i);
- } else {
- nsge = sk_msg_elem_cpy(msg, i);
- }
+ } else {
+ nsge = sk_msg_elem_cpy(msg, i);
}
}
+place_new:
/* Place newly allocated data buffer */
sk_mem_charge(msg->sk, len);
msg->sg.size += len;
@@ -2908,8 +2902,10 @@ static const struct bpf_func_proto bpf_msg_push_data_proto = {
static void sk_msg_shift_left(struct sk_msg *msg, int i)
{
+ struct scatterlist *sge = sk_msg_elem(msg, i);
int prev;
+ put_page(sg_page(sge));
do {
prev = i;
sk_msg_iter_var_next(i);
@@ -2946,6 +2942,9 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
if (unlikely(flags))
return -EINVAL;
+ if (unlikely(len == 0))
+ return 0;
+
/* First find the starting scatterlist element */
i = msg->sg.start;
do {
@@ -2958,7 +2957,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
} while (i != msg->sg.end);
/* Bounds checks: start and pop must be inside message */
- if (start >= offset + l || last >= msg->sg.size)
+ if (start >= offset + l || last > msg->sg.size)
return -EINVAL;
space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
@@ -2987,12 +2986,12 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
*/
if (start != offset) {
struct scatterlist *nsge, *sge = sk_msg_elem(msg, i);
- int a = start;
+ int a = start - offset;
int b = sge->length - pop - a;
sk_msg_iter_var_next(i);
- if (pop < sge->length - a) {
+ if (b > 0) {
if (space) {
sge->length = a;
sk_msg_shift_right(msg, i);
@@ -3011,7 +3010,6 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
if (unlikely(!page))
return -ENOMEM;
- sge->length = a;
orig = sg_page(sge);
from = sg_virt(sge);
to = page_address(page);
@@ -3021,7 +3019,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
put_page(orig);
}
pop = 0;
- } else if (pop >= sge->length - a) {
+ } else {
pop -= (sge->length - a);
sge->length = a;
}
@@ -3055,7 +3053,6 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
pop -= sge->length;
sk_msg_shift_left(msg, i);
}
- sk_msg_iter_var_next(i);
}
sk_mem_uncharge(msg->sk, len - pop);
@@ -5140,6 +5137,17 @@ static u64 __bpf_get_netns_cookie(struct sock *sk)
return net->net_cookie;
}
+BPF_CALL_1(bpf_get_netns_cookie, struct sk_buff *, skb)
+{
+ return __bpf_get_netns_cookie(skb && skb->sk ? skb->sk : NULL);
+}
+
+static const struct bpf_func_proto bpf_get_netns_cookie_proto = {
+ .func = bpf_get_netns_cookie,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX_OR_NULL,
+};
+
BPF_CALL_1(bpf_get_netns_cookie_sock, struct sock *, ctx)
{
return __bpf_get_netns_cookie(ctx);
@@ -6768,8 +6776,6 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
/* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk
* sock refcnt is decremented to prevent a request_sock leak.
*/
- if (!sk_fullsock(sk2))
- sk2 = NULL;
if (sk2 != sk) {
sock_gen_put(sk);
/* Ensure there is no need to bump sk2 refcnt */
@@ -6816,8 +6822,6 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
/* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk
* sock refcnt is decremented to prevent a request_sock leak.
*/
- if (!sk_fullsock(sk2))
- sk2 = NULL;
if (sk2 != sk) {
sock_gen_put(sk);
/* Ensure there is no need to bump sk2 refcnt */
@@ -7266,7 +7270,7 @@ BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk)
{
sk = sk_to_full_sk(sk);
- if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE))
+ if (sk && sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE))
return (unsigned long)sk;
return (unsigned long)NULL;
@@ -8199,6 +8203,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_skb_under_cgroup_proto;
case BPF_FUNC_get_socket_cookie:
return &bpf_get_socket_cookie_proto;
+ case BPF_FUNC_get_netns_cookie:
+ return &bpf_get_netns_cookie_proto;
case BPF_FUNC_get_socket_uid:
return &bpf_get_socket_uid_proto;
case BPF_FUNC_fib_lookup:
@@ -10231,10 +10237,6 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
} \
} while (0)
-#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(S, NS, F, NF, TF) \
- SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( \
- S, NS, F, NF, BPF_FIELD_SIZEOF(NS, NF), 0, TF)
-
static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 1a14f915b7a4..ae74634310a3 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -10,6 +10,7 @@
#include <linux/bpf.h>
#include <net/lwtunnel.h>
#include <net/gre.h>
+#include <net/ip.h>
#include <net/ip6_route.h>
#include <net/ipv6_stubs.h>
#include <net/inet_dscp.h>
@@ -87,16 +88,18 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
static int bpf_lwt_input_reroute(struct sk_buff *skb)
{
+ enum skb_drop_reason reason;
int err = -EINVAL;
if (skb->protocol == htons(ETH_P_IP)) {
struct net_device *dev = skb_dst(skb)->dev;
- struct iphdr *iph = ip_hdr(skb);
+ const struct iphdr *iph = ip_hdr(skb);
dev_hold(dev);
skb_dst_drop(skb);
- err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
- iph->tos, dev);
+ reason = ip_route_input_noref(skb, iph->daddr, iph->saddr,
+ ip4h_dscp(iph), dev);
+ err = reason ? -EINVAL : 0;
dev_put(dev);
} else if (skb->protocol == htons(ETH_P_IPV6)) {
skb_dst_drop(skb);
@@ -206,7 +209,7 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
fl4.flowi4_oif = oif;
fl4.flowi4_mark = skb->mark;
fl4.flowi4_uid = sock_net_uid(net, sk);
- fl4.flowi4_tos = iph->tos & INET_DSCP_MASK;
+ fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph));
fl4.flowi4_flags = FLOWI_FLAG_ANYSRC;
fl4.flowi4_proto = iph->protocol;
fl4.daddr = iph->daddr;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 77b819cd995b..89656d180bc6 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -14,7 +14,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/slab.h>
-#include <linux/kmemleak.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -61,6 +60,25 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
static const struct seq_operations neigh_stat_seq_ops;
#endif
+static struct hlist_head *neigh_get_dev_table(struct net_device *dev, int family)
+{
+ int i;
+
+ switch (family) {
+ default:
+ DEBUG_NET_WARN_ON_ONCE(1);
+ fallthrough; /* to avoid panic by null-ptr-deref */
+ case AF_INET:
+ i = NEIGH_ARP_TABLE;
+ break;
+ case AF_INET6:
+ i = NEIGH_ND_TABLE;
+ break;
+ }
+
+ return &dev->neighbours[i];
+}
+
/*
Neighbour hash table buckets are protected with rwlock tbl->lock.
@@ -205,18 +223,14 @@ static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
}
}
-static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
- struct neigh_table *tbl)
+bool neigh_remove_one(struct neighbour *n)
{
bool retval = false;
write_lock(&n->lock);
if (refcount_read(&n->refcnt) == 1) {
- struct neighbour *neigh;
-
- neigh = rcu_dereference_protected(n->next,
- lockdep_is_held(&tbl->lock));
- rcu_assign_pointer(*np, neigh);
+ hlist_del_rcu(&n->hash);
+ hlist_del_rcu(&n->dev_list);
neigh_mark_dead(n);
retval = true;
}
@@ -226,29 +240,6 @@ static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
return retval;
}
-bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
-{
- struct neigh_hash_table *nht;
- void *pkey = ndel->primary_key;
- u32 hash_val;
- struct neighbour *n;
- struct neighbour __rcu **np;
-
- nht = rcu_dereference_protected(tbl->nht,
- lockdep_is_held(&tbl->lock));
- hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
- hash_val = hash_val >> (32 - nht->hash_shift);
-
- np = &nht->hash_buckets[hash_val];
- while ((n = rcu_dereference_protected(*np,
- lockdep_is_held(&tbl->lock)))) {
- if (n == ndel)
- return neigh_del(n, np, tbl);
- np = &n->next;
- }
- return false;
-}
-
static int neigh_forced_gc(struct neigh_table *tbl)
{
int max_clean = atomic_read(&tbl->gc_entries) -
@@ -276,7 +267,7 @@ static int neigh_forced_gc(struct neigh_table *tbl)
remove = true;
write_unlock(&n->lock);
- if (remove && neigh_remove_one(n, tbl))
+ if (remove && neigh_remove_one(n))
shrunk++;
if (shrunk >= max_clean)
break;
@@ -380,54 +371,42 @@ static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net,
static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
bool skip_perm)
{
- int i;
- struct neigh_hash_table *nht;
+ struct hlist_head *dev_head;
+ struct hlist_node *tmp;
+ struct neighbour *n;
- nht = rcu_dereference_protected(tbl->nht,
- lockdep_is_held(&tbl->lock));
+ dev_head = neigh_get_dev_table(dev, tbl->family);
- for (i = 0; i < (1 << nht->hash_shift); i++) {
- struct neighbour *n;
- struct neighbour __rcu **np = &nht->hash_buckets[i];
+ hlist_for_each_entry_safe(n, tmp, dev_head, dev_list) {
+ if (skip_perm && n->nud_state & NUD_PERMANENT)
+ continue;
- while ((n = rcu_dereference_protected(*np,
- lockdep_is_held(&tbl->lock))) != NULL) {
- if (dev && n->dev != dev) {
- np = &n->next;
- continue;
- }
- if (skip_perm && n->nud_state & NUD_PERMANENT) {
- np = &n->next;
- continue;
- }
- rcu_assign_pointer(*np,
- rcu_dereference_protected(n->next,
- lockdep_is_held(&tbl->lock)));
- write_lock(&n->lock);
- neigh_del_timer(n);
- neigh_mark_dead(n);
- if (refcount_read(&n->refcnt) != 1) {
- /* The most unpleasant situation.
- We must destroy neighbour entry,
- but someone still uses it.
-
- The destroy will be delayed until
- the last user releases us, but
- we must kill timers etc. and move
- it to safe state.
- */
- __skb_queue_purge(&n->arp_queue);
- n->arp_queue_len_bytes = 0;
- WRITE_ONCE(n->output, neigh_blackhole);
- if (n->nud_state & NUD_VALID)
- n->nud_state = NUD_NOARP;
- else
- n->nud_state = NUD_NONE;
- neigh_dbg(2, "neigh %p is stray\n", n);
- }
- write_unlock(&n->lock);
- neigh_cleanup_and_release(n);
+ hlist_del_rcu(&n->hash);
+ hlist_del_rcu(&n->dev_list);
+ write_lock(&n->lock);
+ neigh_del_timer(n);
+ neigh_mark_dead(n);
+ if (refcount_read(&n->refcnt) != 1) {
+ /* The most unpleasant situation.
+ * We must destroy neighbour entry,
+ * but someone still uses it.
+ *
+ * The destroy will be delayed until
+ * the last user releases us, but
+ * we must kill timers etc. and move
+ * it to safe state.
+ */
+ __skb_queue_purge(&n->arp_queue);
+ n->arp_queue_len_bytes = 0;
+ WRITE_ONCE(n->output, neigh_blackhole);
+ if (n->nud_state & NUD_VALID)
+ n->nud_state = NUD_NOARP;
+ else
+ n->nud_state = NUD_NONE;
+ neigh_dbg(2, "neigh %p is stray\n", n);
}
+ write_unlock(&n->lock);
+ neigh_cleanup_and_release(n);
}
}
@@ -530,27 +509,21 @@ static void neigh_get_hash_rnd(u32 *x)
static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
{
- size_t size = (1 << shift) * sizeof(struct neighbour *);
+ size_t size = (1 << shift) * sizeof(struct hlist_head);
+ struct hlist_head *hash_heads;
struct neigh_hash_table *ret;
- struct neighbour __rcu **buckets;
int i;
ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
if (!ret)
return NULL;
- if (size <= PAGE_SIZE) {
- buckets = kzalloc(size, GFP_ATOMIC);
- } else {
- buckets = (struct neighbour __rcu **)
- __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
- get_order(size));
- kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
- }
- if (!buckets) {
+
+ hash_heads = kvzalloc(size, GFP_ATOMIC);
+ if (!hash_heads) {
kfree(ret);
return NULL;
}
- ret->hash_buckets = buckets;
+ ret->hash_heads = hash_heads;
ret->hash_shift = shift;
for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
neigh_get_hash_rnd(&ret->hash_rnd[i]);
@@ -562,15 +535,8 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
struct neigh_hash_table *nht = container_of(head,
struct neigh_hash_table,
rcu);
- size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
- struct neighbour __rcu **buckets = nht->hash_buckets;
- if (size <= PAGE_SIZE) {
- kfree(buckets);
- } else {
- kmemleak_free(buckets);
- free_pages((unsigned long)buckets, get_order(size));
- }
+ kvfree(nht->hash_heads);
kfree(nht);
}
@@ -589,24 +555,17 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
return old_nht;
for (i = 0; i < (1 << old_nht->hash_shift); i++) {
- struct neighbour *n, *next;
+ struct hlist_node *tmp;
+ struct neighbour *n;
- for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
- lockdep_is_held(&tbl->lock));
- n != NULL;
- n = next) {
+ neigh_for_each_in_bucket_safe(n, tmp, &old_nht->hash_heads[i]) {
hash = tbl->hash(n->primary_key, n->dev,
new_nht->hash_rnd);
hash >>= (32 - new_nht->hash_shift);
- next = rcu_dereference_protected(n->next,
- lockdep_is_held(&tbl->lock));
- rcu_assign_pointer(n->next,
- rcu_dereference_protected(
- new_nht->hash_buckets[hash],
- lockdep_is_held(&tbl->lock)));
- rcu_assign_pointer(new_nht->hash_buckets[hash], n);
+ hlist_del_rcu(&n->hash);
+ hlist_add_head_rcu(&n->hash, &new_nht->hash_heads[hash]);
}
}
@@ -693,11 +652,7 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
goto out_tbl_unlock;
}
- for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
- lockdep_is_held(&tbl->lock));
- n1 != NULL;
- n1 = rcu_dereference_protected(n1->next,
- lockdep_is_held(&tbl->lock))) {
+ neigh_for_each_in_bucket(n1, &nht->hash_heads[hash_val]) {
if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
if (want_ref)
neigh_hold(n1);
@@ -713,10 +668,11 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
list_add_tail(&n->managed_list, &n->tbl->managed_list);
if (want_ref)
neigh_hold(n);
- rcu_assign_pointer(n->next,
- rcu_dereference_protected(nht->hash_buckets[hash_val],
- lockdep_is_held(&tbl->lock)));
- rcu_assign_pointer(nht->hash_buckets[hash_val], n);
+ hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]);
+
+ hlist_add_head_rcu(&n->dev_list,
+ neigh_get_dev_table(dev, tbl->family));
+
write_unlock_bh(&tbl->lock);
neigh_dbg(2, "neigh %p is created\n", n);
rc = n;
@@ -948,10 +904,10 @@ static void neigh_connect(struct neighbour *neigh)
static void neigh_periodic_work(struct work_struct *work)
{
struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
+ struct neigh_hash_table *nht;
+ struct hlist_node *tmp;
struct neighbour *n;
- struct neighbour __rcu **np;
unsigned int i;
- struct neigh_hash_table *nht;
NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
@@ -976,10 +932,7 @@ static void neigh_periodic_work(struct work_struct *work)
goto out;
for (i = 0 ; i < (1 << nht->hash_shift); i++) {
- np = &nht->hash_buckets[i];
-
- while ((n = rcu_dereference_protected(*np,
- lockdep_is_held(&tbl->lock))) != NULL) {
+ neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i]) {
unsigned int state;
write_lock(&n->lock);
@@ -988,7 +941,7 @@ static void neigh_periodic_work(struct work_struct *work)
if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
(n->flags & NTF_EXT_LEARNED)) {
write_unlock(&n->lock);
- goto next_elt;
+ continue;
}
if (time_before(n->used, n->confirmed) &&
@@ -999,18 +952,14 @@ static void neigh_periodic_work(struct work_struct *work)
(state == NUD_FAILED ||
!time_in_range_open(jiffies, n->used,
n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
- rcu_assign_pointer(*np,
- rcu_dereference_protected(n->next,
- lockdep_is_held(&tbl->lock)));
+ hlist_del_rcu(&n->hash);
+ hlist_del_rcu(&n->dev_list);
neigh_mark_dead(n);
write_unlock(&n->lock);
neigh_cleanup_and_release(n);
continue;
}
write_unlock(&n->lock);
-
-next_elt:
- np = &n->next;
}
/*
* It's fine to release lock here, even if hash table
@@ -1957,7 +1906,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
NETLINK_CB(skb).portid, extack);
write_lock_bh(&tbl->lock);
neigh_release(neigh);
- neigh_remove_one(neigh, tbl);
+ neigh_remove_one(neigh);
write_unlock_bh(&tbl->lock);
out:
@@ -2728,9 +2677,8 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
for (h = s_h; h < (1 << nht->hash_shift); h++) {
if (h > s_h)
s_idx = 0;
- for (n = rcu_dereference(nht->hash_buckets[h]), idx = 0;
- n != NULL;
- n = rcu_dereference(n->next)) {
+ idx = 0;
+ neigh_for_each_in_bucket_rcu(n, &nht->hash_heads[h]) {
if (idx < s_idx || !net_eq(dev_net(n->dev), net))
goto next;
if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
@@ -2876,6 +2824,7 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
if (err < 0 && cb->strict_check)
return err;
+ err = 0;
s_t = cb->args[0];
@@ -3097,9 +3046,7 @@ void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void
for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
struct neighbour *n;
- for (n = rcu_dereference(nht->hash_buckets[chain]);
- n != NULL;
- n = rcu_dereference(n->next))
+ neigh_for_each_in_bucket(n, &nht->hash_heads[chain])
cb(n, cookie);
}
read_unlock_bh(&tbl->lock);
@@ -3111,29 +3058,25 @@ EXPORT_SYMBOL(neigh_for_each);
void __neigh_for_each_release(struct neigh_table *tbl,
int (*cb)(struct neighbour *))
{
- int chain;
struct neigh_hash_table *nht;
+ int chain;
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
+ struct hlist_node *tmp;
struct neighbour *n;
- struct neighbour __rcu **np;
- np = &nht->hash_buckets[chain];
- while ((n = rcu_dereference_protected(*np,
- lockdep_is_held(&tbl->lock))) != NULL) {
+ neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[chain]) {
int release;
write_lock(&n->lock);
release = cb(n);
if (release) {
- rcu_assign_pointer(*np,
- rcu_dereference_protected(n->next,
- lockdep_is_held(&tbl->lock)));
+ hlist_del_rcu(&n->hash);
+ hlist_del_rcu(&n->dev_list);
neigh_mark_dead(n);
- } else
- np = &n->next;
+ }
write_unlock(&n->lock);
if (release)
neigh_cleanup_and_release(n);
@@ -3190,43 +3133,53 @@ EXPORT_SYMBOL(neigh_xmit);
#ifdef CONFIG_PROC_FS
-static struct neighbour *neigh_get_first(struct seq_file *seq)
+static struct neighbour *neigh_get_valid(struct seq_file *seq,
+ struct neighbour *n,
+ loff_t *pos)
{
struct neigh_seq_state *state = seq->private;
struct net *net = seq_file_net(seq);
+
+ if (!net_eq(dev_net(n->dev), net))
+ return NULL;
+
+ if (state->neigh_sub_iter) {
+ loff_t fakep = 0;
+ void *v;
+
+ v = state->neigh_sub_iter(state, n, pos ? pos : &fakep);
+ if (!v)
+ return NULL;
+ if (pos)
+ return v;
+ }
+
+ if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
+ return n;
+
+ if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
+ return n;
+
+ return NULL;
+}
+
+static struct neighbour *neigh_get_first(struct seq_file *seq)
+{
+ struct neigh_seq_state *state = seq->private;
struct neigh_hash_table *nht = state->nht;
- struct neighbour *n = NULL;
- int bucket;
+ struct neighbour *n, *tmp;
state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
- for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
- n = rcu_dereference(nht->hash_buckets[bucket]);
- while (n) {
- if (!net_eq(dev_net(n->dev), net))
- goto next;
- if (state->neigh_sub_iter) {
- loff_t fakep = 0;
- void *v;
-
- v = state->neigh_sub_iter(state, n, &fakep);
- if (!v)
- goto next;
- }
- if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
- break;
- if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
- break;
-next:
- n = rcu_dereference(n->next);
+ while (++state->bucket < (1 << nht->hash_shift)) {
+ neigh_for_each_in_bucket(n, &nht->hash_heads[state->bucket]) {
+ tmp = neigh_get_valid(seq, n, NULL);
+ if (tmp)
+ return tmp;
}
-
- if (n)
- break;
}
- state->bucket = bucket;
- return n;
+ return NULL;
}
static struct neighbour *neigh_get_next(struct seq_file *seq,
@@ -3234,46 +3187,28 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
loff_t *pos)
{
struct neigh_seq_state *state = seq->private;
- struct net *net = seq_file_net(seq);
- struct neigh_hash_table *nht = state->nht;
+ struct neighbour *tmp;
if (state->neigh_sub_iter) {
void *v = state->neigh_sub_iter(state, n, pos);
+
if (v)
return n;
}
- n = rcu_dereference(n->next);
-
- while (1) {
- while (n) {
- if (!net_eq(dev_net(n->dev), net))
- goto next;
- if (state->neigh_sub_iter) {
- void *v = state->neigh_sub_iter(state, n, pos);
- if (v)
- return n;
- goto next;
- }
- if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
- break;
- if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
- break;
-next:
- n = rcu_dereference(n->next);
+ hlist_for_each_entry_continue(n, hash) {
+ tmp = neigh_get_valid(seq, n, pos);
+ if (tmp) {
+ n = tmp;
+ goto out;
}
-
- if (n)
- break;
-
- if (++state->bucket >= (1 << nht->hash_shift))
- break;
-
- n = rcu_dereference(nht->hash_buckets[state->bucket]);
}
+ n = neigh_get_first(seq);
+out:
if (n && pos)
--(*pos);
+
return n;
}
@@ -3376,7 +3311,7 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl
struct neigh_seq_state *state = seq->private;
state->tbl = tbl;
- state->bucket = 0;
+ state->bucket = -1;
state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
rcu_read_lock();
@@ -3886,17 +3821,18 @@ EXPORT_SYMBOL(neigh_sysctl_unregister);
#endif /* CONFIG_SYSCTL */
+static const struct rtnl_msg_handler neigh_rtnl_msg_handlers[] __initconst = {
+ {.msgtype = RTM_NEWNEIGH, .doit = neigh_add},
+ {.msgtype = RTM_DELNEIGH, .doit = neigh_delete},
+ {.msgtype = RTM_GETNEIGH, .doit = neigh_get, .dumpit = neigh_dump_info,
+ .flags = RTNL_FLAG_DUMP_UNLOCKED},
+ {.msgtype = RTM_GETNEIGHTBL, .dumpit = neightbl_dump_info},
+ {.msgtype = RTM_SETNEIGHTBL, .doit = neightbl_set},
+};
+
static int __init neigh_init(void)
{
- rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info,
- RTNL_FLAG_DUMP_UNLOCKED);
-
- rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
- 0);
- rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
-
+ rtnl_register_many(neigh_rtnl_msg_handlers);
return 0;
}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 05cf5347f25e..2d9afc6e2161 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -409,7 +409,7 @@ NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec);
static int change_gro_flush_timeout(struct net_device *dev, unsigned long val)
{
- WRITE_ONCE(dev->gro_flush_timeout, val);
+ netdev_set_gro_flush_timeout(dev, val);
return 0;
}
@@ -429,7 +429,7 @@ static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val
if (val > S32_MAX)
return -ERANGE;
- WRITE_ONCE(dev->napi_defer_hard_irqs, val);
+ netdev_set_defer_hard_irqs(dev, (u32)val);
return 0;
}
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index e39479f1c9a4..ae34ac818cda 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -56,7 +56,6 @@ static bool init_net_initialized;
* outside.
*/
DECLARE_RWSEM(pernet_ops_rwsem);
-EXPORT_SYMBOL_GPL(pernet_ops_rwsem);
#define MIN_PERNET_OPS_ID \
((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
@@ -317,6 +316,7 @@ static __net_init void preinit_net_sysctl(struct net *net)
*/
net->core.sysctl_optmem_max = 128 * 1024;
net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED;
+ net->core.sysctl_tstamp_allow_data = 1;
}
/* init code that must occur even if setup_net() is not called. */
@@ -334,6 +334,12 @@ static __net_init void preinit_net(struct net *net, struct user_namespace *user_
idr_init(&net->netns_ids);
spin_lock_init(&net->nsid_lock);
mutex_init(&net->ipv4.ra_mutex);
+
+#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
+ mutex_init(&net->rtnl_mutex);
+ lock_set_cmp_fn(&net->rtnl_mutex, rtnl_net_lock_cmp_fn, NULL);
+#endif
+
preinit_net_sysctl(net);
}
@@ -694,20 +700,18 @@ EXPORT_SYMBOL_GPL(get_net_ns);
struct net *get_net_ns_by_fd(int fd)
{
- struct fd f = fdget(fd);
- struct net *net = ERR_PTR(-EINVAL);
+ CLASS(fd, f)(fd);
- if (!fd_file(f))
+ if (fd_empty(f))
return ERR_PTR(-EBADF);
if (proc_ns_file(fd_file(f))) {
struct ns_common *ns = get_proc_ns(file_inode(fd_file(f)));
if (ns->ops == &netns_operations)
- net = get_net(container_of(ns, struct net, ns));
+ return get_net(container_of(ns, struct net, ns));
}
- fdput(f);
- return net;
+ return ERR_PTR(-EINVAL);
}
EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
#endif
@@ -1153,13 +1157,23 @@ static void __init netns_ipv4_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
sysctl_tcp_early_demux);
CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
+ sysctl_tcp_l3mdev_accept);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
sysctl_tcp_reordering);
CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
sysctl_tcp_rmem);
- CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_rx, 18);
+ CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_rx, 22);
}
#endif
+static const struct rtnl_msg_handler net_ns_rtnl_msg_handlers[] __initconst = {
+ {.msgtype = RTM_NEWNSID, .doit = rtnl_net_newid,
+ .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.msgtype = RTM_GETNSID, .doit = rtnl_net_getid,
+ .dumpit = rtnl_net_dumpid,
+ .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
+};
+
void __init net_ns_init(void)
{
struct net_generic *ng;
@@ -1197,11 +1211,7 @@ void __init net_ns_init(void)
if (register_pernet_subsys(&net_ns_ops))
panic("Could not register network namespace subsystems");
- rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL,
- RTNL_FLAG_DOIT_UNLOCKED);
- rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
- RTNL_FLAG_DOIT_UNLOCKED |
- RTNL_FLAG_DUMP_UNLOCKED);
+ rtnl_register_many(net_ns_rtnl_msg_handlers);
}
static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list)
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
index b28424ae06d5..a89cbd8d87c3 100644
--- a/net/core/netdev-genl-gen.c
+++ b/net/core/netdev-genl-gen.c
@@ -14,12 +14,16 @@
/* Integer value ranges */
static const struct netlink_range_validation netdev_a_page_pool_id_range = {
.min = 1ULL,
- .max = 4294967295ULL,
+ .max = U32_MAX,
};
static const struct netlink_range_validation netdev_a_page_pool_ifindex_range = {
.min = 1ULL,
- .max = 2147483647ULL,
+ .max = S32_MAX,
+};
+
+static const struct netlink_range_validation netdev_a_napi_defer_hard_irqs_range = {
+ .max = S32_MAX,
};
/* Common nested types */
@@ -87,6 +91,14 @@ static const struct nla_policy netdev_bind_rx_nl_policy[NETDEV_A_DMABUF_FD + 1]
[NETDEV_A_DMABUF_QUEUES] = NLA_POLICY_NESTED(netdev_queue_id_nl_policy),
};
+/* NETDEV_CMD_NAPI_SET - do */
+static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT + 1] = {
+ [NETDEV_A_NAPI_ID] = { .type = NLA_U32, },
+ [NETDEV_A_NAPI_DEFER_HARD_IRQS] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_napi_defer_hard_irqs_range),
+ [NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT] = { .type = NLA_UINT, },
+ [NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT] = { .type = NLA_UINT, },
+};
+
/* Ops table for netdev */
static const struct genl_split_ops netdev_nl_ops[] = {
{
@@ -171,6 +183,13 @@ static const struct genl_split_ops netdev_nl_ops[] = {
.maxattr = NETDEV_A_DMABUF_FD,
.flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
},
+ {
+ .cmd = NETDEV_CMD_NAPI_SET,
+ .doit = netdev_nl_napi_set_doit,
+ .policy = netdev_napi_set_nl_policy,
+ .maxattr = NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
};
static const struct genl_multicast_group netdev_nl_mcgrps[] = {
diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h
index 8cda334fd042..e09dd7539ff2 100644
--- a/net/core/netdev-genl-gen.h
+++ b/net/core/netdev-genl-gen.h
@@ -33,6 +33,7 @@ int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb);
int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info);
+int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info);
enum {
NETDEV_NLGRP_MGMT,
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index 1cb954f2d39e..9527dd46e4dc 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -24,7 +24,7 @@ struct netdev_nl_dump_ctx {
static struct netdev_nl_dump_ctx *netdev_dump_ctx(struct netlink_callback *cb)
{
- NL_ASSERT_DUMP_CTX_FITS(struct netdev_nl_dump_ctx);
+ NL_ASSERT_CTX_FITS(struct netdev_nl_dump_ctx);
return (struct netdev_nl_dump_ctx *)cb->ctx;
}
@@ -161,6 +161,9 @@ static int
netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi,
const struct genl_info *info)
{
+ unsigned long irq_suspend_timeout;
+ unsigned long gro_flush_timeout;
+ u32 napi_defer_hard_irqs;
void *hdr;
pid_t pid;
@@ -189,6 +192,21 @@ netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi,
goto nla_put_failure;
}
+ napi_defer_hard_irqs = napi_get_defer_hard_irqs(napi);
+ if (nla_put_s32(rsp, NETDEV_A_NAPI_DEFER_HARD_IRQS,
+ napi_defer_hard_irqs))
+ goto nla_put_failure;
+
+ irq_suspend_timeout = napi_get_irq_suspend_timeout(napi);
+ if (nla_put_uint(rsp, NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT,
+ irq_suspend_timeout))
+ goto nla_put_failure;
+
+ gro_flush_timeout = napi_get_gro_flush_timeout(napi);
+ if (nla_put_uint(rsp, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT,
+ gro_flush_timeout))
+ goto nla_put_failure;
+
genlmsg_end(rsp, hdr);
return 0;
@@ -215,6 +233,7 @@ int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
rtnl_lock();
+ rcu_read_lock();
napi = napi_by_id(napi_id);
if (napi) {
@@ -224,6 +243,7 @@ int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info)
err = -ENOENT;
}
+ rcu_read_unlock();
rtnl_unlock();
if (err)
@@ -292,6 +312,59 @@ int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
}
static int
+netdev_nl_napi_set_config(struct napi_struct *napi, struct genl_info *info)
+{
+ u64 irq_suspend_timeout = 0;
+ u64 gro_flush_timeout = 0;
+ u32 defer = 0;
+
+ if (info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]) {
+ defer = nla_get_u32(info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]);
+ napi_set_defer_hard_irqs(napi, defer);
+ }
+
+ if (info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]) {
+ irq_suspend_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]);
+ napi_set_irq_suspend_timeout(napi, irq_suspend_timeout);
+ }
+
+ if (info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]) {
+ gro_flush_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]);
+ napi_set_gro_flush_timeout(napi, gro_flush_timeout);
+ }
+
+ return 0;
+}
+
+int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct napi_struct *napi;
+ unsigned int napi_id;
+ int err;
+
+ if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID))
+ return -EINVAL;
+
+ napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]);
+
+ rtnl_lock();
+ rcu_read_lock();
+
+ napi = napi_by_id(napi_id);
+ if (napi) {
+ err = netdev_nl_napi_set_config(napi, info);
+ } else {
+ NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]);
+ err = -ENOENT;
+ }
+
+ rcu_read_unlock();
+ rtnl_unlock();
+
+ return err;
+}
+
+static int
netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
u32 q_idx, u32 q_type, const struct genl_info *info)
{
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index aa49b92e9194..2e459b9d88eb 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -45,9 +45,6 @@
#define MAX_UDP_CHUNK 1460
#define MAX_SKBS 32
-
-static struct sk_buff_head skb_pool;
-
#define USEC_PER_POLL 50
#define MAX_SKB_SIZE \
@@ -234,20 +231,23 @@ void netpoll_poll_enable(struct net_device *dev)
up(&ni->dev_lock);
}
-static void refill_skbs(void)
+static void refill_skbs(struct netpoll *np)
{
+ struct sk_buff_head *skb_pool;
struct sk_buff *skb;
unsigned long flags;
- spin_lock_irqsave(&skb_pool.lock, flags);
- while (skb_pool.qlen < MAX_SKBS) {
+ skb_pool = &np->skb_pool;
+
+ spin_lock_irqsave(&skb_pool->lock, flags);
+ while (skb_pool->qlen < MAX_SKBS) {
skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
if (!skb)
break;
- __skb_queue_tail(&skb_pool, skb);
+ __skb_queue_tail(skb_pool, skb);
}
- spin_unlock_irqrestore(&skb_pool.lock, flags);
+ spin_unlock_irqrestore(&skb_pool->lock, flags);
}
static void zap_completion_queue(void)
@@ -284,12 +284,12 @@ static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
struct sk_buff *skb;
zap_completion_queue();
- refill_skbs();
+ refill_skbs(np);
repeat:
skb = alloc_skb(len, GFP_ATOMIC);
if (!skb)
- skb = skb_dequeue(&skb_pool);
+ skb = skb_dequeue(&np->skb_pool);
if (!skb) {
if (++count < 10) {
@@ -531,6 +531,14 @@ static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr)
return -1;
}
+static void skb_pool_flush(struct netpoll *np)
+{
+ struct sk_buff_head *skb_pool;
+
+ skb_pool = &np->skb_pool;
+ skb_queue_purge_reason(skb_pool, SKB_CONSUMED);
+}
+
int netpoll_parse_options(struct netpoll *np, char *opt)
{
char *cur=opt, *delim;
@@ -626,7 +634,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
goto out;
}
- if (!ndev->npinfo) {
+ if (!rcu_access_pointer(ndev->npinfo)) {
npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
if (!npinfo) {
err = -ENOMEM;
@@ -641,7 +649,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
ops = ndev->netdev_ops;
if (ops->ndo_netpoll_setup) {
- err = ops->ndo_netpoll_setup(ndev, npinfo);
+ err = ops->ndo_netpoll_setup(ndev);
if (err)
goto free_npinfo;
}
@@ -673,6 +681,8 @@ int netpoll_setup(struct netpoll *np)
struct in_device *in_dev;
int err;
+ skb_queue_head_init(&np->skb_pool);
+
rtnl_lock();
if (np->dev_name[0]) {
struct net *net = current->nsproxy->net_ns;
@@ -773,14 +783,16 @@ put_noaddr:
}
/* fill up the skb queue */
- refill_skbs();
+ refill_skbs(np);
err = __netpoll_setup(np, ndev);
if (err)
- goto put;
+ goto flush;
rtnl_unlock();
return 0;
+flush:
+ skb_pool_flush(np);
put:
DEBUG_NET_WARN_ON_ONCE(np->dev);
if (ip_overwritten)
@@ -792,13 +804,6 @@ unlock:
}
EXPORT_SYMBOL(netpoll_setup);
-static int __init netpoll_init(void)
-{
- skb_queue_head_init(&skb_pool);
- return 0;
-}
-core_initcall(netpoll_init);
-
static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
{
struct netpoll_info *npinfo =
@@ -835,6 +840,8 @@ void __netpoll_cleanup(struct netpoll *np)
call_rcu(&npinfo->rcu, rcu_cleanup_netpoll_info);
} else
RCU_INIT_POINTER(np->dev->npinfo, NULL);
+
+ skb_pool_flush(np);
}
EXPORT_SYMBOL_GPL(__netpoll_cleanup);
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index a813d30d2135..f89cf93f6eb4 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -950,6 +950,7 @@ netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool,
if (netmem && *offset + size > max_size) {
netmem = page_pool_drain_frag(pool, netmem);
if (netmem) {
+ recycle_stat_inc(pool, cached);
alloc_stat_inc(pool, fast);
goto frag_reset;
}
@@ -974,7 +975,6 @@ frag_reset:
pool->frag_users++;
pool->frag_offset = *offset + size;
- alloc_stat_inc(pool, fast);
return netmem;
}
EXPORT_SYMBOL(page_pool_alloc_frag_netmem);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 34f68ef74b8f..7e23cacbe66e 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2285,7 +2285,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
s64 remaining;
struct hrtimer_sleeper t;
- hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ hrtimer_setup_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
hrtimer_set_expires(&t.timer, spin_until);
remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer));
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2ba5cd965d3f..58df76fe408a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -179,6 +179,166 @@ bool lockdep_rtnl_is_held(void)
EXPORT_SYMBOL(lockdep_rtnl_is_held);
#endif /* #ifdef CONFIG_PROVE_LOCKING */
+#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
+void __rtnl_net_lock(struct net *net)
+{
+ ASSERT_RTNL();
+
+ mutex_lock(&net->rtnl_mutex);
+}
+EXPORT_SYMBOL(__rtnl_net_lock);
+
+void __rtnl_net_unlock(struct net *net)
+{
+ ASSERT_RTNL();
+
+ mutex_unlock(&net->rtnl_mutex);
+}
+EXPORT_SYMBOL(__rtnl_net_unlock);
+
+void rtnl_net_lock(struct net *net)
+{
+ rtnl_lock();
+ __rtnl_net_lock(net);
+}
+EXPORT_SYMBOL(rtnl_net_lock);
+
+void rtnl_net_unlock(struct net *net)
+{
+ __rtnl_net_unlock(net);
+ rtnl_unlock();
+}
+EXPORT_SYMBOL(rtnl_net_unlock);
+
+int rtnl_net_trylock(struct net *net)
+{
+ int ret = rtnl_trylock();
+
+ if (ret)
+ __rtnl_net_lock(net);
+
+ return ret;
+}
+EXPORT_SYMBOL(rtnl_net_trylock);
+
+static int rtnl_net_cmp_locks(const struct net *net_a, const struct net *net_b)
+{
+ if (net_eq(net_a, net_b))
+ return 0;
+
+ /* always init_net first */
+ if (net_eq(net_a, &init_net))
+ return -1;
+
+ if (net_eq(net_b, &init_net))
+ return 1;
+
+ /* otherwise lock in ascending order */
+ return net_a < net_b ? -1 : 1;
+}
+
+int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b)
+{
+ const struct net *net_a, *net_b;
+
+ net_a = container_of(a, struct net, rtnl_mutex.dep_map);
+ net_b = container_of(b, struct net, rtnl_mutex.dep_map);
+
+ return rtnl_net_cmp_locks(net_a, net_b);
+}
+
+bool rtnl_net_is_locked(struct net *net)
+{
+ return rtnl_is_locked() && mutex_is_locked(&net->rtnl_mutex);
+}
+EXPORT_SYMBOL(rtnl_net_is_locked);
+
+bool lockdep_rtnl_net_is_held(struct net *net)
+{
+ return lockdep_rtnl_is_held() && lockdep_is_held(&net->rtnl_mutex);
+}
+EXPORT_SYMBOL(lockdep_rtnl_net_is_held);
+#else
+static int rtnl_net_cmp_locks(const struct net *net_a, const struct net *net_b)
+{
+ /* No need to swap */
+ return -1;
+}
+#endif
+
+struct rtnl_nets {
+ /* ->newlink() needs to freeze 3 netns at most;
+ * 2 for the new device, 1 for its peer.
+ */
+ struct net *net[3];
+ unsigned char len;
+};
+
+static void rtnl_nets_init(struct rtnl_nets *rtnl_nets)
+{
+ memset(rtnl_nets, 0, sizeof(*rtnl_nets));
+}
+
+static void rtnl_nets_destroy(struct rtnl_nets *rtnl_nets)
+{
+ int i;
+
+ for (i = 0; i < rtnl_nets->len; i++) {
+ put_net(rtnl_nets->net[i]);
+ rtnl_nets->net[i] = NULL;
+ }
+
+ rtnl_nets->len = 0;
+}
+
+/**
+ * rtnl_nets_add - Add netns to be locked before ->newlink().
+ *
+ * @rtnl_nets: rtnl_nets pointer passed to ->get_peer_net().
+ * @net: netns pointer with an extra refcnt held.
+ *
+ * The extra refcnt is released in rtnl_nets_destroy().
+ */
+static void rtnl_nets_add(struct rtnl_nets *rtnl_nets, struct net *net)
+{
+ int i;
+
+ DEBUG_NET_WARN_ON_ONCE(rtnl_nets->len == ARRAY_SIZE(rtnl_nets->net));
+
+ for (i = 0; i < rtnl_nets->len; i++) {
+ switch (rtnl_net_cmp_locks(rtnl_nets->net[i], net)) {
+ case 0:
+ put_net(net);
+ return;
+ case 1:
+ swap(rtnl_nets->net[i], net);
+ }
+ }
+
+ rtnl_nets->net[i] = net;
+ rtnl_nets->len++;
+}
+
+static void rtnl_nets_lock(struct rtnl_nets *rtnl_nets)
+{
+ int i;
+
+ rtnl_lock();
+
+ for (i = 0; i < rtnl_nets->len; i++)
+ __rtnl_net_lock(rtnl_nets->net[i]);
+}
+
+static void rtnl_nets_unlock(struct rtnl_nets *rtnl_nets)
+{
+ int i;
+
+ for (i = 0; i < rtnl_nets->len; i++)
+ __rtnl_net_unlock(rtnl_nets->net[i]);
+
+ rtnl_unlock();
+}
+
static struct rtnl_link __rcu *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
static inline int rtm_msgindex(int msgtype)
@@ -269,64 +429,13 @@ unlock:
}
/**
- * rtnl_register_module - Register a rtnetlink message type
- *
- * @owner: module registering the hook (THIS_MODULE)
- * @protocol: Protocol family or PF_UNSPEC
- * @msgtype: rtnetlink message type
- * @doit: Function pointer called for each request message
- * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
- * @flags: rtnl_link_flags to modify behaviour of doit/dumpit functions
- *
- * Like rtnl_register, but for use by removable modules.
- */
-int rtnl_register_module(struct module *owner,
- int protocol, int msgtype,
- rtnl_doit_func doit, rtnl_dumpit_func dumpit,
- unsigned int flags)
-{
- return rtnl_register_internal(owner, protocol, msgtype,
- doit, dumpit, flags);
-}
-EXPORT_SYMBOL_GPL(rtnl_register_module);
-
-/**
- * rtnl_register - Register a rtnetlink message type
- * @protocol: Protocol family or PF_UNSPEC
- * @msgtype: rtnetlink message type
- * @doit: Function pointer called for each request message
- * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
- * @flags: rtnl_link_flags to modify behaviour of doit/dumpit functions
- *
- * Registers the specified function pointers (at least one of them has
- * to be non-NULL) to be called whenever a request message for the
- * specified protocol family and message type is received.
- *
- * The special protocol family PF_UNSPEC may be used to define fallback
- * function pointers for the case when no entry for the specific protocol
- * family exists.
- */
-void rtnl_register(int protocol, int msgtype,
- rtnl_doit_func doit, rtnl_dumpit_func dumpit,
- unsigned int flags)
-{
- int err;
-
- err = rtnl_register_internal(NULL, protocol, msgtype, doit, dumpit,
- flags);
- if (err)
- pr_err("Unable to register rtnetlink message handler, "
- "protocol = %d, message type = %d\n", protocol, msgtype);
-}
-
-/**
* rtnl_unregister - Unregister a rtnetlink message type
* @protocol: Protocol family or PF_UNSPEC
* @msgtype: rtnetlink message type
*
* Returns 0 on success or a negative error code.
*/
-int rtnl_unregister(int protocol, int msgtype)
+static int rtnl_unregister(int protocol, int msgtype)
{
struct rtnl_link __rcu **tab;
struct rtnl_link *link;
@@ -349,7 +458,6 @@ int rtnl_unregister(int protocol, int msgtype)
return 0;
}
-EXPORT_SYMBOL_GPL(rtnl_unregister);
/**
* rtnl_unregister_all - Unregister all rtnetlink message type of a protocol
@@ -384,6 +492,26 @@ void rtnl_unregister_all(int protocol)
}
EXPORT_SYMBOL_GPL(rtnl_unregister_all);
+/**
+ * __rtnl_register_many - Register rtnetlink message types
+ * @handlers: Array of struct rtnl_msg_handlers
+ * @n: The length of @handlers
+ *
+ * Registers the specified function pointers (at least one of them has
+ * to be non-NULL) to be called whenever a request message for the
+ * specified protocol family and message type is received.
+ *
+ * The special protocol family PF_UNSPEC may be used to define fallback
+ * function pointers for the case when no entry for the specific protocol
+ * family exists.
+ *
+ * When one element of @handlers fails to register,
+ * 1) built-in: panics.
+ * 2) modules : the previous successful registrations are unwinded
+ * and an error is returned.
+ *
+ * Use rtnl_register_many().
+ */
int __rtnl_register_many(const struct rtnl_msg_handler *handlers, int n)
{
const struct rtnl_msg_handler *handler;
@@ -394,6 +522,10 @@ int __rtnl_register_many(const struct rtnl_msg_handler *handlers, int n)
handler->msgtype, handler->doit,
handler->dumpit, handler->flags);
if (err) {
+ if (!handler->owner)
+ panic("Unable to register rtnetlink message "
+ "handlers, %pS\n", handlers);
+
__rtnl_unregister_many(handlers, i);
break;
}
@@ -413,46 +545,33 @@ void __rtnl_unregister_many(const struct rtnl_msg_handler *handlers, int n)
}
EXPORT_SYMBOL_GPL(__rtnl_unregister_many);
+static DEFINE_MUTEX(link_ops_mutex);
static LIST_HEAD(link_ops);
-static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind)
+static struct rtnl_link_ops *rtnl_link_ops_get(const char *kind, int *srcu_index)
{
- const struct rtnl_link_ops *ops;
+ struct rtnl_link_ops *ops;
+
+ rcu_read_lock();
- list_for_each_entry(ops, &link_ops, list) {
- if (!strcmp(ops->kind, kind))
- return ops;
+ list_for_each_entry_rcu(ops, &link_ops, list) {
+ if (!strcmp(ops->kind, kind)) {
+ *srcu_index = srcu_read_lock(&ops->srcu);
+ goto unlock;
+ }
}
- return NULL;
-}
-/**
- * __rtnl_link_register - Register rtnl_link_ops with rtnetlink.
- * @ops: struct rtnl_link_ops * to register
- *
- * The caller must hold the rtnl_mutex. This function should be used
- * by drivers that create devices during module initialization. It
- * must be called before registering the devices.
- *
- * Returns 0 on success or a negative error code.
- */
-int __rtnl_link_register(struct rtnl_link_ops *ops)
-{
- if (rtnl_link_ops_get(ops->kind))
- return -EEXIST;
+ ops = NULL;
+unlock:
+ rcu_read_unlock();
- /* The check for alloc/setup is here because if ops
- * does not have that filled up, it is not possible
- * to use the ops for creating device. So do not
- * fill up dellink as well. That disables rtnl_dellink.
- */
- if ((ops->alloc || ops->setup) && !ops->dellink)
- ops->dellink = unregister_netdevice_queue;
+ return ops;
+}
- list_add_tail(&ops->list, &link_ops);
- return 0;
+static void rtnl_link_ops_put(struct rtnl_link_ops *ops, int srcu_index)
+{
+ srcu_read_unlock(&ops->srcu, srcu_index);
}
-EXPORT_SYMBOL_GPL(__rtnl_link_register);
/**
* rtnl_link_register - Register rtnl_link_ops with rtnetlink.
@@ -462,6 +581,7 @@ EXPORT_SYMBOL_GPL(__rtnl_link_register);
*/
int rtnl_link_register(struct rtnl_link_ops *ops)
{
+ struct rtnl_link_ops *tmp;
int err;
/* Sanity-check max sizes to avoid stack buffer overflow. */
@@ -469,9 +589,31 @@ int rtnl_link_register(struct rtnl_link_ops *ops)
ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE))
return -EINVAL;
- rtnl_lock();
- err = __rtnl_link_register(ops);
- rtnl_unlock();
+ /* The check for alloc/setup is here because if ops
+ * does not have that filled up, it is not possible
+ * to use the ops for creating device. So do not
+ * fill up dellink as well. That disables rtnl_dellink.
+ */
+ if ((ops->alloc || ops->setup) && !ops->dellink)
+ ops->dellink = unregister_netdevice_queue;
+
+ err = init_srcu_struct(&ops->srcu);
+ if (err)
+ return err;
+
+ mutex_lock(&link_ops_mutex);
+
+ list_for_each_entry(tmp, &link_ops, list) {
+ if (!strcmp(ops->kind, tmp->kind)) {
+ err = -EEXIST;
+ goto unlock;
+ }
+ }
+
+ list_add_tail_rcu(&ops->list, &link_ops);
+unlock:
+ mutex_unlock(&link_ops_mutex);
+
return err;
}
EXPORT_SYMBOL_GPL(rtnl_link_register);
@@ -488,25 +630,6 @@ static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops)
unregister_netdevice_many(&list_kill);
}
-/**
- * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
- * @ops: struct rtnl_link_ops * to unregister
- *
- * The caller must hold the rtnl_mutex and guarantee net_namespace_list
- * integrity (hold pernet_ops_rwsem for writing to close the race
- * with setup_net() and cleanup_net()).
- */
-void __rtnl_link_unregister(struct rtnl_link_ops *ops)
-{
- struct net *net;
-
- for_each_net(net) {
- __rtnl_kill_links(net, ops);
- }
- list_del(&ops->list);
-}
-EXPORT_SYMBOL_GPL(__rtnl_link_unregister);
-
/* Return with the rtnl_lock held when there are no network
* devices unregistering in any network namespace.
*/
@@ -535,10 +658,22 @@ static void rtnl_lock_unregistering_all(void)
*/
void rtnl_link_unregister(struct rtnl_link_ops *ops)
{
+ struct net *net;
+
+ mutex_lock(&link_ops_mutex);
+ list_del_rcu(&ops->list);
+ mutex_unlock(&link_ops_mutex);
+
+ synchronize_srcu(&ops->srcu);
+ cleanup_srcu_struct(&ops->srcu);
+
/* Close the race with setup_net() and cleanup_net() */
down_write(&pernet_ops_rwsem);
rtnl_lock_unregistering_all();
- __rtnl_link_unregister(ops);
+
+ for_each_net(net)
+ __rtnl_kill_links(net, ops);
+
rtnl_unlock();
up_write(&pernet_ops_rwsem);
}
@@ -595,31 +730,51 @@ static size_t rtnl_link_get_size(const struct net_device *dev)
static LIST_HEAD(rtnl_af_ops);
-static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
+static struct rtnl_af_ops *rtnl_af_lookup(const int family, int *srcu_index)
{
- const struct rtnl_af_ops *ops;
+ struct rtnl_af_ops *ops;
ASSERT_RTNL();
- list_for_each_entry(ops, &rtnl_af_ops, list) {
- if (ops->family == family)
- return ops;
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(ops, &rtnl_af_ops, list) {
+ if (ops->family == family) {
+ *srcu_index = srcu_read_lock(&ops->srcu);
+ goto unlock;
+ }
}
- return NULL;
+ ops = NULL;
+unlock:
+ rcu_read_unlock();
+
+ return ops;
+}
+
+static void rtnl_af_put(struct rtnl_af_ops *ops, int srcu_index)
+{
+ srcu_read_unlock(&ops->srcu, srcu_index);
}
/**
* rtnl_af_register - Register rtnl_af_ops with rtnetlink.
* @ops: struct rtnl_af_ops * to register
*
- * Returns 0 on success or a negative error code.
+ * Return: 0 on success or a negative error code.
*/
-void rtnl_af_register(struct rtnl_af_ops *ops)
+int rtnl_af_register(struct rtnl_af_ops *ops)
{
+ int err = init_srcu_struct(&ops->srcu);
+
+ if (err)
+ return err;
+
rtnl_lock();
list_add_tail_rcu(&ops->list, &rtnl_af_ops);
rtnl_unlock();
+
+ return 0;
}
EXPORT_SYMBOL_GPL(rtnl_af_register);
@@ -634,6 +789,8 @@ void rtnl_af_unregister(struct rtnl_af_ops *ops)
rtnl_unlock();
synchronize_rcu();
+ synchronize_srcu(&ops->srcu);
+ cleanup_srcu_struct(&ops->srcu);
}
EXPORT_SYMBOL_GPL(rtnl_af_unregister);
@@ -1147,6 +1304,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(MAX_ADDR_LEN) /* IFLA_PERM_ADDRESS */
+ rtnl_devlink_port_size(dev)
+ rtnl_dpll_pin_size(dev)
+ + nla_total_size(8) /* IFLA_MAX_PACING_OFFLOAD_HORIZON */
+ 0;
}
@@ -1896,6 +2054,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
READ_ONCE(dev->tso_max_size)) ||
nla_put_u32(skb, IFLA_TSO_MAX_SEGS,
READ_ONCE(dev->tso_max_segs)) ||
+ nla_put_uint(skb, IFLA_MAX_PACING_OFFLOAD_HORIZON,
+ READ_ONCE(dev->max_pacing_offload_horizon)) ||
#ifdef CONFIG_RPS
nla_put_u32(skb, IFLA_NUM_RX_QUEUES,
READ_ONCE(dev->num_rx_queues)) ||
@@ -2004,6 +2164,7 @@ nla_put_failure:
}
static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
+ [IFLA_UNSPEC] = { .strict_start_type = IFLA_DPLL_PIN },
[IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 },
[IFLA_ADDRESS] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
[IFLA_BROADCAST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
@@ -2112,10 +2273,11 @@ static const struct nla_policy ifla_xdp_policy[IFLA_XDP_MAX + 1] = {
[IFLA_XDP_PROG_ID] = { .type = NLA_U32 },
};
-static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla)
+static struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla,
+ int *ops_srcu_index)
{
- const struct rtnl_link_ops *ops = NULL;
struct nlattr *linfo[IFLA_INFO_MAX + 1];
+ struct rtnl_link_ops *ops = NULL;
if (nla_parse_nested_deprecated(linfo, IFLA_INFO_MAX, nla, ifla_info_policy, NULL) < 0)
return NULL;
@@ -2124,7 +2286,7 @@ static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla
char kind[MODULE_NAME_LEN];
nla_strscpy(kind, linfo[IFLA_INFO_KIND], sizeof(kind));
- ops = rtnl_link_ops_get(kind);
+ ops = rtnl_link_ops_get(kind, ops_srcu_index);
}
return ops;
@@ -2244,8 +2406,8 @@ static int rtnl_valid_dump_ifinfo_req(const struct nlmsghdr *nlh,
static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
- const struct rtnl_link_ops *kind_ops = NULL;
struct netlink_ext_ack *extack = cb->extack;
+ struct rtnl_link_ops *kind_ops = NULL;
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
unsigned int flags = NLM_F_MULTI;
@@ -2256,6 +2418,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
struct net *tgt_net = net;
u32 ext_filter_mask = 0;
struct net_device *dev;
+ int ops_srcu_index;
int master_idx = 0;
int netnsid = -1;
int err, i;
@@ -2279,7 +2442,9 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
tgt_net = rtnl_get_net_ns_capable(skb->sk, netnsid);
if (IS_ERR(tgt_net)) {
NL_SET_ERR_MSG(extack, "Invalid target network namespace id");
- return PTR_ERR(tgt_net);
+ err = PTR_ERR(tgt_net);
+ netnsid = -1;
+ goto out;
}
break;
case IFLA_EXT_MASK:
@@ -2289,12 +2454,13 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
master_idx = nla_get_u32(tb[i]);
break;
case IFLA_LINKINFO:
- kind_ops = linkinfo_to_kind_ops(tb[i]);
+ kind_ops = linkinfo_to_kind_ops(tb[i], &ops_srcu_index);
break;
default:
if (cb->strict_check) {
NL_SET_ERR_MSG(extack, "Unsupported attribute in link dump request");
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
}
}
@@ -2315,8 +2481,15 @@ walk_entries:
if (err < 0)
break;
}
+
+
cb->seq = tgt_net->dev_base_seq;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+
+out:
+
+ if (kind_ops)
+ rtnl_link_ops_put(kind_ops, ops_srcu_index);
if (netnsid >= 0)
put_net(tgt_net);
@@ -2345,9 +2518,10 @@ int rtnl_nla_parse_ifinfomsg(struct nlattr **tb, const struct nlattr *nla_peer,
}
EXPORT_SYMBOL(rtnl_nla_parse_ifinfomsg);
-struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
+static struct net *rtnl_link_get_net_ifla(struct nlattr *tb[])
{
- struct net *net;
+ struct net *net = NULL;
+
/* Examine the link attributes and figure out which
* network namespace we are talking about.
*/
@@ -2355,8 +2529,17 @@ struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
else if (tb[IFLA_NET_NS_FD])
net = get_net_ns_by_fd(nla_get_u32(tb[IFLA_NET_NS_FD]));
- else
+
+ return net;
+}
+
+struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
+{
+ struct net *net = rtnl_link_get_net_ifla(tb);
+
+ if (!net)
net = get_net(src_net);
+
return net;
}
EXPORT_SYMBOL(rtnl_link_get_net);
@@ -2496,20 +2679,24 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[],
int rem, err;
nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
- const struct rtnl_af_ops *af_ops;
+ struct rtnl_af_ops *af_ops;
+ int af_ops_srcu_index;
- af_ops = rtnl_af_lookup(nla_type(af));
+ af_ops = rtnl_af_lookup(nla_type(af), &af_ops_srcu_index);
if (!af_ops)
return -EAFNOSUPPORT;
if (!af_ops->set_link_af)
- return -EOPNOTSUPP;
-
- if (af_ops->validate_link_af) {
+ err = -EOPNOTSUPP;
+ else if (af_ops->validate_link_af)
err = af_ops->validate_link_af(dev, af, extack);
- if (err < 0)
- return err;
- }
+ else
+ err = 0;
+
+ rtnl_af_put(af_ops, af_ops_srcu_index);
+
+ if (err < 0)
+ return err;
}
}
@@ -2800,8 +2987,8 @@ static int do_set_proto_down(struct net_device *dev,
#define DO_SETLINK_MODIFIED 0x01
/* notify flag means notify + modified. */
#define DO_SETLINK_NOTIFY 0x03
-static int do_setlink(const struct sk_buff *skb,
- struct net_device *dev, struct ifinfomsg *ifm,
+static int do_setlink(const struct sk_buff *skb, struct net_device *dev,
+ struct net *tgt_net, struct ifinfomsg *ifm,
struct netlink_ext_ack *extack,
struct nlattr **tb, int status)
{
@@ -2809,32 +2996,25 @@ static int do_setlink(const struct sk_buff *skb,
char ifname[IFNAMSIZ];
int err;
+ err = validate_linkmsg(dev, tb, extack);
+ if (err < 0)
+ goto errout;
+
if (tb[IFLA_IFNAME])
nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
else
ifname[0] = '\0';
- if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_TARGET_NETNSID]) {
+ if (!net_eq(tgt_net, dev_net(dev))) {
const char *pat = ifname[0] ? ifname : NULL;
- struct net *net;
int new_ifindex;
- net = rtnl_link_get_net_capable(skb, dev_net(dev),
- tb, CAP_NET_ADMIN);
- if (IS_ERR(net)) {
- err = PTR_ERR(net);
- goto errout;
- }
-
- if (tb[IFLA_NEW_IFINDEX])
- new_ifindex = nla_get_s32(tb[IFLA_NEW_IFINDEX]);
- else
- new_ifindex = 0;
+ new_ifindex = nla_get_s32_default(tb[IFLA_NEW_IFINDEX], 0);
- err = __dev_change_net_namespace(dev, net, pat, new_ifindex);
- put_net(net);
+ err = __dev_change_net_namespace(dev, tgt_net, pat, new_ifindex);
if (err)
goto errout;
+
status |= DO_SETLINK_MODIFIED;
}
@@ -3093,11 +3273,18 @@ static int do_setlink(const struct sk_buff *skb,
int rem;
nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
- const struct rtnl_af_ops *af_ops;
+ struct rtnl_af_ops *af_ops;
+ int af_ops_srcu_index;
- BUG_ON(!(af_ops = rtnl_af_lookup(nla_type(af))));
+ af_ops = rtnl_af_lookup(nla_type(af), &af_ops_srcu_index);
+ if (!af_ops) {
+ err = -EAFNOSUPPORT;
+ goto errout;
+ }
err = af_ops->set_link_af(dev, af, extack);
+ rtnl_af_put(af_ops, af_ops_srcu_index);
+
if (err < 0)
goto errout;
@@ -3194,11 +3381,13 @@ static struct net_device *rtnl_dev_get(struct net *net,
static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ struct ifinfomsg *ifm = nlmsg_data(nlh);
struct net *net = sock_net(skb->sk);
- struct ifinfomsg *ifm;
- struct net_device *dev;
- int err;
struct nlattr *tb[IFLA_MAX+1];
+ struct net_device *dev = NULL;
+ struct rtnl_nets rtnl_nets;
+ struct net *tgt_net;
+ int err;
err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX,
ifla_policy, extack);
@@ -3209,25 +3398,31 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
goto errout;
- err = -EINVAL;
- ifm = nlmsg_data(nlh);
+ tgt_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN);
+ if (IS_ERR(tgt_net)) {
+ err = PTR_ERR(tgt_net);
+ goto errout;
+ }
+
+ rtnl_nets_init(&rtnl_nets);
+ rtnl_nets_add(&rtnl_nets, get_net(net));
+ rtnl_nets_add(&rtnl_nets, tgt_net);
+
+ rtnl_nets_lock(&rtnl_nets);
+
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(net, ifm->ifi_index);
else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
dev = rtnl_dev_get(net, tb);
else
- goto errout;
+ err = -EINVAL;
- if (dev == NULL) {
+ if (dev)
+ err = do_setlink(skb, dev, tgt_net, ifm, extack, tb, 0);
+ else if (!err)
err = -ENODEV;
- goto errout;
- }
- err = validate_linkmsg(dev, tb, extack);
- if (err < 0)
- goto errout;
-
- err = do_setlink(skb, dev, ifm, extack, tb, 0);
+ rtnl_nets_unlock(&rtnl_nets);
errout:
return err;
}
@@ -3287,14 +3482,14 @@ EXPORT_SYMBOL_GPL(rtnl_delete_link);
static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ struct ifinfomsg *ifm = nlmsg_data(nlh);
struct net *net = sock_net(skb->sk);
u32 portid = NETLINK_CB(skb).portid;
- struct net *tgt_net = net;
- struct net_device *dev = NULL;
- struct ifinfomsg *ifm;
struct nlattr *tb[IFLA_MAX+1];
- int err;
+ struct net_device *dev = NULL;
+ struct net *tgt_net = net;
int netnsid = -1;
+ int err;
err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX,
ifla_policy, extack);
@@ -3312,27 +3507,24 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
return PTR_ERR(tgt_net);
}
- err = -EINVAL;
- ifm = nlmsg_data(nlh);
+ rtnl_net_lock(tgt_net);
+
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
dev = rtnl_dev_get(tgt_net, tb);
+
+ if (dev)
+ err = rtnl_delete_link(dev, portid, nlh);
+ else if (ifm->ifi_index > 0 || tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
+ err = -ENODEV;
else if (tb[IFLA_GROUP])
err = rtnl_group_dellink(tgt_net, nla_get_u32(tb[IFLA_GROUP]));
else
- goto out;
-
- if (!dev) {
- if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME] || ifm->ifi_index > 0)
- err = -ENODEV;
-
- goto out;
- }
+ err = -EINVAL;
- err = rtnl_delete_link(dev, portid, nlh);
+ rtnl_net_unlock(tgt_net);
-out:
if (netnsid >= 0)
put_net(tgt_net);
@@ -3459,21 +3651,90 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
}
EXPORT_SYMBOL(rtnl_create_link);
+struct rtnl_newlink_tbs {
+ struct nlattr *tb[IFLA_MAX + 1];
+ struct nlattr *linkinfo[IFLA_INFO_MAX + 1];
+ struct nlattr *attr[RTNL_MAX_TYPE + 1];
+ struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1];
+};
+
+static int rtnl_changelink(const struct sk_buff *skb, struct nlmsghdr *nlh,
+ const struct rtnl_link_ops *ops,
+ struct net_device *dev, struct net *tgt_net,
+ struct rtnl_newlink_tbs *tbs,
+ struct nlattr **data,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr ** const linkinfo = tbs->linkinfo;
+ struct nlattr ** const tb = tbs->tb;
+ int status = 0;
+ int err;
+
+ if (nlh->nlmsg_flags & NLM_F_EXCL)
+ return -EEXIST;
+
+ if (nlh->nlmsg_flags & NLM_F_REPLACE)
+ return -EOPNOTSUPP;
+
+ if (linkinfo[IFLA_INFO_DATA]) {
+ if (!ops || ops != dev->rtnl_link_ops || !ops->changelink)
+ return -EOPNOTSUPP;
+
+ err = ops->changelink(dev, tb, data, extack);
+ if (err < 0)
+ return err;
+
+ status |= DO_SETLINK_NOTIFY;
+ }
+
+ if (linkinfo[IFLA_INFO_SLAVE_DATA]) {
+ const struct rtnl_link_ops *m_ops = NULL;
+ struct nlattr **slave_data = NULL;
+ struct net_device *master_dev;
+
+ master_dev = netdev_master_upper_dev_get(dev);
+ if (master_dev)
+ m_ops = master_dev->rtnl_link_ops;
+
+ if (!m_ops || !m_ops->slave_changelink)
+ return -EOPNOTSUPP;
+
+ if (m_ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE)
+ return -EINVAL;
+
+ if (m_ops->slave_maxtype) {
+ err = nla_parse_nested_deprecated(tbs->slave_attr,
+ m_ops->slave_maxtype,
+ linkinfo[IFLA_INFO_SLAVE_DATA],
+ m_ops->slave_policy, extack);
+ if (err < 0)
+ return err;
+
+ slave_data = tbs->slave_attr;
+ }
+
+ err = m_ops->slave_changelink(master_dev, dev, tb, slave_data, extack);
+ if (err < 0)
+ return err;
+
+ status |= DO_SETLINK_NOTIFY;
+ }
+
+ return do_setlink(skb, dev, tgt_net, nlmsg_data(nlh), extack, tb, status);
+}
+
static int rtnl_group_changelink(const struct sk_buff *skb,
- struct net *net, int group,
- struct ifinfomsg *ifm,
- struct netlink_ext_ack *extack,
- struct nlattr **tb)
+ struct net *net, struct net *tgt_net,
+ int group, struct ifinfomsg *ifm,
+ struct netlink_ext_ack *extack,
+ struct nlattr **tb)
{
struct net_device *dev, *aux;
int err;
for_each_netdev_safe(net, dev, aux) {
if (dev->group == group) {
- err = validate_linkmsg(dev, tb, extack);
- if (err < 0)
- return err;
- err = do_setlink(skb, dev, ifm, extack, tb, 0);
+ err = do_setlink(skb, dev, tgt_net, ifm, extack, tb, 0);
if (err < 0)
return err;
}
@@ -3484,6 +3745,7 @@ static int rtnl_group_changelink(const struct sk_buff *skb,
static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm,
const struct rtnl_link_ops *ops,
+ struct net *tgt_net, struct net *link_net,
const struct nlmsghdr *nlh,
struct nlattr **tb, struct nlattr **data,
struct netlink_ext_ack *extack)
@@ -3491,7 +3753,6 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm,
unsigned char name_assign_type = NET_NAME_USER;
struct net *net = sock_net(skb->sk);
u32 portid = NETLINK_CB(skb).portid;
- struct net *dest_net, *link_net;
struct net_device *dev;
char ifname[IFNAMSIZ];
int err;
@@ -3506,27 +3767,7 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm,
name_assign_type = NET_NAME_ENUM;
}
- dest_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN);
- if (IS_ERR(dest_net))
- return PTR_ERR(dest_net);
-
- if (tb[IFLA_LINK_NETNSID]) {
- int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
-
- link_net = get_net_ns_by_id(dest_net, id);
- if (!link_net) {
- NL_SET_ERR_MSG(extack, "Unknown network namespace id");
- err = -EINVAL;
- goto out;
- }
- err = -EPERM;
- if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN))
- goto out;
- } else {
- link_net = NULL;
- }
-
- dev = rtnl_create_link(link_net ? : dest_net, ifname,
+ dev = rtnl_create_link(link_net ? : tgt_net, ifname,
name_assign_type, ops, tb, extack);
if (IS_ERR(dev)) {
err = PTR_ERR(dev);
@@ -3548,7 +3789,7 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm,
if (err < 0)
goto out_unregister;
if (link_net) {
- err = dev_change_net_namespace(dev, dest_net, ifname);
+ err = dev_change_net_namespace(dev, tgt_net, ifname);
if (err < 0)
goto out_unregister;
}
@@ -3558,9 +3799,6 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm,
goto out_unregister;
}
out:
- if (link_net)
- put_net(link_net);
- put_net(dest_net);
return err;
out_unregister:
if (ops->newlink) {
@@ -3574,41 +3812,49 @@ out_unregister:
goto out;
}
-struct rtnl_newlink_tbs {
+static int rtnl_add_peer_net(struct rtnl_nets *rtnl_nets,
+ const struct rtnl_link_ops *ops,
+ struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+{
struct nlattr *tb[IFLA_MAX + 1];
- struct nlattr *attr[RTNL_MAX_TYPE + 1];
- struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1];
-};
+ struct net *net;
+ int err;
+
+ if (!data || !data[ops->peer_type])
+ return 0;
+
+ err = rtnl_nla_parse_ifinfomsg(tb, data[ops->peer_type], extack);
+ if (err < 0)
+ return err;
+
+ if (ops->validate) {
+ err = ops->validate(tb, NULL, extack);
+ if (err < 0)
+ return err;
+ }
+
+ net = rtnl_link_get_net_ifla(tb);
+ if (IS_ERR(net))
+ return PTR_ERR(net);
+ if (net)
+ rtnl_nets_add(rtnl_nets, net);
+
+ return 0;
+}
static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ const struct rtnl_link_ops *ops,
+ struct net *tgt_net, struct net *link_net,
struct rtnl_newlink_tbs *tbs,
+ struct nlattr **data,
struct netlink_ext_ack *extack)
{
- struct nlattr *linkinfo[IFLA_INFO_MAX + 1];
struct nlattr ** const tb = tbs->tb;
- const struct rtnl_link_ops *m_ops;
- struct net_device *master_dev;
struct net *net = sock_net(skb->sk);
- const struct rtnl_link_ops *ops;
- struct nlattr **slave_data;
- char kind[MODULE_NAME_LEN];
struct net_device *dev;
struct ifinfomsg *ifm;
- struct nlattr **data;
bool link_specified;
- int err;
-
-#ifdef CONFIG_MODULES
-replay:
-#endif
- err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX,
- ifla_policy, extack);
- if (err < 0)
- return err;
-
- err = rtnl_ensure_unique_netns(tb, extack, false);
- if (err < 0)
- return err;
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0) {
@@ -3625,151 +3871,148 @@ replay:
dev = NULL;
}
- master_dev = NULL;
- m_ops = NULL;
- if (dev) {
- master_dev = netdev_master_upper_dev_get(dev);
- if (master_dev)
- m_ops = master_dev->rtnl_link_ops;
+ if (dev)
+ return rtnl_changelink(skb, nlh, ops, dev, tgt_net, tbs, data, extack);
+
+ if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
+ /* No dev found and NLM_F_CREATE not set. Requested dev does not exist,
+ * or it's for a group
+ */
+ if (link_specified || !tb[IFLA_GROUP])
+ return -ENODEV;
+
+ return rtnl_group_changelink(skb, net, tgt_net,
+ nla_get_u32(tb[IFLA_GROUP]),
+ ifm, extack, tb);
}
+ if (tb[IFLA_MAP] || tb[IFLA_PROTINFO])
+ return -EOPNOTSUPP;
+
+ if (!ops) {
+ NL_SET_ERR_MSG(extack, "Unknown device type");
+ return -EOPNOTSUPP;
+ }
+
+ return rtnl_newlink_create(skb, ifm, ops, tgt_net, link_net, nlh, tb, data, extack);
+}
+
+static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr **tb, **linkinfo, **data = NULL;
+ struct net *tgt_net, *link_net = NULL;
+ struct rtnl_link_ops *ops = NULL;
+ struct rtnl_newlink_tbs *tbs;
+ struct rtnl_nets rtnl_nets;
+ int ops_srcu_index;
+ int ret;
+
+ tbs = kmalloc(sizeof(*tbs), GFP_KERNEL);
+ if (!tbs)
+ return -ENOMEM;
+
+ tb = tbs->tb;
+ ret = nlmsg_parse_deprecated(nlh, sizeof(struct ifinfomsg), tb,
+ IFLA_MAX, ifla_policy, extack);
+ if (ret < 0)
+ goto free;
+
+ ret = rtnl_ensure_unique_netns(tb, extack, false);
+ if (ret < 0)
+ goto free;
+
+ linkinfo = tbs->linkinfo;
if (tb[IFLA_LINKINFO]) {
- err = nla_parse_nested_deprecated(linkinfo, IFLA_INFO_MAX,
+ ret = nla_parse_nested_deprecated(linkinfo, IFLA_INFO_MAX,
tb[IFLA_LINKINFO],
ifla_info_policy, NULL);
- if (err < 0)
- return err;
- } else
- memset(linkinfo, 0, sizeof(linkinfo));
+ if (ret < 0)
+ goto free;
+ } else {
+ memset(linkinfo, 0, sizeof(tbs->linkinfo));
+ }
if (linkinfo[IFLA_INFO_KIND]) {
+ char kind[MODULE_NAME_LEN];
+
nla_strscpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind));
- ops = rtnl_link_ops_get(kind);
- } else {
- kind[0] = '\0';
- ops = NULL;
+ ops = rtnl_link_ops_get(kind, &ops_srcu_index);
+#ifdef CONFIG_MODULES
+ if (!ops) {
+ request_module("rtnl-link-%s", kind);
+ ops = rtnl_link_ops_get(kind, &ops_srcu_index);
+ }
+#endif
}
- data = NULL;
+ rtnl_nets_init(&rtnl_nets);
+
if (ops) {
- if (ops->maxtype > RTNL_MAX_TYPE)
- return -EINVAL;
+ if (ops->maxtype > RTNL_MAX_TYPE) {
+ ret = -EINVAL;
+ goto put_ops;
+ }
if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
- err = nla_parse_nested_deprecated(tbs->attr, ops->maxtype,
+ ret = nla_parse_nested_deprecated(tbs->attr, ops->maxtype,
linkinfo[IFLA_INFO_DATA],
ops->policy, extack);
- if (err < 0)
- return err;
+ if (ret < 0)
+ goto put_ops;
+
data = tbs->attr;
}
+
if (ops->validate) {
- err = ops->validate(tb, data, extack);
- if (err < 0)
- return err;
+ ret = ops->validate(tb, data, extack);
+ if (ret < 0)
+ goto put_ops;
}
- }
-
- slave_data = NULL;
- if (m_ops) {
- if (m_ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE)
- return -EINVAL;
- if (m_ops->slave_maxtype &&
- linkinfo[IFLA_INFO_SLAVE_DATA]) {
- err = nla_parse_nested_deprecated(tbs->slave_attr,
- m_ops->slave_maxtype,
- linkinfo[IFLA_INFO_SLAVE_DATA],
- m_ops->slave_policy,
- extack);
- if (err < 0)
- return err;
- slave_data = tbs->slave_attr;
+ if (ops->peer_type) {
+ ret = rtnl_add_peer_net(&rtnl_nets, ops, data, extack);
+ if (ret < 0)
+ goto put_ops;
}
}
- if (dev) {
- int status = 0;
+ tgt_net = rtnl_link_get_net_capable(skb, sock_net(skb->sk), tb, CAP_NET_ADMIN);
+ if (IS_ERR(tgt_net)) {
+ ret = PTR_ERR(tgt_net);
+ goto put_net;
+ }
- if (nlh->nlmsg_flags & NLM_F_EXCL)
- return -EEXIST;
- if (nlh->nlmsg_flags & NLM_F_REPLACE)
- return -EOPNOTSUPP;
+ rtnl_nets_add(&rtnl_nets, tgt_net);
- err = validate_linkmsg(dev, tb, extack);
- if (err < 0)
- return err;
-
- if (linkinfo[IFLA_INFO_DATA]) {
- if (!ops || ops != dev->rtnl_link_ops ||
- !ops->changelink)
- return -EOPNOTSUPP;
+ if (tb[IFLA_LINK_NETNSID]) {
+ int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
- err = ops->changelink(dev, tb, data, extack);
- if (err < 0)
- return err;
- status |= DO_SETLINK_NOTIFY;
+ link_net = get_net_ns_by_id(tgt_net, id);
+ if (!link_net) {
+ NL_SET_ERR_MSG(extack, "Unknown network namespace id");
+ ret = -EINVAL;
+ goto put_net;
}
- if (linkinfo[IFLA_INFO_SLAVE_DATA]) {
- if (!m_ops || !m_ops->slave_changelink)
- return -EOPNOTSUPP;
+ rtnl_nets_add(&rtnl_nets, link_net);
- err = m_ops->slave_changelink(master_dev, dev, tb,
- slave_data, extack);
- if (err < 0)
- return err;
- status |= DO_SETLINK_NOTIFY;
+ if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN)) {
+ ret = -EPERM;
+ goto put_net;
}
-
- return do_setlink(skb, dev, ifm, extack, tb, status);
- }
-
- if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
- /* No dev found and NLM_F_CREATE not set. Requested dev does not exist,
- * or it's for a group
- */
- if (link_specified)
- return -ENODEV;
- if (tb[IFLA_GROUP])
- return rtnl_group_changelink(skb, net,
- nla_get_u32(tb[IFLA_GROUP]),
- ifm, extack, tb);
- return -ENODEV;
}
- if (tb[IFLA_MAP] || tb[IFLA_PROTINFO])
- return -EOPNOTSUPP;
+ rtnl_nets_lock(&rtnl_nets);
+ ret = __rtnl_newlink(skb, nlh, ops, tgt_net, link_net, tbs, data, extack);
+ rtnl_nets_unlock(&rtnl_nets);
- if (!ops) {
-#ifdef CONFIG_MODULES
- if (kind[0]) {
- __rtnl_unlock();
- request_module("rtnl-link-%s", kind);
- rtnl_lock();
- ops = rtnl_link_ops_get(kind);
- if (ops)
- goto replay;
- }
-#endif
- NL_SET_ERR_MSG(extack, "Unknown device type");
- return -EOPNOTSUPP;
- }
-
- return rtnl_newlink_create(skb, ifm, ops, nlh, tb, data, extack);
-}
-
-static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
-{
- struct rtnl_newlink_tbs *tbs;
- int ret;
-
- tbs = kmalloc(sizeof(*tbs), GFP_KERNEL);
- if (!tbs)
- return -ENOMEM;
-
- ret = __rtnl_newlink(skb, nlh, tbs, extack);
+put_net:
+ rtnl_nets_destroy(&rtnl_nets);
+put_ops:
+ if (ops)
+ rtnl_link_ops_put(ops, ops_srcu_index);
+free:
kfree(tbs);
return ret;
}
@@ -4341,9 +4584,10 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
netif_is_bridge_port(dev)) {
struct net_device *br_dev = netdev_master_upper_dev_get(dev);
const struct net_device_ops *ops = br_dev->netdev_ops;
+ bool notified = false;
err = ops->ndo_fdb_add(ndm, tb, dev, addr, vid,
- nlh->nlmsg_flags, extack);
+ nlh->nlmsg_flags, &notified, extack);
if (err)
goto out;
else
@@ -4352,16 +4596,18 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
/* Embedded bridge, macvlan, and any other device support */
if ((ndm->ndm_flags & NTF_SELF)) {
+ bool notified = false;
+
if (dev->netdev_ops->ndo_fdb_add)
err = dev->netdev_ops->ndo_fdb_add(ndm, tb, dev, addr,
vid,
nlh->nlmsg_flags,
- extack);
+ &notified, extack);
else
err = ndo_dflt_fdb_add(ndm, tb, dev, addr, vid,
nlh->nlmsg_flags);
- if (!err) {
+ if (!err && !notified) {
rtnl_fdb_notify(dev, addr, vid, RTM_NEWNEIGH,
ndm->ndm_state);
ndm->ndm_flags &= ~NTF_SELF;
@@ -4461,11 +4707,13 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) &&
netif_is_bridge_port(dev)) {
struct net_device *br_dev = netdev_master_upper_dev_get(dev);
+ bool notified = false;
ops = br_dev->netdev_ops;
if (!del_bulk) {
if (ops->ndo_fdb_del)
- err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, extack);
+ err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid,
+ &notified, extack);
} else {
if (ops->ndo_fdb_del_bulk)
err = ops->ndo_fdb_del_bulk(nlh, dev, extack);
@@ -4479,10 +4727,13 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
/* Embedded bridge, macvlan, and any other device support */
if (ndm->ndm_flags & NTF_SELF) {
+ bool notified = false;
+
ops = dev->netdev_ops;
if (!del_bulk) {
if (ops->ndo_fdb_del)
- err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, extack);
+ err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid,
+ &notified, extack);
else
err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid);
} else {
@@ -4493,7 +4744,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
}
if (!err) {
- if (!del_bulk)
+ if (!del_bulk && !notified)
rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH,
ndm->ndm_state);
ndm->ndm_flags &= ~NTF_SELF;
@@ -6198,7 +6449,7 @@ static int rtnl_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
int idx, s_idx;
int err;
- NL_ASSERT_DUMP_CTX_FITS(struct rtnl_mdb_dump_ctx);
+ NL_ASSERT_CTX_FITS(struct rtnl_mdb_dump_ctx);
if (cb->strict_check) {
err = rtnl_mdb_valid_dump_req(cb->nlh, cb->extack);
@@ -6765,6 +7016,41 @@ static struct pernet_operations rtnetlink_net_ops = {
.exit = rtnetlink_net_exit,
};
+static const struct rtnl_msg_handler rtnetlink_rtnl_msg_handlers[] __initconst = {
+ {.msgtype = RTM_NEWLINK, .doit = rtnl_newlink,
+ .flags = RTNL_FLAG_DOIT_PERNET},
+ {.msgtype = RTM_DELLINK, .doit = rtnl_dellink,
+ .flags = RTNL_FLAG_DOIT_PERNET_WIP},
+ {.msgtype = RTM_GETLINK, .doit = rtnl_getlink,
+ .dumpit = rtnl_dump_ifinfo, .flags = RTNL_FLAG_DUMP_SPLIT_NLM_DONE},
+ {.msgtype = RTM_SETLINK, .doit = rtnl_setlink,
+ .flags = RTNL_FLAG_DOIT_PERNET_WIP},
+ {.msgtype = RTM_GETADDR, .dumpit = rtnl_dump_all},
+ {.msgtype = RTM_GETROUTE, .dumpit = rtnl_dump_all},
+ {.msgtype = RTM_GETNETCONF, .dumpit = rtnl_dump_all},
+ {.msgtype = RTM_GETSTATS, .doit = rtnl_stats_get,
+ .dumpit = rtnl_stats_dump},
+ {.msgtype = RTM_SETSTATS, .doit = rtnl_stats_set},
+ {.msgtype = RTM_NEWLINKPROP, .doit = rtnl_newlinkprop},
+ {.msgtype = RTM_DELLINKPROP, .doit = rtnl_dellinkprop},
+ {.protocol = PF_BRIDGE, .msgtype = RTM_GETLINK,
+ .dumpit = rtnl_bridge_getlink},
+ {.protocol = PF_BRIDGE, .msgtype = RTM_DELLINK,
+ .doit = rtnl_bridge_dellink},
+ {.protocol = PF_BRIDGE, .msgtype = RTM_SETLINK,
+ .doit = rtnl_bridge_setlink},
+ {.protocol = PF_BRIDGE, .msgtype = RTM_NEWNEIGH, .doit = rtnl_fdb_add},
+ {.protocol = PF_BRIDGE, .msgtype = RTM_DELNEIGH, .doit = rtnl_fdb_del,
+ .flags = RTNL_FLAG_BULK_DEL_SUPPORTED},
+ {.protocol = PF_BRIDGE, .msgtype = RTM_GETNEIGH, .doit = rtnl_fdb_get,
+ .dumpit = rtnl_fdb_dump},
+ {.protocol = PF_BRIDGE, .msgtype = RTM_NEWMDB, .doit = rtnl_mdb_add},
+ {.protocol = PF_BRIDGE, .msgtype = RTM_DELMDB, .doit = rtnl_mdb_del,
+ .flags = RTNL_FLAG_BULK_DEL_SUPPORTED},
+ {.protocol = PF_BRIDGE, .msgtype = RTM_GETMDB, .doit = rtnl_mdb_get,
+ .dumpit = rtnl_mdb_dump},
+};
+
void __init rtnetlink_init(void)
{
if (register_pernet_subsys(&rtnetlink_net_ops))
@@ -6772,34 +7058,5 @@ void __init rtnetlink_init(void)
register_netdevice_notifier(&rtnetlink_dev_notifier);
- rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink,
- rtnl_dump_ifinfo, RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
- rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, 0);
-
- rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, 0);
- rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, 0);
- rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, 0);
-
- rtnl_register(PF_UNSPEC, RTM_NEWLINKPROP, rtnl_newlinkprop, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_DELLINKPROP, rtnl_dellinkprop, NULL, 0);
-
- rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0);
- rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL,
- RTNL_FLAG_BULK_DEL_SUPPORTED);
- rtnl_register(PF_BRIDGE, RTM_GETNEIGH, rtnl_fdb_get, rtnl_fdb_dump, 0);
-
- rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, 0);
- rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, 0);
- rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, 0);
-
- rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump,
- 0);
- rtnl_register(PF_UNSPEC, RTM_SETSTATS, rtnl_stats_set, NULL, 0);
-
- rtnl_register(PF_BRIDGE, RTM_GETMDB, rtnl_mdb_get, rtnl_mdb_dump, 0);
- rtnl_register(PF_BRIDGE, RTM_NEWMDB, rtnl_mdb_add, NULL, 0);
- rtnl_register(PF_BRIDGE, RTM_DELMDB, rtnl_mdb_del, NULL,
- RTNL_FLAG_BULK_DEL_SUPPORTED);
+ rtnl_register_many(rtnetlink_rtnl_msg_handlers);
}
diff --git a/net/core/rtnl_net_debug.c b/net/core/rtnl_net_debug.c
new file mode 100644
index 000000000000..f406045cbd0e
--- /dev/null
+++ b/net/core/rtnl_net_debug.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/notifier.h>
+#include <linux/rtnetlink.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+static int rtnl_net_debug_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct net *net = dev_net(dev);
+ enum netdev_cmd cmd = event;
+
+ /* Keep enum and don't add default to trigger -Werror=switch */
+ switch (cmd) {
+ case NETDEV_UP:
+ case NETDEV_DOWN:
+ case NETDEV_REBOOT:
+ case NETDEV_CHANGE:
+ case NETDEV_REGISTER:
+ case NETDEV_UNREGISTER:
+ case NETDEV_CHANGEMTU:
+ case NETDEV_CHANGEADDR:
+ case NETDEV_PRE_CHANGEADDR:
+ case NETDEV_GOING_DOWN:
+ case NETDEV_CHANGENAME:
+ case NETDEV_FEAT_CHANGE:
+ case NETDEV_BONDING_FAILOVER:
+ case NETDEV_PRE_UP:
+ case NETDEV_PRE_TYPE_CHANGE:
+ case NETDEV_POST_TYPE_CHANGE:
+ case NETDEV_POST_INIT:
+ case NETDEV_PRE_UNINIT:
+ case NETDEV_RELEASE:
+ case NETDEV_NOTIFY_PEERS:
+ case NETDEV_JOIN:
+ case NETDEV_CHANGEUPPER:
+ case NETDEV_RESEND_IGMP:
+ case NETDEV_PRECHANGEMTU:
+ case NETDEV_CHANGEINFODATA:
+ case NETDEV_BONDING_INFO:
+ case NETDEV_PRECHANGEUPPER:
+ case NETDEV_CHANGELOWERSTATE:
+ case NETDEV_UDP_TUNNEL_PUSH_INFO:
+ case NETDEV_UDP_TUNNEL_DROP_INFO:
+ case NETDEV_CHANGE_TX_QUEUE_LEN:
+ case NETDEV_CVLAN_FILTER_PUSH_INFO:
+ case NETDEV_CVLAN_FILTER_DROP_INFO:
+ case NETDEV_SVLAN_FILTER_PUSH_INFO:
+ case NETDEV_SVLAN_FILTER_DROP_INFO:
+ case NETDEV_OFFLOAD_XSTATS_ENABLE:
+ case NETDEV_OFFLOAD_XSTATS_DISABLE:
+ case NETDEV_OFFLOAD_XSTATS_REPORT_USED:
+ case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA:
+ case NETDEV_XDP_FEAT_CHANGE:
+ ASSERT_RTNL();
+ break;
+
+ /* Once an event fully supports RTNL_NET, move it here
+ * and remove "if (0)" below.
+ *
+ * case NETDEV_XXX:
+ * ASSERT_RTNL_NET(net);
+ * break;
+ */
+ }
+
+ /* Just to avoid unused-variable error for dev and net. */
+ if (0)
+ ASSERT_RTNL_NET(net);
+
+ return NOTIFY_DONE;
+}
+
+static int rtnl_net_debug_net_id;
+
+static int __net_init rtnl_net_debug_net_init(struct net *net)
+{
+ struct notifier_block *nb;
+
+ nb = net_generic(net, rtnl_net_debug_net_id);
+ nb->notifier_call = rtnl_net_debug_event;
+
+ return register_netdevice_notifier_net(net, nb);
+}
+
+static void __net_exit rtnl_net_debug_net_exit(struct net *net)
+{
+ struct notifier_block *nb;
+
+ nb = net_generic(net, rtnl_net_debug_net_id);
+ unregister_netdevice_notifier_net(net, nb);
+}
+
+static struct pernet_operations rtnl_net_debug_net_ops __net_initdata = {
+ .init = rtnl_net_debug_net_init,
+ .exit = rtnl_net_debug_net_exit,
+ .id = &rtnl_net_debug_net_id,
+ .size = sizeof(struct notifier_block),
+};
+
+static struct notifier_block rtnl_net_debug_block = {
+ .notifier_call = rtnl_net_debug_event,
+};
+
+static int __init rtnl_net_debug_init(void)
+{
+ int ret;
+
+ ret = register_pernet_device(&rtnl_net_debug_net_ops);
+ if (ret)
+ return ret;
+
+ ret = register_netdevice_notifier(&rtnl_net_debug_block);
+ if (ret)
+ unregister_pernet_subsys(&rtnl_net_debug_net_ops);
+
+ return ret;
+}
+
+subsys_initcall(rtnl_net_debug_init);
diff --git a/net/core/skb_fault_injection.c b/net/core/skb_fault_injection.c
new file mode 100644
index 000000000000..4235db6bdfad
--- /dev/null
+++ b/net/core/skb_fault_injection.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/debugfs.h>
+#include <linux/fault-inject.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+
+static struct {
+ struct fault_attr attr;
+ char devname[IFNAMSIZ];
+ bool filtered;
+} skb_realloc = {
+ .attr = FAULT_ATTR_INITIALIZER,
+ .filtered = false,
+};
+
+static bool should_fail_net_realloc_skb(struct sk_buff *skb)
+{
+ struct net_device *net = skb->dev;
+
+ if (skb_realloc.filtered &&
+ strncmp(net->name, skb_realloc.devname, IFNAMSIZ))
+ /* device name filter set, but names do not match */
+ return false;
+
+ if (!should_fail(&skb_realloc.attr, 1))
+ return false;
+
+ return true;
+}
+ALLOW_ERROR_INJECTION(should_fail_net_realloc_skb, TRUE);
+
+void skb_might_realloc(struct sk_buff *skb)
+{
+ if (!should_fail_net_realloc_skb(skb))
+ return;
+
+ pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+}
+EXPORT_SYMBOL(skb_might_realloc);
+
+static int __init fail_skb_realloc_setup(char *str)
+{
+ return setup_fault_attr(&skb_realloc.attr, str);
+}
+__setup("fail_skb_realloc=", fail_skb_realloc_setup);
+
+static void reset_settings(void)
+{
+ skb_realloc.filtered = false;
+ memset(&skb_realloc.devname, 0, IFNAMSIZ);
+}
+
+static ssize_t devname_write(struct file *file, const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ ssize_t ret;
+
+ reset_settings();
+ ret = simple_write_to_buffer(&skb_realloc.devname, IFNAMSIZ,
+ ppos, buffer, count);
+ if (ret < 0)
+ return ret;
+
+ skb_realloc.devname[IFNAMSIZ - 1] = '\0';
+ /* Remove a possible \n at the end of devname */
+ strim(skb_realloc.devname);
+
+ if (strnlen(skb_realloc.devname, IFNAMSIZ))
+ skb_realloc.filtered = true;
+
+ return count;
+}
+
+static ssize_t devname_read(struct file *file,
+ char __user *buffer,
+ size_t size, loff_t *ppos)
+{
+ if (!skb_realloc.filtered)
+ return 0;
+
+ return simple_read_from_buffer(buffer, size, ppos, &skb_realloc.devname,
+ strlen(skb_realloc.devname));
+}
+
+static const struct file_operations devname_ops = {
+ .write = devname_write,
+ .read = devname_read,
+};
+
+static int __init fail_skb_realloc_debugfs(void)
+{
+ umode_t mode = S_IFREG | 0600;
+ struct dentry *dir;
+
+ dir = fault_create_debugfs_attr("fail_skb_realloc", NULL,
+ &skb_realloc.attr);
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
+
+ debugfs_create_file("devname", mode, dir, NULL, &devname_ops);
+
+ return 0;
+}
+
+late_initcall(fail_skb_realloc_debugfs);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 74149dc4ee31..6841e61a6bd0 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -753,14 +753,14 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
if (in_hardirq() || irqs_disabled()) {
nc = this_cpu_ptr(&netdev_alloc_cache);
data = page_frag_alloc(nc, len, gfp_mask);
- pfmemalloc = nc->pfmemalloc;
+ pfmemalloc = page_frag_cache_is_pfmemalloc(nc);
} else {
local_bh_disable();
local_lock_nested_bh(&napi_alloc_cache.bh_lock);
nc = this_cpu_ptr(&napi_alloc_cache.page);
data = page_frag_alloc(nc, len, gfp_mask);
- pfmemalloc = nc->pfmemalloc;
+ pfmemalloc = page_frag_cache_is_pfmemalloc(nc);
local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
local_bh_enable();
@@ -850,7 +850,7 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
len = SKB_HEAD_ALIGN(len);
data = page_frag_alloc(&nc->page, len, gfp_mask);
- pfmemalloc = nc->page.pfmemalloc;
+ pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
}
local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
@@ -5506,7 +5506,7 @@ static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly)
{
bool ret;
- if (likely(READ_ONCE(sysctl_tstamp_allow_data) || tsonly))
+ if (likely(tsonly || READ_ONCE(sock_net(sk)->core.sysctl_tstamp_allow_data)))
return true;
read_lock_bh(&sk->sk_callback_lock);
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index b1dcbd3be89e..e90fbab703b2 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -1117,9 +1117,9 @@ static void sk_psock_strp_data_ready(struct sock *sk)
if (tls_sw_has_ctx_rx(sk)) {
psock->saved_data_ready(sk);
} else {
- write_lock_bh(&sk->sk_callback_lock);
+ read_lock_bh(&sk->sk_callback_lock);
strp_data_ready(&psock->strp);
- write_unlock_bh(&sk->sk_callback_lock);
+ read_unlock_bh(&sk->sk_callback_lock);
}
}
rcu_read_unlock();
diff --git a/net/core/sock.c b/net/core/sock.c
index da50df485090..74729d20cd00 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -286,8 +286,6 @@ EXPORT_SYMBOL(sysctl_rmem_max);
__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
-int sysctl_tstamp_allow_data __read_mostly = 1;
-
DEFINE_STATIC_KEY_FALSE(memalloc_socks_key);
EXPORT_SYMBOL_GPL(memalloc_socks_key);
@@ -822,14 +820,11 @@ EXPORT_SYMBOL(sock_set_sndtimeo);
static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
{
+ sock_valbool_flag(sk, SOCK_RCVTSTAMP, val);
+ sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, val && ns);
if (val) {
sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new);
- sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns);
- sock_set_flag(sk, SOCK_RCVTSTAMP);
sock_enable_timestamp(sk, SOCK_TIMESTAMP);
- } else {
- sock_reset_flag(sk, SOCK_RCVTSTAMP);
- sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
}
}
@@ -2600,14 +2595,11 @@ void __sock_wfree(struct sk_buff *skb)
void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
{
skb_orphan(skb);
- skb->sk = sk;
#ifdef CONFIG_INET
- if (unlikely(!sk_fullsock(sk))) {
- skb->destructor = sock_edemux;
- sock_hold(sk);
- return;
- }
+ if (unlikely(!sk_fullsock(sk)))
+ return skb_set_owner_edemux(skb, sk);
#endif
+ skb->sk = sk;
skb->destructor = sock_wfree;
skb_set_hash_from_sk(skb, sk);
/*
@@ -2905,6 +2897,8 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
{
u32 tsflags;
+ BUILD_BUG_ON(SOF_TIMESTAMPING_LAST == (1 << 31));
+
switch (cmsg->cmsg_type) {
case SO_MARK:
if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
@@ -2933,6 +2927,17 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
return -EINVAL;
sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
break;
+ case SCM_TS_OPT_ID:
+ if (sk_is_tcp(sk))
+ return -EINVAL;
+ tsflags = READ_ONCE(sk->sk_tsflags);
+ if (!(tsflags & SOF_TIMESTAMPING_OPT_ID))
+ return -EINVAL;
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
+ return -EINVAL;
+ sockc->ts_opt_id = *(u32 *)CMSG_DATA(cmsg);
+ sockc->tsflags |= SOCKCM_FLAG_TS_OPT_ID;
+ break;
/* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
case SCM_RIGHTS:
case SCM_CREDENTIALS:
@@ -3825,9 +3830,6 @@ void sk_common_release(struct sock *sk)
sk->sk_prot->unhash(sk);
- if (sk->sk_socket)
- sk->sk_socket->sk = NULL;
-
/*
* In this point socket cannot receive new packets, but it is possible
* that some packets are in flight because some CPU runs receiver and
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 86a2476678c4..cb8d32e5c14e 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -51,29 +51,45 @@ int sysctl_devconf_inherit_init_net __read_mostly;
EXPORT_SYMBOL(sysctl_devconf_inherit_init_net);
#if IS_ENABLED(CONFIG_NET_FLOW_LIMIT) || IS_ENABLED(CONFIG_RPS)
-static void dump_cpumask(void *buffer, size_t *lenp, loff_t *ppos,
- struct cpumask *mask)
+static int dump_cpumask(void *buffer, size_t *lenp, loff_t *ppos,
+ struct cpumask *mask)
{
- char kbuf[128];
+ char *kbuf;
int len;
if (*ppos || !*lenp) {
*lenp = 0;
- return;
+ return 0;
+ }
+
+ /* CPUs are displayed as a hex bitmap + a comma between each groups of 8
+ * nibbles (except the last one which has a newline instead).
+ * Guesstimate the buffer size at the group granularity level.
+ */
+ len = min(DIV_ROUND_UP(nr_cpumask_bits, 32) * (8 + 1), *lenp);
+ kbuf = kmalloc(len, GFP_KERNEL);
+ if (!kbuf) {
+ *lenp = 0;
+ return -ENOMEM;
}
- len = min(sizeof(kbuf) - 1, *lenp);
len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask));
if (!len) {
*lenp = 0;
- return;
+ goto free_buf;
}
- if (len < *lenp)
- kbuf[len++] = '\n';
+ /* scnprintf writes a trailing null char not counted in the returned
+ * length, override it with a newline.
+ */
+ kbuf[len++] = '\n';
memcpy(buffer, kbuf, len);
*lenp = len;
*ppos += len;
+
+free_buf:
+ kfree(kbuf);
+ return 0;
}
#endif
@@ -117,8 +133,8 @@ static int rps_default_mask_sysctl(const struct ctl_table *table, int write,
if (err)
goto done;
} else {
- dump_cpumask(buffer, lenp, ppos,
- net->core.rps_default_mask ? : cpu_none_mask);
+ err = dump_cpumask(buffer, lenp, ppos,
+ net->core.rps_default_mask ? : cpu_none_mask);
}
done:
@@ -247,7 +263,7 @@ write_unlock:
}
rcu_read_unlock();
- dump_cpumask(buffer, lenp, ppos, mask);
+ ret = dump_cpumask(buffer, lenp, ppos, mask);
}
done:
@@ -491,15 +507,6 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
- .procname = "tstamp_allow_data",
- .data = &sysctl_tstamp_allow_data,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE
- },
#ifdef CONFIG_RPS
{
.procname = "rps_sock_flow_entries",
@@ -665,6 +672,15 @@ static struct ctl_table netns_core_table[] = {
.extra2 = SYSCTL_ONE,
.proc_handler = proc_dou8vec_minmax,
},
+ {
+ .procname = "tstamp_allow_data",
+ .data = &init_net.core.sysctl_tstamp_allow_data,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
+ },
/* sysctl_core_net_init() will set the values after this
* to readonly in network namespaces
*/
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 2e6b8c8fd2de..03eb1d941fca 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -2408,6 +2408,11 @@ static struct notifier_block dcbnl_nb __read_mostly = {
.notifier_call = dcbnl_netdevice_event,
};
+static const struct rtnl_msg_handler dcbnl_rtnl_msg_handlers[] __initconst = {
+ {.msgtype = RTM_GETDCB, .doit = dcb_doit},
+ {.msgtype = RTM_SETDCB, .doit = dcb_doit},
+};
+
static int __init dcbnl_init(void)
{
int err;
@@ -2416,8 +2421,7 @@ static int __init dcbnl_init(void)
if (err)
return err;
- rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, 0);
+ rtnl_register_many(dcbnl_rtnl_msg_handlers);
return 0;
}
diff --git a/net/devlink/dev.c b/net/devlink/dev.c
index 13c73f50da3d..d6e3db300acb 100644
--- a/net/devlink/dev.c
+++ b/net/devlink/dev.c
@@ -531,10 +531,8 @@ int devlink_nl_reload_doit(struct sk_buff *skb, struct genl_info *info)
return err;
}
- if (info->attrs[DEVLINK_ATTR_RELOAD_ACTION])
- action = nla_get_u8(info->attrs[DEVLINK_ATTR_RELOAD_ACTION]);
- else
- action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT;
+ action = nla_get_u8_default(info->attrs[DEVLINK_ATTR_RELOAD_ACTION],
+ DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
if (!devlink_reload_action_is_supported(devlink, action)) {
NL_SET_ERR_MSG(info->extack, "Requested reload action is not supported by the driver");
@@ -971,14 +969,14 @@ static int devlink_nl_flash_update_fill(struct sk_buff *msg,
nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_COMPONENT,
params->component))
goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE,
- params->done, DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE,
+ params->done))
goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL,
- params->total, DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL,
+ params->total))
goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT,
- params->timeout, DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT,
+ params->timeout))
goto nla_put_failure;
out:
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index c7a8e13f917c..14eaad9cfe35 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -166,7 +166,7 @@ int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb,
static inline struct devlink_nl_dump_state *
devlink_dump_state(struct netlink_callback *cb)
{
- NL_ASSERT_DUMP_CTX_FITS(struct devlink_nl_dump_state);
+ NL_ASSERT_CTX_FITS(struct devlink_nl_dump_state);
return (struct devlink_nl_dump_state *)cb->ctx;
}
@@ -181,6 +181,11 @@ devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
return 0;
}
+static inline int devlink_nl_put_u64(struct sk_buff *msg, int attrtype, u64 val)
+{
+ return nla_put_u64_64bit(msg, attrtype, val, DEVLINK_ATTR_PAD);
+}
+
int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net,
struct devlink *devlink, int attrtype);
int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info);
diff --git a/net/devlink/dpipe.c b/net/devlink/dpipe.c
index 55009b377447..e55701b007f0 100644
--- a/net/devlink/dpipe.c
+++ b/net/devlink/dpipe.c
@@ -165,18 +165,17 @@ static int devlink_dpipe_table_put(struct sk_buff *skb,
return -EMSGSIZE;
if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_TABLE_NAME, table->name) ||
- nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table_size,
- DEVLINK_ATTR_PAD))
+ devlink_nl_put_u64(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table_size))
goto nla_put_failure;
if (nla_put_u8(skb, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED,
table->counters_enabled))
goto nla_put_failure;
if (table->resource_valid) {
- if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID,
- table->resource_id, DEVLINK_ATTR_PAD) ||
- nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS,
- table->resource_units, DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID,
+ table->resource_id) ||
+ devlink_nl_put_u64(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS,
+ table->resource_units))
goto nla_put_failure;
}
if (devlink_dpipe_matches_put(table, skb))
@@ -403,12 +402,11 @@ static int devlink_dpipe_entry_put(struct sk_buff *skb,
if (!entry_attr)
return -EMSGSIZE;
- if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_INDEX, entry->index,
- DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(skb, DEVLINK_ATTR_DPIPE_ENTRY_INDEX, entry->index))
goto nla_put_failure;
if (entry->counter_valid)
- if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_COUNTER,
- entry->counter, DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(skb, DEVLINK_ATTR_DPIPE_ENTRY_COUNTER,
+ entry->counter))
goto nla_put_failure;
matches_attr = nla_nest_start_noflag(skb,
diff --git a/net/devlink/health.c b/net/devlink/health.c
index acb8c0e174bb..b8d3084e6fe0 100644
--- a/net/devlink/health.c
+++ b/net/devlink/health.c
@@ -287,29 +287,27 @@ devlink_nl_health_reporter_fill(struct sk_buff *msg,
if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE,
reporter->health_state))
goto reporter_nest_cancel;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT,
- reporter->error_count, DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT,
+ reporter->error_count))
goto reporter_nest_cancel;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT,
- reporter->recovery_count, DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT,
+ reporter->recovery_count))
goto reporter_nest_cancel;
if (reporter->ops->recover &&
- nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,
- reporter->graceful_period,
- DEVLINK_ATTR_PAD))
+ devlink_nl_put_u64(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,
+ reporter->graceful_period))
goto reporter_nest_cancel;
if (reporter->ops->recover &&
nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,
reporter->auto_recover))
goto reporter_nest_cancel;
if (reporter->dump_fmsg &&
- nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,
- jiffies_to_msecs(reporter->dump_ts),
- DEVLINK_ATTR_PAD))
+ devlink_nl_put_u64(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,
+ jiffies_to_msecs(reporter->dump_ts)))
goto reporter_nest_cancel;
if (reporter->dump_fmsg &&
- nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS,
- reporter->dump_real_ts, DEVLINK_ATTR_PAD))
+ devlink_nl_put_u64(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS,
+ reporter->dump_real_ts))
goto reporter_nest_cancel;
if (reporter->ops->dump &&
nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP,
@@ -963,8 +961,7 @@ devlink_fmsg_item_fill_data(struct devlink_fmsg_item *msg, struct sk_buff *skb)
case NLA_U32:
return nla_put_u32(skb, attrtype, *(u32 *)msg->value);
case NLA_U64:
- return nla_put_u64_64bit(skb, attrtype, *(u64 *)msg->value,
- DEVLINK_ATTR_PAD);
+ return devlink_nl_put_u64(skb, attrtype, *(u64 *)msg->value);
case NLA_NUL_STRING:
return nla_put_string(skb, attrtype, (char *)&msg->value);
case NLA_BINARY:
diff --git a/net/devlink/rate.c b/net/devlink/rate.c
index 7139e67e93ae..8828ffaf6cbc 100644
--- a/net/devlink/rate.c
+++ b/net/devlink/rate.c
@@ -108,12 +108,12 @@ static int devlink_nl_rate_fill(struct sk_buff *msg,
goto nla_put_failure;
}
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_SHARE,
- devlink_rate->tx_share, DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_RATE_TX_SHARE,
+ devlink_rate->tx_share))
goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_MAX,
- devlink_rate->tx_max, DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_RATE_TX_MAX,
+ devlink_rate->tx_max))
goto nla_put_failure;
if (nla_put_u32(msg, DEVLINK_ATTR_RATE_TX_PRIORITY,
diff --git a/net/devlink/region.c b/net/devlink/region.c
index 7319127c5913..63fb297f6d67 100644
--- a/net/devlink/region.c
+++ b/net/devlink/region.c
@@ -77,7 +77,7 @@ static int devlink_nl_region_snapshot_id_put(struct sk_buff *msg,
snap_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_REGION_SNAPSHOT);
if (!snap_attr)
- return -EINVAL;
+ return -EMSGSIZE;
err = nla_put_u32(msg, DEVLINK_ATTR_REGION_SNAPSHOT_ID, snapshot->id);
if (err)
@@ -102,7 +102,7 @@ static int devlink_nl_region_snapshots_id_put(struct sk_buff *msg,
snapshots_attr = nla_nest_start_noflag(msg,
DEVLINK_ATTR_REGION_SNAPSHOTS);
if (!snapshots_attr)
- return -EINVAL;
+ return -EMSGSIZE;
list_for_each_entry(snapshot, &region->snapshot_list, list) {
err = devlink_nl_region_snapshot_id_put(msg, devlink, snapshot);
@@ -145,9 +145,7 @@ static int devlink_nl_region_fill(struct sk_buff *msg, struct devlink *devlink,
if (err)
goto nla_put_failure;
- err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_SIZE,
- region->size,
- DEVLINK_ATTR_PAD);
+ err = devlink_nl_put_u64(msg, DEVLINK_ATTR_REGION_SIZE, region->size);
if (err)
goto nla_put_failure;
@@ -210,8 +208,8 @@ devlink_nl_region_notify_build(struct devlink_region *region,
if (err)
goto out_cancel_msg;
} else {
- err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_SIZE,
- region->size, DEVLINK_ATTR_PAD);
+ err = devlink_nl_put_u64(msg, DEVLINK_ATTR_REGION_SIZE,
+ region->size);
if (err)
goto out_cancel_msg;
}
@@ -773,8 +771,7 @@ static int devlink_nl_cmd_region_read_chunk_fill(struct sk_buff *msg,
if (err)
goto nla_put_failure;
- err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_CHUNK_ADDR, addr,
- DEVLINK_ATTR_PAD);
+ err = devlink_nl_put_u64(msg, DEVLINK_ATTR_REGION_CHUNK_ADDR, addr);
if (err)
goto nla_put_failure;
diff --git a/net/devlink/resource.c b/net/devlink/resource.c
index 594c8aeb3bfa..2d6324f3d91f 100644
--- a/net/devlink/resource.c
+++ b/net/devlink/resource.c
@@ -141,12 +141,12 @@ devlink_resource_size_params_put(struct devlink_resource *resource,
struct devlink_resource_size_params *size_params;
size_params = &resource->size_params;
- if (nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN,
- size_params->size_granularity, DEVLINK_ATTR_PAD) ||
- nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX,
- size_params->size_max, DEVLINK_ATTR_PAD) ||
- nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MIN,
- size_params->size_min, DEVLINK_ATTR_PAD) ||
+ if (devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN,
+ size_params->size_granularity) ||
+ devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX,
+ size_params->size_max) ||
+ devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_SIZE_MIN,
+ size_params->size_min) ||
nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_UNIT, size_params->unit))
return -EMSGSIZE;
return 0;
@@ -157,9 +157,8 @@ static int devlink_resource_occ_put(struct devlink_resource *resource,
{
if (!resource->occ_get)
return 0;
- return nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_OCC,
- resource->occ_get(resource->occ_get_priv),
- DEVLINK_ATTR_PAD);
+ return devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_OCC,
+ resource->occ_get(resource->occ_get_priv));
}
static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb,
@@ -174,14 +173,12 @@ static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb,
return -EMSGSIZE;
if (nla_put_string(skb, DEVLINK_ATTR_RESOURCE_NAME, resource->name) ||
- nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE, resource->size,
- DEVLINK_ATTR_PAD) ||
- nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_ID, resource->id,
- DEVLINK_ATTR_PAD))
+ devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_SIZE, resource->size) ||
+ devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_ID, resource->id))
goto nla_put_failure;
if (resource->size != resource->size_new &&
- nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW,
- resource->size_new, DEVLINK_ATTR_PAD))
+ devlink_nl_put_u64(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW,
+ resource->size_new))
goto nla_put_failure;
if (devlink_resource_occ_put(resource, skb))
goto nla_put_failure;
@@ -348,7 +345,7 @@ int devl_resource_register(struct devlink *devlink,
resource = devlink_resource_find(devlink, NULL, resource_id);
if (resource)
- return -EINVAL;
+ return -EEXIST;
resource = kzalloc(sizeof(*resource), GFP_KERNEL);
if (!resource)
@@ -384,39 +381,6 @@ int devl_resource_register(struct devlink *devlink,
}
EXPORT_SYMBOL_GPL(devl_resource_register);
-/**
- * devlink_resource_register - devlink resource register
- *
- * @devlink: devlink
- * @resource_name: resource's name
- * @resource_size: resource's size
- * @resource_id: resource's id
- * @parent_resource_id: resource's parent id
- * @size_params: size parameters
- *
- * Generic resources should reuse the same names across drivers.
- * Please see the generic resources list at:
- * Documentation/networking/devlink/devlink-resource.rst
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-int devlink_resource_register(struct devlink *devlink,
- const char *resource_name,
- u64 resource_size,
- u64 resource_id,
- u64 parent_resource_id,
- const struct devlink_resource_size_params *size_params)
-{
- int err;
-
- devl_lock(devlink);
- err = devl_resource_register(devlink, resource_name, resource_size,
- resource_id, parent_resource_id, size_params);
- devl_unlock(devlink);
- return err;
-}
-EXPORT_SYMBOL_GPL(devlink_resource_register);
-
static void devlink_resource_unregister(struct devlink *devlink,
struct devlink_resource *resource)
{
@@ -517,28 +481,6 @@ void devl_resource_occ_get_register(struct devlink *devlink,
EXPORT_SYMBOL_GPL(devl_resource_occ_get_register);
/**
- * devlink_resource_occ_get_register - register occupancy getter
- *
- * @devlink: devlink
- * @resource_id: resource id
- * @occ_get: occupancy getter callback
- * @occ_get_priv: occupancy getter callback priv
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-void devlink_resource_occ_get_register(struct devlink *devlink,
- u64 resource_id,
- devlink_resource_occ_get_t *occ_get,
- void *occ_get_priv)
-{
- devl_lock(devlink);
- devl_resource_occ_get_register(devlink, resource_id,
- occ_get, occ_get_priv);
- devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_resource_occ_get_register);
-
-/**
* devl_resource_occ_get_unregister - unregister occupancy getter
*
* @devlink: devlink
@@ -560,20 +502,3 @@ void devl_resource_occ_get_unregister(struct devlink *devlink,
resource->occ_get_priv = NULL;
}
EXPORT_SYMBOL_GPL(devl_resource_occ_get_unregister);
-
-/**
- * devlink_resource_occ_get_unregister - unregister occupancy getter
- *
- * @devlink: devlink
- * @resource_id: resource id
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-void devlink_resource_occ_get_unregister(struct devlink *devlink,
- u64 resource_id)
-{
- devl_lock(devlink);
- devl_resource_occ_get_unregister(devlink, resource_id);
- devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_resource_occ_get_unregister);
diff --git a/net/devlink/trap.c b/net/devlink/trap.c
index 5d18c7424df1..f36087f90db5 100644
--- a/net/devlink/trap.c
+++ b/net/devlink/trap.c
@@ -189,14 +189,12 @@ devlink_trap_group_stats_put(struct sk_buff *msg,
if (!attr)
return -EMSGSIZE;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
- u64_stats_read(&stats.rx_packets),
- DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
+ u64_stats_read(&stats.rx_packets)))
goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
- u64_stats_read(&stats.rx_bytes),
- DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_STATS_RX_BYTES,
+ u64_stats_read(&stats.rx_bytes)))
goto nla_put_failure;
nla_nest_end(msg, attr);
@@ -231,18 +229,15 @@ static int devlink_trap_stats_put(struct sk_buff *msg, struct devlink *devlink,
return -EMSGSIZE;
if (devlink->ops->trap_drop_counter_get &&
- nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops,
- DEVLINK_ATTR_PAD))
+ devlink_nl_put_u64(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops))
goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
- u64_stats_read(&stats.rx_packets),
- DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
+ u64_stats_read(&stats.rx_packets)))
goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
- u64_stats_read(&stats.rx_bytes),
- DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_STATS_RX_BYTES,
+ u64_stats_read(&stats.rx_bytes)))
goto nla_put_failure;
nla_nest_end(msg, attr);
@@ -750,8 +745,7 @@ devlink_trap_policer_stats_put(struct sk_buff *msg, struct devlink *devlink,
if (!attr)
return -EMSGSIZE;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops,
- DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops))
goto nla_put_failure;
nla_nest_end(msg, attr);
@@ -783,12 +777,12 @@ devlink_nl_trap_policer_fill(struct sk_buff *msg, struct devlink *devlink,
policer_item->policer->id))
goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_TRAP_POLICER_RATE,
- policer_item->rate, DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_TRAP_POLICER_RATE,
+ policer_item->rate))
goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_TRAP_POLICER_BURST,
- policer_item->burst, DEVLINK_ATTR_PAD))
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_TRAP_POLICER_BURST,
+ policer_item->burst))
goto nla_put_failure;
err = devlink_trap_policer_stats_put(msg, devlink,
diff --git a/net/dsa/devlink.c b/net/dsa/devlink.c
index 0aac887d0098..f41f9fc2194e 100644
--- a/net/dsa/devlink.c
+++ b/net/dsa/devlink.c
@@ -229,10 +229,15 @@ int dsa_devlink_resource_register(struct dsa_switch *ds,
u64 parent_resource_id,
const struct devlink_resource_size_params *size_params)
{
- return devlink_resource_register(ds->devlink, resource_name,
- resource_size, resource_id,
- parent_resource_id,
- size_params);
+ int ret;
+
+ devl_lock(ds->devlink);
+ ret = devl_resource_register(ds->devlink, resource_name, resource_size,
+ resource_id, parent_resource_id,
+ size_params);
+ devl_unlock(ds->devlink);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(dsa_devlink_resource_register);
@@ -247,15 +252,19 @@ void dsa_devlink_resource_occ_get_register(struct dsa_switch *ds,
devlink_resource_occ_get_t *occ_get,
void *occ_get_priv)
{
- return devlink_resource_occ_get_register(ds->devlink, resource_id,
- occ_get, occ_get_priv);
+ devl_lock(ds->devlink);
+ devl_resource_occ_get_register(ds->devlink, resource_id, occ_get,
+ occ_get_priv);
+ devl_unlock(ds->devlink);
}
EXPORT_SYMBOL_GPL(dsa_devlink_resource_occ_get_register);
void dsa_devlink_resource_occ_get_unregister(struct dsa_switch *ds,
u64 resource_id)
{
- devlink_resource_occ_get_unregister(ds->devlink, resource_id);
+ devl_lock(ds->devlink);
+ devl_resource_occ_get_unregister(ds->devlink, resource_id);
+ devl_unlock(ds->devlink);
}
EXPORT_SYMBOL_GPL(dsa_devlink_resource_occ_get_unregister);
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 1664547deffd..5a7c0e565a89 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -1505,14 +1505,6 @@ static int dsa_switch_probe(struct dsa_switch *ds)
if (!ds->num_ports)
return -EINVAL;
- if (ds->phylink_mac_ops) {
- if (ds->ops->phylink_mac_select_pcs ||
- ds->ops->phylink_mac_config ||
- ds->ops->phylink_mac_link_down ||
- ds->ops->phylink_mac_link_up)
- return -EINVAL;
- }
-
if (np) {
err = dsa_switch_parse_of(ds, np);
if (err)
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 25258b33e59e..ee0aaec4c8e0 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -1575,44 +1575,16 @@ void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
cpu_dp->tag_ops = tag_ops;
}
-static struct phylink_pcs *
-dsa_port_phylink_mac_select_pcs(struct phylink_config *config,
- phy_interface_t interface)
-{
- struct dsa_port *dp = dsa_phylink_to_port(config);
- struct phylink_pcs *pcs = ERR_PTR(-EOPNOTSUPP);
- struct dsa_switch *ds = dp->ds;
-
- if (ds->ops->phylink_mac_select_pcs)
- pcs = ds->ops->phylink_mac_select_pcs(ds, dp->index, interface);
-
- return pcs;
-}
-
static void dsa_port_phylink_mac_config(struct phylink_config *config,
unsigned int mode,
const struct phylink_link_state *state)
{
- struct dsa_port *dp = dsa_phylink_to_port(config);
- struct dsa_switch *ds = dp->ds;
-
- if (!ds->ops->phylink_mac_config)
- return;
-
- ds->ops->phylink_mac_config(ds, dp->index, mode, state);
}
static void dsa_port_phylink_mac_link_down(struct phylink_config *config,
unsigned int mode,
phy_interface_t interface)
{
- struct dsa_port *dp = dsa_phylink_to_port(config);
- struct dsa_switch *ds = dp->ds;
-
- if (!ds->ops->phylink_mac_link_down)
- return;
-
- ds->ops->phylink_mac_link_down(ds, dp->index, mode, interface);
}
static void dsa_port_phylink_mac_link_up(struct phylink_config *config,
@@ -1622,18 +1594,9 @@ static void dsa_port_phylink_mac_link_up(struct phylink_config *config,
int speed, int duplex,
bool tx_pause, bool rx_pause)
{
- struct dsa_port *dp = dsa_phylink_to_port(config);
- struct dsa_switch *ds = dp->ds;
-
- if (!ds->ops->phylink_mac_link_up)
- return;
-
- ds->ops->phylink_mac_link_up(ds, dp->index, mode, interface, phydev,
- speed, duplex, tx_pause, rx_pause);
}
static const struct phylink_mac_ops dsa_port_phylink_mac_ops = {
- .mac_select_pcs = dsa_port_phylink_mac_select_pcs,
.mac_config = dsa_port_phylink_mac_config,
.mac_link_down = dsa_port_phylink_mac_link_down,
.mac_link_up = dsa_port_phylink_mac_link_up,
@@ -1871,9 +1834,6 @@ static void dsa_shared_port_link_down(struct dsa_port *dp)
if (ds->phylink_mac_ops && ds->phylink_mac_ops->mac_link_down)
ds->phylink_mac_ops->mac_link_down(&dp->pl_config, MLO_AN_FIXED,
PHY_INTERFACE_MODE_NA);
- else if (ds->ops->phylink_mac_link_down)
- ds->ops->phylink_mac_link_down(ds, dp->index, MLO_AN_FIXED,
- PHY_INTERFACE_MODE_NA);
}
int dsa_shared_port_link_register_of(struct dsa_port *dp)
diff --git a/net/dsa/user.c b/net/dsa/user.c
index 64f660d2334b..06c30a9e29ff 100644
--- a/net/dsa/user.c
+++ b/net/dsa/user.c
@@ -1042,15 +1042,12 @@ static void dsa_user_get_strings(struct net_device *dev,
struct dsa_switch *ds = dp->ds;
if (stringset == ETH_SS_STATS) {
- int len = ETH_GSTRING_LEN;
-
- strscpy_pad(data, "tx_packets", len);
- strscpy_pad(data + len, "tx_bytes", len);
- strscpy_pad(data + 2 * len, "rx_packets", len);
- strscpy_pad(data + 3 * len, "rx_bytes", len);
+ ethtool_puts(&data, "tx_packets");
+ ethtool_puts(&data, "tx_bytes");
+ ethtool_puts(&data, "rx_packets");
+ ethtool_puts(&data, "rx_bytes");
if (ds->ops->get_strings)
- ds->ops->get_strings(ds, dp->index, stringset,
- data + 4 * len);
+ ds->ops->get_strings(ds, dp->index, stringset, data);
} else if (stringset == ETH_SS_TEST) {
net_selftest_get_strings(data);
}
@@ -1308,8 +1305,7 @@ static int dsa_user_set_pauseparam(struct net_device *dev,
}
#ifdef CONFIG_NET_POLL_CONTROLLER
-static int dsa_user_netpoll_setup(struct net_device *dev,
- struct netpoll_info *ni)
+static int dsa_user_netpoll_setup(struct net_device *dev)
{
struct net_device *conduit = dsa_user_to_conduit(dev);
struct dsa_user_priv *p = netdev_priv(dev);
@@ -1365,7 +1361,7 @@ dsa_user_mall_tc_entry_find(struct net_device *dev, unsigned long cookie)
static int
dsa_user_add_cls_matchall_mirred(struct net_device *dev,
struct tc_cls_matchall_offload *cls,
- bool ingress)
+ bool ingress, bool ingress_target)
{
struct netlink_ext_ack *extack = cls->common.extack;
struct dsa_port *dp = dsa_user_to_port(dev);
@@ -1377,11 +1373,19 @@ dsa_user_add_cls_matchall_mirred(struct net_device *dev,
struct dsa_port *to_dp;
int err;
- if (!ds->ops->port_mirror_add)
+ if (cls->common.protocol != htons(ETH_P_ALL)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Can only offload \"protocol all\" matchall filter");
+ return -EOPNOTSUPP;
+ }
+
+ if (!ds->ops->port_mirror_add) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Switch does not support mirroring operation");
return -EOPNOTSUPP;
+ }
- if (!flow_action_basic_hw_stats_check(&cls->rule->action,
- cls->common.extack))
+ if (!flow_action_basic_hw_stats_check(&cls->rule->action, extack))
return -EOPNOTSUPP;
act = &cls->rule->action.entries[0];
@@ -1389,10 +1393,30 @@ dsa_user_add_cls_matchall_mirred(struct net_device *dev,
if (!act->dev)
return -EINVAL;
- if (!dsa_user_dev_check(act->dev))
- return -EOPNOTSUPP;
-
- to_dp = dsa_user_to_port(act->dev);
+ if (dsa_user_dev_check(act->dev)) {
+ if (ingress_target) {
+ /* We can only fulfill this using software assist */
+ if (cls->common.skip_sw) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Can only mirred to ingress of DSA user port if filter also runs in software");
+ return -EOPNOTSUPP;
+ }
+ to_dp = dp->cpu_dp;
+ } else {
+ to_dp = dsa_user_to_port(act->dev);
+ }
+ } else {
+ /* Handle mirroring to foreign target ports as a mirror towards
+ * the CPU. The software tc rule will take the packets from
+ * there.
+ */
+ if (cls->common.skip_sw) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Can only mirred to CPU if filter also runs in software");
+ return -EOPNOTSUPP;
+ }
+ to_dp = dp->cpu_dp;
+ }
if (dp->ds != to_dp->ds) {
NL_SET_ERR_MSG_MOD(extack,
@@ -1447,8 +1471,7 @@ dsa_user_add_cls_matchall_police(struct net_device *dev,
return -EOPNOTSUPP;
}
- if (!flow_action_basic_hw_stats_check(&cls->rule->action,
- cls->common.extack))
+ if (!flow_action_basic_hw_stats_check(&cls->rule->action, extack))
return -EOPNOTSUPP;
list_for_each_entry(mall_tc_entry, &p->mall_tc_list, list) {
@@ -1486,17 +1509,30 @@ static int dsa_user_add_cls_matchall(struct net_device *dev,
struct tc_cls_matchall_offload *cls,
bool ingress)
{
- int err = -EOPNOTSUPP;
+ const struct flow_action *action = &cls->rule->action;
+ struct netlink_ext_ack *extack = cls->common.extack;
- if (cls->common.protocol == htons(ETH_P_ALL) &&
- flow_offload_has_one_action(&cls->rule->action) &&
- cls->rule->action.entries[0].id == FLOW_ACTION_MIRRED)
- err = dsa_user_add_cls_matchall_mirred(dev, cls, ingress);
- else if (flow_offload_has_one_action(&cls->rule->action) &&
- cls->rule->action.entries[0].id == FLOW_ACTION_POLICE)
- err = dsa_user_add_cls_matchall_police(dev, cls, ingress);
+ if (!flow_offload_has_one_action(action)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cannot offload matchall filter with more than one action");
+ return -EOPNOTSUPP;
+ }
- return err;
+ switch (action->entries[0].id) {
+ case FLOW_ACTION_MIRRED:
+ return dsa_user_add_cls_matchall_mirred(dev, cls, ingress,
+ false);
+ case FLOW_ACTION_MIRRED_INGRESS:
+ return dsa_user_add_cls_matchall_mirred(dev, cls, ingress,
+ true);
+ case FLOW_ACTION_POLICE:
+ return dsa_user_add_cls_matchall_police(dev, cls, ingress);
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "Unknown action");
+ break;
+ }
+
+ return -EOPNOTSUPP;
}
static void dsa_user_del_cls_matchall(struct net_device *dev,
diff --git a/net/ethtool/cmis.h b/net/ethtool/cmis.h
index 3e7c293af78c..1e790413db0e 100644
--- a/net/ethtool/cmis.h
+++ b/net/ethtool/cmis.h
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#define ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH 120
+#define ETHTOOL_CMIS_CDB_EPL_MAX_PL_LENGTH 2048
#define ETHTOOL_CMIS_CDB_CMD_PAGE 0x9F
#define ETHTOOL_CMIS_CDB_PAGE_I2C_ADDR 0x50
@@ -23,6 +24,7 @@ enum ethtool_cmis_cdb_cmd_id {
ETHTOOL_CMIS_CDB_CMD_FW_MANAGMENT_FEATURES = 0x0041,
ETHTOOL_CMIS_CDB_CMD_START_FW_DOWNLOAD = 0x0101,
ETHTOOL_CMIS_CDB_CMD_WRITE_FW_BLOCK_LPL = 0x0103,
+ ETHTOOL_CMIS_CDB_CMD_WRITE_FW_BLOCK_EPL = 0x0104,
ETHTOOL_CMIS_CDB_CMD_COMPLETE_FW_DOWNLOAD = 0x0107,
ETHTOOL_CMIS_CDB_CMD_RUN_FW_IMAGE = 0x0109,
ETHTOOL_CMIS_CDB_CMD_COMMIT_FW_IMAGE = 0x010A,
@@ -38,6 +40,7 @@ enum ethtool_cmis_cdb_cmd_id {
* @resv1: Added to match the CMIS standard request continuity.
* @resv2: Added to match the CMIS standard request continuity.
* @payload: Payload for the CDB commands.
+ * @epl: Extended payload for the CDB commands.
*/
struct ethtool_cmis_cdb_request {
__be16 id;
@@ -49,6 +52,7 @@ struct ethtool_cmis_cdb_request {
u8 resv2;
u8 payload[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH];
);
+ u8 *epl; /* Everything above this field checksummed. */
};
#define CDB_F_COMPLETION_VALID BIT(0)
@@ -96,13 +100,15 @@ struct ethtool_cmis_cdb_rpl {
u8 payload[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH];
};
-u32 ethtool_cmis_get_max_payload_size(u8 num_of_byte_octs);
+u32 ethtool_cmis_get_max_lpl_size(u8 num_of_byte_octs);
+u32 ethtool_cmis_get_max_epl_size(u8 num_of_byte_octs);
void ethtool_cmis_cdb_compose_args(struct ethtool_cmis_cdb_cmd_args *args,
- enum ethtool_cmis_cdb_cmd_id cmd, u8 *pl,
- u8 lpl_len, u16 max_duration,
- u8 read_write_len_ext, u16 msleep_pre_rpl,
- u8 rpl_exp_len, u8 flags);
+ enum ethtool_cmis_cdb_cmd_id cmd, u8 *lpl,
+ u8 lpl_len, u8 *epl, u16 epl_len,
+ u16 max_duration, u8 read_write_len_ext,
+ u16 msleep_pre_rpl, u8 rpl_exp_len,
+ u8 flags);
void ethtool_cmis_cdb_check_completion_flag(u8 cmis_rev, u8 *flags);
diff --git a/net/ethtool/cmis_cdb.c b/net/ethtool/cmis_cdb.c
index 4d5581147952..d159dc121bde 100644
--- a/net/ethtool/cmis_cdb.c
+++ b/net/ethtool/cmis_cdb.c
@@ -11,25 +11,41 @@
* min(i, 15) byte octets where i specifies the allowable additional number of
* byte octets in a READ or a WRITE.
*/
-u32 ethtool_cmis_get_max_payload_size(u8 num_of_byte_octs)
+u32 ethtool_cmis_get_max_lpl_size(u8 num_of_byte_octs)
{
return 8 * (1 + min_t(u8, num_of_byte_octs, 15));
}
+/* For accessing the EPL field on page 9Fh, the allowable length extension is
+ * min(i, 255) byte octets where i specifies the allowable additional number of
+ * byte octets in a READ or a WRITE.
+ */
+u32 ethtool_cmis_get_max_epl_size(u8 num_of_byte_octs)
+{
+ return 8 * (1 + min_t(u8, num_of_byte_octs, 255));
+}
+
void ethtool_cmis_cdb_compose_args(struct ethtool_cmis_cdb_cmd_args *args,
- enum ethtool_cmis_cdb_cmd_id cmd, u8 *pl,
- u8 lpl_len, u16 max_duration,
- u8 read_write_len_ext, u16 msleep_pre_rpl,
- u8 rpl_exp_len, u8 flags)
+ enum ethtool_cmis_cdb_cmd_id cmd, u8 *lpl,
+ u8 lpl_len, u8 *epl, u16 epl_len,
+ u16 max_duration, u8 read_write_len_ext,
+ u16 msleep_pre_rpl, u8 rpl_exp_len, u8 flags)
{
args->req.id = cpu_to_be16(cmd);
args->req.lpl_len = lpl_len;
- if (pl)
- memcpy(args->req.payload, pl, args->req.lpl_len);
+ if (lpl) {
+ memcpy(args->req.payload, lpl, args->req.lpl_len);
+ args->read_write_len_ext =
+ ethtool_cmis_get_max_lpl_size(read_write_len_ext);
+ }
+ if (epl) {
+ args->req.epl_len = cpu_to_be16(epl_len);
+ args->req.epl = epl;
+ args->read_write_len_ext =
+ ethtool_cmis_get_max_epl_size(read_write_len_ext);
+ }
args->max_duration = max_duration;
- args->read_write_len_ext =
- ethtool_cmis_get_max_payload_size(read_write_len_ext);
args->msleep_pre_rpl = msleep_pre_rpl;
args->rpl_exp_len = rpl_exp_len;
args->flags = flags;
@@ -183,7 +199,7 @@ cmis_cdb_validate_password(struct ethtool_cmis_cdb *cdb,
}
ethtool_cmis_cdb_compose_args(&args, ETHTOOL_CMIS_CDB_CMD_QUERY_STATUS,
- (u8 *)&qs_pl, sizeof(qs_pl), 0,
+ (u8 *)&qs_pl, sizeof(qs_pl), NULL, 0, 0,
cdb->read_write_len_ext, 1000,
sizeof(*rpl),
CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID);
@@ -245,8 +261,9 @@ static int cmis_cdb_module_features_get(struct ethtool_cmis_cdb *cdb,
ethtool_cmis_cdb_check_completion_flag(cdb->cmis_rev, &flags);
ethtool_cmis_cdb_compose_args(&args,
ETHTOOL_CMIS_CDB_CMD_MODULE_FEATURES,
- NULL, 0, 0, cdb->read_write_len_ext,
- 1000, sizeof(*rpl), flags);
+ NULL, 0, NULL, 0, 0,
+ cdb->read_write_len_ext, 1000,
+ sizeof(*rpl), flags);
err = ethtool_cmis_cdb_execute_cmd(dev, &args);
if (err < 0) {
@@ -546,6 +563,49 @@ __ethtool_cmis_cdb_execute_cmd(struct net_device *dev,
return err;
}
+#define CMIS_CDB_EPL_PAGE_START 0xA0
+#define CMIS_CDB_EPL_PAGE_END 0xAF
+#define CMIS_CDB_EPL_FW_BLOCK_OFFSET_START 128
+#define CMIS_CDB_EPL_FW_BLOCK_OFFSET_END 255
+
+static int
+ethtool_cmis_cdb_execute_epl_cmd(struct net_device *dev,
+ struct ethtool_cmis_cdb_cmd_args *args,
+ struct ethtool_module_eeprom *page_data)
+{
+ u16 epl_len = be16_to_cpu(args->req.epl_len);
+ u32 bytes_written = 0;
+ u8 page;
+ int err;
+
+ for (page = CMIS_CDB_EPL_PAGE_START;
+ page <= CMIS_CDB_EPL_PAGE_END && bytes_written < epl_len; page++) {
+ u16 offset = CMIS_CDB_EPL_FW_BLOCK_OFFSET_START;
+
+ while (offset <= CMIS_CDB_EPL_FW_BLOCK_OFFSET_END &&
+ bytes_written < epl_len) {
+ u32 bytes_left = epl_len - bytes_written;
+ u16 space_left, bytes_to_write;
+
+ space_left = CMIS_CDB_EPL_FW_BLOCK_OFFSET_END - offset + 1;
+ bytes_to_write = min_t(u16, bytes_left,
+ min_t(u16, space_left,
+ args->read_write_len_ext));
+
+ err = __ethtool_cmis_cdb_execute_cmd(dev, page_data,
+ page, offset,
+ bytes_to_write,
+ args->req.epl + bytes_written);
+ if (err < 0)
+ return err;
+
+ offset += bytes_to_write;
+ bytes_written += bytes_to_write;
+ }
+ }
+ return 0;
+}
+
static u8 cmis_cdb_calc_checksum(const void *data, size_t size)
{
const u8 *bytes = (const u8 *)data;
@@ -567,7 +627,9 @@ int ethtool_cmis_cdb_execute_cmd(struct net_device *dev,
int err;
args->req.chk_code =
- cmis_cdb_calc_checksum(&args->req, sizeof(args->req));
+ cmis_cdb_calc_checksum(&args->req,
+ offsetof(struct ethtool_cmis_cdb_request,
+ epl));
if (args->req.lpl_len > args->read_write_len_ext) {
args->err_msg = "LPL length is longer than CDB read write length extension allows";
@@ -589,6 +651,12 @@ int ethtool_cmis_cdb_execute_cmd(struct net_device *dev,
if (err < 0)
return err;
+ if (args->req.epl_len) {
+ err = ethtool_cmis_cdb_execute_epl_cmd(dev, args, &page_data);
+ if (err < 0)
+ return err;
+ }
+
offset = CMIS_CDB_CMD_ID_OFFSET +
offsetof(struct ethtool_cmis_cdb_request, id);
err = __ethtool_cmis_cdb_execute_cmd(dev, &page_data,
diff --git a/net/ethtool/cmis_fw_update.c b/net/ethtool/cmis_fw_update.c
index 655ff5224ffa..48aef6220f00 100644
--- a/net/ethtool/cmis_fw_update.c
+++ b/net/ethtool/cmis_fw_update.c
@@ -9,6 +9,7 @@
struct cmis_fw_update_fw_mng_features {
u8 start_cmd_payload_size;
+ u8 write_mechanism;
u16 max_duration_start;
u16 max_duration_write;
u16 max_duration_complete;
@@ -36,7 +37,9 @@ struct cmis_cdb_fw_mng_features_rpl {
};
enum cmis_cdb_fw_write_mechanism {
+ CMIS_CDB_FW_WRITE_MECHANISM_NONE = 0x00,
CMIS_CDB_FW_WRITE_MECHANISM_LPL = 0x01,
+ CMIS_CDB_FW_WRITE_MECHANISM_EPL = 0x10,
CMIS_CDB_FW_WRITE_MECHANISM_BOTH = 0x11,
};
@@ -54,7 +57,8 @@ cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb,
ethtool_cmis_cdb_check_completion_flag(cdb->cmis_rev, &flags);
ethtool_cmis_cdb_compose_args(&args,
ETHTOOL_CMIS_CDB_CMD_FW_MANAGMENT_FEATURES,
- NULL, 0, cdb->max_completion_time,
+ NULL, 0, NULL, 0,
+ cdb->max_completion_time,
cdb->read_write_len_ext, 1000,
sizeof(*rpl), flags);
@@ -67,10 +71,9 @@ cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb,
}
rpl = (struct cmis_cdb_fw_mng_features_rpl *)args.req.payload;
- if (!(rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_LPL ||
- rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_BOTH)) {
+ if (rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_NONE) {
ethnl_module_fw_flash_ntf_err(dev, ntf_params,
- "Write LPL is not supported",
+ "CDB write mechanism is not supported",
NULL);
return -EOPNOTSUPP;
}
@@ -82,6 +85,10 @@ cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb,
*/
cdb->read_write_len_ext = rpl->read_write_len_ext;
fw_mng->start_cmd_payload_size = rpl->start_cmd_payload_size;
+ fw_mng->write_mechanism =
+ rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_LPL ?
+ CMIS_CDB_FW_WRITE_MECHANISM_LPL :
+ CMIS_CDB_FW_WRITE_MECHANISM_EPL;
fw_mng->max_duration_start = be16_to_cpu(rpl->max_duration_start);
fw_mng->max_duration_write = be16_to_cpu(rpl->max_duration_write);
fw_mng->max_duration_complete = be16_to_cpu(rpl->max_duration_complete);
@@ -122,7 +129,7 @@ cmis_fw_update_start_download(struct ethtool_cmis_cdb *cdb,
ethtool_cmis_cdb_compose_args(&args,
ETHTOOL_CMIS_CDB_CMD_START_FW_DOWNLOAD,
- (u8 *)&pl, lpl_len,
+ (u8 *)&pl, lpl_len, NULL, 0,
fw_mng->max_duration_start,
cdb->read_write_len_ext, 1000, 0,
CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID);
@@ -148,9 +155,9 @@ struct cmis_cdb_write_fw_block_lpl_pl {
};
static int
-cmis_fw_update_write_image(struct ethtool_cmis_cdb *cdb,
- struct ethtool_cmis_fw_update_params *fw_update,
- struct cmis_fw_update_fw_mng_features *fw_mng)
+cmis_fw_update_write_image_lpl(struct ethtool_cmis_cdb *cdb,
+ struct ethtool_cmis_fw_update_params *fw_update,
+ struct cmis_fw_update_fw_mng_features *fw_mng)
{
u8 start = fw_mng->start_cmd_payload_size;
u32 offset, max_block_size, max_lpl_len;
@@ -158,7 +165,7 @@ cmis_fw_update_write_image(struct ethtool_cmis_cdb *cdb,
int err;
max_lpl_len = min_t(u32,
- ethtool_cmis_get_max_payload_size(cdb->read_write_len_ext),
+ ethtool_cmis_get_max_lpl_size(cdb->read_write_len_ext),
ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH);
max_block_size =
max_lpl_len - sizeof_field(struct cmis_cdb_write_fw_block_lpl_pl,
@@ -183,7 +190,7 @@ cmis_fw_update_write_image(struct ethtool_cmis_cdb *cdb,
ethtool_cmis_cdb_compose_args(&args,
ETHTOOL_CMIS_CDB_CMD_WRITE_FW_BLOCK_LPL,
- (u8 *)&pl, lpl_len,
+ (u8 *)&pl, lpl_len, NULL, 0,
fw_mng->max_duration_write,
cdb->read_write_len_ext, 1, 0,
CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID);
@@ -201,6 +208,67 @@ cmis_fw_update_write_image(struct ethtool_cmis_cdb *cdb,
return 0;
}
+struct cmis_cdb_write_fw_block_epl_pl {
+ u8 fw_block[ETHTOOL_CMIS_CDB_EPL_MAX_PL_LENGTH];
+};
+
+static int
+cmis_fw_update_write_image_epl(struct ethtool_cmis_cdb *cdb,
+ struct ethtool_cmis_fw_update_params *fw_update,
+ struct cmis_fw_update_fw_mng_features *fw_mng)
+{
+ u8 start = fw_mng->start_cmd_payload_size;
+ u32 image_size = fw_update->fw->size;
+ u32 offset, lpl_len;
+ int err;
+
+ lpl_len = sizeof_field(struct cmis_cdb_write_fw_block_lpl_pl,
+ block_address);
+
+ for (offset = start; offset < image_size;
+ offset += ETHTOOL_CMIS_CDB_EPL_MAX_PL_LENGTH) {
+ struct cmis_cdb_write_fw_block_lpl_pl lpl = {
+ .block_address = cpu_to_be32(offset - start),
+ };
+ struct cmis_cdb_write_fw_block_epl_pl *epl;
+ struct ethtool_cmis_cdb_cmd_args args = {};
+ u32 epl_len;
+
+ ethnl_module_fw_flash_ntf_in_progress(fw_update->dev,
+ &fw_update->ntf_params,
+ offset - start,
+ image_size);
+
+ epl_len = min_t(u32, ETHTOOL_CMIS_CDB_EPL_MAX_PL_LENGTH,
+ image_size - offset);
+ epl = kmalloc_array(epl_len, sizeof(u8), GFP_KERNEL);
+ if (!epl)
+ return -ENOMEM;
+
+ memcpy(epl->fw_block, &fw_update->fw->data[offset], epl_len);
+
+ ethtool_cmis_cdb_compose_args(&args,
+ ETHTOOL_CMIS_CDB_CMD_WRITE_FW_BLOCK_EPL,
+ (u8 *)&lpl, lpl_len, (u8 *)epl,
+ epl_len,
+ fw_mng->max_duration_write,
+ cdb->read_write_len_ext, 1, 0,
+ CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID);
+
+ err = ethtool_cmis_cdb_execute_cmd(fw_update->dev, &args);
+ kfree(epl);
+ if (err < 0) {
+ ethnl_module_fw_flash_ntf_err(fw_update->dev,
+ &fw_update->ntf_params,
+ "Write FW block EPL command failed",
+ args.err_msg);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
static int
cmis_fw_update_complete_download(struct ethtool_cmis_cdb *cdb,
struct net_device *dev,
@@ -212,7 +280,8 @@ cmis_fw_update_complete_download(struct ethtool_cmis_cdb *cdb,
ethtool_cmis_cdb_compose_args(&args,
ETHTOOL_CMIS_CDB_CMD_COMPLETE_FW_DOWNLOAD,
- NULL, 0, fw_mng->max_duration_complete,
+ NULL, 0, NULL, 0,
+ fw_mng->max_duration_complete,
cdb->read_write_len_ext, 1000, 0,
CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID);
@@ -236,9 +305,15 @@ cmis_fw_update_download_image(struct ethtool_cmis_cdb *cdb,
if (err < 0)
return err;
- err = cmis_fw_update_write_image(cdb, fw_update, fw_mng);
- if (err < 0)
- return err;
+ if (fw_mng->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_LPL) {
+ err = cmis_fw_update_write_image_lpl(cdb, fw_update, fw_mng);
+ if (err < 0)
+ return err;
+ } else {
+ err = cmis_fw_update_write_image_epl(cdb, fw_update, fw_mng);
+ if (err < 0)
+ return err;
+ }
err = cmis_fw_update_complete_download(cdb, fw_update->dev, fw_mng,
&fw_update->ntf_params);
@@ -294,7 +369,7 @@ cmis_fw_update_run_image(struct ethtool_cmis_cdb *cdb, struct net_device *dev,
int err;
ethtool_cmis_cdb_compose_args(&args, ETHTOOL_CMIS_CDB_CMD_RUN_FW_IMAGE,
- (u8 *)&pl, sizeof(pl),
+ (u8 *)&pl, sizeof(pl), NULL, 0,
cdb->max_completion_time,
cdb->read_write_len_ext, 1000, 0,
CDB_F_MODULE_STATE_VALID);
@@ -326,7 +401,8 @@ cmis_fw_update_commit_image(struct ethtool_cmis_cdb *cdb,
ethtool_cmis_cdb_compose_args(&args,
ETHTOOL_CMIS_CDB_CMD_COMMIT_FW_IMAGE,
- NULL, 0, cdb->max_completion_time,
+ NULL, 0, NULL, 0,
+ cdb->max_completion_time,
cdb->read_write_len_ext, 1000, 0,
CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID);
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index dd345efa114b..05ce4f8080b3 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -538,6 +538,20 @@ static int ethtool_get_rxnfc_rule_count(struct net_device *dev)
return info.rule_cnt;
}
+/* Max offset for one RSS context */
+static u32 ethtool_get_rss_ctx_max_channel(struct ethtool_rxfh_context *ctx)
+{
+ u32 max_ring = 0;
+ u32 i, *tbl;
+
+ if (WARN_ON_ONCE(!ctx))
+ return 0;
+ tbl = ethtool_rxfh_context_indir(ctx);
+ for (i = 0; i < ctx->indir_size; i++)
+ max_ring = max(max_ring, tbl[i]);
+ return max_ring;
+}
+
static int ethtool_get_max_rxnfc_channel(struct net_device *dev, u64 *max)
{
const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -574,10 +588,18 @@ static int ethtool_get_max_rxnfc_channel(struct net_device *dev, u64 *max)
if (rule_info.fs.ring_cookie != RX_CLS_FLOW_DISC &&
rule_info.fs.ring_cookie != RX_CLS_FLOW_WAKE &&
- !(rule_info.flow_type & FLOW_RSS) &&
- !ethtool_get_flow_spec_ring_vf(rule_info.fs.ring_cookie))
- max_ring =
- max_t(u64, max_ring, rule_info.fs.ring_cookie);
+ !ethtool_get_flow_spec_ring_vf(rule_info.fs.ring_cookie)) {
+ u64 ring = rule_info.fs.ring_cookie;
+
+ if (rule_info.flow_type & FLOW_RSS) {
+ struct ethtool_rxfh_context *ctx;
+
+ ctx = xa_load(&dev->ethtool->rss_ctx,
+ rule_info.rss_context);
+ ring += ethtool_get_rss_ctx_max_channel(ctx);
+ }
+ max_ring = max_t(u64, max_ring, ring);
+ }
}
kvfree(info);
@@ -589,6 +611,7 @@ err_free_info:
return err;
}
+/* Max offset across all of a device's RSS contexts */
static u32 ethtool_get_max_rss_ctx_channel(struct net_device *dev)
{
struct ethtool_rxfh_context *ctx;
@@ -596,13 +619,8 @@ static u32 ethtool_get_max_rss_ctx_channel(struct net_device *dev)
u32 max_ring = 0;
mutex_lock(&dev->ethtool->rss_lock);
- xa_for_each(&dev->ethtool->rss_ctx, context, ctx) {
- u32 i, *tbl;
-
- tbl = ethtool_rxfh_context_indir(ctx);
- for (i = 0; i < ctx->indir_size; i++)
- max_ring = max(max_ring, tbl[i]);
- }
+ xa_for_each(&dev->ethtool->rss_ctx, context, ctx)
+ max_ring = max(max_ring, ethtool_get_rss_ctx_max_channel(ctx));
mutex_unlock(&dev->ethtool->rss_lock);
return max_ring;
@@ -611,7 +629,7 @@ static u32 ethtool_get_max_rss_ctx_channel(struct net_device *dev)
static u32 ethtool_get_max_rxfh_channel(struct net_device *dev)
{
struct ethtool_rxfh_param rxfh = {};
- u32 dev_size, current_max;
+ u32 dev_size, current_max = 0;
int ret;
/* While we do track whether RSS context has an indirection
@@ -684,6 +702,54 @@ int ethtool_check_max_channel(struct net_device *dev,
return 0;
}
+int ethtool_check_rss_ctx_busy(struct net_device *dev, u32 rss_context)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_rxnfc *info;
+ int rc, i, rule_cnt;
+
+ if (!ops->get_rxnfc)
+ return 0;
+
+ rule_cnt = ethtool_get_rxnfc_rule_count(dev);
+ if (!rule_cnt)
+ return 0;
+
+ if (rule_cnt < 0)
+ return -EINVAL;
+
+ info = kvzalloc(struct_size(info, rule_locs, rule_cnt), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ info->cmd = ETHTOOL_GRXCLSRLALL;
+ info->rule_cnt = rule_cnt;
+ rc = ops->get_rxnfc(dev, info, info->rule_locs);
+ if (rc)
+ goto out_free;
+
+ for (i = 0; i < rule_cnt; i++) {
+ struct ethtool_rxnfc rule_info = {
+ .cmd = ETHTOOL_GRXCLSRULE,
+ .fs.location = info->rule_locs[i],
+ };
+
+ rc = ops->get_rxnfc(dev, &rule_info, NULL);
+ if (rc)
+ goto out_free;
+
+ if (rule_info.fs.flow_type & FLOW_RSS &&
+ rule_info.rss_context == rss_context) {
+ rc = -EBUSY;
+ goto out_free;
+ }
+ }
+
+out_free:
+ kvfree(info);
+ return rc;
+}
+
int ethtool_check_ops(const struct ethtool_ops *ops)
{
if (WARN_ON(ops->set_coalesce && !ops->supported_coalesce_params))
diff --git a/net/ethtool/common.h b/net/ethtool/common.h
index d55d5201b085..4a2de3ce7354 100644
--- a/net/ethtool/common.h
+++ b/net/ethtool/common.h
@@ -47,6 +47,7 @@ bool convert_legacy_settings_to_link_ksettings(
int ethtool_check_max_channel(struct net_device *dev,
struct ethtool_channels channels,
struct genl_info *info);
+int ethtool_check_rss_ctx_busy(struct net_device *dev, u32 rss_context);
int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info);
extern const struct ethtool_phy_ops *ethtool_phy_ops;
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 65cfe76dafbe..61df8ce44379 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -992,6 +992,11 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
if (rc)
return rc;
+ /* Nonzero ring with RSS only makes sense if NIC adds them together */
+ if (info.flow_type & FLOW_RSS && !ops->cap_rss_rxnfc_adds &&
+ ethtool_get_flow_spec_ring(info.fs.ring_cookie))
+ return -EINVAL;
+
if (ops->get_rxfh) {
struct ethtool_rxfh_param rxfh = {};
@@ -1462,6 +1467,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
mutex_lock(&dev->ethtool->rss_lock);
locked = true;
}
+
+ if (rxfh.rss_context && rxfh_dev.rss_delete) {
+ ret = ethtool_check_rss_ctx_busy(dev, rxfh.rss_context);
+ if (ret)
+ goto out;
+ }
+
if (create) {
if (rxfh_dev.rss_delete) {
ret = -EINVAL;
@@ -1505,6 +1517,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
extack);
/* Make sure driver populates defaults */
WARN_ON_ONCE(!ret && !rxfh_dev.key &&
+ ops->rxfh_per_ctx_key &&
!memchr_inv(ethtool_rxfh_context_key(ctx),
0, ctx->key_size));
} else if (rxfh_dev.rss_delete) {
diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c
index e07386275e14..7cb106b590ab 100644
--- a/net/ethtool/rss.c
+++ b/net/ethtool/rss.c
@@ -224,7 +224,7 @@ struct rss_nl_dump_ctx {
static struct rss_nl_dump_ctx *rss_dump_ctx(struct netlink_callback *cb)
{
- NL_ASSERT_DUMP_CTX_FITS(struct rss_nl_dump_ctx);
+ NL_ASSERT_CTX_FITS(struct rss_nl_dump_ctx);
return (struct rss_nl_dump_ctx *)cb->ctx;
}
diff --git a/net/handshake/request.c b/net/handshake/request.c
index 94d5cef3e048..274d2c89b6b2 100644
--- a/net/handshake/request.c
+++ b/net/handshake/request.c
@@ -13,7 +13,6 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/inet.h>
-#include <linux/fdtable.h>
#include <linux/rhashtable.h>
#include <net/sock.h>
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index ebdfd5b64e17..31a416ee21ad 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -268,6 +268,8 @@ static struct sk_buff *hsr_init_skb(struct hsr_port *master)
skb->dev = master->dev;
skb->priority = TC_PRIO_CONTROL;
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
if (dev_hard_header(skb, skb->dev, ETH_P_PRP,
hsr->sup_multicast_addr,
skb->dev->dev_addr, skb->len) <= 0)
@@ -275,8 +277,6 @@ static struct sk_buff *hsr_init_skb(struct hsr_port *master)
skb_reset_mac_header(skb);
skb_reset_mac_len(skb);
- skb_reset_network_header(skb);
- skb_reset_transport_header(skb);
return skb;
out:
@@ -515,6 +515,77 @@ static void hsr_change_rx_flags(struct net_device *dev, int change)
}
}
+static int hsr_ndo_vlan_rx_add_vid(struct net_device *dev,
+ __be16 proto, u16 vid)
+{
+ bool is_slave_a_added = false;
+ bool is_slave_b_added = false;
+ struct hsr_port *port;
+ struct hsr_priv *hsr;
+ int ret = 0;
+
+ hsr = netdev_priv(dev);
+
+ hsr_for_each_port(hsr, port) {
+ if (port->type == HSR_PT_MASTER ||
+ port->type == HSR_PT_INTERLINK)
+ continue;
+
+ ret = vlan_vid_add(port->dev, proto, vid);
+ switch (port->type) {
+ case HSR_PT_SLAVE_A:
+ if (ret) {
+ /* clean up Slave-B */
+ netdev_err(dev, "add vid failed for Slave-A\n");
+ if (is_slave_b_added)
+ vlan_vid_del(port->dev, proto, vid);
+ return ret;
+ }
+
+ is_slave_a_added = true;
+ break;
+
+ case HSR_PT_SLAVE_B:
+ if (ret) {
+ /* clean up Slave-A */
+ netdev_err(dev, "add vid failed for Slave-B\n");
+ if (is_slave_a_added)
+ vlan_vid_del(port->dev, proto, vid);
+ return ret;
+ }
+
+ is_slave_b_added = true;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int hsr_ndo_vlan_rx_kill_vid(struct net_device *dev,
+ __be16 proto, u16 vid)
+{
+ struct hsr_port *port;
+ struct hsr_priv *hsr;
+
+ hsr = netdev_priv(dev);
+
+ hsr_for_each_port(hsr, port) {
+ switch (port->type) {
+ case HSR_PT_SLAVE_A:
+ case HSR_PT_SLAVE_B:
+ vlan_vid_del(port->dev, proto, vid);
+ break;
+ default:
+ break;
+ }
+ }
+
+ return 0;
+}
+
static const struct net_device_ops hsr_device_ops = {
.ndo_change_mtu = hsr_dev_change_mtu,
.ndo_open = hsr_dev_open,
@@ -523,6 +594,8 @@ static const struct net_device_ops hsr_device_ops = {
.ndo_change_rx_flags = hsr_change_rx_flags,
.ndo_fix_features = hsr_fix_features,
.ndo_set_rx_mode = hsr_set_rx_mode,
+ .ndo_vlan_rx_add_vid = hsr_ndo_vlan_rx_add_vid,
+ .ndo_vlan_rx_kill_vid = hsr_ndo_vlan_rx_kill_vid,
};
static const struct device_type hsr_type = {
@@ -569,14 +642,10 @@ void hsr_dev_setup(struct net_device *dev)
dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
NETIF_F_GSO_MASK | NETIF_F_HW_CSUM |
- NETIF_F_HW_VLAN_CTAG_TX;
+ NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_CTAG_FILTER;
dev->features = dev->hw_features;
-
- /* VLAN on top of HSR needs testing and probably some work on
- * hsr_header_create() etc.
- */
- dev->features |= NETIF_F_VLAN_CHALLENGED;
}
/* Return true if dev is a HSR master; return false otherwise.
@@ -652,6 +721,10 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
(slave[1]->features & NETIF_F_HW_HSR_FWD))
hsr->fwd_offloaded = true;
+ if ((slave[0]->features & NETIF_F_HW_VLAN_CTAG_FILTER) &&
+ (slave[1]->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+ hsr_dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+
res = register_netdevice(hsr_dev);
if (res)
goto err_unregister;
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index b38060246e62..aa6acebc7c1e 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -280,6 +280,7 @@ static struct sk_buff *hsr_fill_tag(struct sk_buff *skb,
struct hsr_port *port, u8 proto_version)
{
struct hsr_ethhdr *hsr_ethhdr;
+ unsigned char *pc;
int lsdu_size;
/* pad to minimum packet size which is 60 + 6 (HSR tag) */
@@ -290,7 +291,18 @@ static struct sk_buff *hsr_fill_tag(struct sk_buff *skb,
if (frame->is_vlan)
lsdu_size -= 4;
- hsr_ethhdr = (struct hsr_ethhdr *)skb_mac_header(skb);
+ pc = skb_mac_header(skb);
+ if (frame->is_vlan)
+ /* This 4-byte shift (size of a vlan tag) does not
+ * mean that the ethhdr starts there. But rather it
+ * provides the proper environment for accessing
+ * the fields, such as hsr_tag etc., just like
+ * when the vlan tag is not there. This is because
+ * the hsr tag is after the vlan tag.
+ */
+ hsr_ethhdr = (struct hsr_ethhdr *)(pc + VLAN_HLEN);
+ else
+ hsr_ethhdr = (struct hsr_ethhdr *)pc;
hsr_set_path_id(hsr_ethhdr, port);
set_hsr_tag_LSDU_size(&hsr_ethhdr->hsr_tag, lsdu_size);
@@ -368,7 +380,7 @@ struct sk_buff *prp_create_tagged_frame(struct hsr_frame_info *frame,
return skb_clone(frame->skb_std, GFP_ATOMIC);
}
- skb = skb_copy_expand(frame->skb_std, 0,
+ skb = skb_copy_expand(frame->skb_std, skb_headroom(frame->skb_std),
skb_tailroom(frame->skb_std) + HSR_HLEN,
GFP_ATOMIC);
return prp_fill_rct(skb, frame, port);
@@ -690,9 +702,6 @@ static int fill_frame_info(struct hsr_frame_info *frame,
if (frame->is_vlan) {
vlan_hdr = (struct hsr_vlan_ethhdr *)ethhdr;
proto = vlan_hdr->vlanhdr.h_vlan_encapsulated_proto;
- /* FIXME: */
- netdev_warn_once(skb->dev, "VLAN not yet supported");
- return -EINVAL;
}
frame->is_from_san = false;
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index f6ff0b61e08a..b68f2f71d0e1 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -82,10 +82,7 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev,
return -EINVAL;
}
- if (!data[IFLA_HSR_MULTICAST_SPEC])
- multicast_spec = 0;
- else
- multicast_spec = nla_get_u8(data[IFLA_HSR_MULTICAST_SPEC]);
+ multicast_spec = nla_get_u8_default(data[IFLA_HSR_MULTICAST_SPEC], 0);
if (data[IFLA_HSR_PROTOCOL])
proto = nla_get_u8(data[IFLA_HSR_PROTOCOL]);
@@ -128,9 +125,9 @@ static void hsr_dellink(struct net_device *dev, struct list_head *head)
{
struct hsr_priv *hsr = netdev_priv(dev);
- del_timer_sync(&hsr->prune_timer);
- del_timer_sync(&hsr->prune_proxy_timer);
- del_timer_sync(&hsr->announce_timer);
+ timer_delete_sync(&hsr->prune_timer);
+ timer_delete_sync(&hsr->prune_proxy_timer);
+ timer_delete_sync(&hsr->announce_timer);
timer_delete_sync(&hsr->announce_proxy_timer);
hsr_debugfs_term(hsr);
diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index 29bf97640166..74ef0a310afb 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -202,10 +202,7 @@ int ieee802154_associate_req(struct sk_buff *skb, struct genl_info *info)
addr.pan_id = nla_get_shortaddr(
info->attrs[IEEE802154_ATTR_COORD_PAN_ID]);
- if (info->attrs[IEEE802154_ATTR_PAGE])
- page = nla_get_u8(info->attrs[IEEE802154_ATTR_PAGE]);
- else
- page = 0;
+ page = nla_get_u8_default(info->attrs[IEEE802154_ATTR_PAGE], 0);
ret = ieee802154_mlme_ops(dev)->assoc_req(dev, &addr,
nla_get_u8(info->attrs[IEEE802154_ATTR_CHANNEL]),
@@ -338,10 +335,7 @@ int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
blx = nla_get_u8(info->attrs[IEEE802154_ATTR_BAT_EXT]);
coord_realign = nla_get_u8(info->attrs[IEEE802154_ATTR_COORD_REALIGN]);
- if (info->attrs[IEEE802154_ATTR_PAGE])
- page = nla_get_u8(info->attrs[IEEE802154_ATTR_PAGE]);
- else
- page = 0;
+ page = nla_get_u8_default(info->attrs[IEEE802154_ATTR_PAGE], 0);
if (addr.short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST)) {
ieee802154_nl_start_confirm(dev, IEEE802154_NO_SHORT_ADDRESS);
@@ -388,10 +382,7 @@ int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info)
channels = nla_get_u32(info->attrs[IEEE802154_ATTR_CHANNELS]);
duration = nla_get_u8(info->attrs[IEEE802154_ATTR_DURATION]);
- if (info->attrs[IEEE802154_ATTR_PAGE])
- page = nla_get_u8(info->attrs[IEEE802154_ATTR_PAGE]);
- else
- page = 0;
+ page = nla_get_u8_default(info->attrs[IEEE802154_ATTR_PAGE], 0);
ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels,
page, duration);
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 7eb37de3add2..5a024ca60d35 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -1438,22 +1438,18 @@ static int nl802154_trigger_scan(struct sk_buff *skb, struct genl_info *info)
}
/* Use current page by default */
- if (info->attrs[NL802154_ATTR_PAGE])
- request->page = nla_get_u8(info->attrs[NL802154_ATTR_PAGE]);
- else
- request->page = wpan_phy->current_page;
+ request->page = nla_get_u8_default(info->attrs[NL802154_ATTR_PAGE],
+ wpan_phy->current_page);
/* Scan all supported channels by default */
- if (info->attrs[NL802154_ATTR_SCAN_CHANNELS])
- request->channels = nla_get_u32(info->attrs[NL802154_ATTR_SCAN_CHANNELS]);
- else
- request->channels = wpan_phy->supported.channels[request->page];
+ request->channels =
+ nla_get_u32_default(info->attrs[NL802154_ATTR_SCAN_CHANNELS],
+ wpan_phy->supported.channels[request->page]);
/* Use maximum duration order by default */
- if (info->attrs[NL802154_ATTR_SCAN_DURATION])
- request->duration = nla_get_u8(info->attrs[NL802154_ATTR_SCAN_DURATION]);
- else
- request->duration = IEEE802154_MAX_SCAN_DURATION;
+ request->duration =
+ nla_get_u8_default(info->attrs[NL802154_ATTR_SCAN_DURATION],
+ IEEE802154_MAX_SCAN_DURATION);
err = rdev_trigger_scan(rdev, request);
if (err) {
@@ -1598,10 +1594,8 @@ nl802154_send_beacons(struct sk_buff *skb, struct genl_info *info)
request->wpan_phy = wpan_phy;
/* Use maximum duration order by default */
- if (info->attrs[NL802154_ATTR_BEACON_INTERVAL])
- request->interval = nla_get_u8(info->attrs[NL802154_ATTR_BEACON_INTERVAL]);
- else
- request->interval = IEEE802154_MAX_SCAN_DURATION;
+ request->interval = nla_get_u8_default(info->attrs[NL802154_ATTR_BEACON_INTERVAL],
+ IEEE802154_MAX_SCAN_DURATION);
err = rdev_send_beacons(rdev, request);
if (err) {
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index 990a83455dcf..18d267921bb5 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -1043,19 +1043,21 @@ static int ieee802154_create(struct net *net, struct socket *sock,
if (sk->sk_prot->hash) {
rc = sk->sk_prot->hash(sk);
- if (rc) {
- sk_common_release(sk);
- goto out;
- }
+ if (rc)
+ goto out_sk_release;
}
if (sk->sk_prot->init) {
rc = sk->sk_prot->init(sk);
if (rc)
- sk_common_release(sk);
+ goto out_sk_release;
}
out:
return rc;
+out_sk_release:
+ sk_common_release(sk);
+ sock->sk = NULL;
+ goto out;
}
static const struct net_proto_family ieee802154_family_ops = {
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b24d74616637..8095e82de808 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -376,32 +376,30 @@ lookup_protocol:
inet->inet_sport = htons(inet->inet_num);
/* Add to protocol hash chains. */
err = sk->sk_prot->hash(sk);
- if (err) {
- sk_common_release(sk);
- goto out;
- }
+ if (err)
+ goto out_sk_release;
}
if (sk->sk_prot->init) {
err = sk->sk_prot->init(sk);
- if (err) {
- sk_common_release(sk);
- goto out;
- }
+ if (err)
+ goto out_sk_release;
}
if (!kern) {
err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
- if (err) {
- sk_common_release(sk);
- goto out;
- }
+ if (err)
+ goto out_sk_release;
}
out:
return err;
out_rcu_unlock:
rcu_read_unlock();
goto out;
+out_sk_release:
+ sk_common_release(sk);
+ sock->sk = NULL;
+ goto out;
}
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 11c1519b3699..cb9a7ed8abd3 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1215,7 +1215,7 @@ int arp_invalidate(struct net_device *dev, __be32 ip, bool force)
NEIGH_UPDATE_F_ADMIN, 0);
write_lock_bh(&tbl->lock);
neigh_release(neigh);
- neigh_remove_one(neigh, tbl);
+ neigh_remove_one(neigh);
write_unlock_bh(&tbl->lock);
}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 7cf5f7d0d0de..c8b3cf5fba4c 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -119,11 +119,9 @@ struct inet_fill_args {
#define IN4_ADDR_HSIZE_SHIFT 8
#define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
-static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
-
static u32 inet_addr_hash(const struct net *net, __be32 addr)
{
- u32 val = (__force u32) addr ^ net_hash_mix(net);
+ u32 val = __ipv4_addr_hash(addr, net_hash_mix(net));
return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
}
@@ -133,13 +131,13 @@ static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
u32 hash = inet_addr_hash(net, ifa->ifa_local);
ASSERT_RTNL();
- hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
+ hlist_add_head_rcu(&ifa->addr_lst, &net->ipv4.inet_addr_lst[hash]);
}
static void inet_hash_remove(struct in_ifaddr *ifa)
{
ASSERT_RTNL();
- hlist_del_init_rcu(&ifa->hash);
+ hlist_del_init_rcu(&ifa->addr_lst);
}
/**
@@ -186,9 +184,8 @@ struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
u32 hash = inet_addr_hash(net, addr);
struct in_ifaddr *ifa;
- hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
- if (ifa->ifa_local == addr &&
- net_eq(dev_net(ifa->ifa_dev->dev), net))
+ hlist_for_each_entry_rcu(ifa, &net->ipv4.inet_addr_lst[hash], addr_lst)
+ if (ifa->ifa_local == addr)
return ifa;
return NULL;
@@ -227,7 +224,7 @@ static struct in_ifaddr *inet_alloc_ifa(struct in_device *in_dev)
in_dev_hold(in_dev);
ifa->ifa_dev = in_dev;
- INIT_HLIST_NODE(&ifa->hash);
+ INIT_HLIST_NODE(&ifa->addr_lst);
return ifa;
}
@@ -499,26 +496,18 @@ static void inet_del_ifa(struct in_device *in_dev,
__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
}
-static void check_lifetime(struct work_struct *work);
-
-static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
-
static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
u32 portid, struct netlink_ext_ack *extack)
{
struct in_ifaddr __rcu **last_primary, **ifap;
struct in_device *in_dev = ifa->ifa_dev;
+ struct net *net = dev_net(in_dev->dev);
struct in_validator_info ivi;
struct in_ifaddr *ifa1;
int ret;
ASSERT_RTNL();
- if (!ifa->ifa_local) {
- inet_free_ifa(ifa);
- return 0;
- }
-
ifa->ifa_flags &= ~IFA_F_SECONDARY;
last_primary = &in_dev->ifa_list;
@@ -576,8 +565,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
inet_hash_insert(dev_net(in_dev->dev), ifa);
- cancel_delayed_work(&check_lifetime_work);
- queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
+ cancel_delayed_work(&net->ipv4.addr_chk_work);
+ queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work, 0);
/* Send message first, then call notifier.
Notifier will trigger FIB update, so that
@@ -590,14 +579,17 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
static int inet_insert_ifa(struct in_ifaddr *ifa)
{
+ if (!ifa->ifa_local) {
+ inet_free_ifa(ifa);
+ return 0;
+ }
+
return __inet_insert_ifa(ifa, NULL, 0, NULL);
}
static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
{
- struct in_device *in_dev = __in_dev_get_rtnl(dev);
-
- ASSERT_RTNL();
+ struct in_device *in_dev = __in_dev_get_rtnl_net(dev);
ipv4_devconf_setall(in_dev);
neigh_parms_data_state_setall(in_dev->arp_parms);
@@ -651,7 +643,7 @@ static int ip_mc_autojoin_config(struct net *net, bool join,
struct sock *sk = net->ipv4.mc_autojoin_sk;
int ret;
- ASSERT_RTNL();
+ ASSERT_RTNL_NET(net);
lock_sock(sk);
if (join)
@@ -677,22 +669,24 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
struct in_ifaddr *ifa;
int err;
- ASSERT_RTNL();
-
err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
ifa_ipv4_policy, extack);
if (err < 0)
- goto errout;
+ goto out;
ifm = nlmsg_data(nlh);
+
+ rtnl_net_lock(net);
+
in_dev = inetdev_by_index(net, ifm->ifa_index);
if (!in_dev) {
NL_SET_ERR_MSG(extack, "ipv4: Device not found");
err = -ENODEV;
- goto errout;
+ goto unlock;
}
- for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
+ for (ifap = &in_dev->ifa_list;
+ (ifa = rtnl_net_dereference(net, *ifap)) != NULL;
ifap = &ifa->ifa_next) {
if (tb[IFA_LOCAL] &&
ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
@@ -708,13 +702,16 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
if (ipv4_is_multicast(ifa->ifa_address))
ip_mc_autojoin_config(net, false, ifa);
+
__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
- return 0;
+ goto unlock;
}
NL_SET_ERR_MSG(extack, "ipv4: Address not found");
err = -EADDRNOTAVAIL;
-errout:
+unlock:
+ rtnl_net_unlock(net);
+out:
return err;
}
@@ -723,16 +720,19 @@ static void check_lifetime(struct work_struct *work)
unsigned long now, next, next_sec, next_sched;
struct in_ifaddr *ifa;
struct hlist_node *n;
+ struct net *net;
int i;
+ net = container_of(to_delayed_work(work), struct net, ipv4.addr_chk_work);
now = jiffies;
next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
for (i = 0; i < IN4_ADDR_HSIZE; i++) {
+ struct hlist_head *head = &net->ipv4.inet_addr_lst[i];
bool change_needed = false;
rcu_read_lock();
- hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
+ hlist_for_each_entry_rcu(ifa, head, addr_lst) {
unsigned long age, tstamp;
u32 preferred_lft;
u32 valid_lft;
@@ -769,8 +769,9 @@ static void check_lifetime(struct work_struct *work)
rcu_read_unlock();
if (!change_needed)
continue;
- rtnl_lock();
- hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
+
+ rtnl_net_lock(net);
+ hlist_for_each_entry_safe(ifa, n, head, addr_lst) {
unsigned long age;
if (ifa->ifa_flags & IFA_F_PERMANENT)
@@ -786,7 +787,7 @@ static void check_lifetime(struct work_struct *work)
struct in_ifaddr *tmp;
ifap = &ifa->ifa_dev->ifa_list;
- tmp = rtnl_dereference(*ifap);
+ tmp = rtnl_net_dereference(net, *ifap);
while (tmp) {
if (tmp == ifa) {
inet_del_ifa(ifa->ifa_dev,
@@ -794,7 +795,7 @@ static void check_lifetime(struct work_struct *work)
break;
}
ifap = &tmp->ifa_next;
- tmp = rtnl_dereference(*ifap);
+ tmp = rtnl_net_dereference(net, *ifap);
}
} else if (ifa->ifa_preferred_lft !=
INFINITY_LIFE_TIME &&
@@ -804,7 +805,7 @@ static void check_lifetime(struct work_struct *work)
rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
}
}
- rtnl_unlock();
+ rtnl_net_unlock(net);
}
next_sec = round_jiffies_up(next);
@@ -819,8 +820,8 @@ static void check_lifetime(struct work_struct *work)
if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
- queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
- next_sched - now);
+ queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work,
+ next_sched - now);
}
static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
@@ -849,35 +850,54 @@ static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp);
}
-static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
- __u32 *pvalid_lft, __u32 *pprefered_lft,
- struct netlink_ext_ack *extack)
+static int inet_validate_rtm(struct nlmsghdr *nlh, struct nlattr **tb,
+ struct netlink_ext_ack *extack,
+ __u32 *valid_lft, __u32 *prefered_lft)
{
- struct nlattr *tb[IFA_MAX+1];
- struct in_ifaddr *ifa;
- struct ifaddrmsg *ifm;
- struct net_device *dev;
- struct in_device *in_dev;
+ struct ifaddrmsg *ifm = nlmsg_data(nlh);
int err;
err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
ifa_ipv4_policy, extack);
if (err < 0)
- goto errout;
-
- ifm = nlmsg_data(nlh);
- err = -EINVAL;
+ return err;
if (ifm->ifa_prefixlen > 32) {
NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
- goto errout;
+ return -EINVAL;
}
if (!tb[IFA_LOCAL]) {
NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
- goto errout;
+ return -EINVAL;
+ }
+
+ if (tb[IFA_CACHEINFO]) {
+ struct ifa_cacheinfo *ci;
+
+ ci = nla_data(tb[IFA_CACHEINFO]);
+ if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
+ NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
+ return -EINVAL;
+ }
+
+ *valid_lft = ci->ifa_valid;
+ *prefered_lft = ci->ifa_prefered;
}
+ return 0;
+}
+
+static struct in_ifaddr *inet_rtm_to_ifa(struct net *net, struct nlmsghdr *nlh,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
+{
+ struct ifaddrmsg *ifm = nlmsg_data(nlh);
+ struct in_device *in_dev;
+ struct net_device *dev;
+ struct in_ifaddr *ifa;
+ int err;
+
dev = __dev_get_by_index(net, ifm->ifa_index);
err = -ENODEV;
if (!dev) {
@@ -885,7 +905,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
goto errout;
}
- in_dev = __in_dev_get_rtnl(dev);
+ in_dev = __in_dev_get_rtnl_net(dev);
err = -ENOBUFS;
if (!in_dev)
goto errout;
@@ -906,8 +926,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
ifa->ifa_prefixlen = ifm->ifa_prefixlen;
ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
- ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
- ifm->ifa_flags;
+ ifa->ifa_flags = nla_get_u32_default(tb[IFA_FLAGS], ifm->ifa_flags);
ifa->ifa_scope = ifm->ifa_scope;
ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
@@ -926,76 +945,69 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
if (tb[IFA_PROTO])
ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
- if (tb[IFA_CACHEINFO]) {
- struct ifa_cacheinfo *ci;
-
- ci = nla_data(tb[IFA_CACHEINFO]);
- if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
- NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
- err = -EINVAL;
- goto errout_free;
- }
- *pvalid_lft = ci->ifa_valid;
- *pprefered_lft = ci->ifa_prefered;
- }
-
return ifa;
-errout_free:
- inet_free_ifa(ifa);
errout:
return ERR_PTR(err);
}
-static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
+static struct in_ifaddr *find_matching_ifa(struct net *net, struct in_ifaddr *ifa)
{
struct in_device *in_dev = ifa->ifa_dev;
struct in_ifaddr *ifa1;
- if (!ifa->ifa_local)
- return NULL;
-
- in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
+ in_dev_for_each_ifa_rtnl_net(net, ifa1, in_dev) {
if (ifa1->ifa_mask == ifa->ifa_mask &&
inet_ifa_match(ifa1->ifa_address, ifa) &&
ifa1->ifa_local == ifa->ifa_local)
return ifa1;
}
+
return NULL;
}
static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ __u32 prefered_lft = INFINITY_LIFE_TIME;
+ __u32 valid_lft = INFINITY_LIFE_TIME;
struct net *net = sock_net(skb->sk);
- struct in_ifaddr *ifa;
struct in_ifaddr *ifa_existing;
- __u32 valid_lft = INFINITY_LIFE_TIME;
- __u32 prefered_lft = INFINITY_LIFE_TIME;
+ struct nlattr *tb[IFA_MAX + 1];
+ struct in_ifaddr *ifa;
+ int ret;
- ASSERT_RTNL();
+ ret = inet_validate_rtm(nlh, tb, extack, &valid_lft, &prefered_lft);
+ if (ret < 0)
+ return ret;
+
+ if (!nla_get_in_addr(tb[IFA_LOCAL]))
+ return 0;
- ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
- if (IS_ERR(ifa))
- return PTR_ERR(ifa);
+ rtnl_net_lock(net);
- ifa_existing = find_matching_ifa(ifa);
+ ifa = inet_rtm_to_ifa(net, nlh, tb, extack);
+ if (IS_ERR(ifa)) {
+ ret = PTR_ERR(ifa);
+ goto unlock;
+ }
+
+ ifa_existing = find_matching_ifa(net, ifa);
if (!ifa_existing) {
/* It would be best to check for !NLM_F_CREATE here but
* userspace already relies on not having to provide this.
*/
set_ifa_lifetime(ifa, valid_lft, prefered_lft);
if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
- int ret = ip_mc_autojoin_config(net, true, ifa);
-
+ ret = ip_mc_autojoin_config(net, true, ifa);
if (ret < 0) {
NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
inet_free_ifa(ifa);
- return ret;
+ goto unlock;
}
}
- return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
- extack);
+
+ ret = __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid, extack);
} else {
u32 new_metric = ifa->ifa_rt_priority;
u8 new_proto = ifa->ifa_proto;
@@ -1005,7 +1017,8 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
if (nlh->nlmsg_flags & NLM_F_EXCL ||
!(nlh->nlmsg_flags & NLM_F_REPLACE)) {
NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
- return -EEXIST;
+ ret = -EEXIST;
+ goto unlock;
}
ifa = ifa_existing;
@@ -1017,12 +1030,16 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
ifa->ifa_proto = new_proto;
set_ifa_lifetime(ifa, valid_lft, prefered_lft);
- cancel_delayed_work(&check_lifetime_work);
+ cancel_delayed_work(&net->ipv4.addr_chk_work);
queue_delayed_work(system_power_efficient_wq,
- &check_lifetime_work, 0);
+ &net->ipv4.addr_chk_work, 0);
rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
}
- return 0;
+
+unlock:
+ rtnl_net_unlock(net);
+
+ return ret;
}
/*
@@ -1109,7 +1126,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
goto out;
}
- rtnl_lock();
+ rtnl_net_lock(net);
ret = -ENODEV;
dev = __dev_get_by_name(net, ifr->ifr_name);
@@ -1119,7 +1136,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
if (colon)
*colon = ':';
- in_dev = __in_dev_get_rtnl(dev);
+ in_dev = __in_dev_get_rtnl_net(dev);
if (in_dev) {
if (tryaddrmatch) {
/* Matthias Andree */
@@ -1129,7 +1146,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
This is checked above. */
for (ifap = &in_dev->ifa_list;
- (ifa = rtnl_dereference(*ifap)) != NULL;
+ (ifa = rtnl_net_dereference(net, *ifap)) != NULL;
ifap = &ifa->ifa_next) {
if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
sin_orig.sin_addr.s_addr ==
@@ -1143,7 +1160,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
comparing just the label */
if (!ifa) {
for (ifap = &in_dev->ifa_list;
- (ifa = rtnl_dereference(*ifap)) != NULL;
+ (ifa = rtnl_net_dereference(net, *ifap)) != NULL;
ifap = &ifa->ifa_next)
if (!strcmp(ifr->ifr_name, ifa->ifa_label))
break;
@@ -1185,6 +1202,9 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
inet_del_ifa(in_dev, ifap, 1);
break;
}
+
+ /* NETDEV_UP/DOWN/CHANGE could touch a peer dev */
+ ASSERT_RTNL();
ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
break;
@@ -1286,14 +1306,14 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
break;
}
done:
- rtnl_unlock();
+ rtnl_net_unlock(net);
out:
return ret;
}
int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
{
- struct in_device *in_dev = __in_dev_get_rtnl(dev);
+ struct in_device *in_dev = __in_dev_get_rtnl_net(dev);
const struct in_ifaddr *ifa;
struct ifreq ifr;
int done = 0;
@@ -1304,7 +1324,7 @@ int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
if (!in_dev)
goto out;
- in_dev_for_each_ifa_rtnl(ifa, in_dev) {
+ in_dev_for_each_ifa_rtnl_net(dev_net(dev), ifa, in_dev) {
if (!buf) {
done += size;
continue;
@@ -2375,7 +2395,7 @@ static void inet_forward_change(struct net *net)
if (on)
dev_disable_lro(dev);
- in_dev = __in_dev_get_rtnl(dev);
+ in_dev = __in_dev_get_rtnl_net(dev);
if (in_dev) {
IN_DEV_CONF_SET(in_dev, FORWARDING, on);
inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
@@ -2466,7 +2486,7 @@ static int devinet_sysctl_forward(const struct ctl_table *ctl, int write,
if (write && *valp != val) {
if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
- if (!rtnl_trylock()) {
+ if (!rtnl_net_trylock(net)) {
/* Restore the original values before restarting */
*valp = val;
*ppos = pos;
@@ -2485,7 +2505,7 @@ static int devinet_sysctl_forward(const struct ctl_table *ctl, int write,
idev->dev->ifindex,
cnf);
}
- rtnl_unlock();
+ rtnl_net_unlock(net);
rt_cache_flush(net);
} else
inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
@@ -2678,14 +2698,21 @@ static struct ctl_table ctl_forward_entry[] = {
static __net_init int devinet_init_net(struct net *net)
{
- int err;
- struct ipv4_devconf *all, *dflt;
#ifdef CONFIG_SYSCTL
- struct ctl_table *tbl;
struct ctl_table_header *forw_hdr;
+ struct ctl_table *tbl;
#endif
+ struct ipv4_devconf *all, *dflt;
+ int err;
+ int i;
err = -ENOMEM;
+ net->ipv4.inet_addr_lst = kmalloc_array(IN4_ADDR_HSIZE,
+ sizeof(struct hlist_head),
+ GFP_KERNEL);
+ if (!net->ipv4.inet_addr_lst)
+ goto err_alloc_hash;
+
all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
if (!all)
goto err_alloc_all;
@@ -2746,6 +2773,11 @@ static __net_init int devinet_init_net(struct net *net)
net->ipv4.forw_hdr = forw_hdr;
#endif
+ for (i = 0; i < IN4_ADDR_HSIZE; i++)
+ INIT_HLIST_HEAD(&net->ipv4.inet_addr_lst[i]);
+
+ INIT_DEFERRABLE_WORK(&net->ipv4.addr_chk_work, check_lifetime);
+
net->ipv4.devconf_all = all;
net->ipv4.devconf_dflt = dflt;
return 0;
@@ -2763,6 +2795,8 @@ err_alloc_ctl:
err_alloc_dflt:
kfree(all);
err_alloc_all:
+ kfree(net->ipv4.inet_addr_lst);
+err_alloc_hash:
return err;
}
@@ -2770,7 +2804,11 @@ static __net_exit void devinet_exit_net(struct net *net)
{
#ifdef CONFIG_SYSCTL
const struct ctl_table *tbl;
+#endif
+ cancel_delayed_work_sync(&net->ipv4.addr_chk_work);
+
+#ifdef CONFIG_SYSCTL
tbl = net->ipv4.forw_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->ipv4.forw_hdr);
__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
@@ -2781,6 +2819,7 @@ static __net_exit void devinet_exit_net(struct net *net)
#endif
kfree(net->ipv4.devconf_dflt);
kfree(net->ipv4.devconf_all);
+ kfree(net->ipv4.inet_addr_lst);
}
static __net_initdata struct pernet_operations devinet_ops = {
@@ -2796,25 +2835,25 @@ static struct rtnl_af_ops inet_af_ops __read_mostly = {
.set_link_af = inet_set_link_af,
};
+static const struct rtnl_msg_handler devinet_rtnl_msg_handlers[] __initconst = {
+ {.protocol = PF_INET, .msgtype = RTM_NEWADDR, .doit = inet_rtm_newaddr,
+ .flags = RTNL_FLAG_DOIT_PERNET},
+ {.protocol = PF_INET, .msgtype = RTM_DELADDR, .doit = inet_rtm_deladdr,
+ .flags = RTNL_FLAG_DOIT_PERNET},
+ {.protocol = PF_INET, .msgtype = RTM_GETADDR, .dumpit = inet_dump_ifaddr,
+ .flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE},
+ {.protocol = PF_INET, .msgtype = RTM_GETNETCONF,
+ .doit = inet_netconf_get_devconf, .dumpit = inet_netconf_dump_devconf,
+ .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
+};
+
void __init devinet_init(void)
{
- int i;
-
- for (i = 0; i < IN4_ADDR_HSIZE; i++)
- INIT_HLIST_HEAD(&inet_addr_lst[i]);
-
register_pernet_subsys(&devinet_ops);
register_netdevice_notifier(&ip_netdev_notifier);
- queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
-
- rtnl_af_register(&inet_af_ops);
+ if (rtnl_af_register(&inet_af_ops))
+ panic("Unable to register inet_af_ops\n");
- rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
- rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
- rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr,
- RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
- rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
- inet_netconf_dump_devconf,
- RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED);
+ rtnl_register_many(devinet_rtnl_msg_handlers);
}
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 80c4ea0e12f4..e0d94270da28 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -53,9 +53,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
if (sp->len == XFRM_MAX_DEPTH)
goto out_reset;
- x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
- (xfrm_address_t *)&ip_hdr(skb)->daddr,
- spi, IPPROTO_ESP, AF_INET);
+ x = xfrm_input_state_lookup(dev_net(skb->dev), skb->mark,
+ (xfrm_address_t *)&ip_hdr(skb)->daddr,
+ spi, IPPROTO_ESP, AF_INET);
if (unlikely(x && x->dir && x->dir != XFRM_SA_DIR_IN)) {
/* non-offload path will record the error and audit log */
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 793e6781399a..272e42d81323 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -293,7 +293,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
.flowi4_iif = LOOPBACK_IFINDEX,
.flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev),
.daddr = ip_hdr(skb)->saddr,
- .flowi4_tos = ip_hdr(skb)->tos & INET_DSCP_MASK,
+ .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip_hdr(skb))),
.flowi4_scope = scope,
.flowi4_mark = vmark ? skb->mark : 0,
};
@@ -342,10 +342,11 @@ EXPORT_SYMBOL_GPL(fib_info_nh_uses_dev);
* called with rcu_read_lock()
*/
static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
- u8 tos, int oif, struct net_device *dev,
+ dscp_t dscp, int oif, struct net_device *dev,
int rpf, struct in_device *idev, u32 *itag)
{
struct net *net = dev_net(dev);
+ enum skb_drop_reason reason;
struct flow_keys flkeys;
int ret, no_addr;
struct fib_result res;
@@ -357,7 +358,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
fl4.daddr = src;
fl4.saddr = dst;
- fl4.flowi4_tos = tos;
+ fl4.flowi4_tos = inet_dscp_to_dsfield(dscp);
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
fl4.flowi4_tun_key.tun_id = 0;
fl4.flowi4_flags = 0;
@@ -377,9 +378,15 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
if (fib_lookup(net, &fl4, &res, 0))
goto last_resort;
- if (res.type != RTN_UNICAST &&
- (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
- goto e_inval;
+ if (res.type != RTN_UNICAST) {
+ if (res.type != RTN_LOCAL) {
+ reason = SKB_DROP_REASON_IP_INVALID_SOURCE;
+ goto e_inval;
+ } else if (!IN_DEV_ACCEPT_LOCAL(idev)) {
+ reason = SKB_DROP_REASON_IP_LOCAL_SOURCE;
+ goto e_inval;
+ }
+ }
fib_combine_itag(itag, &res);
dev_match = fib_info_nh_uses_dev(res.fi, dev);
@@ -412,14 +419,14 @@ last_resort:
return 0;
e_inval:
- return -EINVAL;
+ return -reason;
e_rpf:
- return -EXDEV;
+ return -SKB_DROP_REASON_IP_RPFILTER;
}
/* Ignore rp_filter for packets protected by IPsec. */
int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
- u8 tos, int oif, struct net_device *dev,
+ dscp_t dscp, int oif, struct net_device *dev,
struct in_device *idev, u32 *itag)
{
int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev);
@@ -440,7 +447,7 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
* and the same host but different containers are not.
*/
if (inet_lookup_ifaddr_rcu(net, src))
- return -EINVAL;
+ return -SKB_DROP_REASON_IP_LOCAL_SOURCE;
ok:
*itag = 0;
@@ -448,7 +455,8 @@ ok:
}
full_check:
- return __fib_validate_source(skb, src, dst, tos, oif, dev, r, idev, itag);
+ return __fib_validate_source(skb, src, dst, dscp, oif, dev, r, idev,
+ itag);
}
static inline __be32 sk_extract_addr(struct sockaddr *addr)
@@ -1648,6 +1656,15 @@ static struct pernet_operations fib_net_ops = {
.exit_batch = fib_net_exit_batch,
};
+static const struct rtnl_msg_handler fib_rtnl_msg_handlers[] __initconst = {
+ {.protocol = PF_INET, .msgtype = RTM_NEWROUTE,
+ .doit = inet_rtm_newroute},
+ {.protocol = PF_INET, .msgtype = RTM_DELROUTE,
+ .doit = inet_rtm_delroute},
+ {.protocol = PF_INET, .msgtype = RTM_GETROUTE, .dumpit = inet_dump_fib,
+ .flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE},
+};
+
void __init ip_fib_init(void)
{
fib_trie_init();
@@ -1657,8 +1674,5 @@ void __init ip_fib_init(void)
register_netdevice_notifier(&fib_netdev_notifier);
register_inetaddr_notifier(&fib_inetaddr_notifier);
- rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, 0);
- rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, 0);
- rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib,
- RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
+ rtnl_register_many(fib_rtnl_msg_handlers);
}
diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c
index 0e23ade74493..b1551c26554b 100644
--- a/net/ipv4/fib_notifier.c
+++ b/net/ipv4/fib_notifier.c
@@ -22,15 +22,15 @@ int call_fib4_notifiers(struct net *net, enum fib_event_type event_type,
ASSERT_RTNL();
info->family = AF_INET;
- net->ipv4.fib_seq++;
+ /* Paired with READ_ONCE() in fib4_seq_read() */
+ WRITE_ONCE(net->ipv4.fib_seq, net->ipv4.fib_seq + 1);
return call_fib_notifiers(net, event_type, info);
}
-static unsigned int fib4_seq_read(struct net *net)
+static unsigned int fib4_seq_read(const struct net *net)
{
- ASSERT_RTNL();
-
- return net->ipv4.fib_seq + fib4_rules_seq_read(net);
+ /* Paired with WRITE_ONCE() in call_fib4_notifiers() */
+ return READ_ONCE(net->ipv4.fib_seq) + fib4_rules_seq_read(net);
}
static int fib4_dump(struct net *net, struct notifier_block *nb,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index b07292d50ee7..8325224ef072 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -74,7 +74,7 @@ int fib4_rules_dump(struct net *net, struct notifier_block *nb,
return fib_rules_dump(net, nb, AF_INET, extack);
}
-unsigned int fib4_rules_seq_read(struct net *net)
+unsigned int fib4_rules_seq_read(const struct net *net)
{
return fib_rules_seq_read(net, AF_INET);
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index ba2df3d2ac15..d2cee5c314f5 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -50,17 +50,12 @@
#include "fib_lookup.h"
-static DEFINE_SPINLOCK(fib_info_lock);
static struct hlist_head *fib_info_hash;
static struct hlist_head *fib_info_laddrhash;
static unsigned int fib_info_hash_size;
static unsigned int fib_info_hash_bits;
static unsigned int fib_info_cnt;
-#define DEVINDEX_HASHBITS 8
-#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
-static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
-
/* for_nexthops and change_nexthops only used when nexthop object
* is not set in a fib_info. The logic within can reference fib_nh.
*/
@@ -260,12 +255,11 @@ EXPORT_SYMBOL_GPL(free_fib_info);
void fib_release_info(struct fib_info *fi)
{
- spin_lock_bh(&fib_info_lock);
+ ASSERT_RTNL();
if (fi && refcount_dec_and_test(&fi->fib_treeref)) {
hlist_del(&fi->fib_hash);
- /* Paired with READ_ONCE() in fib_create_info(). */
- WRITE_ONCE(fib_info_cnt, fib_info_cnt - 1);
+ fib_info_cnt--;
if (fi->fib_prefsrc)
hlist_del(&fi->fib_lhash);
@@ -275,14 +269,13 @@ void fib_release_info(struct fib_info *fi)
change_nexthops(fi) {
if (!nexthop_nh->fib_nh_dev)
continue;
- hlist_del(&nexthop_nh->nh_hash);
+ hlist_del_rcu(&nexthop_nh->nh_hash);
} endfor_nexthops(fi)
}
/* Paired with READ_ONCE() from fib_table_lookup() */
WRITE_ONCE(fi->fib_dead, 1);
fib_info_put(fi);
}
- spin_unlock_bh(&fib_info_lock);
}
static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
@@ -322,17 +315,9 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
return 0;
}
-static inline unsigned int fib_devindex_hashfn(unsigned int val)
-{
- return hash_32(val, DEVINDEX_HASHBITS);
-}
-
-static struct hlist_head *
-fib_info_devhash_bucket(const struct net_device *dev)
+static struct hlist_head *fib_nh_head(struct net_device *dev)
{
- u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex;
-
- return &fib_info_devhash[fib_devindex_hashfn(val)];
+ return &dev->fib_nh_head;
}
static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
@@ -347,11 +332,10 @@ static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
return val;
}
-static unsigned int fib_info_hashfn_result(unsigned int val)
+static unsigned int fib_info_hashfn_result(const struct net *net,
+ unsigned int val)
{
- unsigned int mask = (fib_info_hash_size - 1);
-
- return (val ^ (val >> 7) ^ (val >> 12)) & mask;
+ return hash_32(val ^ net_hash_mix(net), fib_info_hash_bits);
}
static inline unsigned int fib_info_hashfn(struct fib_info *fi)
@@ -363,14 +347,14 @@ static inline unsigned int fib_info_hashfn(struct fib_info *fi)
fi->fib_priority);
if (fi->nh) {
- val ^= fib_devindex_hashfn(fi->nh->id);
+ val ^= fi->nh->id;
} else {
for_nexthops(fi) {
- val ^= fib_devindex_hashfn(nh->fib_nh_oif);
+ val ^= nh->fib_nh_oif;
} endfor_nexthops(fi)
}
- return fib_info_hashfn_result(val);
+ return fib_info_hashfn_result(fi->fib_net, val);
}
/* no metrics, only nexthop id */
@@ -381,11 +365,11 @@ static struct fib_info *fib_find_info_nh(struct net *net,
struct fib_info *fi;
unsigned int hash;
- hash = fib_info_hashfn_1(fib_devindex_hashfn(cfg->fc_nh_id),
+ hash = fib_info_hashfn_1(cfg->fc_nh_id,
cfg->fc_protocol, cfg->fc_scope,
(__force u32)cfg->fc_prefsrc,
cfg->fc_priority);
- hash = fib_info_hashfn_result(hash);
+ hash = fib_info_hashfn_result(net, hash);
head = &fib_info_hash[hash];
hlist_for_each_entry(fi, head, fib_hash) {
@@ -437,28 +421,23 @@ static struct fib_info *fib_find_info(struct fib_info *nfi)
}
/* Check, that the gateway is already configured.
- * Used only by redirect accept routine.
+ * Used only by redirect accept routine, under rcu_read_lock();
*/
int ip_fib_check_default(__be32 gw, struct net_device *dev)
{
struct hlist_head *head;
struct fib_nh *nh;
- spin_lock(&fib_info_lock);
-
- head = fib_info_devhash_bucket(dev);
+ head = fib_nh_head(dev);
- hlist_for_each_entry(nh, head, nh_hash) {
- if (nh->fib_nh_dev == dev &&
- nh->fib_nh_gw4 == gw &&
+ hlist_for_each_entry_rcu(nh, head, nh_hash) {
+ DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev);
+ if (nh->fib_nh_gw4 == gw &&
!(nh->fib_nh_flags & RTNH_F_DEAD)) {
- spin_unlock(&fib_info_lock);
return 0;
}
}
- spin_unlock(&fib_info_lock);
-
return -1;
}
@@ -1277,7 +1256,7 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
unsigned int old_size = fib_info_hash_size;
unsigned int i;
- spin_lock_bh(&fib_info_lock);
+ ASSERT_RTNL();
old_info_hash = fib_info_hash;
old_laddrhash = fib_info_laddrhash;
fib_info_hash_size = new_size;
@@ -1314,8 +1293,6 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
}
}
- spin_unlock_bh(&fib_info_lock);
-
kvfree(old_info_hash);
kvfree(old_laddrhash);
}
@@ -1391,6 +1368,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
int nhs = 1;
struct net *net = cfg->fc_nlinfo.nl_net;
+ ASSERT_RTNL();
if (cfg->fc_type > RTN_MAX)
goto err_inval;
@@ -1433,8 +1411,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
err = -ENOBUFS;
- /* Paired with WRITE_ONCE() in fib_release_info() */
- if (READ_ONCE(fib_info_cnt) >= fib_info_hash_size) {
+ if (fib_info_cnt >= fib_info_hash_size) {
unsigned int new_size = fib_info_hash_size << 1;
struct hlist_head *new_info_hash;
struct hlist_head *new_laddrhash;
@@ -1593,7 +1570,7 @@ link_it:
refcount_set(&fi->fib_treeref, 1);
refcount_set(&fi->fib_clntref, 1);
- spin_lock_bh(&fib_info_lock);
+
fib_info_cnt++;
hlist_add_head(&fi->fib_hash,
&fib_info_hash[fib_info_hashfn(fi)]);
@@ -1611,11 +1588,10 @@ link_it:
if (!nexthop_nh->fib_nh_dev)
continue;
- head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev);
- hlist_add_head(&nexthop_nh->nh_hash, head);
+ head = fib_nh_head(nexthop_nh->fib_nh_dev);
+ hlist_add_head_rcu(&nexthop_nh->nh_hash, head);
} endfor_nexthops(fi)
}
- spin_unlock_bh(&fib_info_lock);
return fi;
err_inval:
@@ -1965,12 +1941,12 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
{
- struct hlist_head *head = fib_info_devhash_bucket(dev);
+ struct hlist_head *head = fib_nh_head(dev);
struct fib_nh *nh;
hlist_for_each_entry(nh, head, nh_hash) {
- if (nh->fib_nh_dev == dev)
- fib_nhc_update_mtu(&nh->nh_common, dev->mtu, orig_mtu);
+ DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev);
+ fib_nhc_update_mtu(&nh->nh_common, dev->mtu, orig_mtu);
}
}
@@ -1984,7 +1960,7 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
*/
int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
{
- struct hlist_head *head = fib_info_devhash_bucket(dev);
+ struct hlist_head *head = fib_nh_head(dev);
struct fib_info *prev_fi = NULL;
int scope = RT_SCOPE_NOWHERE;
struct fib_nh *nh;
@@ -1998,7 +1974,8 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
int dead;
BUG_ON(!fi->fib_nhs);
- if (nh->fib_nh_dev != dev || fi == prev_fi)
+ DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev);
+ if (fi == prev_fi)
continue;
prev_fi = fi;
dead = 0;
@@ -2148,7 +2125,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
}
prev_fi = NULL;
- head = fib_info_devhash_bucket(dev);
+ head = fib_nh_head(dev);
ret = 0;
hlist_for_each_entry(nh, head, nh_hash) {
@@ -2156,7 +2133,8 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
int alive;
BUG_ON(!fi->fib_nhs);
- if (nh->fib_nh_dev != dev || fi == prev_fi)
+ DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev);
+ if (fi == prev_fi)
continue;
prev_fi = fi;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 09e31757e96c..161f5526b86c 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -292,15 +292,9 @@ static const int inflate_threshold = 50;
static const int halve_threshold_root = 15;
static const int inflate_threshold_root = 30;
-static void __alias_free_mem(struct rcu_head *head)
-{
- struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
- kmem_cache_free(fn_alias_kmem, fa);
-}
-
static inline void alias_free_mem_rcu(struct fib_alias *fa)
{
- call_rcu(&fa->rcu, __alias_free_mem);
+ kfree_rcu(fa, rcu);
}
#define TNODE_VMALLOC_MAX \
diff --git a/net/ipv4/fou_nl.c b/net/ipv4/fou_nl.c
index 98b90107b5ab..3d9614609b2d 100644
--- a/net/ipv4/fou_nl.c
+++ b/net/ipv4/fou_nl.c
@@ -12,7 +12,7 @@
/* Global operation policy for fou */
const struct nla_policy fou_nl_policy[FOU_ATTR_IFINDEX + 1] = {
- [FOU_ATTR_PORT] = { .type = NLA_U16, },
+ [FOU_ATTR_PORT] = { .type = NLA_BE16, },
[FOU_ATTR_AF] = { .type = NLA_U8, },
[FOU_ATTR_IPPROTO] = { .type = NLA_U8, },
[FOU_ATTR_TYPE] = { .type = NLA_U8, },
@@ -21,7 +21,7 @@ const struct nla_policy fou_nl_policy[FOU_ATTR_IFINDEX + 1] = {
[FOU_ATTR_LOCAL_V6] = { .len = 16, },
[FOU_ATTR_PEER_V4] = { .type = NLA_U32, },
[FOU_ATTR_PEER_V6] = { .len = 16, },
- [FOU_ATTR_PEER_PORT] = { .type = NLA_U16, },
+ [FOU_ATTR_PEER_PORT] = { .type = NLA_BE16, },
[FOU_ATTR_IFINDEX] = { .type = NLA_S32, },
};
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index e1384e7331d8..4f088fa1c2f2 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -445,7 +445,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
fl4.saddr = saddr;
fl4.flowi4_mark = mark;
fl4.flowi4_uid = sock_net_uid(net, NULL);
- fl4.flowi4_tos = ip_hdr(skb)->tos & INET_DSCP_MASK;
+ fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip_hdr(skb)));
fl4.flowi4_proto = IPPROTO_ICMP;
fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev);
security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4));
@@ -478,13 +478,11 @@ static struct net_device *icmp_get_route_lookup_dev(struct sk_buff *skb)
return route_lookup_dev;
}
-static struct rtable *icmp_route_lookup(struct net *net,
- struct flowi4 *fl4,
+static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4,
struct sk_buff *skb_in,
- const struct iphdr *iph,
- __be32 saddr, u8 tos, u32 mark,
- int type, int code,
- struct icmp_bxm *param)
+ const struct iphdr *iph, __be32 saddr,
+ dscp_t dscp, u32 mark, int type,
+ int code, struct icmp_bxm *param)
{
struct net_device *route_lookup_dev;
struct dst_entry *dst, *dst2;
@@ -498,7 +496,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->saddr = saddr;
fl4->flowi4_mark = mark;
fl4->flowi4_uid = sock_net_uid(net, NULL);
- fl4->flowi4_tos = tos & INET_DSCP_MASK;
+ fl4->flowi4_tos = inet_dscp_to_dsfield(dscp);
fl4->flowi4_proto = IPPROTO_ICMP;
fl4->fl4_icmp_type = type;
fl4->fl4_icmp_code = code;
@@ -547,7 +545,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
orefdst = skb_in->_skb_refdst; /* save old refdst */
skb_dst_set(skb_in, NULL);
err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr,
- tos, rt2->dst.dev);
+ dscp, rt2->dst.dev) ? -EINVAL : 0;
dst_release(&rt2->dst);
rt2 = skb_rtable(skb_in);
@@ -741,8 +739,9 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
ipc.opt = &icmp_param.replyopts.opt;
ipc.sockc.mark = mark;
- rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark,
- type, code, &icmp_param);
+ rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr,
+ inet_dsfield_to_dscp(tos), mark, type, code,
+ &icmp_param);
if (IS_ERR(rt))
goto out_unlock;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 9bf09de6a2e7..6a238398acc9 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1437,16 +1437,32 @@ static void ip_mc_hash_remove(struct in_device *in_dev,
static void ____ip_mc_inc_group(struct in_device *in_dev, __be32 addr,
unsigned int mode, gfp_t gfp)
{
+ struct ip_mc_list __rcu **mc_hash;
struct ip_mc_list *im;
ASSERT_RTNL();
- for_each_pmc_rtnl(in_dev, im) {
- if (im->multiaddr == addr) {
- im->users++;
- ip_mc_add_src(in_dev, &addr, mode, 0, NULL, 0);
- goto out;
+ mc_hash = rtnl_dereference(in_dev->mc_hash);
+ if (mc_hash) {
+ u32 hash = hash_32((__force u32)addr, MC_HASH_SZ_LOG);
+
+ for (im = rtnl_dereference(mc_hash[hash]);
+ im;
+ im = rtnl_dereference(im->next_hash)) {
+ if (im->multiaddr == addr)
+ break;
}
+ } else {
+ for_each_pmc_rtnl(in_dev, im) {
+ if (im->multiaddr == addr)
+ break;
+ }
+ }
+
+ if (im) {
+ im->users++;
+ ip_mc_add_src(in_dev, &addr, mode, 0, NULL, 0);
+ goto out;
}
im = kzalloc(sizeof(*im), gfp);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 2b698f8419fe..6872b5aff73e 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -775,7 +775,8 @@ void inet_csk_clear_xmit_timers(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- icsk->icsk_pending = icsk->icsk_ack.pending = 0;
+ smp_store_release(&icsk->icsk_pending, 0);
+ smp_store_release(&icsk->icsk_ack.pending, 0);
sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
sk_stop_timer(sk, &icsk->icsk_delack_timer);
@@ -790,7 +791,8 @@ void inet_csk_clear_xmit_timers_sync(struct sock *sk)
/* ongoing timer handlers need to acquire socket lock. */
sock_not_owned_by_me(sk);
- icsk->icsk_pending = icsk->icsk_ack.pending = 0;
+ smp_store_release(&icsk->icsk_pending, 0);
+ smp_store_release(&icsk->icsk_ack.pending, 0);
sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer);
sk_stop_timer_sync(sk, &icsk->icsk_delack_timer);
@@ -1189,7 +1191,7 @@ no_ownership:
drop:
__inet_csk_reqsk_queue_drop(sk_listener, oreq, true);
- reqsk_put(req);
+ reqsk_put(oreq);
}
static bool reqsk_queue_hash_req(struct request_sock *req,
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 67639309163d..321acc8abf17 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -247,6 +247,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
struct nlmsghdr *nlh;
struct nlattr *attr;
void *info = NULL;
+ u8 icsk_pending;
int protocol;
cb_data = cb->data;
@@ -307,14 +308,15 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
goto out;
}
- if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
- icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
- icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
+ icsk_pending = smp_load_acquire(&icsk->icsk_pending);
+ if (icsk_pending == ICSK_TIME_RETRANS ||
+ icsk_pending == ICSK_TIME_REO_TIMEOUT ||
+ icsk_pending == ICSK_TIME_LOSS_PROBE) {
r->idiag_timer = 1;
r->idiag_retrans = icsk->icsk_retransmits;
r->idiag_expires =
jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies);
- } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
+ } else if (icsk_pending == ICSK_TIME_PROBE0) {
r->idiag_timer = 4;
r->idiag_retrans = icsk->icsk_probes_out;
r->idiag_expires =
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 5bd759963451..5ab56f4cb529 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -128,11 +128,6 @@ static struct inet_peer *lookup(const struct inetpeer_addr *daddr,
return NULL;
}
-static void inetpeer_free_rcu(struct rcu_head *head)
-{
- kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu));
-}
-
/* perform garbage collect on all items stacked during a lookup */
static void inet_peer_gc(struct inet_peer_base *base,
struct inet_peer *gc_stack[],
@@ -168,7 +163,7 @@ static void inet_peer_gc(struct inet_peer_base *base,
if (p) {
rb_erase(&p->rb_node, &base->rb_root);
base->total--;
- call_rcu(&p->rcu, inetpeer_free_rcu);
+ kfree_rcu(p, rcu);
}
}
}
@@ -242,7 +237,7 @@ void inet_putpeer(struct inet_peer *p)
WRITE_ONCE(p->dtime, (__u32)jiffies);
if (refcount_dec_and_test(&p->refcnt))
- call_rcu(&p->rcu, inetpeer_free_rcu);
+ kfree_rcu(p, rcu);
}
EXPORT_SYMBOL_GPL(inet_putpeer);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index a92664a5ef2e..07036a2943c1 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -132,12 +132,12 @@ static bool frag_expire_skip_icmp(u32 user)
*/
static void ip_expire(struct timer_list *t)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_FRAG_REASM_TIMEOUT;
struct inet_frag_queue *frag = from_timer(frag, t, timer);
const struct iphdr *iph;
struct sk_buff *head = NULL;
struct net *net;
struct ipq *qp;
- int err;
qp = container_of(frag, struct ipq, q);
net = qp->q.fqdir->net;
@@ -175,14 +175,15 @@ static void ip_expire(struct timer_list *t)
/* skb has no dst, perform route lookup again */
iph = ip_hdr(head);
- err = ip_route_input_noref(head, iph->daddr, iph->saddr,
- iph->tos, head->dev);
- if (err)
+ reason = ip_route_input_noref(head, iph->daddr, iph->saddr,
+ ip4h_dscp(iph), head->dev);
+ if (reason)
goto out;
/* Only an end host needs to send an ICMP
* "Fragment Reassembly Timeout" message, per RFC792.
*/
+ reason = SKB_DROP_REASON_FRAG_REASM_TIMEOUT;
if (frag_expire_skip_icmp(qp->q.key.v4.user) &&
(skb_rtable(head)->rt_type != RTN_LOCAL))
goto out;
@@ -195,7 +196,7 @@ out:
spin_unlock(&qp->q.lock);
out_rcu_unlock:
rcu_read_unlock();
- kfree_skb_reason(head, SKB_DROP_REASON_FRAG_REASM_TIMEOUT);
+ kfree_skb_reason(head, reason);
ipq_put(qp);
}
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index b6e7d4921309..f0a4dda246ab 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -322,15 +322,14 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
int err, drop_reason;
struct rtable *rt;
- drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
-
if (ip_can_use_hint(skb, iph, hint)) {
- err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos,
- dev, hint);
- if (unlikely(err))
+ drop_reason = ip_route_use_hint(skb, iph->daddr, iph->saddr,
+ ip4h_dscp(iph), dev, hint);
+ if (unlikely(drop_reason))
goto drop_error;
}
+ drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
!skb_dst(skb) &&
!skb->sk &&
@@ -362,10 +361,11 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
* how the packet travels inside Linux networking.
*/
if (!skb_valid_dst(skb)) {
- err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
- iph->tos, dev);
- if (unlikely(err))
+ drop_reason = ip_route_input_noref(skb, iph->daddr, iph->saddr,
+ ip4h_dscp(iph), dev);
+ if (unlikely(drop_reason))
goto drop_error;
+ drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
} else {
struct in_device *in_dev = __in_dev_get_rcu(dev);
@@ -425,10 +425,8 @@ drop:
return NET_RX_DROP;
drop_error:
- if (err == -EXDEV) {
- drop_reason = SKB_DROP_REASON_IP_RPFILTER;
+ if (drop_reason == SKB_DROP_REASON_IP_RPFILTER)
__NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
- }
goto drop;
}
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 68aedb8877b9..e3321932bec0 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -617,7 +617,8 @@ int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev)
orefdst = skb->_skb_refdst;
skb_dst_set(skb, NULL);
- err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, dev);
+ err = ip_route_input(skb, nexthop, iph->saddr, ip4h_dscp(iph),
+ dev) ? -EINVAL : 0;
rt2 = skb_rtable(skb);
if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) {
skb_dst_drop(skb);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 49811c9281d4..0065b1996c94 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -973,7 +973,7 @@ static int __ip_append_data(struct sock *sk,
unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
int csummode = CHECKSUM_NONE;
struct rtable *rt = dst_rtable(cork->dst);
- bool paged, hold_tskey, extra_uref = false;
+ bool paged, hold_tskey = false, extra_uref = false;
unsigned int wmem_alloc_delta = 0;
u32 tskey = 0;
@@ -1049,10 +1049,15 @@ static int __ip_append_data(struct sock *sk,
cork->length += length;
- hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
- READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
- if (hold_tskey)
- tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+ if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
+ READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
+ if (cork->flags & IPCORK_TS_OPT_ID) {
+ tskey = cork->ts_opt_id;
+ } else {
+ tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+ hold_tskey = true;
+ }
+ }
/* So, what's going on in the loop below?
*
@@ -1326,7 +1331,11 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
cork->priority = ipc->priority;
cork->transmit_time = ipc->sockc.transmit_time;
cork->tx_flags = 0;
- sock_tx_timestamp(sk, ipc->sockc.tsflags, &cork->tx_flags);
+ sock_tx_timestamp(sk, &ipc->sockc, &cork->tx_flags);
+ if (ipc->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) {
+ cork->flags |= IPCORK_TS_OPT_ID;
+ cork->ts_opt_id = ipc->sockc.ts_opt_id;
+ }
return 0;
}
@@ -1587,7 +1596,8 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset,
* Generic function to send a packet as reply to another packet.
* Used to send some TCP resets/acks so far.
*/
-void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
+void ip_send_unicast_reply(struct sock *sk, const struct sock *orig_sk,
+ struct sk_buff *skb,
const struct ip_options *sopt,
__be32 daddr, __be32 saddr,
const struct ip_reply_arg *arg,
@@ -1653,6 +1663,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
arg->csumoffset) = csum_fold(csum_add(nskb->csum,
arg->csum));
nskb->ip_summed = CHECKSUM_NONE;
+ if (orig_sk)
+ skb_set_owner_edemux(nskb, (struct sock *)orig_sk);
if (transmit_time)
nskb->tstamp_type = SKB_CLOCK_MONOTONIC;
if (txhash)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 089864c6a35e..c5b8ec5c0a8c 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -120,6 +120,11 @@ static void ipmr_expire_process(struct timer_list *t);
lockdep_rtnl_is_held() || \
list_empty(&net->ipv4.mr_tables))
+static bool ipmr_can_free_table(struct net *net)
+{
+ return !check_net(net) || !net->ipv4.mr_rules_ops;
+}
+
static struct mr_table *ipmr_mr_table_iter(struct net *net,
struct mr_table *mrt)
{
@@ -137,7 +142,7 @@ static struct mr_table *ipmr_mr_table_iter(struct net *net,
return ret;
}
-static struct mr_table *ipmr_get_table(struct net *net, u32 id)
+static struct mr_table *__ipmr_get_table(struct net *net, u32 id)
{
struct mr_table *mrt;
@@ -148,6 +153,16 @@ static struct mr_table *ipmr_get_table(struct net *net, u32 id)
return NULL;
}
+static struct mr_table *ipmr_get_table(struct net *net, u32 id)
+{
+ struct mr_table *mrt;
+
+ rcu_read_lock();
+ mrt = __ipmr_get_table(net, id);
+ rcu_read_unlock();
+ return mrt;
+}
+
static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
struct mr_table **mrt)
{
@@ -189,7 +204,7 @@ static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
arg->table = fib_rule_get_table(rule, arg);
- mrt = ipmr_get_table(rule->fr_net, arg->table);
+ mrt = __ipmr_get_table(rule->fr_net, arg->table);
if (!mrt)
return -EAGAIN;
res->mrt = mrt;
@@ -288,7 +303,7 @@ static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR, extack);
}
-static unsigned int ipmr_rules_seq_read(struct net *net)
+static unsigned int ipmr_rules_seq_read(const struct net *net)
{
return fib_rules_seq_read(net, RTNL_FAMILY_IPMR);
}
@@ -302,6 +317,11 @@ EXPORT_SYMBOL(ipmr_rule_default);
#define ipmr_for_each_table(mrt, net) \
for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
+static bool ipmr_can_free_table(struct net *net)
+{
+ return !check_net(net);
+}
+
static struct mr_table *ipmr_mr_table_iter(struct net *net,
struct mr_table *mrt)
{
@@ -315,6 +335,8 @@ static struct mr_table *ipmr_get_table(struct net *net, u32 id)
return net->ipv4.mrt;
}
+#define __ipmr_get_table ipmr_get_table
+
static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
struct mr_table **mrt)
{
@@ -346,7 +368,7 @@ static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
return 0;
}
-static unsigned int ipmr_rules_seq_read(struct net *net)
+static unsigned int ipmr_rules_seq_read(const struct net *net)
{
return 0;
}
@@ -403,7 +425,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
if (id != RT_TABLE_DEFAULT && id >= 1000000000)
return ERR_PTR(-EINVAL);
- mrt = ipmr_get_table(net, id);
+ mrt = __ipmr_get_table(net, id);
if (mrt)
return mrt;
@@ -413,6 +435,10 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
static void ipmr_free_table(struct mr_table *mrt)
{
+ struct net *net = read_pnet(&mrt->net);
+
+ WARN_ON_ONCE(!ipmr_can_free_table(net));
+
timer_shutdown_sync(&mrt->ipmr_expire_timer);
mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC |
MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC);
@@ -1374,7 +1400,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
goto out_unlock;
}
- mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
+ mrt = __ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
if (!mrt) {
ret = -ENOENT;
goto out_unlock;
@@ -2081,7 +2107,7 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
struct flowi4 fl4 = {
.daddr = iph->daddr,
.saddr = iph->saddr,
- .flowi4_tos = iph->tos & INET_DSCP_MASK,
+ .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)),
.flowi4_oif = (rt_is_output_route(rt) ?
skb->dev->ifindex : 0),
.flowi4_iif = (rt_is_output_route(rt) ?
@@ -2262,11 +2288,13 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
struct mr_table *mrt;
int err;
- mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
- if (!mrt)
+ rcu_read_lock();
+ mrt = __ipmr_get_table(net, RT_TABLE_DEFAULT);
+ if (!mrt) {
+ rcu_read_unlock();
return -ENOENT;
+ }
- rcu_read_lock();
cache = ipmr_cache_find(mrt, saddr, daddr);
if (!cache && skb->dev) {
int vif = ipmr_find_vif(mrt, skb->dev);
@@ -2546,11 +2574,11 @@ static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (err < 0)
goto errout;
- src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
- grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
- tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0;
+ src = nla_get_in_addr_default(tb[RTA_SRC], 0);
+ grp = nla_get_in_addr_default(tb[RTA_DST], 0);
+ tableid = nla_get_u32_default(tb[RTA_TABLE], 0);
- mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT);
+ mrt = __ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT);
if (!mrt) {
err = -ENOENT;
goto errout_free;
@@ -2604,7 +2632,7 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
if (filter.table_id) {
struct mr_table *mrt;
- mrt = ipmr_get_table(sock_net(skb->sk), filter.table_id);
+ mrt = __ipmr_get_table(sock_net(skb->sk), filter.table_id);
if (!mrt) {
if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IPMR)
return skb->len;
@@ -2712,7 +2740,7 @@ static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh,
break;
}
}
- mrt = ipmr_get_table(net, tblid);
+ mrt = __ipmr_get_table(net, tblid);
if (!mrt) {
ret = -ENOENT;
goto out;
@@ -2920,13 +2948,15 @@ static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
struct net *net = seq_file_net(seq);
struct mr_table *mrt;
- mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
- if (!mrt)
+ rcu_read_lock();
+ mrt = __ipmr_get_table(net, RT_TABLE_DEFAULT);
+ if (!mrt) {
+ rcu_read_unlock();
return ERR_PTR(-ENOENT);
+ }
iter->mrt = mrt;
- rcu_read_lock();
return mr_vif_seq_start(seq, pos);
}
@@ -3035,11 +3065,9 @@ static const struct net_protocol pim_protocol = {
};
#endif
-static unsigned int ipmr_seq_read(struct net *net)
+static unsigned int ipmr_seq_read(const struct net *net)
{
- ASSERT_RTNL();
-
- return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net);
+ return READ_ONCE(net->ipv4.ipmr_seq) + ipmr_rules_seq_read(net);
}
static int ipmr_dump(struct net *net, struct notifier_block *nb,
@@ -3139,6 +3167,17 @@ static struct pernet_operations ipmr_net_ops = {
.exit_batch = ipmr_net_exit_batch,
};
+static const struct rtnl_msg_handler ipmr_rtnl_msg_handlers[] __initconst = {
+ {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETLINK,
+ .dumpit = ipmr_rtm_dumplink},
+ {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_NEWROUTE,
+ .doit = ipmr_rtm_route},
+ {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_DELROUTE,
+ .doit = ipmr_rtm_route},
+ {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETROUTE,
+ .doit = ipmr_rtm_getroute, .dumpit = ipmr_rtm_dumproute},
+};
+
int __init ip_mr_init(void)
{
int err;
@@ -3159,15 +3198,8 @@ int __init ip_mr_init(void)
goto add_proto_fail;
}
#endif
- rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE,
- ipmr_rtm_getroute, ipmr_rtm_dumproute, 0);
- rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE,
- ipmr_rtm_route, NULL, 0);
- rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE,
- ipmr_rtm_route, NULL, 0);
-
- rtnl_register(RTNL_FAMILY_IPMR, RTM_GETLINK,
- NULL, ipmr_rtm_dumplink, 0);
+ rtnl_register_many(ipmr_rtnl_msg_handlers);
+
return 0;
#ifdef CONFIG_IP_PIMSM_V2
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index e0aab66cd925..08bc3f2c0078 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -44,7 +44,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un
*/
fl4.daddr = iph->daddr;
fl4.saddr = saddr;
- fl4.flowi4_tos = iph->tos & INET_DSCP_MASK;
+ fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph));
fl4.flowi4_oif = sk ? sk->sk_bound_dev_if : 0;
fl4.flowi4_l3mdev = l3mdev_master_ifindex(dev);
fl4.flowi4_mark = skb->mark;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 1b991b889506..ef8009281da5 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -12,7 +12,13 @@ config NF_DEFRAG_IPV4
# old sockopt interface and eval loop
config IP_NF_IPTABLES_LEGACY
- tristate
+ tristate "Legacy IP tables support"
+ default n
+ select NETFILTER_XTABLES
+ help
+ iptables is a legacy packet classifier.
+ This is not needed if you are using iptables over nftables
+ (iptables-nft).
config NF_SOCKET_IPV4
tristate "IPv4 socket lookup support"
@@ -318,7 +324,13 @@ endif # IP_NF_IPTABLES
# ARP tables
config IP_NF_ARPTABLES
- tristate
+ tristate "Legacy ARPTABLES support"
+ depends on NETFILTER_XTABLES
+ default n
+ help
+ arptables is a legacy packet classifier.
+ This is not needed if you are using arptables over nftables
+ (iptables-nft).
config NFT_COMPAT_ARP
tristate
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index 1ce7a1655b97..a27782d7653e 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -76,7 +76,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
flow.daddr = iph->saddr;
flow.saddr = rpfilter_get_saddr(iph->daddr);
flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
- flow.flowi4_tos = iph->tos & INET_DSCP_MASK;
+ flow.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph));
flow.flowi4_scope = RT_SCOPE_UNIVERSE;
flow.flowi4_l3mdev = l3mdev_master_ifindex_rcu(xt_in(par));
flow.flowi4_uid = sock_net_uid(xt_net(par), NULL);
diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c
index ec94ee1051c7..25e1e8eb18dd 100644
--- a/net/ipv4/netfilter/nf_dup_ipv4.c
+++ b/net/ipv4/netfilter/nf_dup_ipv4.c
@@ -33,7 +33,7 @@ static bool nf_dup_ipv4_route(struct net *net, struct sk_buff *skb,
fl4.flowi4_oif = oif;
fl4.daddr = gw->s_addr;
- fl4.flowi4_tos = iph->tos & INET_DSCP_MASK;
+ fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph));
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH;
rt = ip_route_output_key(net, &fl4);
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index 09fff5d424ef..625adbc42037 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -11,6 +11,7 @@
#include <net/netfilter/nft_fib.h>
#include <net/inet_dscp.h>
+#include <net/ip.h>
#include <net/ip_fib.h>
#include <net/route.h>
@@ -107,7 +108,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (priv->flags & NFTA_FIB_F_MARK)
fl4.flowi4_mark = pkt->skb->mark;
- fl4.flowi4_tos = iph->tos & INET_DSCP_MASK;
+ fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph));
if (priv->flags & NFTA_FIB_F_DADDR) {
fl4.daddr = iph->daddr;
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 93aaea0006ba..09a3d73b45ba 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -3247,12 +3247,8 @@ static int nh_valid_get_del_req(const struct nlmsghdr *nlh,
return -EINVAL;
}
- if (op_flags) {
- if (tb[NHA_OP_FLAGS])
- *op_flags = nla_get_u32(tb[NHA_OP_FLAGS]);
- else
- *op_flags = 0;
- }
+ if (op_flags)
+ *op_flags = nla_get_u32_default(tb[NHA_OP_FLAGS], 0);
return 0;
}
@@ -3433,10 +3429,7 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh,
if (err < 0)
return err;
- if (tb[NHA_OP_FLAGS])
- filter->op_flags = nla_get_u32(tb[NHA_OP_FLAGS]);
- else
- filter->op_flags = 0;
+ filter->op_flags = nla_get_u32_default(tb[NHA_OP_FLAGS], 0);
return __nh_valid_dump_req(nlh, tb, filter, cb->extack);
}
@@ -4042,25 +4035,30 @@ static struct pernet_operations nexthop_net_ops = {
.exit_batch_rtnl = nexthop_net_exit_batch_rtnl,
};
+static const struct rtnl_msg_handler nexthop_rtnl_msg_handlers[] __initconst = {
+ {.msgtype = RTM_NEWNEXTHOP, .doit = rtm_new_nexthop},
+ {.msgtype = RTM_DELNEXTHOP, .doit = rtm_del_nexthop},
+ {.msgtype = RTM_GETNEXTHOP, .doit = rtm_get_nexthop,
+ .dumpit = rtm_dump_nexthop},
+ {.msgtype = RTM_GETNEXTHOPBUCKET, .doit = rtm_get_nexthop_bucket,
+ .dumpit = rtm_dump_nexthop_bucket},
+ {.protocol = PF_INET, .msgtype = RTM_NEWNEXTHOP,
+ .doit = rtm_new_nexthop},
+ {.protocol = PF_INET, .msgtype = RTM_GETNEXTHOP,
+ .dumpit = rtm_dump_nexthop},
+ {.protocol = PF_INET6, .msgtype = RTM_NEWNEXTHOP,
+ .doit = rtm_new_nexthop},
+ {.protocol = PF_INET6, .msgtype = RTM_GETNEXTHOP,
+ .dumpit = rtm_dump_nexthop},
+};
+
static int __init nexthop_init(void)
{
register_pernet_subsys(&nexthop_net_ops);
register_netdevice_notifier(&nh_netdev_notifier);
- rtnl_register(PF_UNSPEC, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_DELNEXTHOP, rtm_del_nexthop, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_GETNEXTHOP, rtm_get_nexthop,
- rtm_dump_nexthop, 0);
-
- rtnl_register(PF_INET, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
- rtnl_register(PF_INET, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
-
- rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
- rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
-
- rtnl_register(PF_UNSPEC, RTM_GETNEXTHOPBUCKET, rtm_get_nexthop_bucket,
- rtm_dump_nexthop_bucket, 0);
+ rtnl_register_many(nexthop_rtnl_msg_handlers);
return 0;
}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 474dfd263c8b..0e9e01967ec9 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -370,7 +370,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
skb->ip_summed = CHECKSUM_NONE;
- skb_setup_tx_timestamp(skb, sockc->tsflags);
+ skb_setup_tx_timestamp(skb, sockc);
if (flags & MSG_CONFIRM)
skb_set_dst_pending_confirm(skb, 1);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 723ac9181558..e5603e84b20d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1027,6 +1027,19 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
struct fib_nh_common *nhc;
fib_select_path(net, &res, fl4, NULL);
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ if (fib_info_num_path(res.fi) > 1) {
+ int nhsel;
+
+ for (nhsel = 0; nhsel < fib_info_num_path(res.fi); nhsel++) {
+ nhc = fib_info_nhc(res.fi, nhsel);
+ update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
+ jiffies + net->ipv4.ip_rt_mtu_expires);
+ }
+ rcu_read_unlock();
+ return;
+ }
+#endif /* CONFIG_IP_ROUTE_MULTIPATH */
nhc = FIB_RES_NHC(res);
update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
jiffies + net->ipv4.ip_rt_mtu_expires);
@@ -1263,7 +1276,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
struct flowi4 fl4 = {
.daddr = iph->daddr,
.saddr = iph->saddr,
- .flowi4_tos = iph->tos & INET_DSCP_MASK,
+ .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)),
.flowi4_oif = rt->dst.dev->ifindex,
.flowi4_iif = skb->dev->ifindex,
.flowi4_mark = skb->mark,
@@ -1665,49 +1678,54 @@ struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
EXPORT_SYMBOL(rt_dst_clone);
/* called in rcu_read_lock() section */
-int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev,
- struct in_device *in_dev, u32 *itag)
+enum skb_drop_reason
+ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev,
+ struct in_device *in_dev, u32 *itag)
{
- int err;
+ enum skb_drop_reason reason;
/* Primary sanity checks. */
if (!in_dev)
- return -EINVAL;
+ return SKB_DROP_REASON_NOT_SPECIFIED;
- if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
- skb->protocol != htons(ETH_P_IP))
- return -EINVAL;
+ if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
+ return SKB_DROP_REASON_IP_INVALID_SOURCE;
+
+ if (skb->protocol != htons(ETH_P_IP))
+ return SKB_DROP_REASON_INVALID_PROTO;
if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
- return -EINVAL;
+ return SKB_DROP_REASON_IP_LOCALNET;
if (ipv4_is_zeronet(saddr)) {
if (!ipv4_is_local_multicast(daddr) &&
ip_hdr(skb)->protocol != IPPROTO_IGMP)
- return -EINVAL;
+ return SKB_DROP_REASON_IP_INVALID_SOURCE;
} else {
- err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
- in_dev, itag);
- if (err < 0)
- return err;
+ reason = fib_validate_source_reason(skb, saddr, 0, dscp, 0,
+ dev, in_dev, itag);
+ if (reason)
+ return reason;
}
- return 0;
+ return SKB_NOT_DROPPED_YET;
}
/* called in rcu_read_lock() section */
-static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev, int our)
+static enum skb_drop_reason
+ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev, int our)
{
struct in_device *in_dev = __in_dev_get_rcu(dev);
unsigned int flags = RTCF_MULTICAST;
+ enum skb_drop_reason reason;
struct rtable *rth;
u32 itag = 0;
- int err;
- err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
- if (err)
- return err;
+ reason = ip_mc_validate_source(skb, daddr, saddr, dscp, dev, in_dev,
+ &itag);
+ if (reason)
+ return reason;
if (our)
flags |= RTCF_LOCAL;
@@ -1718,7 +1736,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
false);
if (!rth)
- return -ENOBUFS;
+ return SKB_DROP_REASON_NOMEM;
#ifdef CONFIG_IP_ROUTE_CLASSID
rth->dst.tclassid = itag;
@@ -1734,7 +1752,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
skb_dst_drop(skb);
skb_dst_set(skb, &rth->dst);
- return 0;
+ return SKB_NOT_DROPPED_YET;
}
@@ -1764,11 +1782,12 @@ static void ip_handle_martian_source(struct net_device *dev,
}
/* called in rcu_read_lock() section */
-static int __mkroute_input(struct sk_buff *skb,
- const struct fib_result *res,
- struct in_device *in_dev,
- __be32 daddr, __be32 saddr, u32 tos)
+static enum skb_drop_reason
+__mkroute_input(struct sk_buff *skb, const struct fib_result *res,
+ struct in_device *in_dev, __be32 daddr,
+ __be32 saddr, dscp_t dscp)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct fib_nh_common *nhc = FIB_RES_NHC(*res);
struct net_device *dev = nhc->nhc_dev;
struct fib_nh_exception *fnhe;
@@ -1782,12 +1801,13 @@ static int __mkroute_input(struct sk_buff *skb,
out_dev = __in_dev_get_rcu(dev);
if (!out_dev) {
net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
- return -EINVAL;
+ return reason;
}
- err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
+ err = fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res),
in_dev->dev, in_dev, &itag);
if (err < 0) {
+ reason = -err;
ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
saddr);
@@ -1815,7 +1835,7 @@ static int __mkroute_input(struct sk_buff *skb,
*/
if (out_dev == in_dev &&
IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
- err = -EINVAL;
+ reason = SKB_DROP_REASON_ARP_PVLAN_DISABLE;
goto cleanup;
}
}
@@ -1838,7 +1858,7 @@ static int __mkroute_input(struct sk_buff *skb,
rth = rt_dst_alloc(out_dev->dev, 0, res->type,
IN_DEV_ORCONF(out_dev, NOXFRM));
if (!rth) {
- err = -ENOBUFS;
+ reason = SKB_DROP_REASON_NOMEM;
goto cleanup;
}
@@ -1852,9 +1872,9 @@ static int __mkroute_input(struct sk_buff *skb,
lwtunnel_set_redirect(&rth->dst);
skb_dst_set(skb, &rth->dst);
out:
- err = 0;
- cleanup:
- return err;
+ reason = SKB_NOT_DROPPED_YET;
+cleanup:
+ return reason;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -2112,11 +2132,10 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
}
#endif /* CONFIG_IP_ROUTE_MULTIPATH */
-static int ip_mkroute_input(struct sk_buff *skb,
- struct fib_result *res,
- struct in_device *in_dev,
- __be32 daddr, __be32 saddr, u32 tos,
- struct flow_keys *hkeys)
+static enum skb_drop_reason
+ip_mkroute_input(struct sk_buff *skb, struct fib_result *res,
+ struct in_device *in_dev, __be32 daddr,
+ __be32 saddr, dscp_t dscp, struct flow_keys *hkeys)
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi && fib_info_num_path(res->fi) > 1) {
@@ -2128,50 +2147,57 @@ static int ip_mkroute_input(struct sk_buff *skb,
#endif
/* create a routing cache entry */
- return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
+ return __mkroute_input(skb, res, in_dev, daddr, saddr, dscp);
}
/* Implements all the saddr-related checks as ip_route_input_slow(),
* assuming daddr is valid and the destination is not a local broadcast one.
* Uses the provided hint instead of performing a route lookup.
*/
-int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev,
- const struct sk_buff *hint)
+enum skb_drop_reason
+ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev,
+ const struct sk_buff *hint)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct in_device *in_dev = __in_dev_get_rcu(dev);
struct rtable *rt = skb_rtable(hint);
struct net *net = dev_net(dev);
- int err = -EINVAL;
u32 tag = 0;
if (!in_dev)
- return -EINVAL;
+ return reason;
- if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
+ if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) {
+ reason = SKB_DROP_REASON_IP_INVALID_SOURCE;
goto martian_source;
+ }
- if (ipv4_is_zeronet(saddr))
+ if (ipv4_is_zeronet(saddr)) {
+ reason = SKB_DROP_REASON_IP_INVALID_SOURCE;
goto martian_source;
+ }
- if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
+ if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) {
+ reason = SKB_DROP_REASON_IP_LOCALNET;
goto martian_source;
+ }
if (rt->rt_type != RTN_LOCAL)
goto skip_validate_source;
- tos &= INET_DSCP_MASK;
- err = fib_validate_source(skb, saddr, daddr, tos, 0, dev, in_dev, &tag);
- if (err < 0)
+ reason = fib_validate_source_reason(skb, saddr, daddr, dscp, 0, dev,
+ in_dev, &tag);
+ if (reason)
goto martian_source;
skip_validate_source:
skb_dst_copy(skb, hint);
- return 0;
+ return SKB_NOT_DROPPED_YET;
martian_source:
ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
- return err;
+ return reason;
}
/* get device for dst_alloc with local routes */
@@ -2200,10 +2226,12 @@ static struct net_device *ip_rt_get_dev(struct net *net,
* called with rcu_read_lock()
*/
-static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev,
- struct fib_result *res)
+static enum skb_drop_reason
+ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev,
+ struct fib_result *res)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct in_device *in_dev = __in_dev_get_rcu(dev);
struct flow_keys *flkeys = NULL, _flkeys;
struct net *net = dev_net(dev);
@@ -2231,8 +2259,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
fl4.flowi4_tun_key.tun_id = 0;
skb_dst_drop(skb);
- if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
+ if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) {
+ reason = SKB_DROP_REASON_IP_INVALID_SOURCE;
goto martian_source;
+ }
res->fi = NULL;
res->table = NULL;
@@ -2242,21 +2272,29 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
/* Accept zero addresses only to limited broadcast;
* I even do not know to fix it or not. Waiting for complains :-)
*/
- if (ipv4_is_zeronet(saddr))
+ if (ipv4_is_zeronet(saddr)) {
+ reason = SKB_DROP_REASON_IP_INVALID_SOURCE;
goto martian_source;
+ }
- if (ipv4_is_zeronet(daddr))
+ if (ipv4_is_zeronet(daddr)) {
+ reason = SKB_DROP_REASON_IP_INVALID_DEST;
goto martian_destination;
+ }
/* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
* and call it once if daddr or/and saddr are loopback addresses
*/
if (ipv4_is_loopback(daddr)) {
- if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
+ if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) {
+ reason = SKB_DROP_REASON_IP_LOCALNET;
goto martian_destination;
+ }
} else if (ipv4_is_loopback(saddr)) {
- if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
+ if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) {
+ reason = SKB_DROP_REASON_IP_LOCALNET;
goto martian_source;
+ }
}
/*
@@ -2266,7 +2304,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
fl4.flowi4_oif = 0;
fl4.flowi4_iif = dev->ifindex;
fl4.flowi4_mark = skb->mark;
- fl4.flowi4_tos = tos;
+ fl4.flowi4_tos = inet_dscp_to_dsfield(dscp);
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
fl4.flowi4_flags = 0;
fl4.daddr = daddr;
@@ -2298,10 +2336,11 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
goto brd_input;
}
+ err = -EINVAL;
if (res->type == RTN_LOCAL) {
- err = fib_validate_source(skb, saddr, daddr, tos,
- 0, dev, in_dev, &itag);
- if (err < 0)
+ reason = fib_validate_source_reason(skb, saddr, daddr, dscp,
+ 0, dev, in_dev, &itag);
+ if (reason)
goto martian_source;
goto local_input;
}
@@ -2310,21 +2349,28 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
err = -EHOSTUNREACH;
goto no_route;
}
- if (res->type != RTN_UNICAST)
+ if (res->type != RTN_UNICAST) {
+ reason = SKB_DROP_REASON_IP_INVALID_DEST;
goto martian_destination;
+ }
make_route:
- err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
-out: return err;
+ reason = ip_mkroute_input(skb, res, in_dev, daddr, saddr, dscp,
+ flkeys);
+
+out:
+ return reason;
brd_input:
- if (skb->protocol != htons(ETH_P_IP))
- goto e_inval;
+ if (skb->protocol != htons(ETH_P_IP)) {
+ reason = SKB_DROP_REASON_INVALID_PROTO;
+ goto out;
+ }
if (!ipv4_is_zeronet(saddr)) {
- err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
- in_dev, &itag);
- if (err < 0)
+ reason = fib_validate_source_reason(skb, saddr, 0, dscp, 0,
+ dev, in_dev, &itag);
+ if (reason)
goto martian_source;
}
flags |= RTCF_BROADCAST;
@@ -2342,7 +2388,7 @@ local_input:
rth = rcu_dereference(nhc->nhc_rth_input);
if (rt_cache_valid(rth)) {
skb_dst_set_noref(skb, &rth->dst);
- err = 0;
+ reason = SKB_NOT_DROPPED_YET;
goto out;
}
}
@@ -2379,7 +2425,7 @@ local_input:
rt_add_uncached_list(rth);
}
skb_dst_set(skb, &rth->dst);
- err = 0;
+ reason = SKB_NOT_DROPPED_YET;
goto out;
no_route:
@@ -2400,12 +2446,8 @@ martian_destination:
&daddr, &saddr, dev->name);
#endif
-e_inval:
- err = -EINVAL;
- goto out;
-
e_nobufs:
- err = -ENOBUFS;
+ reason = SKB_DROP_REASON_NOMEM;
goto out;
martian_source:
@@ -2414,8 +2456,10 @@ martian_source:
}
/* called with rcu_read_lock held */
-static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev, struct fib_result *res)
+static enum skb_drop_reason
+ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev,
+ struct fib_result *res)
{
/* Multicast recognition logic is moved from route cache to here.
* The problem was that too many Ethernet cards have broken/missing
@@ -2429,12 +2473,12 @@ static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
* route cache entry is created eventually.
*/
if (ipv4_is_multicast(daddr)) {
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct in_device *in_dev = __in_dev_get_rcu(dev);
int our = 0;
- int err = -EINVAL;
if (!in_dev)
- return err;
+ return -EINVAL;
our = ip_check_mc_rcu(in_dev, daddr, saddr,
ip_hdr(skb)->protocol);
@@ -2455,27 +2499,27 @@ static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
IN_DEV_MFORWARD(in_dev))
#endif
) {
- err = ip_route_input_mc(skb, daddr, saddr,
- tos, dev, our);
+ reason = ip_route_input_mc(skb, daddr, saddr, dscp,
+ dev, our);
}
- return err;
+ return reason;
}
- return ip_route_input_slow(skb, daddr, saddr, tos, dev, res);
+ return ip_route_input_slow(skb, daddr, saddr, dscp, dev, res);
}
-int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev)
+enum skb_drop_reason ip_route_input_noref(struct sk_buff *skb, __be32 daddr,
+ __be32 saddr, dscp_t dscp,
+ struct net_device *dev)
{
+ enum skb_drop_reason reason;
struct fib_result res;
- int err;
- tos &= INET_DSCP_MASK;
rcu_read_lock();
- err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res);
+ reason = ip_route_input_rcu(skb, daddr, saddr, dscp, dev, &res);
rcu_read_unlock();
- return err;
+ return reason;
}
EXPORT_SYMBOL(ip_route_input_noref);
@@ -3233,10 +3277,10 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
return err;
rtm = nlmsg_data(nlh);
- src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
- dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
- iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
- mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
+ src = nla_get_in_addr_default(tb[RTA_SRC], 0);
+ dst = nla_get_in_addr_default(tb[RTA_DST], 0);
+ iif = nla_get_u32_default(tb[RTA_IIF], 0);
+ mark = nla_get_u32_default(tb[RTA_MARK], 0);
if (tb[RTA_UID])
uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
else
@@ -3262,7 +3306,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fl4.daddr = dst;
fl4.saddr = src;
fl4.flowi4_tos = rtm->rtm_tos & INET_DSCP_MASK;
- fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
+ fl4.flowi4_oif = nla_get_u32_default(tb[RTA_OIF], 0);
fl4.flowi4_mark = mark;
fl4.flowi4_uid = uid;
if (sport)
@@ -3286,8 +3330,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
skb->dev = dev;
skb->mark = mark;
err = ip_route_input_rcu(skb, dst, src,
- rtm->rtm_tos & INET_DSCP_MASK, dev,
- &res);
+ inet_dsfield_to_dscp(rtm->rtm_tos),
+ dev, &res) ? -EINVAL : 0;
rt = skb_rtable(skb);
if (err == 0 && rt->dst.error)
@@ -3634,6 +3678,11 @@ static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
#endif /* CONFIG_IP_ROUTE_CLASSID */
+static const struct rtnl_msg_handler ip_rt_rtnl_msg_handlers[] __initconst = {
+ {.protocol = PF_INET, .msgtype = RTM_GETROUTE,
+ .doit = inet_rtm_getroute, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+};
+
int __init ip_rt_init(void)
{
void *idents_hash;
@@ -3691,8 +3740,7 @@ int __init ip_rt_init(void)
xfrm_init();
xfrm4_init();
#endif
- rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL,
- RTNL_FLAG_DOIT_UNLOCKED);
+ rtnl_register_many(ip_rt_rtnl_msg_handlers);
#ifdef CONFIG_SYSCTL
register_pernet_subsys(&sysctl_route_ops);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4f77bd862e95..0d704bda6c41 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -477,15 +477,16 @@ void tcp_init_sock(struct sock *sk)
}
EXPORT_SYMBOL(tcp_init_sock);
-static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
+static void tcp_tx_timestamp(struct sock *sk, struct sockcm_cookie *sockc)
{
struct sk_buff *skb = tcp_write_queue_tail(sk);
+ u32 tsflags = sockc->tsflags;
if (tsflags && skb) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
- sock_tx_timestamp(sk, tsflags, &shinfo->tx_flags);
+ sock_tx_timestamp(sk, sockc, &shinfo->tx_flags);
if (tsflags & SOF_TIMESTAMPING_TX_ACK)
tcb->txstamp_ack = 1;
if (tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK)
@@ -1321,7 +1322,7 @@ wait_for_space:
out:
if (copied) {
- tcp_tx_timestamp(sk, sockc.tsflags);
+ tcp_tx_timestamp(sk, &sockc);
tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
}
out_nopush:
@@ -3335,7 +3336,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->window_clamp = 0;
tp->delivered = 0;
tp->delivered_ce = 0;
- if (icsk->icsk_ca_ops->release)
+ if (icsk->icsk_ca_initialized && icsk->icsk_ca_ops->release)
icsk->icsk_ca_ops->release(sk);
memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
icsk->icsk_ca_initialized = 0;
diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c
index db6516092daf..bbb8d5f0eae7 100644
--- a/net/ipv4/tcp_ao.c
+++ b/net/ipv4/tcp_ao.c
@@ -109,12 +109,13 @@ bool tcp_ao_ignore_icmp(const struct sock *sk, int family, int type, int code)
* it's known that the keys in ao_info are matching peer's
* family/address/VRF/etc.
*/
-struct tcp_ao_key *tcp_ao_established_key(struct tcp_ao_info *ao,
+struct tcp_ao_key *tcp_ao_established_key(const struct sock *sk,
+ struct tcp_ao_info *ao,
int sndid, int rcvid)
{
struct tcp_ao_key *key;
- hlist_for_each_entry_rcu(key, &ao->head, node) {
+ hlist_for_each_entry_rcu(key, &ao->head, node, lockdep_sock_is_held(sk)) {
if ((sndid >= 0 && key->sndid != sndid) ||
(rcvid >= 0 && key->rcvid != rcvid))
continue;
@@ -205,7 +206,7 @@ static struct tcp_ao_key *__tcp_ao_do_lookup(const struct sock *sk, int l3index,
if (!ao)
return NULL;
- hlist_for_each_entry_rcu(key, &ao->head, node) {
+ hlist_for_each_entry_rcu(key, &ao->head, node, lockdep_sock_is_held(sk)) {
u8 prefixlen = min(prefix, key->prefixlen);
if (!tcp_ao_key_cmp(key, l3index, addr, prefixlen,
@@ -793,7 +794,7 @@ int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb,
if (!ao_info)
return -ENOENT;
- *key = tcp_ao_established_key(ao_info, aoh->rnext_keyid, -1);
+ *key = tcp_ao_established_key(sk, ao_info, aoh->rnext_keyid, -1);
if (!*key)
return -ENOENT;
*traffic_key = snd_other_key(*key);
@@ -979,7 +980,7 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb,
*/
key = READ_ONCE(info->rnext_key);
if (key->rcvid != aoh->keyid) {
- key = tcp_ao_established_key(info, -1, aoh->keyid);
+ key = tcp_ao_established_key(sk, info, -1, aoh->keyid);
if (!key)
goto key_not_found;
}
@@ -1003,7 +1004,7 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb,
aoh->rnext_keyid,
tcp_ao_hdr_maclen(aoh));
/* If the key is not found we do nothing. */
- key = tcp_ao_established_key(info, aoh->rnext_keyid, -1);
+ key = tcp_ao_established_key(sk, info, aoh->rnext_keyid, -1);
if (key)
/* pairs with tcp_ao_del_cmd */
WRITE_ONCE(info->current_key, key);
@@ -1163,7 +1164,7 @@ void tcp_ao_established(struct sock *sk)
if (!ao)
return;
- hlist_for_each_entry_rcu(key, &ao->head, node)
+ hlist_for_each_entry_rcu(key, &ao->head, node, lockdep_sock_is_held(sk))
tcp_ao_cache_traffic_keys(sk, ao, key);
}
@@ -1180,7 +1181,7 @@ void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb)
WRITE_ONCE(ao->risn, tcp_hdr(skb)->seq);
ao->rcv_sne = 0;
- hlist_for_each_entry_rcu(key, &ao->head, node)
+ hlist_for_each_entry_rcu(key, &ao->head, node, lockdep_sock_is_held(sk))
tcp_ao_cache_traffic_keys(sk, ao, key);
}
@@ -1256,14 +1257,14 @@ int tcp_ao_copy_all_matching(const struct sock *sk, struct sock *newsk,
key_head = rcu_dereference(hlist_first_rcu(&new_ao->head));
first_key = hlist_entry_safe(key_head, struct tcp_ao_key, node);
- key = tcp_ao_established_key(new_ao, tcp_rsk(req)->ao_keyid, -1);
+ key = tcp_ao_established_key(req_to_sk(req), new_ao, tcp_rsk(req)->ao_keyid, -1);
if (key)
new_ao->current_key = key;
else
new_ao->current_key = first_key;
/* set rnext_key */
- key = tcp_ao_established_key(new_ao, -1, tcp_rsk(req)->ao_rcv_next);
+ key = tcp_ao_established_key(req_to_sk(req), new_ao, -1, tcp_rsk(req)->ao_rcv_next);
if (key)
new_ao->rnext_key = key;
else
@@ -1857,12 +1858,12 @@ static int tcp_ao_del_cmd(struct sock *sk, unsigned short int family,
* if there's any.
*/
if (cmd.set_current) {
- new_current = tcp_ao_established_key(ao_info, cmd.current_key, -1);
+ new_current = tcp_ao_established_key(sk, ao_info, cmd.current_key, -1);
if (!new_current)
return -ENOENT;
}
if (cmd.set_rnext) {
- new_rnext = tcp_ao_established_key(ao_info, -1, cmd.rnext);
+ new_rnext = tcp_ao_established_key(sk, ao_info, -1, cmd.rnext);
if (!new_rnext)
return -ENOENT;
}
@@ -1902,7 +1903,8 @@ static int tcp_ao_del_cmd(struct sock *sk, unsigned short int family,
* "It is presumed that an MKT affecting a particular
* connection cannot be destroyed during an active connection"
*/
- hlist_for_each_entry_rcu(key, &ao_info->head, node) {
+ hlist_for_each_entry_rcu(key, &ao_info->head, node,
+ lockdep_sock_is_held(sk)) {
if (cmd.sndid != key->sndid ||
cmd.rcvid != key->rcvid)
continue;
@@ -2000,14 +2002,14 @@ static int tcp_ao_info_cmd(struct sock *sk, unsigned short int family,
* if there's any.
*/
if (cmd.set_current) {
- new_current = tcp_ao_established_key(ao_info, cmd.current_key, -1);
+ new_current = tcp_ao_established_key(sk, ao_info, cmd.current_key, -1);
if (!new_current) {
err = -ENOENT;
goto out;
}
}
if (cmd.set_rnext) {
- new_rnext = tcp_ao_established_key(ao_info, -1, cmd.rnext);
+ new_rnext = tcp_ao_established_key(sk, ao_info, -1, cmd.rnext);
if (!new_rnext) {
err = -ENOENT;
goto out;
@@ -2101,7 +2103,8 @@ int tcp_v4_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen)
* The layout of the fields in the user and kernel structures is expected to
* be the same (including in the 32bit vs 64bit case).
*/
-static int tcp_ao_copy_mkts_to_user(struct tcp_ao_info *ao_info,
+static int tcp_ao_copy_mkts_to_user(const struct sock *sk,
+ struct tcp_ao_info *ao_info,
sockptr_t optval, sockptr_t optlen)
{
struct tcp_ao_getsockopt opt_in, opt_out;
@@ -2229,7 +2232,8 @@ static int tcp_ao_copy_mkts_to_user(struct tcp_ao_info *ao_info,
/* May change in RX, while we're dumping, pre-fetch it */
current_key = READ_ONCE(ao_info->current_key);
- hlist_for_each_entry_rcu(key, &ao_info->head, node) {
+ hlist_for_each_entry_rcu(key, &ao_info->head, node,
+ lockdep_sock_is_held(sk)) {
if (opt_in.get_all)
goto match;
@@ -2309,7 +2313,7 @@ int tcp_ao_get_mkts(struct sock *sk, sockptr_t optval, sockptr_t optlen)
if (!ao_info)
return -ENOENT;
- return tcp_ao_copy_mkts_to_user(ao_info, optval, optlen);
+ return tcp_ao_copy_mkts_to_user(sk, ao_info, optval, optlen);
}
int tcp_ao_get_sock_info(struct sock *sk, sockptr_t optval, sockptr_t optlen)
@@ -2396,7 +2400,7 @@ int tcp_ao_set_repair(struct sock *sk, sockptr_t optval, unsigned int optlen)
WRITE_ONCE(ao->snd_sne, cmd.snd_sne);
WRITE_ONCE(ao->rcv_sne, cmd.rcv_sne);
- hlist_for_each_entry_rcu(key, &ao->head, node)
+ hlist_for_each_entry_rcu(key, &ao->head, node, lockdep_sock_is_held(sk))
tcp_ao_cache_traffic_keys(sk, ao, key);
return 0;
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 0306d257fa64..df758adbb445 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -270,8 +270,9 @@ void tcp_cleanup_congestion_control(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- if (icsk->icsk_ca_ops->release)
+ if (icsk->icsk_ca_initialized && icsk->icsk_ca_ops->release)
icsk->icsk_ca_ops->release(sk);
+ icsk->icsk_ca_initialized = 0;
bpf_module_put(icsk->icsk_ca_ops, icsk->icsk_ca_ops->owner);
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2d844e1f867f..5bdf13ac26ef 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4921,8 +4921,8 @@ static bool tcp_ooo_try_coalesce(struct sock *sk,
return res;
}
-static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb,
- enum skb_drop_reason reason)
+noinline_for_tracing static void
+tcp_drop_reason(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason reason)
{
sk_drops_add(sk, skb);
sk_skb_reason_drop(sk, skb, reason);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5afe5e57c89b..a38c8b1f44db 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -907,7 +907,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb,
ctl_sk->sk_mark = 0;
ctl_sk->sk_priority = 0;
}
- ip_send_unicast_reply(ctl_sk,
+ ip_send_unicast_reply(ctl_sk, sk,
skb, &TCP_SKB_CB(skb)->header.h4.opt,
ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
&arg, arg.iov[0].iov_len,
@@ -1021,7 +1021,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority);
transmit_time = tcp_transmit_time(sk);
- ip_send_unicast_reply(ctl_sk,
+ ip_send_unicast_reply(ctl_sk, sk,
skb, &TCP_SKB_CB(skb)->header.h4.opt,
ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
&arg, arg.iov[0].iov_len,
@@ -1053,7 +1053,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
}
if (aoh)
- key.ao_key = tcp_ao_established_key(ao_info, aoh->rnext_keyid, -1);
+ key.ao_key = tcp_ao_established_key(sk, ao_info,
+ aoh->rnext_keyid, -1);
}
}
if (key.ao_key) {
@@ -2900,15 +2901,17 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
__be32 src = inet->inet_rcv_saddr;
__u16 destp = ntohs(inet->inet_dport);
__u16 srcp = ntohs(inet->inet_sport);
+ u8 icsk_pending;
int rx_queue;
int state;
- if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
- icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
- icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
+ icsk_pending = smp_load_acquire(&icsk->icsk_pending);
+ if (icsk_pending == ICSK_TIME_RETRANS ||
+ icsk_pending == ICSK_TIME_REO_TIMEOUT ||
+ icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;
timer_expires = icsk->icsk_timeout;
- } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
+ } else if (icsk_pending == ICSK_TIME_PROBE0) {
timer_active = 4;
timer_expires = icsk->icsk_timeout;
} else if (timer_pending(&sk->sk_timer)) {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 68804fd01daf..5485a70b5fe5 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2954,10 +2954,8 @@ void tcp_send_loss_probe(struct sock *sk)
}
skb = skb_rb_last(&sk->tcp_rtx_queue);
if (unlikely(!skb)) {
- WARN_ONCE(tp->packets_out,
- "invalid inflight: %u state %u cwnd %u mss %d\n",
- tp->packets_out, sk->sk_state, tcp_snd_cwnd(tp), mss);
- inet_csk(sk)->icsk_pending = 0;
+ tcp_warn_once(sk, tp->packets_out, "invalid inflight: ");
+ smp_store_release(&inet_csk(sk)->icsk_pending, 0);
return;
}
@@ -2990,7 +2988,7 @@ probe_sent:
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSPROBES);
/* Reset s.t. tcp_rearm_rto will restart timer from now */
- inet_csk(sk)->icsk_pending = 0;
+ smp_store_release(&inet_csk(sk)->icsk_pending, 0);
rearm_timer:
tcp_rearm_rto(sk);
}
@@ -3728,7 +3726,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
switch (synack_type) {
case TCP_SYNACK_NORMAL:
- skb_set_owner_w(skb, req_to_sk(req));
+ skb_set_owner_edemux(skb, req_to_sk(req));
break;
case TCP_SYNACK_COOKIE:
/* Under synflood, we do not attach skb to a socket,
@@ -4131,7 +4129,10 @@ int tcp_connect(struct sock *sk)
if (unlikely(!buff))
return -ENOBUFS;
- tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
+ /* SYN eats a sequence byte, write_seq updated by
+ * tcp_connect_queue_skb().
+ */
+ tcp_init_nondata_skb(buff, tp->write_seq, TCPHDR_SYN);
tcp_mstamp_refresh(tp);
tp->retrans_stamp = tcp_time_stamp_ts(tp);
tcp_connect_queue_skb(sk, buff);
@@ -4221,7 +4222,8 @@ void tcp_send_delayed_ack(struct sock *sk)
if (!time_before(timeout, icsk->icsk_ack.timeout))
timeout = icsk->icsk_ack.timeout;
}
- icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
+ smp_store_release(&icsk->icsk_ack.pending,
+ icsk->icsk_ack.pending | ICSK_ACK_SCHED | ICSK_ACK_TIMER);
icsk->icsk_ack.timeout = timeout;
sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 79064580c8c0..b412ed88ccd9 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -361,6 +361,14 @@ static void tcp_delack_timer(struct timer_list *t)
from_timer(icsk, t, icsk_delack_timer);
struct sock *sk = &icsk->icsk_inet.sk;
+ /* Avoid taking socket spinlock if there is no ACK to send.
+ * The compressed_ack check is racy, but a separate hrtimer
+ * will take care of it eventually.
+ */
+ if (!(smp_load_acquire(&icsk->icsk_ack.pending) & ICSK_ACK_TIMER) &&
+ !READ_ONCE(tcp_sk(sk)->compressed_ack))
+ goto out;
+
bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) {
tcp_delack_timer_handler(sk);
@@ -371,6 +379,7 @@ static void tcp_delack_timer(struct timer_list *t)
sock_hold(sk);
}
bh_unlock_sock(sk);
+out:
sock_put(sk);
}
@@ -701,11 +710,11 @@ void tcp_write_timer_handler(struct sock *sk)
tcp_send_loss_probe(sk);
break;
case ICSK_TIME_RETRANS:
- icsk->icsk_pending = 0;
+ smp_store_release(&icsk->icsk_pending, 0);
tcp_retransmit_timer(sk);
break;
case ICSK_TIME_PROBE0:
- icsk->icsk_pending = 0;
+ smp_store_release(&icsk->icsk_pending, 0);
tcp_probe_timer(sk);
break;
}
@@ -717,6 +726,10 @@ static void tcp_write_timer(struct timer_list *t)
from_timer(icsk, t, icsk_retransmit_timer);
struct sock *sk = &icsk->icsk_inet.sk;
+ /* Avoid locking the socket when there is no pending event. */
+ if (!smp_load_acquire(&icsk->icsk_pending))
+ goto out;
+
bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) {
tcp_write_timer_handler(sk);
@@ -726,6 +739,7 @@ static void tcp_write_timer(struct timer_list *t)
sock_hold(sk);
}
bh_unlock_sock(sk);
+out:
sock_put(sk);
}
@@ -851,6 +865,7 @@ static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer)
* LINUX_MIB_TCPACKCOMPRESSED accurate.
*/
tp->compressed_ack--;
+ tcp_mstamp_refresh(tp);
tcp_send_ack(sk);
}
} else {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 2849b273b131..6a01905d379f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -100,6 +100,7 @@
#include <net/net_namespace.h>
#include <net/icmp.h>
#include <net/inet_hashtables.h>
+#include <net/ip.h>
#include <net/ip_tunnels.h>
#include <net/route.h>
#include <net/checksum.h>
@@ -115,7 +116,6 @@
#include <net/addrconf.h>
#include <net/udp_tunnel.h>
#include <net/gro.h>
-#include <net/inet_dscp.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6_stubs.h>
#endif
@@ -410,7 +410,6 @@ static int compute_score(struct sock *sk, const struct net *net,
return score;
}
-INDIRECT_CALLABLE_SCOPE
u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport,
const __be32 faddr, const __be16 fport)
{
@@ -419,6 +418,7 @@ u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport,
return __inet_ehashfn(laddr, lport, faddr, fport,
udp_ehash_secret + net_hash_mix(net));
}
+EXPORT_SYMBOL(udp_ehashfn);
/* called with rcu_read_lock() */
static struct sock *udp4_lib_lookup2(const struct net *net,
@@ -478,6 +478,159 @@ rescore:
return result;
}
+#if IS_ENABLED(CONFIG_BASE_SMALL)
+static struct sock *udp4_lib_lookup4(const struct net *net,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, unsigned int hnum,
+ int dif, int sdif,
+ struct udp_table *udptable)
+{
+ return NULL;
+}
+
+static void udp_rehash4(struct udp_table *udptable, struct sock *sk,
+ u16 newhash4)
+{
+}
+
+static void udp_unhash4(struct udp_table *udptable, struct sock *sk)
+{
+}
+#else /* !CONFIG_BASE_SMALL */
+static struct sock *udp4_lib_lookup4(const struct net *net,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, unsigned int hnum,
+ int dif, int sdif,
+ struct udp_table *udptable)
+{
+ const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
+ const struct hlist_nulls_node *node;
+ struct udp_hslot *hslot4;
+ unsigned int hash4, slot;
+ struct udp_sock *up;
+ struct sock *sk;
+
+ hash4 = udp_ehashfn(net, daddr, hnum, saddr, sport);
+ slot = hash4 & udptable->mask;
+ hslot4 = &udptable->hash4[slot];
+ INET_ADDR_COOKIE(acookie, saddr, daddr);
+
+begin:
+ /* SLAB_TYPESAFE_BY_RCU not used, so we don't need to touch sk_refcnt */
+ udp_lrpa_for_each_entry_rcu(up, node, &hslot4->nulls_head) {
+ sk = (struct sock *)up;
+ if (inet_match(net, sk, acookie, ports, dif, sdif))
+ return sk;
+ }
+
+ /* if the nulls value we got at the end of this lookup is not the
+ * expected one, we must restart lookup. We probably met an item that
+ * was moved to another chain due to rehash.
+ */
+ if (get_nulls_value(node) != slot)
+ goto begin;
+
+ return NULL;
+}
+
+/* In hash4, rehash can happen in connect(), where hash4_cnt keeps unchanged. */
+static void udp_rehash4(struct udp_table *udptable, struct sock *sk,
+ u16 newhash4)
+{
+ struct udp_hslot *hslot4, *nhslot4;
+
+ hslot4 = udp_hashslot4(udptable, udp_sk(sk)->udp_lrpa_hash);
+ nhslot4 = udp_hashslot4(udptable, newhash4);
+ udp_sk(sk)->udp_lrpa_hash = newhash4;
+
+ if (hslot4 != nhslot4) {
+ spin_lock_bh(&hslot4->lock);
+ hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_lrpa_node);
+ hslot4->count--;
+ spin_unlock_bh(&hslot4->lock);
+
+ spin_lock_bh(&nhslot4->lock);
+ hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_lrpa_node,
+ &nhslot4->nulls_head);
+ nhslot4->count++;
+ spin_unlock_bh(&nhslot4->lock);
+ }
+}
+
+static void udp_unhash4(struct udp_table *udptable, struct sock *sk)
+{
+ struct udp_hslot *hslot2, *hslot4;
+
+ if (udp_hashed4(sk)) {
+ hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
+ hslot4 = udp_hashslot4(udptable, udp_sk(sk)->udp_lrpa_hash);
+
+ spin_lock(&hslot4->lock);
+ hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_lrpa_node);
+ hslot4->count--;
+ spin_unlock(&hslot4->lock);
+
+ spin_lock(&hslot2->lock);
+ udp_hash4_dec(hslot2);
+ spin_unlock(&hslot2->lock);
+ }
+}
+
+void udp_lib_hash4(struct sock *sk, u16 hash)
+{
+ struct udp_hslot *hslot, *hslot2, *hslot4;
+ struct net *net = sock_net(sk);
+ struct udp_table *udptable;
+
+ /* Connected udp socket can re-connect to another remote address,
+ * so rehash4 is needed.
+ */
+ udptable = net->ipv4.udp_table;
+ if (udp_hashed4(sk)) {
+ udp_rehash4(udptable, sk, hash);
+ return;
+ }
+
+ hslot = udp_hashslot(udptable, net, udp_sk(sk)->udp_port_hash);
+ hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
+ hslot4 = udp_hashslot4(udptable, hash);
+ udp_sk(sk)->udp_lrpa_hash = hash;
+
+ spin_lock_bh(&hslot->lock);
+ if (rcu_access_pointer(sk->sk_reuseport_cb))
+ reuseport_detach_sock(sk);
+
+ spin_lock(&hslot4->lock);
+ hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_lrpa_node,
+ &hslot4->nulls_head);
+ hslot4->count++;
+ spin_unlock(&hslot4->lock);
+
+ spin_lock(&hslot2->lock);
+ udp_hash4_inc(hslot2);
+ spin_unlock(&hslot2->lock);
+
+ spin_unlock_bh(&hslot->lock);
+}
+EXPORT_SYMBOL(udp_lib_hash4);
+
+/* call with sock lock */
+void udp4_hash4(struct sock *sk)
+{
+ struct net *net = sock_net(sk);
+ unsigned int hash;
+
+ if (sk_unhashed(sk) || sk->sk_rcv_saddr == htonl(INADDR_ANY))
+ return;
+
+ hash = udp_ehashfn(net, sk->sk_rcv_saddr, sk->sk_num,
+ sk->sk_daddr, sk->sk_dport);
+
+ udp_lib_hash4(sk, hash);
+}
+EXPORT_SYMBOL(udp4_hash4);
+#endif /* CONFIG_BASE_SMALL */
+
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
* harder than this. -DaveM
*/
@@ -486,13 +639,19 @@ struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr,
int sdif, struct udp_table *udptable, struct sk_buff *skb)
{
unsigned short hnum = ntohs(dport);
- unsigned int hash2, slot2;
struct udp_hslot *hslot2;
struct sock *result, *sk;
+ unsigned int hash2;
hash2 = ipv4_portaddr_hash(net, daddr, hnum);
- slot2 = hash2 & udptable->mask;
- hslot2 = &udptable->hash2[slot2];
+ hslot2 = udp_hashslot2(udptable, hash2);
+
+ if (udp_has_hash4(hslot2)) {
+ result = udp4_lib_lookup4(net, saddr, sport, daddr, hnum,
+ dif, sdif, udptable);
+ if (result) /* udp4_lib_lookup4 return sk or NULL */
+ return result;
+ }
/* Lookup connected or non-wildcard socket */
result = udp4_lib_lookup2(net, saddr, sport,
@@ -519,8 +678,7 @@ struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr,
/* Lookup wildcard sockets */
hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
- slot2 = hash2 & udptable->mask;
- hslot2 = &udptable->hash2[slot2];
+ hslot2 = udp_hashslot2(udptable, hash2);
result = udp4_lib_lookup2(net, saddr, sport,
htonl(INADDR_ANY), hnum, dif, sdif,
@@ -1935,6 +2093,18 @@ int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
}
EXPORT_SYMBOL(udp_pre_connect);
+static int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+ int res;
+
+ lock_sock(sk);
+ res = __ip4_datagram_connect(sk, uaddr, addr_len);
+ if (!res)
+ udp4_hash4(sk);
+ release_sock(sk);
+ return res;
+}
+
int __udp_disconnect(struct sock *sk, int flags)
{
struct inet_sock *inet = inet_sk(sk);
@@ -1994,6 +2164,8 @@ void udp_lib_unhash(struct sock *sk)
hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
hslot2->count--;
spin_unlock(&hslot2->lock);
+
+ udp_unhash4(udptable, sk);
}
spin_unlock_bh(&hslot->lock);
}
@@ -2003,7 +2175,7 @@ EXPORT_SYMBOL(udp_lib_unhash);
/*
* inet_rcv_saddr was changed, we must rehash secondary hash
*/
-void udp_lib_rehash(struct sock *sk, u16 newhash)
+void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4)
{
if (sk_hashed(sk)) {
struct udp_table *udptable = udp_get_table_prot(sk);
@@ -2035,6 +2207,19 @@ void udp_lib_rehash(struct sock *sk, u16 newhash)
spin_unlock(&nhslot2->lock);
}
+ if (udp_hashed4(sk)) {
+ udp_rehash4(udptable, sk, newhash4);
+
+ if (hslot2 != nhslot2) {
+ spin_lock(&hslot2->lock);
+ udp_hash4_dec(hslot2);
+ spin_unlock(&hslot2->lock);
+
+ spin_lock(&nhslot2->lock);
+ udp_hash4_inc(nhslot2);
+ spin_unlock(&nhslot2->lock);
+ }
+ }
spin_unlock_bh(&hslot->lock);
}
}
@@ -2046,7 +2231,11 @@ void udp_v4_rehash(struct sock *sk)
u16 new_hash = ipv4_portaddr_hash(sock_net(sk),
inet_sk(sk)->inet_rcv_saddr,
inet_sk(sk)->inet_num);
- udp_lib_rehash(sk, new_hash);
+ u16 new_hash4 = udp_ehashfn(sock_net(sk),
+ sk->sk_rcv_saddr, sk->sk_num,
+ sk->sk_daddr, sk->sk_dport);
+
+ udp_lib_rehash(sk, new_hash, new_hash4);
}
static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
@@ -2268,7 +2457,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
udptable->mask;
hash2 = ipv4_portaddr_hash(net, daddr, hnum) & udptable->mask;
start_lookup:
- hslot = &udptable->hash2[hash2];
+ hslot = &udptable->hash2[hash2].hslot;
offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
}
@@ -2539,14 +2728,13 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
struct udp_table *udptable = net->ipv4.udp_table;
INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr);
unsigned short hnum = ntohs(loc_port);
- unsigned int hash2, slot2;
struct udp_hslot *hslot2;
+ unsigned int hash2;
__portpair ports;
struct sock *sk;
hash2 = ipv4_portaddr_hash(net, loc_addr, hnum);
- slot2 = hash2 & udptable->mask;
- hslot2 = &udptable->hash2[slot2];
+ hslot2 = udp_hashslot2(udptable, hash2);
ports = INET_COMBINED_PORTS(rmt_port, hnum);
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
@@ -2621,7 +2809,7 @@ int udp_v4_early_demux(struct sk_buff *skb)
if (!inet_sk(sk)->inet_daddr && in_dev)
return ip_mc_validate_source(skb, iph->daddr,
iph->saddr,
- iph->tos & INET_DSCP_MASK,
+ ip4h_dscp(iph),
skb->dev, in_dev, &itag);
}
return 0;
@@ -2940,7 +3128,7 @@ struct proto udp_prot = {
.owner = THIS_MODULE,
.close = udp_lib_close,
.pre_connect = udp_pre_connect,
- .connect = ip4_datagram_connect,
+ .connect = udp_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
.init = udp_init_sock,
@@ -3187,7 +3375,7 @@ again:
batch_sks = 0;
for (; state->bucket <= udptable->mask; state->bucket++) {
- struct udp_hslot *hslot2 = &udptable->hash2[state->bucket];
+ struct udp_hslot *hslot2 = &udptable->hash2[state->bucket].hslot;
if (hlist_empty(&hslot2->head))
continue;
@@ -3428,10 +3616,12 @@ __setup("uhash_entries=", set_uhash_entries);
void __init udp_table_init(struct udp_table *table, const char *name)
{
- unsigned int i;
+ unsigned int i, slot_size;
+ slot_size = sizeof(struct udp_hslot) + sizeof(struct udp_hslot_main) +
+ udp_hash4_slot_size();
table->hash = alloc_large_system_hash(name,
- 2 * sizeof(struct udp_hslot),
+ slot_size,
uhash_entries,
21, /* one slot per 2 MB */
0,
@@ -3440,17 +3630,18 @@ void __init udp_table_init(struct udp_table *table, const char *name)
UDP_HTABLE_SIZE_MIN,
UDP_HTABLE_SIZE_MAX);
- table->hash2 = table->hash + (table->mask + 1);
+ table->hash2 = (void *)(table->hash + (table->mask + 1));
for (i = 0; i <= table->mask; i++) {
INIT_HLIST_HEAD(&table->hash[i].head);
table->hash[i].count = 0;
spin_lock_init(&table->hash[i].lock);
}
for (i = 0; i <= table->mask; i++) {
- INIT_HLIST_HEAD(&table->hash2[i].head);
- table->hash2[i].count = 0;
- spin_lock_init(&table->hash2[i].lock);
+ INIT_HLIST_HEAD(&table->hash2[i].hslot.head);
+ table->hash2[i].hslot.count = 0;
+ spin_lock_init(&table->hash2[i].hslot.lock);
}
+ udp_table_hash4_init(table);
}
u32 udp_flow_hashrnd(void)
@@ -3476,18 +3667,21 @@ static void __net_init udp_sysctl_init(struct net *net)
static struct udp_table __net_init *udp_pernet_table_alloc(unsigned int hash_entries)
{
struct udp_table *udptable;
+ unsigned int slot_size;
int i;
udptable = kmalloc(sizeof(*udptable), GFP_KERNEL);
if (!udptable)
goto out;
- udptable->hash = vmalloc_huge(hash_entries * 2 * sizeof(struct udp_hslot),
+ slot_size = sizeof(struct udp_hslot) + sizeof(struct udp_hslot_main) +
+ udp_hash4_slot_size();
+ udptable->hash = vmalloc_huge(hash_entries * slot_size,
GFP_KERNEL_ACCOUNT);
if (!udptable->hash)
goto free_table;
- udptable->hash2 = udptable->hash + hash_entries;
+ udptable->hash2 = (void *)(udptable->hash + hash_entries);
udptable->mask = hash_entries - 1;
udptable->log = ilog2(hash_entries);
@@ -3496,10 +3690,11 @@ static struct udp_table __net_init *udp_pernet_table_alloc(unsigned int hash_ent
udptable->hash[i].count = 0;
spin_lock_init(&udptable->hash[i].lock);
- INIT_HLIST_HEAD(&udptable->hash2[i].head);
- udptable->hash2[i].count = 0;
- spin_lock_init(&udptable->hash2[i].lock);
+ INIT_HLIST_HEAD(&udptable->hash2[i].hslot.head);
+ udptable->hash2[i].hslot.count = 0;
+ spin_lock_init(&udptable->hash2[i].hslot.lock);
}
+ udp_table_hash4_init(udptable);
return udptable;
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index a620618cc568..b5b06323cfd9 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -33,7 +33,7 @@ static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
const struct iphdr *iph = ip_hdr(skb);
if (ip_route_input_noref(skb, iph->daddr, iph->saddr,
- iph->tos, skb->dev))
+ ip4h_dscp(iph), skb->dev))
goto drop;
}
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 7e1c2faed1ff..7fb6205619e7 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -14,6 +14,7 @@
#include <linux/inetdevice.h>
#include <net/dst.h>
#include <net/xfrm.h>
+#include <net/inet_dscp.h>
#include <net/ip.h>
#include <net/l3mdev.h>
@@ -24,7 +25,7 @@ static struct dst_entry *__xfrm4_dst_lookup(struct flowi4 *fl4,
memset(fl4, 0, sizeof(*fl4));
fl4->daddr = params->daddr->a4;
- fl4->flowi4_tos = params->tos;
+ fl4->flowi4_tos = inet_dscp_to_dsfield(params->dscp);
fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(params->net,
params->oif);
fl4->flowi4_mark = params->mark;
diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c
index b146ce88c5d0..4ee624d8e66f 100644
--- a/net/ipv4/xfrm4_protocol.c
+++ b/net/ipv4/xfrm4_protocol.c
@@ -76,7 +76,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
const struct iphdr *iph = ip_hdr(skb);
if (ip_route_input_noref(skb, iph->daddr, iph->saddr,
- iph->tos, skb->dev))
+ ip4h_dscp(iph), skb->dev))
goto drop;
}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 94dceac52884..c489a1e6aec9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1016,7 +1016,7 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
static u32 inet6_addr_hash(const struct net *net, const struct in6_addr *addr)
{
- u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+ u32 val = __ipv6_addr_jhash(addr, net_hash_mix(net));
return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
}
@@ -2570,6 +2570,24 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
return idev;
}
+static void delete_tempaddrs(struct inet6_dev *idev,
+ struct inet6_ifaddr *ifp)
+{
+ struct inet6_ifaddr *ift, *tmp;
+
+ write_lock_bh(&idev->lock);
+ list_for_each_entry_safe(ift, tmp, &idev->tempaddr_list, tmp_list) {
+ if (ift->ifpub != ifp)
+ continue;
+
+ in6_ifa_hold(ift);
+ write_unlock_bh(&idev->lock);
+ ipv6_del_addr(ift);
+ write_lock_bh(&idev->lock);
+ }
+ write_unlock_bh(&idev->lock);
+}
+
static void manage_tempaddrs(struct inet6_dev *idev,
struct inet6_ifaddr *ifp,
__u32 valid_lft, __u32 prefered_lft,
@@ -3124,11 +3142,12 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
in6_ifa_hold(ifp);
read_unlock_bh(&idev->lock);
- if (!(ifp->flags & IFA_F_TEMPORARY) &&
- (ifa_flags & IFA_F_MANAGETEMPADDR))
- manage_tempaddrs(idev, ifp, 0, 0, false,
- jiffies);
ipv6_del_addr(ifp);
+
+ if (!(ifp->flags & IFA_F_TEMPORARY) &&
+ (ifp->flags & IFA_F_MANAGETEMPADDR))
+ delete_tempaddrs(idev, ifp);
+
addrconf_verify_rtnl(net);
if (ipv6_addr_is_multicast(pfx)) {
ipv6_mc_config(net->ipv6.mc_autojoin_sk,
@@ -4793,7 +4812,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!pfx)
return -EINVAL;
- ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags;
+ ifa_flags = nla_get_u32_default(tb[IFA_FLAGS], ifm->ifa_flags);
/* We ignore other flags so far. */
ifa_flags &= IFA_F_MANAGETEMPADDR;
@@ -4952,14 +4971,12 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
}
if (was_managetempaddr || ifp->flags & IFA_F_MANAGETEMPADDR) {
- if (was_managetempaddr &&
- !(ifp->flags & IFA_F_MANAGETEMPADDR)) {
- cfg->valid_lft = 0;
- cfg->preferred_lft = 0;
- }
- manage_tempaddrs(ifp->idev, ifp, cfg->valid_lft,
- cfg->preferred_lft, !was_managetempaddr,
- jiffies);
+ if (was_managetempaddr && !(ifp->flags & IFA_F_MANAGETEMPADDR))
+ delete_tempaddrs(ifp->idev, ifp);
+ else
+ manage_tempaddrs(ifp->idev, ifp, cfg->valid_lft,
+ cfg->preferred_lft, !was_managetempaddr,
+ jiffies);
}
addrconf_verify_rtnl(net);
@@ -5018,10 +5035,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
return -ENODEV;
}
- if (tb[IFA_FLAGS])
- cfg.ifa_flags = nla_get_u32(tb[IFA_FLAGS]);
- else
- cfg.ifa_flags = ifm->ifa_flags;
+ cfg.ifa_flags = nla_get_u32_default(tb[IFA_FLAGS], ifm->ifa_flags);
/* We ignore other flags so far. */
cfg.ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS |
@@ -7406,6 +7420,27 @@ static struct rtnl_af_ops inet6_ops __read_mostly = {
.set_link_af = inet6_set_link_af,
};
+static const struct rtnl_msg_handler addrconf_rtnl_msg_handlers[] __initconst_or_module = {
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETLINK,
+ .dumpit = inet6_dump_ifinfo, .flags = RTNL_FLAG_DUMP_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWADDR,
+ .doit = inet6_rtm_newaddr},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELADDR,
+ .doit = inet6_rtm_deladdr},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETADDR,
+ .doit = inet6_rtm_getaddr, .dumpit = inet6_dump_ifaddr,
+ .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETMULTICAST,
+ .dumpit = inet6_dump_ifmcaddr,
+ .flags = RTNL_FLAG_DUMP_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETANYCAST,
+ .dumpit = inet6_dump_ifacaddr,
+ .flags = RTNL_FLAG_DUMP_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETNETCONF,
+ .doit = inet6_netconf_get_devconf, .dumpit = inet6_netconf_dump_devconf,
+ .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
+};
+
/*
* Init / cleanup code
*/
@@ -7447,44 +7482,14 @@ int __init addrconf_init(void)
addrconf_verify(&init_net);
- rtnl_af_register(&inet6_ops);
+ err = rtnl_af_register(&inet6_ops);
+ if (err)
+ goto erraf;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETLINK,
- NULL, inet6_dump_ifinfo, RTNL_FLAG_DUMP_UNLOCKED);
- if (err < 0)
+ err = rtnl_register_many(addrconf_rtnl_msg_handlers);
+ if (err)
goto errout;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWADDR,
- inet6_rtm_newaddr, NULL, 0);
- if (err < 0)
- goto errout;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELADDR,
- inet6_rtm_deladdr, NULL, 0);
- if (err < 0)
- goto errout;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDR,
- inet6_rtm_getaddr, inet6_dump_ifaddr,
- RTNL_FLAG_DOIT_UNLOCKED |
- RTNL_FLAG_DUMP_UNLOCKED);
- if (err < 0)
- goto errout;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETMULTICAST,
- NULL, inet6_dump_ifmcaddr,
- RTNL_FLAG_DUMP_UNLOCKED);
- if (err < 0)
- goto errout;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETANYCAST,
- NULL, inet6_dump_ifacaddr,
- RTNL_FLAG_DUMP_UNLOCKED);
- if (err < 0)
- goto errout;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETNETCONF,
- inet6_netconf_get_devconf,
- inet6_netconf_dump_devconf,
- RTNL_FLAG_DOIT_UNLOCKED |
- RTNL_FLAG_DUMP_UNLOCKED);
- if (err < 0)
- goto errout;
err = ipv6_addr_label_rtnl_register();
if (err < 0)
goto errout;
@@ -7493,6 +7498,7 @@ int __init addrconf_init(void)
errout:
rtnl_unregister_all(PF_INET6);
rtnl_af_unregister(&inet6_ops);
+erraf:
unregister_netdevice_notifier(&ipv6_dev_notf);
errlo:
destroy_workqueue(addrconf_wq);
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index acd70b5992a7..ab054f329e12 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -634,23 +634,17 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
return err;
}
+static const struct rtnl_msg_handler ipv6_adddr_label_rtnl_msg_handlers[] __initconst_or_module = {
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWADDRLABEL,
+ .doit = ip6addrlbl_newdel, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELADDRLABEL,
+ .doit = ip6addrlbl_newdel, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETADDRLABEL,
+ .doit = ip6addrlbl_get, .dumpit = ip6addrlbl_dump,
+ .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
+};
+
int __init ipv6_addr_label_rtnl_register(void)
{
- int ret;
-
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWADDRLABEL,
- ip6addrlbl_newdel,
- NULL, RTNL_FLAG_DOIT_UNLOCKED);
- if (ret < 0)
- return ret;
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELADDRLABEL,
- ip6addrlbl_newdel,
- NULL, RTNL_FLAG_DOIT_UNLOCKED);
- if (ret < 0)
- return ret;
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDRLABEL,
- ip6addrlbl_get,
- ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED |
- RTNL_FLAG_DUMP_UNLOCKED);
- return ret;
+ return rtnl_register_many(ipv6_adddr_label_rtnl_msg_handlers);
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index ba69b86f1c7d..f60ec8b0f8ea 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -252,31 +252,29 @@ lookup_protocol:
*/
inet->inet_sport = htons(inet->inet_num);
err = sk->sk_prot->hash(sk);
- if (err) {
- sk_common_release(sk);
- goto out;
- }
+ if (err)
+ goto out_sk_release;
}
if (sk->sk_prot->init) {
err = sk->sk_prot->init(sk);
- if (err) {
- sk_common_release(sk);
- goto out;
- }
+ if (err)
+ goto out_sk_release;
}
if (!kern) {
err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
- if (err) {
- sk_common_release(sk);
- goto out;
- }
+ if (err)
+ goto out_sk_release;
}
out:
return err;
out_rcu_unlock:
rcu_read_unlock();
goto out;
+out_sk_release:
+ sk_common_release(sk);
+ sock->sk = NULL;
+ goto out;
}
static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 0627c4c18d1a..562cace50ca9 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -49,9 +49,10 @@ static DEFINE_SPINLOCK(acaddr_hash_lock);
static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
-static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
+static u32 inet6_acaddr_hash(const struct net *net,
+ const struct in6_addr *addr)
{
- u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+ u32 val = __ipv6_addr_jhash(addr, net_hash_mix(net));
return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
}
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 919ebfabbe4e..7b41fb4f00b5 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -80,9 +80,9 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
if (sp->len == XFRM_MAX_DEPTH)
goto out_reset;
- x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
- (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
- spi, IPPROTO_ESP, AF_INET6);
+ x = xfrm_input_state_lookup(dev_net(skb->dev), skb->mark,
+ (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
+ spi, IPPROTO_ESP, AF_INET6);
if (unlikely(x && x->dir && x->dir != XFRM_SA_DIR_IN)) {
/* non-offload path will record the error and audit log */
diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c
index f87ae33e1d01..949b72610df7 100644
--- a/net/ipv6/fib6_notifier.c
+++ b/net/ipv6/fib6_notifier.c
@@ -22,7 +22,7 @@ int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
return call_fib_notifiers(net, event_type, info);
}
-static unsigned int fib6_seq_read(struct net *net)
+static unsigned int fib6_seq_read(const struct net *net)
{
return fib6_tables_seq_read(net) + fib6_rules_seq_read(net);
}
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 04a9ed5e8310..c85c1627cb16 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -56,7 +56,7 @@ int fib6_rules_dump(struct net *net, struct notifier_block *nb,
return fib_rules_dump(net, nb, AF_INET6, extack);
}
-unsigned int fib6_rules_seq_read(struct net *net)
+unsigned int fib6_rules_seq_read(const struct net *net)
{
return fib_rules_seq_read(net, AF_INET6);
}
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 534a4498e280..7646e401c630 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -105,16 +105,11 @@ static int parse_nl_config(struct genl_info *info,
xp->ip.locator_match.v64 = (__force __be64)nla_get_u64(
info->attrs[ILA_ATTR_LOCATOR_MATCH]);
- if (info->attrs[ILA_ATTR_CSUM_MODE])
- xp->ip.csum_mode = nla_get_u8(info->attrs[ILA_ATTR_CSUM_MODE]);
- else
- xp->ip.csum_mode = ILA_CSUM_NO_ACTION;
-
- if (info->attrs[ILA_ATTR_IDENT_TYPE])
- xp->ip.ident_type = nla_get_u8(
- info->attrs[ILA_ATTR_IDENT_TYPE]);
- else
- xp->ip.ident_type = ILA_ATYPE_USE_FORMAT;
+ xp->ip.csum_mode = nla_get_u8_default(info->attrs[ILA_ATTR_CSUM_MODE],
+ ILA_CSUM_NO_ACTION);
+
+ xp->ip.ident_type = nla_get_u8_default(info->attrs[ILA_ATTR_IDENT_TYPE],
+ ILA_ATYPE_USE_FORMAT);
if (info->attrs[ILA_ATTR_IFINDEX])
xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]);
diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c
index 08c929513065..a84d332f952f 100644
--- a/net/ipv6/ioam6.c
+++ b/net/ipv6/ioam6.c
@@ -135,15 +135,11 @@ static int ioam6_genl_addns(struct sk_buff *skb, struct genl_info *info)
ns->id = id;
- if (!info->attrs[IOAM6_ATTR_NS_DATA])
- data32 = IOAM6_U32_UNAVAILABLE;
- else
- data32 = nla_get_u32(info->attrs[IOAM6_ATTR_NS_DATA]);
-
- if (!info->attrs[IOAM6_ATTR_NS_DATA_WIDE])
- data64 = IOAM6_U64_UNAVAILABLE;
- else
- data64 = nla_get_u64(info->attrs[IOAM6_ATTR_NS_DATA_WIDE]);
+ data32 = nla_get_u32_default(info->attrs[IOAM6_ATTR_NS_DATA],
+ IOAM6_U32_UNAVAILABLE);
+
+ data64 = nla_get_u64_default(info->attrs[IOAM6_ATTR_NS_DATA_WIDE],
+ IOAM6_U64_UNAVAILABLE);
ns->data = cpu_to_be32(data32);
ns->data_wide = cpu_to_be64(data64);
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index beb6b4cfc551..9d8422e350f8 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -142,10 +142,8 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
}
}
- if (!tb[IOAM6_IPTUNNEL_MODE])
- mode = IOAM6_IPTUNNEL_MODE_INLINE;
- else
- mode = nla_get_u8(tb[IOAM6_IPTUNNEL_MODE]);
+ mode = nla_get_u8_default(tb[IOAM6_IPTUNNEL_MODE],
+ IOAM6_IPTUNNEL_MODE_INLINE);
if (tb[IOAM6_IPTUNNEL_SRC] && mode == IOAM6_IPTUNNEL_MODE_INLINE) {
NL_SET_ERR_MSG(extack, "no tunnel src expected with this mode");
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index eb111d20615c..c134ba202c4c 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -198,16 +198,9 @@ static void node_free_immediate(struct net *net, struct fib6_node *fn)
net->ipv6.rt6_stats->fib_nodes--;
}
-static void node_free_rcu(struct rcu_head *head)
-{
- struct fib6_node *fn = container_of(head, struct fib6_node, rcu);
-
- kmem_cache_free(fib6_node_kmem, fn);
-}
-
static void node_free(struct net *net, struct fib6_node *fn)
{
- call_rcu(&fn->rcu, node_free_rcu);
+ kfree_rcu(fn, rcu);
net->ipv6.rt6_stats->fib_nodes--;
}
@@ -345,17 +338,17 @@ static void __net_init fib6_tables_init(struct net *net)
#endif
-unsigned int fib6_tables_seq_read(struct net *net)
+unsigned int fib6_tables_seq_read(const struct net *net)
{
unsigned int h, fib_seq = 0;
rcu_read_lock();
for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
- struct hlist_head *head = &net->ipv6.fib_table_hash[h];
- struct fib6_table *tb;
+ const struct hlist_head *head = &net->ipv6.fib_table_hash[h];
+ const struct fib6_table *tb;
hlist_for_each_entry_rcu(tb, head, tb6_hlist)
- fib_seq += tb->fib_seq;
+ fib_seq += READ_ONCE(tb->fib_seq);
}
rcu_read_unlock();
@@ -400,7 +393,7 @@ int call_fib6_entry_notifiers(struct net *net,
.rt = rt,
};
- rt->fib6_table->fib_seq++;
+ WRITE_ONCE(rt->fib6_table->fib_seq, rt->fib6_table->fib_seq + 1);
return call_fib6_notifiers(net, event_type, &info.info);
}
@@ -416,7 +409,7 @@ int call_fib6_multipath_entry_notifiers(struct net *net,
.nsiblings = nsiblings,
};
- rt->fib6_table->fib_seq++;
+ WRITE_ONCE(rt->fib6_table->fib_seq, rt->fib6_table->fib_seq + 1);
return call_fib6_notifiers(net, event_type, &info.info);
}
@@ -427,7 +420,7 @@ int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt)
.nsiblings = rt->fib6_nsiblings,
};
- rt->fib6_table->fib_seq++;
+ WRITE_ONCE(rt->fib6_table->fib_seq, rt->fib6_table->fib_seq + 1);
return call_fib6_notifiers(net, FIB_EVENT_ENTRY_REPLACE, &info.info);
}
@@ -1190,8 +1183,8 @@ next_iter:
while (sibling) {
if (sibling->fib6_metric == rt->fib6_metric &&
rt6_qualify_for_ecmp(sibling)) {
- list_add_tail(&rt->fib6_siblings,
- &sibling->fib6_siblings);
+ list_add_tail_rcu(&rt->fib6_siblings,
+ &sibling->fib6_siblings);
break;
}
sibling = rcu_dereference_protected(sibling->fib6_next,
@@ -1252,7 +1245,7 @@ add:
fib6_siblings)
sibling->fib6_nsiblings--;
rt->fib6_nsiblings = 0;
- list_del_init(&rt->fib6_siblings);
+ list_del_rcu(&rt->fib6_siblings);
rt6_multipath_rebalance(next_sibling);
return err;
}
@@ -1970,7 +1963,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
&rt->fib6_siblings, fib6_siblings)
sibling->fib6_nsiblings--;
rt->fib6_nsiblings = 0;
- list_del_init(&rt->fib6_siblings);
+ list_del_rcu(&rt->fib6_siblings);
rt6_multipath_rebalance(next_sibling);
}
@@ -2500,6 +2493,12 @@ static struct pernet_operations fib6_net_ops = {
.exit = fib6_net_exit,
};
+static const struct rtnl_msg_handler fib6_rtnl_msg_handlers[] __initconst_or_module = {
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETROUTE,
+ .dumpit = inet6_dump_fib,
+ .flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE},
+};
+
int __init fib6_init(void)
{
int ret = -ENOMEM;
@@ -2513,9 +2512,7 @@ int __init fib6_init(void)
if (ret)
goto out_kmem_cache_create;
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL,
- inet6_dump_fib, RTNL_FLAG_DUMP_UNLOCKED |
- RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
+ ret = rtnl_register_many(fib6_rtnl_msg_handlers);
if (ret)
goto out_unregister_subsys;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f26841f1490f..f7b4608bb316 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -127,7 +127,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
nexthop = rt6_nexthop(dst_rt6_info(dst), daddr);
neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
- if (unlikely(IS_ERR_OR_NULL(neigh))) {
+ if (IS_ERR_OR_NULL(neigh)) {
if (unlikely(!neigh))
neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
if (IS_ERR(neigh)) {
@@ -1401,8 +1401,11 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
cork->base.gso_size = ipc6->gso_size;
cork->base.tx_flags = 0;
cork->base.mark = ipc6->sockc.mark;
- sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
-
+ sock_tx_timestamp(sk, &ipc6->sockc, &cork->base.tx_flags);
+ if (ipc6->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) {
+ cork->base.flags |= IPCORK_TS_OPT_ID;
+ cork->base.ts_opt_id = ipc6->sockc.ts_opt_id;
+ }
cork->base.length = 0;
cork->base.transmit_time = ipc6->sockc.transmit_time;
@@ -1433,7 +1436,7 @@ static int __ip6_append_data(struct sock *sk,
bool zc = false;
u32 tskey = 0;
struct rt6_info *rt = dst_rt6_info(cork->dst);
- bool paged, hold_tskey, extra_uref = false;
+ bool paged, hold_tskey = false, extra_uref = false;
struct ipv6_txoptions *opt = v6_cork->opt;
int csummode = CHECKSUM_NONE;
unsigned int maxnonfragsize, headersize;
@@ -1543,10 +1546,15 @@ emsgsize:
flags &= ~MSG_SPLICE_PAGES;
}
- hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
- READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
- if (hold_tskey)
- tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+ if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
+ READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
+ if (cork->flags & IPCORK_TS_OPT_ID) {
+ tskey = cork->ts_opt_id;
+ } else {
+ tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+ hold_tskey = true;
+ }
+ }
/*
* Let's try using as much space as possible.
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index b60e13c42bca..48fd53b98972 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -630,8 +630,8 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
skb_dst_set(skb2, &rt->dst);
} else {
- if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
- skb2->dev) ||
+ if (ip_route_input(skb2, eiph->daddr, eiph->saddr,
+ ip4h_dscp(eiph), skb2->dev) ||
skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6)
goto out;
}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 2ce4ae0d8dc3..7f1902ac3586 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -108,6 +108,11 @@ static void ipmr_expire_process(struct timer_list *t);
lockdep_rtnl_is_held() || \
list_empty(&net->ipv6.mr6_tables))
+static bool ip6mr_can_free_table(struct net *net)
+{
+ return !check_net(net) || !net->ipv6.mr6_rules_ops;
+}
+
static struct mr_table *ip6mr_mr_table_iter(struct net *net,
struct mr_table *mrt)
{
@@ -125,7 +130,7 @@ static struct mr_table *ip6mr_mr_table_iter(struct net *net,
return ret;
}
-static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *__ip6mr_get_table(struct net *net, u32 id)
{
struct mr_table *mrt;
@@ -136,6 +141,16 @@ static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
return NULL;
}
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
+{
+ struct mr_table *mrt;
+
+ rcu_read_lock();
+ mrt = __ip6mr_get_table(net, id);
+ rcu_read_unlock();
+ return mrt;
+}
+
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
struct mr_table **mrt)
{
@@ -177,7 +192,7 @@ static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
arg->table = fib_rule_get_table(rule, arg);
- mrt = ip6mr_get_table(rule->fr_net, arg->table);
+ mrt = __ip6mr_get_table(rule->fr_net, arg->table);
if (!mrt)
return -EAGAIN;
res->mrt = mrt;
@@ -276,7 +291,7 @@ static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack);
}
-static unsigned int ip6mr_rules_seq_read(struct net *net)
+static unsigned int ip6mr_rules_seq_read(const struct net *net)
{
return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
}
@@ -291,6 +306,11 @@ EXPORT_SYMBOL(ip6mr_rule_default);
#define ip6mr_for_each_table(mrt, net) \
for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
+static bool ip6mr_can_free_table(struct net *net)
+{
+ return !check_net(net);
+}
+
static struct mr_table *ip6mr_mr_table_iter(struct net *net,
struct mr_table *mrt)
{
@@ -304,6 +324,8 @@ static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
return net->ipv6.mrt6;
}
+#define __ip6mr_get_table ip6mr_get_table
+
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
struct mr_table **mrt)
{
@@ -335,7 +357,7 @@ static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
return 0;
}
-static unsigned int ip6mr_rules_seq_read(struct net *net)
+static unsigned int ip6mr_rules_seq_read(const struct net *net)
{
return 0;
}
@@ -382,7 +404,7 @@ static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
{
struct mr_table *mrt;
- mrt = ip6mr_get_table(net, id);
+ mrt = __ip6mr_get_table(net, id);
if (mrt)
return mrt;
@@ -392,6 +414,10 @@ static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
static void ip6mr_free_table(struct mr_table *mrt)
{
+ struct net *net = read_pnet(&mrt->net);
+
+ WARN_ON_ONCE(!ip6mr_can_free_table(net));
+
timer_shutdown_sync(&mrt->ipmr_expire_timer);
mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC);
@@ -411,13 +437,15 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
struct net *net = seq_file_net(seq);
struct mr_table *mrt;
- mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
- if (!mrt)
+ rcu_read_lock();
+ mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
+ if (!mrt) {
+ rcu_read_unlock();
return ERR_PTR(-ENOENT);
+ }
iter->mrt = mrt;
- rcu_read_lock();
return mr_vif_seq_start(seq, pos);
}
@@ -1260,11 +1288,9 @@ static int ip6mr_device_event(struct notifier_block *this,
return NOTIFY_DONE;
}
-static unsigned int ip6mr_seq_read(struct net *net)
+static unsigned int ip6mr_seq_read(const struct net *net)
{
- ASSERT_RTNL();
-
- return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
+ return READ_ONCE(net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net);
}
static int ip6mr_dump(struct net *net, struct notifier_block *nb,
@@ -1369,6 +1395,12 @@ static struct pernet_operations ip6mr_net_ops = {
.exit_batch = ip6mr_net_exit_batch,
};
+static const struct rtnl_msg_handler ip6mr_rtnl_msg_handlers[] __initconst_or_module = {
+ {.owner = THIS_MODULE, .protocol = RTNL_FAMILY_IP6MR,
+ .msgtype = RTM_GETROUTE,
+ .doit = ip6mr_rtm_getroute, .dumpit = ip6mr_rtm_dumproute},
+};
+
int __init ip6_mr_init(void)
{
int err;
@@ -1391,9 +1423,8 @@ int __init ip6_mr_init(void)
goto add_proto_fail;
}
#endif
- err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
- ip6mr_rtm_getroute, ip6mr_rtm_dumproute, 0);
- if (err == 0)
+ err = rtnl_register_many(ip6mr_rtnl_msg_handlers);
+ if (!err)
return 0;
#ifdef CONFIG_IPV6_PIMSM_V2
@@ -1408,9 +1439,9 @@ reg_pernet_fail:
return err;
}
-void ip6_mr_cleanup(void)
+void __init ip6_mr_cleanup(void)
{
- rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
+ rtnl_unregister_many(ip6mr_rtnl_msg_handlers);
#ifdef CONFIG_IPV6_PIMSM_V2
inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
#endif
@@ -2275,11 +2306,13 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
struct mfc6_cache *cache;
struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
- mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
- if (!mrt)
+ rcu_read_lock();
+ mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
+ if (!mrt) {
+ rcu_read_unlock();
return -ENOENT;
+ }
- rcu_read_lock();
cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
if (!cache && skb->dev) {
int vif = ip6mr_find_vif(mrt, skb->dev);
@@ -2557,9 +2590,9 @@ static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
src = nla_get_in6_addr(tb[RTA_SRC]);
if (tb[RTA_DST])
grp = nla_get_in6_addr(tb[RTA_DST]);
- tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0;
+ tableid = nla_get_u32_default(tb[RTA_TABLE], 0);
- mrt = ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT);
+ mrt = __ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT);
if (!mrt) {
NL_SET_ERR_MSG_MOD(extack, "MR table does not exist");
return -ENOENT;
@@ -2606,7 +2639,7 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
if (filter.table_id) {
struct mr_table *mrt;
- mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id);
+ mrt = __ip6mr_get_table(sock_net(skb->sk), filter.table_id);
if (!mrt) {
if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR)
return skb->len;
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index f3c8e2d918e1..e087a8e97ba7 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -8,7 +8,14 @@ menu "IPv6: Netfilter Configuration"
# old sockopt interface and eval loop
config IP6_NF_IPTABLES_LEGACY
- tristate
+ tristate "Legacy IP6 tables support"
+ depends on INET && IPV6
+ select NETFILTER_XTABLES
+ default n
+ help
+ ip6tables is a legacy packet classifier.
+ This is not needed if you are using iptables over nftables
+ (iptables-nft).
config NF_SOCKET_IPV6
tristate "IPv6 socket lookup support"
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 608fa9d05b55..8476a3944a88 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -629,7 +629,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
skb->ip_summed = CHECKSUM_NONE;
- skb_setup_tx_timestamp(skb, sockc->tsflags);
+ skb_setup_tx_timestamp(skb, sockc);
if (flags & MSG_CONFIRM)
skb_set_dst_pending_confirm(skb, 1);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b4251915585f..63d7681c929f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -374,6 +374,7 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
{
struct rt6_info *rt = dst_rt6_info(dst);
struct inet6_dev *idev = rt->rt6i_idev;
+ struct fib6_info *from;
if (idev && idev->dev != blackhole_netdev) {
struct inet6_dev *blackhole_idev = in6_dev_get(blackhole_netdev);
@@ -383,6 +384,8 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
in6_dev_put(idev);
}
}
+ from = unrcu_pointer(xchg(&rt->from, NULL));
+ fib6_info_release(from);
}
static bool __rt6_check_expired(const struct rt6_info *rt)
@@ -413,8 +416,8 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
struct flowi6 *fl6, int oif, bool have_oif_match,
const struct sk_buff *skb, int strict)
{
- struct fib6_info *sibling, *next_sibling;
struct fib6_info *match = res->f6i;
+ struct fib6_info *sibling;
if (!match->nh && (!match->fib6_nsiblings || have_oif_match))
goto out;
@@ -440,8 +443,8 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound))
goto out;
- list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
- fib6_siblings) {
+ list_for_each_entry_rcu(sibling, &match->fib6_siblings,
+ fib6_siblings) {
const struct fib6_nh *nh = sibling->fib6_nh;
int nh_upper_bound;
@@ -1455,7 +1458,6 @@ static DEFINE_SPINLOCK(rt6_exception_lock);
static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
struct rt6_exception *rt6_ex)
{
- struct fib6_info *from;
struct net *net;
if (!bucket || !rt6_ex)
@@ -1467,8 +1469,6 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
/* purge completely the exception to allow releasing the held resources:
* some [sk] cache may keep the dst around for unlimited time
*/
- from = unrcu_pointer(xchg(&rt6_ex->rt6i->from, NULL));
- fib6_info_release(from);
dst_dev_put(&rt6_ex->rt6i->dst);
hlist_del_rcu(&rt6_ex->hlist);
@@ -5195,14 +5195,18 @@ static void ip6_route_mpath_notify(struct fib6_info *rt,
* nexthop. Since sibling routes are always added at the end of
* the list, find the first sibling of the last route appended
*/
+ rcu_read_lock();
+
if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
- rt = list_first_entry(&rt_last->fib6_siblings,
- struct fib6_info,
- fib6_siblings);
+ rt = list_first_or_null_rcu(&rt_last->fib6_siblings,
+ struct fib6_info,
+ fib6_siblings);
}
if (rt)
inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
+
+ rcu_read_unlock();
}
static bool ip6_route_mpath_should_notify(const struct fib6_info *rt)
@@ -5547,17 +5551,21 @@ static size_t rt6_nlmsg_size(struct fib6_info *f6i)
nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size,
&nexthop_len);
} else {
- struct fib6_info *sibling, *next_sibling;
struct fib6_nh *nh = f6i->fib6_nh;
+ struct fib6_info *sibling;
nexthop_len = 0;
if (f6i->fib6_nsiblings) {
rt6_nh_nlmsg_size(nh, &nexthop_len);
- list_for_each_entry_safe(sibling, next_sibling,
- &f6i->fib6_siblings, fib6_siblings) {
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(sibling, &f6i->fib6_siblings,
+ fib6_siblings) {
rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len);
}
+
+ rcu_read_unlock();
}
nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
}
@@ -5721,7 +5729,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
goto nla_put_failure;
} else if (rt->fib6_nsiblings) {
- struct fib6_info *sibling, *next_sibling;
+ struct fib6_info *sibling;
struct nlattr *mp;
mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
@@ -5733,14 +5741,21 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
0) < 0)
goto nla_put_failure;
- list_for_each_entry_safe(sibling, next_sibling,
- &rt->fib6_siblings, fib6_siblings) {
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(sibling, &rt->fib6_siblings,
+ fib6_siblings) {
if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
sibling->fib6_nh->fib_nh_weight,
- AF_INET6, 0) < 0)
+ AF_INET6, 0) < 0) {
+ rcu_read_unlock();
+
goto nla_put_failure;
+ }
}
+ rcu_read_unlock();
+
nla_nest_end(skb, mp);
} else if (rt->nh) {
if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id))
@@ -6177,7 +6192,7 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
err = -ENOBUFS;
seq = info->nlh ? info->nlh->nlmsg_seq : 0;
- skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
+ skb = nlmsg_new(rt6_nlmsg_size(rt), GFP_ATOMIC);
if (!skb)
goto errout;
@@ -6190,7 +6205,7 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
goto errout;
}
rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
- info->nlh, gfp_any());
+ info->nlh, GFP_ATOMIC);
return;
errout:
rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
@@ -6680,6 +6695,15 @@ static void bpf_iter_unregister(void)
#endif
#endif
+static const struct rtnl_msg_handler ip6_route_rtnl_msg_handlers[] __initconst_or_module = {
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWROUTE,
+ .doit = inet6_rtm_newroute},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELROUTE,
+ .doit = inet6_rtm_delroute},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETROUTE,
+ .doit = inet6_rtm_getroute, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+};
+
int __init ip6_route_init(void)
{
int ret;
@@ -6722,19 +6746,7 @@ int __init ip6_route_init(void)
if (ret)
goto fib6_rules_init;
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
- inet6_rtm_newroute, NULL, 0);
- if (ret < 0)
- goto out_register_late_subsys;
-
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
- inet6_rtm_delroute, NULL, 0);
- if (ret < 0)
- goto out_register_late_subsys;
-
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
- inet6_rtm_getroute, NULL,
- RTNL_FLAG_DOIT_UNLOCKED);
+ ret = rtnl_register_many(ip6_route_rtnl_msg_handlers);
if (ret < 0)
goto out_register_late_subsys;
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index c74705ead984..ac1dbd492c22 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -954,10 +954,10 @@ static int input_action_end_dx4_finish(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
+ enum skb_drop_reason reason;
struct seg6_local_lwt *slwt;
struct iphdr *iph;
__be32 nhaddr;
- int err;
slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
@@ -967,9 +967,9 @@ static int input_action_end_dx4_finish(struct net *net, struct sock *sk,
skb_dst_drop(skb);
- err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
- if (err) {
- kfree_skb(skb);
+ reason = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
+ if (reason) {
+ kfree_skb_reason(skb, reason);
return -EINVAL;
}
@@ -1174,8 +1174,8 @@ drop:
static int input_action_end_dt4(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
+ enum skb_drop_reason reason;
struct iphdr *iph;
- int err;
if (!decap_and_validate(skb, IPPROTO_IPIP))
goto drop;
@@ -1193,8 +1193,8 @@ static int input_action_end_dt4(struct sk_buff *skb,
iph = ip_hdr(skb);
- err = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev);
- if (unlikely(err))
+ reason = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev);
+ if (unlikely(reason))
goto drop;
return dst_input(skb);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c9de5ef8f267..2debdf085a3b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -967,6 +967,9 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
}
if (sk) {
+ /* unconstify the socket only to attach it to buff with care. */
+ skb_set_owner_edemux(buff, (struct sock *)sk);
+
if (sk->sk_state == TCP_TIME_WAIT)
mark = inet_twsk(sk)->tw_mark;
else
@@ -1169,8 +1172,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh))
goto out;
if (aoh)
- key.ao_key = tcp_ao_established_key(ao_info,
- aoh->rnext_keyid, -1);
+ key.ao_key = tcp_ao_established_key(sk, ao_info,
+ aoh->rnext_keyid, -1);
}
}
if (key.ao_key) {
@@ -2175,6 +2178,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
const struct tcp_sock *tp = tcp_sk(sp);
const struct inet_connection_sock *icsk = inet_csk(sp);
const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
+ u8 icsk_pending;
int rx_queue;
int state;
@@ -2183,12 +2187,13 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
destp = ntohs(inet->inet_dport);
srcp = ntohs(inet->inet_sport);
- if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
- icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
- icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
+ icsk_pending = smp_load_acquire(&icsk->icsk_pending);
+ if (icsk_pending == ICSK_TIME_RETRANS ||
+ icsk_pending == ICSK_TIME_REO_TIMEOUT ||
+ icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;
timer_expires = icsk->icsk_timeout;
- } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
+ } else if (icsk_pending == ICSK_TIME_PROBE0) {
timer_active = 4;
timer_expires = icsk->icsk_timeout;
} else if (timer_pending(&sp->sk_timer)) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 0cef8ae5d1ea..d766fd798ecf 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -110,8 +110,19 @@ void udp_v6_rehash(struct sock *sk)
u16 new_hash = ipv6_portaddr_hash(sock_net(sk),
&sk->sk_v6_rcv_saddr,
inet_sk(sk)->inet_num);
+ u16 new_hash4;
- udp_lib_rehash(sk, new_hash);
+ if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) {
+ new_hash4 = udp_ehashfn(sock_net(sk),
+ sk->sk_rcv_saddr, sk->sk_num,
+ sk->sk_daddr, sk->sk_dport);
+ } else {
+ new_hash4 = udp6_ehashfn(sock_net(sk),
+ &sk->sk_v6_rcv_saddr, sk->sk_num,
+ &sk->sk_v6_daddr, sk->sk_dport);
+ }
+
+ udp_lib_rehash(sk, new_hash, new_hash4);
}
static int compute_score(struct sock *sk, const struct net *net,
@@ -216,6 +227,74 @@ rescore:
return result;
}
+#if IS_ENABLED(CONFIG_BASE_SMALL)
+static struct sock *udp6_lib_lookup4(const struct net *net,
+ const struct in6_addr *saddr, __be16 sport,
+ const struct in6_addr *daddr,
+ unsigned int hnum, int dif, int sdif,
+ struct udp_table *udptable)
+{
+ return NULL;
+}
+
+static void udp6_hash4(struct sock *sk)
+{
+}
+#else /* !CONFIG_BASE_SMALL */
+static struct sock *udp6_lib_lookup4(const struct net *net,
+ const struct in6_addr *saddr, __be16 sport,
+ const struct in6_addr *daddr,
+ unsigned int hnum, int dif, int sdif,
+ struct udp_table *udptable)
+{
+ const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
+ const struct hlist_nulls_node *node;
+ struct udp_hslot *hslot4;
+ unsigned int hash4, slot;
+ struct udp_sock *up;
+ struct sock *sk;
+
+ hash4 = udp6_ehashfn(net, daddr, hnum, saddr, sport);
+ slot = hash4 & udptable->mask;
+ hslot4 = &udptable->hash4[slot];
+
+begin:
+ udp_lrpa_for_each_entry_rcu(up, node, &hslot4->nulls_head) {
+ sk = (struct sock *)up;
+ if (inet6_match(net, sk, saddr, daddr, ports, dif, sdif))
+ return sk;
+ }
+
+ /* if the nulls value we got at the end of this lookup is not the
+ * expected one, we must restart lookup. We probably met an item that
+ * was moved to another chain due to rehash.
+ */
+ if (get_nulls_value(node) != slot)
+ goto begin;
+
+ return NULL;
+}
+
+static void udp6_hash4(struct sock *sk)
+{
+ struct net *net = sock_net(sk);
+ unsigned int hash;
+
+ if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) {
+ udp4_hash4(sk);
+ return;
+ }
+
+ if (sk_unhashed(sk) || ipv6_addr_any(&sk->sk_v6_rcv_saddr))
+ return;
+
+ hash = udp6_ehashfn(net, &sk->sk_v6_rcv_saddr, sk->sk_num,
+ &sk->sk_v6_daddr, sk->sk_dport);
+
+ udp_lib_hash4(sk, hash);
+}
+#endif /* CONFIG_BASE_SMALL */
+
/* rcu_read_lock() must be held */
struct sock *__udp6_lib_lookup(const struct net *net,
const struct in6_addr *saddr, __be16 sport,
@@ -224,13 +303,19 @@ struct sock *__udp6_lib_lookup(const struct net *net,
struct sk_buff *skb)
{
unsigned short hnum = ntohs(dport);
- unsigned int hash2, slot2;
struct udp_hslot *hslot2;
struct sock *result, *sk;
+ unsigned int hash2;
hash2 = ipv6_portaddr_hash(net, daddr, hnum);
- slot2 = hash2 & udptable->mask;
- hslot2 = &udptable->hash2[slot2];
+ hslot2 = udp_hashslot2(udptable, hash2);
+
+ if (udp_has_hash4(hslot2)) {
+ result = udp6_lib_lookup4(net, saddr, sport, daddr, hnum,
+ dif, sdif, udptable);
+ if (result) /* udp6_lib_lookup4 return sk or NULL */
+ return result;
+ }
/* Lookup connected or non-wildcard sockets */
result = udp6_lib_lookup2(net, saddr, sport,
@@ -257,8 +342,7 @@ struct sock *__udp6_lib_lookup(const struct net *net,
/* Lookup wildcard sockets */
hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
- slot2 = hash2 & udptable->mask;
- hslot2 = &udptable->hash2[slot2];
+ hslot2 = udp_hashslot2(udptable, hash2);
result = udp6_lib_lookup2(net, saddr, sport,
&in6addr_any, hnum, dif, sdif,
@@ -859,7 +943,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
udptable->mask;
hash2 = ipv6_portaddr_hash(net, daddr, hnum) & udptable->mask;
start_lookup:
- hslot = &udptable->hash2[hash2];
+ hslot = &udptable->hash2[hash2].hslot;
offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
}
@@ -1065,14 +1149,13 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
{
struct udp_table *udptable = net->ipv4.udp_table;
unsigned short hnum = ntohs(loc_port);
- unsigned int hash2, slot2;
struct udp_hslot *hslot2;
+ unsigned int hash2;
__portpair ports;
struct sock *sk;
hash2 = ipv6_portaddr_hash(net, loc_addr, hnum);
- slot2 = hash2 & udptable->mask;
- hslot2 = &udptable->hash2[slot2];
+ hslot2 = udp_hashslot2(udptable, hash2);
ports = INET_COMBINED_PORTS(rmt_port, hnum);
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
@@ -1169,6 +1252,18 @@ static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len);
}
+static int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+ int res;
+
+ lock_sock(sk);
+ res = __ip6_datagram_connect(sk, uaddr, addr_len);
+ if (!res)
+ udp6_hash4(sk);
+ release_sock(sk);
+ return res;
+}
+
/**
* udp6_hwcsum_outgoing - handle outgoing HW checksumming
* @sk: socket we are sending on
@@ -1764,7 +1859,7 @@ struct proto udpv6_prot = {
.owner = THIS_MODULE,
.close = udp_lib_close,
.pre_connect = udpv6_pre_connect,
- .connect = ip6_datagram_connect,
+ .connect = udpv6_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
.init = udpv6_init_sock,
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index c00323fa9eb6..7929df08d4e0 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1236,7 +1236,9 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg,
return -EOPNOTSUPP;
/* receive/dequeue next skb:
- * the function understands MSG_PEEK and, thus, does not dequeue skb */
+ * the function understands MSG_PEEK and, thus, does not dequeue skb
+ * only refcount is increased.
+ */
skb = skb_recv_datagram(sk, flags, &err);
if (!skb) {
if (sk->sk_shutdown & RCV_SHUTDOWN)
@@ -1252,9 +1254,8 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg,
cskb = skb;
if (skb_copy_datagram_msg(cskb, offset, msg, copied)) {
- if (!(flags & MSG_PEEK))
- skb_queue_head(&sk->sk_receive_queue, skb);
- return -EFAULT;
+ err = -EFAULT;
+ goto err_out;
}
/* SOCK_SEQPACKET: set MSG_TRUNC if recv buf size is too small */
@@ -1271,11 +1272,8 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg,
err = put_cmsg(msg, SOL_IUCV, SCM_IUCV_TRGCLS,
sizeof(IUCV_SKB_CB(skb)->class),
(void *)&IUCV_SKB_CB(skb)->class);
- if (err) {
- if (!(flags & MSG_PEEK))
- skb_queue_head(&sk->sk_receive_queue, skb);
- return err;
- }
+ if (err)
+ goto err_out;
/* Mark read part of skb as used */
if (!(flags & MSG_PEEK)) {
@@ -1331,8 +1329,18 @@ done:
/* SOCK_SEQPACKET: return real length if MSG_TRUNC is set */
if (sk->sk_type == SOCK_SEQPACKET && (flags & MSG_TRUNC))
copied = rlen;
+ if (flags & MSG_PEEK)
+ skb_unref(skb);
return copied;
+
+err_out:
+ if (!(flags & MSG_PEEK))
+ skb_queue_head(&sk->sk_receive_queue, skb);
+ else
+ skb_unref(skb);
+
+ return err;
}
static inline __poll_t iucv_accept_poll(struct sock *parent)
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index d4118c796290..24aec295a51c 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1584,14 +1584,6 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
return err;
}
-static void free_mux(struct rcu_head *rcu)
-{
- struct kcm_mux *mux = container_of(rcu,
- struct kcm_mux, rcu);
-
- kmem_cache_free(kcm_muxp, mux);
-}
-
static void release_mux(struct kcm_mux *mux)
{
struct kcm_net *knet = mux->knet;
@@ -1619,7 +1611,7 @@ static void release_mux(struct kcm_mux *mux)
knet->count--;
mutex_unlock(&knet->mutex);
- call_rcu(&mux->rcu, free_mux);
+ kfree_rcu(mux, rcu);
}
static void kcm_done(struct kcm_sock *kcm)
diff --git a/net/key/af_key.c b/net/key/af_key.c
index f79fb99271ed..c56bb4f451e6 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1354,7 +1354,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
}
if (hdr->sadb_msg_seq) {
- x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq);
+ x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq, UINT_MAX);
if (x && !xfrm_addr_equal(&x->id.daddr, xdaddr, family)) {
xfrm_state_put(x);
x = NULL;
@@ -1362,7 +1362,8 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
}
if (!x)
- x = xfrm_find_acq(net, &dummy_mark, mode, reqid, 0, proto, xdaddr, xsaddr, 1, family);
+ x = xfrm_find_acq(net, &dummy_mark, mode, reqid, 0, UINT_MAX,
+ proto, xdaddr, xsaddr, 1, family);
if (x == NULL)
return -ENOENT;
@@ -1417,7 +1418,7 @@ static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, const struct sadb
if (hdr->sadb_msg_seq == 0 || hdr->sadb_msg_errno == 0)
return 0;
- x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq);
+ x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq, UINT_MAX);
if (x == NULL)
return 0;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 3eec23ac5ab1..369a2f2e459c 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1870,15 +1870,31 @@ static __net_exit void l2tp_pre_exit_net(struct net *net)
}
}
+static int l2tp_idr_item_unexpected(int id, void *p, void *data)
+{
+ const char *idr_name = data;
+
+ pr_err("l2tp: %s IDR not empty at net %d exit\n", idr_name, id);
+ WARN_ON_ONCE(1);
+ return 1;
+}
+
static __net_exit void l2tp_exit_net(struct net *net)
{
struct l2tp_net *pn = l2tp_pernet(net);
- WARN_ON_ONCE(!idr_is_empty(&pn->l2tp_v2_session_idr));
+ /* Our per-net IDRs should be empty. Check that is so, to
+ * help catch cleanup races or refcnt leaks.
+ */
+ idr_for_each(&pn->l2tp_v2_session_idr, l2tp_idr_item_unexpected,
+ "v2_session");
+ idr_for_each(&pn->l2tp_v3_session_idr, l2tp_idr_item_unexpected,
+ "v3_session");
+ idr_for_each(&pn->l2tp_tunnel_idr, l2tp_idr_item_unexpected,
+ "tunnel");
+
idr_destroy(&pn->l2tp_v2_session_idr);
- WARN_ON_ONCE(!idr_is_empty(&pn->l2tp_v3_session_idr));
idr_destroy(&pn->l2tp_v3_session_idr);
- WARN_ON_ONCE(!idr_is_empty(&pn->l2tp_tunnel_idr));
idr_destroy(&pn->l2tp_tunnel_idr);
}
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 4eb52add7103..0259cde394ba 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -1098,7 +1098,7 @@ static int llc_ui_setsockopt(struct socket *sock, int level, int optname,
lock_sock(sk);
if (unlikely(level != SOL_LLC || optlen != sizeof(int)))
goto out;
- rc = copy_from_sockptr(&opt, optval, sizeof(opt));
+ rc = copy_safe_from_sockptr(&opt, sizeof(opt), optval, optlen);
if (rc)
goto out;
rc = -EINVAL;
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index fe7eab4b681b..f3fbe5a4395e 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -170,28 +170,63 @@ static void sta_rx_agg_reorder_timer_expired(struct timer_list *t)
rcu_read_unlock();
}
-static void ieee80211_add_addbaext(struct ieee80211_sub_if_data *sdata,
- struct sk_buff *skb,
- const struct ieee80211_addba_ext_ie *req,
- u16 buf_size)
+void ieee80211_add_addbaext(struct sk_buff *skb,
+ const u8 req_addba_ext_data,
+ u16 buf_size)
{
- struct ieee80211_addba_ext_ie *resp;
+ struct ieee80211_addba_ext_ie *addba_ext;
u8 *pos;
pos = skb_put_zero(skb, 2 + sizeof(struct ieee80211_addba_ext_ie));
*pos++ = WLAN_EID_ADDBA_EXT;
*pos++ = sizeof(struct ieee80211_addba_ext_ie);
- resp = (struct ieee80211_addba_ext_ie *)pos;
- resp->data = req->data & IEEE80211_ADDBA_EXT_NO_FRAG;
+ addba_ext = (struct ieee80211_addba_ext_ie *)pos;
- resp->data |= u8_encode_bits(buf_size >> IEEE80211_ADDBA_EXT_BUF_SIZE_SHIFT,
- IEEE80211_ADDBA_EXT_BUF_SIZE_MASK);
+ addba_ext->data = IEEE80211_ADDBA_EXT_NO_FRAG;
+ if (req_addba_ext_data)
+ addba_ext->data &= req_addba_ext_data;
+
+ addba_ext->data |=
+ u8_encode_bits(buf_size >> IEEE80211_ADDBA_EXT_BUF_SIZE_SHIFT,
+ IEEE80211_ADDBA_EXT_BUF_SIZE_MASK);
+}
+
+u8 ieee80211_retrieve_addba_ext_data(struct sta_info *sta,
+ const void *elem_data, ssize_t elem_len,
+ u16 *buf_size)
+{
+ struct ieee802_11_elems *elems;
+ u8 buf_size_1k, data = 0;
+
+ if (!sta->sta.deflink.he_cap.has_he)
+ return 0;
+
+ if (elem_len <= 0)
+ return 0;
+
+ elems = ieee802_11_parse_elems(elem_data, elem_len, true, NULL);
+
+ if (elems && !elems->parse_error && elems->addba_ext_ie) {
+ data = elems->addba_ext_ie->data;
+
+ if (!sta->sta.deflink.eht_cap.has_eht || !buf_size)
+ goto free;
+
+ buf_size_1k = u8_get_bits(elems->addba_ext_ie->data,
+ IEEE80211_ADDBA_EXT_BUF_SIZE_MASK);
+ *buf_size |= (u16)buf_size_1k <<
+ IEEE80211_ADDBA_EXT_BUF_SIZE_SHIFT;
+ }
+free:
+ kfree(elems);
+
+ return data;
}
static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid,
u8 dialog_token, u16 status, u16 policy,
u16 buf_size, u16 timeout,
- const struct ieee80211_addba_ext_ie *addbaext)
+ const u8 req_addba_ext_data)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
struct ieee80211_local *local = sdata->local;
@@ -223,8 +258,8 @@ static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid,
mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout);
mgmt->u.action.u.addba_resp.status = cpu_to_le16(status);
- if (sta->sta.deflink.he_cap.has_he && addbaext)
- ieee80211_add_addbaext(sdata, skb, addbaext, buf_size);
+ if (sta->sta.deflink.he_cap.has_he)
+ ieee80211_add_addbaext(skb, req_addba_ext_data, buf_size);
ieee80211_tx_skb(sdata, skb);
}
@@ -233,7 +268,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
u8 dialog_token, u16 timeout,
u16 start_seq_num, u16 ba_policy, u16 tid,
u16 buf_size, bool tx, bool auto_seq,
- const struct ieee80211_addba_ext_ie *addbaext)
+ const u8 addba_ext_data)
{
struct ieee80211_local *local = sta->sdata->local;
struct tid_ampdu_rx *tid_agg_rx;
@@ -419,7 +454,7 @@ end:
if (tx)
ieee80211_send_addba_resp(sta, sta->sta.addr, tid,
dialog_token, status, 1, buf_size,
- timeout, addbaext);
+ timeout, addba_ext_data);
}
void ieee80211_process_addba_request(struct ieee80211_local *local,
@@ -428,9 +463,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
size_t len)
{
u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num;
- struct ieee802_11_elems *elems = NULL;
- u8 dialog_token;
- int ies_len;
+ u8 dialog_token, addba_ext_data;
/* extract session parameters from addba request frame */
dialog_token = mgmt->u.action.u.addba_req.dialog_token;
@@ -443,28 +476,17 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
- ies_len = len - offsetof(struct ieee80211_mgmt,
- u.action.u.addba_req.variable);
- if (ies_len) {
- elems = ieee802_11_parse_elems(mgmt->u.action.u.addba_req.variable,
- ies_len, true, NULL);
- if (!elems || elems->parse_error)
- goto free;
- }
-
- if (sta->sta.deflink.eht_cap.has_eht && elems && elems->addba_ext_ie) {
- u8 buf_size_1k = u8_get_bits(elems->addba_ext_ie->data,
- IEEE80211_ADDBA_EXT_BUF_SIZE_MASK);
-
- buf_size |= buf_size_1k << IEEE80211_ADDBA_EXT_BUF_SIZE_SHIFT;
- }
+ addba_ext_data =
+ ieee80211_retrieve_addba_ext_data(sta,
+ mgmt->u.action.u.addba_req.variable,
+ len -
+ offsetof(typeof(*mgmt),
+ u.action.u.addba_req.variable),
+ &buf_size);
__ieee80211_start_rx_ba_session(sta, dialog_token, timeout,
start_seq_num, ba_policy, tid,
- buf_size, true, false,
- elems ? elems->addba_ext_ie : NULL);
-free:
- kfree(elems);
+ buf_size, true, false, addba_ext_data);
}
void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif,
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 1c18b862ef8c..61f2cac37728 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -58,23 +58,24 @@
* complete.
*/
-static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata,
- const u8 *da, u16 tid,
+static void ieee80211_send_addba_request(struct sta_info *sta, u16 tid,
u8 dialog_token, u16 start_seq_num,
u16 agg_size, u16 timeout)
{
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
struct ieee80211_local *local = sdata->local;
struct sk_buff *skb;
struct ieee80211_mgmt *mgmt;
u16 capab;
- skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
-
+ skb = dev_alloc_skb(sizeof(*mgmt) +
+ 2 + sizeof(struct ieee80211_addba_ext_ie) +
+ local->hw.extra_tx_headroom);
if (!skb)
return;
skb_reserve(skb, local->hw.extra_tx_headroom);
- mgmt = ieee80211_mgmt_ba(skb, da, sdata);
+ mgmt = ieee80211_mgmt_ba(skb, sta->sta.addr, sdata);
skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_req));
@@ -93,6 +94,9 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata,
mgmt->u.action.u.addba_req.start_seq_num =
cpu_to_le16(start_seq_num << 4);
+ if (sta->sta.deflink.he_cap.has_he)
+ ieee80211_add_addbaext(skb, 0, agg_size);
+
ieee80211_tx_skb_tid(sdata, skb, tid, -1);
}
@@ -460,8 +464,11 @@ static void ieee80211_send_addba_with_timeout(struct sta_info *sta,
sta->ampdu_mlme.addba_req_num[tid]++;
spin_unlock_bh(&sta->lock);
- if (sta->sta.deflink.he_cap.has_he) {
+ if (sta->sta.deflink.eht_cap.has_eht) {
buf_size = local->hw.max_tx_aggregation_subframes;
+ } else if (sta->sta.deflink.he_cap.has_he) {
+ buf_size = min_t(u16, local->hw.max_tx_aggregation_subframes,
+ IEEE80211_MAX_AMPDU_BUF_HE);
} else {
/*
* We really should use what the driver told us it will
@@ -473,9 +480,8 @@ static void ieee80211_send_addba_with_timeout(struct sta_info *sta,
}
/* send AddBA request */
- ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
- tid_tx->dialog_token, tid_tx->ssn,
- buf_size, tid_tx->timeout);
+ ieee80211_send_addba_request(sta, tid, tid_tx->dialog_token,
+ tid_tx->ssn, buf_size, tid_tx->timeout);
WARN_ON(test_and_set_bit(HT_AGG_STATE_SENT_ADDBA, &tid_tx->state));
}
@@ -797,7 +803,7 @@ void ieee80211_start_tx_ba_cb(struct sta_info *sta, int tid,
if (!test_bit(HT_AGG_STATE_SENT_ADDBA, &tid_tx->state)) {
ieee80211_send_addba_with_timeout(sta, tid_tx);
- /* RESPONSE_RECEIVED state whould trigger the flow again */
+ /* RESPONSE_RECEIVED state would trigger the flow again */
return;
}
@@ -970,6 +976,13 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
amsdu = capab & IEEE80211_ADDBA_PARAM_AMSDU_MASK;
tid = u16_get_bits(capab, IEEE80211_ADDBA_PARAM_TID_MASK);
buf_size = u16_get_bits(capab, IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK);
+
+ ieee80211_retrieve_addba_ext_data(sta,
+ mgmt->u.action.u.addba_resp.variable,
+ len - offsetof(typeof(*mgmt),
+ u.action.u.addba_resp.variable),
+ &buf_size);
+
buf_size = min(buf_size, local->hw.max_tx_aggregation_subframes);
txq = sta->sta.txq[tid];
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 6dfc61a9acd4..61a824ec33da 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -105,8 +105,11 @@ static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata,
}
/* also validate MU-MIMO change */
- monitor_sdata = wiphy_dereference(local->hw.wiphy,
- local->monitor_sdata);
+ if (ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR))
+ monitor_sdata = sdata;
+ else
+ monitor_sdata = wiphy_dereference(local->hw.wiphy,
+ local->monitor_sdata);
if (!monitor_sdata &&
(params->vht_mumimo_groups || params->vht_mumimo_follow_addr))
@@ -114,7 +117,9 @@ static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata,
/* apply all changes now - no failures allowed */
- if (monitor_sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
+ if (monitor_sdata &&
+ (ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) ||
+ ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)))
ieee80211_set_mu_mimo_follow(monitor_sdata, params);
if (params->flags) {
@@ -138,7 +143,7 @@ static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata,
}
static int ieee80211_set_ap_mbssid_options(struct ieee80211_sub_if_data *sdata,
- struct cfg80211_mbssid_config params,
+ struct cfg80211_mbssid_config *params,
struct ieee80211_bss_conf *link_conf)
{
struct ieee80211_sub_if_data *tx_sdata;
@@ -149,10 +154,10 @@ static int ieee80211_set_ap_mbssid_options(struct ieee80211_sub_if_data *sdata,
link_conf->ema_ap = false;
link_conf->bssid_indicator = 0;
- if (sdata->vif.type != NL80211_IFTYPE_AP || !params.tx_wdev)
+ if (sdata->vif.type != NL80211_IFTYPE_AP || !params->tx_wdev)
return -EINVAL;
- tx_sdata = IEEE80211_WDEV_TO_SUB_IF(params.tx_wdev);
+ tx_sdata = IEEE80211_WDEV_TO_SUB_IF(params->tx_wdev);
if (!tx_sdata)
return -EINVAL;
@@ -161,9 +166,9 @@ static int ieee80211_set_ap_mbssid_options(struct ieee80211_sub_if_data *sdata,
} else {
sdata->vif.mbssid_tx_vif = &tx_sdata->vif;
link_conf->nontransmitted = true;
- link_conf->bssid_index = params.index;
+ link_conf->bssid_index = params->index;
}
- if (params.ema)
+ if (params->ema)
link_conf->ema_ap = true;
return 0;
@@ -194,6 +199,24 @@ static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy,
}
}
+ /* Let the driver know that an interface is going to be added.
+ * Indicate so only for interface types that will be added to the
+ * driver.
+ */
+ switch (type) {
+ case NL80211_IFTYPE_AP_VLAN:
+ break;
+ case NL80211_IFTYPE_MONITOR:
+ if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) ||
+ !(params->flags & MONITOR_FLAG_ACTIVE))
+ break;
+ fallthrough;
+ default:
+ drv_prep_add_interface(local,
+ ieee80211_vif_type_p2p(&sdata->vif));
+ break;
+ }
+
return wdev;
}
@@ -879,6 +902,7 @@ static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev,
}
static int ieee80211_set_monitor_channel(struct wiphy *wiphy,
+ struct net_device *dev,
struct cfg80211_chan_def *chandef)
{
struct ieee80211_local *local = wiphy_priv(wiphy);
@@ -888,22 +912,25 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy,
lockdep_assert_wiphy(local->hw.wiphy);
- if (cfg80211_chandef_identical(&local->monitor_chanreq.oper,
- &chanreq.oper))
- return 0;
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ if (!ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) {
+ if (cfg80211_chandef_identical(&local->monitor_chanreq.oper,
+ &chanreq.oper))
+ return 0;
- sdata = wiphy_dereference(local->hw.wiphy,
- local->monitor_sdata);
- if (!sdata)
- goto done;
+ sdata = wiphy_dereference(wiphy, local->monitor_sdata);
+ if (!sdata)
+ goto done;
+ }
- if (cfg80211_chandef_identical(&sdata->vif.bss_conf.chanreq.oper,
+ if (rcu_access_pointer(sdata->deflink.conf->chanctx_conf) &&
+ cfg80211_chandef_identical(&sdata->vif.bss_conf.chanreq.oper,
&chanreq.oper))
return 0;
ieee80211_link_release_channel(&sdata->deflink);
ret = ieee80211_link_use_channel(&sdata->deflink, &chanreq,
- IEEE80211_CHANCTX_EXCLUSIVE);
+ IEEE80211_CHANCTX_SHARED);
if (ret)
return ret;
done:
@@ -1294,9 +1321,6 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
if (old)
return -EALREADY;
- if (params->smps_mode != NL80211_SMPS_OFF)
- return -EOPNOTSUPP;
-
link->smps_mode = IEEE80211_SMPS_OFF;
link->needed_rx_chains = sdata->local->rx_chains;
@@ -1390,7 +1414,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
if (sdata->vif.type == NL80211_IFTYPE_AP &&
params->mbssid_config.tx_wdev) {
err = ieee80211_set_ap_mbssid_options(sdata,
- params->mbssid_config,
+ &params->mbssid_config,
link_conf);
if (err)
return err;
@@ -1705,7 +1729,7 @@ static int sta_apply_auth_flags(struct ieee80211_local *local,
* before drv_sta_state() is called.
*/
if (!test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
- rate_control_rate_init(sta);
+ rate_control_rate_init_all_links(sta);
ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC);
if (ret)
@@ -2134,7 +2158,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
*/
if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
test_sta_flag(sta, WLAN_STA_ASSOC))
- rate_control_rate_init(sta);
+ rate_control_rate_init_all_links(sta);
return sta_info_insert(sta);
}
@@ -3046,14 +3070,31 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
enum nl80211_tx_power_setting txp_type = type;
bool update_txp_type = false;
bool has_monitor = false;
+ int user_power_level;
int old_power = local->user_power_level;
lockdep_assert_wiphy(local->hw.wiphy);
+ switch (type) {
+ case NL80211_TX_POWER_AUTOMATIC:
+ user_power_level = IEEE80211_UNSET_POWER_LEVEL;
+ txp_type = NL80211_TX_POWER_LIMITED;
+ break;
+ case NL80211_TX_POWER_LIMITED:
+ case NL80211_TX_POWER_FIXED:
+ if (mbm < 0 || (mbm % 100))
+ return -EOPNOTSUPP;
+ user_power_level = MBM_TO_DBM(mbm);
+ break;
+ default:
+ return -EINVAL;
+ }
+
if (wdev) {
sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
- if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
+ if (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
+ !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) {
if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
return -EOPNOTSUPP;
@@ -3063,57 +3104,67 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
return -EOPNOTSUPP;
}
- switch (type) {
- case NL80211_TX_POWER_AUTOMATIC:
- sdata->deflink.user_power_level =
- IEEE80211_UNSET_POWER_LEVEL;
- txp_type = NL80211_TX_POWER_LIMITED;
- break;
- case NL80211_TX_POWER_LIMITED:
- case NL80211_TX_POWER_FIXED:
- if (mbm < 0 || (mbm % 100))
- return -EOPNOTSUPP;
- sdata->deflink.user_power_level = MBM_TO_DBM(mbm);
- break;
- }
+ for (int link_id = 0;
+ link_id < ARRAY_SIZE(sdata->link);
+ link_id++) {
+ struct ieee80211_link_data *link =
+ wiphy_dereference(wiphy, sdata->link[link_id]);
- if (txp_type != sdata->vif.bss_conf.txpower_type) {
- update_txp_type = true;
- sdata->vif.bss_conf.txpower_type = txp_type;
- }
+ if (!link)
+ continue;
- ieee80211_recalc_txpower(sdata, update_txp_type);
+ link->user_power_level = user_power_level;
+
+ if (txp_type != link->conf->txpower_type) {
+ update_txp_type = true;
+ link->conf->txpower_type = txp_type;
+ }
+ ieee80211_recalc_txpower(link, update_txp_type);
+ }
return 0;
}
- switch (type) {
- case NL80211_TX_POWER_AUTOMATIC:
- local->user_power_level = IEEE80211_UNSET_POWER_LEVEL;
- txp_type = NL80211_TX_POWER_LIMITED;
- break;
- case NL80211_TX_POWER_LIMITED:
- case NL80211_TX_POWER_FIXED:
- if (mbm < 0 || (mbm % 100))
- return -EOPNOTSUPP;
- local->user_power_level = MBM_TO_DBM(mbm);
- break;
- }
+ local->user_power_level = user_power_level;
list_for_each_entry(sdata, &local->interfaces, list) {
- if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
+ if (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
+ !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) {
has_monitor = true;
continue;
}
- sdata->deflink.user_power_level = local->user_power_level;
- if (txp_type != sdata->vif.bss_conf.txpower_type)
- update_txp_type = true;
- sdata->vif.bss_conf.txpower_type = txp_type;
+
+ for (int link_id = 0;
+ link_id < ARRAY_SIZE(sdata->link);
+ link_id++) {
+ struct ieee80211_link_data *link =
+ wiphy_dereference(wiphy, sdata->link[link_id]);
+
+ if (!link)
+ continue;
+
+ link->user_power_level = local->user_power_level;
+ if (txp_type != link->conf->txpower_type)
+ update_txp_type = true;
+ link->conf->txpower_type = txp_type;
+ }
}
list_for_each_entry(sdata, &local->interfaces, list) {
- if (sdata->vif.type == NL80211_IFTYPE_MONITOR)
+ if (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
+ !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR))
continue;
- ieee80211_recalc_txpower(sdata, update_txp_type);
+
+ for (int link_id = 0;
+ link_id < ARRAY_SIZE(sdata->link);
+ link_id++) {
+ struct ieee80211_link_data *link =
+ wiphy_dereference(wiphy, sdata->link[link_id]);
+
+ if (!link)
+ continue;
+
+ ieee80211_recalc_txpower(link, update_txp_type);
+ }
}
if (has_monitor) {
@@ -3125,7 +3176,8 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
update_txp_type = true;
sdata->vif.bss_conf.txpower_type = txp_type;
- ieee80211_recalc_txpower(sdata, update_txp_type);
+ ieee80211_recalc_txpower(&sdata->deflink,
+ update_txp_type);
}
}
@@ -4307,7 +4359,8 @@ static int ieee80211_cfg_get_channel(struct wiphy *wiphy,
if (chanctx_conf) {
*chandef = link->conf->chanreq.oper;
ret = 0;
- } else if (local->open_count > 0 &&
+ } else if (!ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR) &&
+ local->open_count > 0 &&
local->open_count == local->monitors &&
sdata->vif.type == NL80211_IFTYPE_MONITOR) {
*chandef = local->monitor_chanreq.oper;
@@ -5030,6 +5083,13 @@ ieee80211_add_link_station(struct wiphy *wiphy, struct net_device *dev,
return ret;
}
+ if (test_sta_flag(sta, WLAN_STA_ASSOC)) {
+ struct link_sta_info *link_sta;
+
+ link_sta = sdata_dereference(sta->link[params->link_id], sdata);
+ rate_control_rate_init(link_sta);
+ }
+
/* ieee80211_sta_activate_link frees the link upon failure */
return ieee80211_sta_activate_link(sta, params->link_id);
}
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index cca6d14084d2..a442cb667520 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -323,22 +323,34 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local,
continue;
switch (link->sdata->vif.type) {
- case NL80211_IFTYPE_AP:
- case NL80211_IFTYPE_AP_VLAN:
- width = ieee80211_get_max_required_bw(link);
- break;
case NL80211_IFTYPE_STATION:
+ if (!link->sdata->vif.cfg.assoc) {
+ /*
+ * The AP's sta->bandwidth may not yet be set
+ * at this point (pre-association), so simply
+ * take the width from the chandef. We cannot
+ * have TDLS peers yet (only after association).
+ */
+ width = link->conf->chanreq.oper.width;
+ break;
+ }
/*
- * The ap's sta->bandwidth is not set yet at this
- * point, so take the width from the chandef, but
- * account also for TDLS peers
+ * otherwise just use min_def like in AP, depending on what
+ * we currently think the AP STA (and possibly TDLS peers)
+ * require(s)
*/
- width = max(link->conf->chanreq.oper.width,
- ieee80211_get_max_required_bw(link));
+ fallthrough;
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_AP_VLAN:
+ width = ieee80211_get_max_required_bw(link);
break;
case NL80211_IFTYPE_P2P_DEVICE:
case NL80211_IFTYPE_NAN:
continue;
+ case NL80211_IFTYPE_MONITOR:
+ WARN_ON_ONCE(!ieee80211_hw_check(&local->hw,
+ NO_VIRTUAL_MONITOR));
+ fallthrough;
case NL80211_IFTYPE_ADHOC:
case NL80211_IFTYPE_MESH_POINT:
case NL80211_IFTYPE_OCB:
@@ -347,7 +359,6 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local,
case NL80211_IFTYPE_WDS:
case NL80211_IFTYPE_UNSPECIFIED:
case NUM_NL80211_IFTYPES:
- case NL80211_IFTYPE_MONITOR:
case NL80211_IFTYPE_P2P_CLIENT:
case NL80211_IFTYPE_P2P_GO:
WARN_ON_ONCE(1);
@@ -409,7 +420,7 @@ _ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
if (!ctx->driver_present)
return 0;
- return IEEE80211_CHANCTX_CHANGE_MIN_WIDTH;
+ return IEEE80211_CHANCTX_CHANGE_MIN_DEF;
}
static void ieee80211_chan_bw_change(struct ieee80211_local *local,
@@ -462,12 +473,12 @@ static void ieee80211_chan_bw_change(struct ieee80211_local *local,
continue;
/* vif changed to narrow BW and narrow BW for station wasn't
- * requested or vise versa */
+ * requested or vice versa */
if ((new_sta_bw < link_sta->pub->bandwidth) == !narrowed)
continue;
link_sta->pub->bandwidth = new_sta_bw;
- rate_control_rate_update(local, sband, sta, link_id,
+ rate_control_rate_update(local, sband, link_sta,
IEEE80211_RC_BW_CHANGED);
}
}
@@ -905,7 +916,7 @@ static int ieee80211_assign_link_chanctx(struct ieee80211_link_data *link,
}
if (new_ctx && ieee80211_chanctx_num_assigned(local, new_ctx) > 0) {
- ieee80211_recalc_txpower(sdata, false);
+ ieee80211_recalc_txpower(link, false);
ieee80211_recalc_chanctx_min_def(local, new_ctx, NULL, false);
}
@@ -956,6 +967,10 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local,
if (!link->sdata->u.mgd.associated)
continue;
break;
+ case NL80211_IFTYPE_MONITOR:
+ if (!ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR))
+ continue;
+ break;
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_ADHOC:
case NL80211_IFTYPE_MESH_POINT:
@@ -968,6 +983,11 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local,
if (rcu_access_pointer(link->conf->chanctx_conf) != &chanctx->conf)
continue;
+ if (link->sdata->vif.type == NL80211_IFTYPE_MONITOR) {
+ rx_chains_dynamic = rx_chains_static = local->rx_chains;
+ break;
+ }
+
switch (link->smps_mode) {
default:
WARN_ONCE(1, "Invalid SMPS mode %d\n",
@@ -1118,7 +1138,7 @@ ieee80211_replace_chanctx(struct ieee80211_local *local,
*
* Consider ctx1..3, link1..6, each ctx has 2 links. link1 and
* link2 from ctx1 request new different chandefs starting 2
- * in-place reserations with ctx4 and ctx5 replacing ctx1 and
+ * in-place reservations with ctx4 and ctx5 replacing ctx1 and
* ctx2 respectively. Next link5 and link6 from ctx3 reserve
* ctx4. If link3 and link4 remain on ctx2 as they are then this
* fails unless `replace_ctx` from ctx5 is replaced with ctx3.
@@ -1169,7 +1189,7 @@ ieee80211_replace_chanctx(struct ieee80211_local *local,
static bool
ieee80211_find_available_radio(struct ieee80211_local *local,
const struct ieee80211_chan_req *chanreq,
- int *radio_idx)
+ u32 radio_mask, int *radio_idx)
{
struct wiphy *wiphy = local->hw.wiphy;
const struct wiphy_radio *radio;
@@ -1180,6 +1200,9 @@ ieee80211_find_available_radio(struct ieee80211_local *local,
return true;
for (i = 0; i < wiphy->n_radio; i++) {
+ if (!(radio_mask & BIT(i)))
+ continue;
+
radio = &wiphy->radio[i];
if (!cfg80211_radio_chandef_valid(radio, &chanreq->oper))
continue;
@@ -1213,7 +1236,9 @@ int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link,
new_ctx = ieee80211_find_reservation_chanctx(local, chanreq, mode);
if (!new_ctx) {
if (ieee80211_can_create_new_chanctx(local, -1) &&
- ieee80211_find_available_radio(local, chanreq, &radio_idx))
+ ieee80211_find_available_radio(local, chanreq,
+ sdata->wdev.radio_mask,
+ &radio_idx))
new_ctx = ieee80211_new_chanctx(local, chanreq, mode,
false, radio_idx);
else
@@ -1712,7 +1737,7 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
link,
changed);
- ieee80211_recalc_txpower(sdata, false);
+ ieee80211_recalc_txpower(link, false);
}
ieee80211_recalc_chanctx_chantype(local, ctx);
@@ -1883,7 +1908,9 @@ int _ieee80211_link_use_channel(struct ieee80211_link_data *link,
/* Note: context is now reserved */
if (ctx)
reserved = true;
- else if (!ieee80211_find_available_radio(local, chanreq, &radio_idx))
+ else if (!ieee80211_find_available_radio(local, chanreq,
+ sdata->wdev.radio_mask,
+ &radio_idx))
ctx = ERR_PTR(-EBUSY);
else
ctx = ieee80211_new_chanctx(local, chanreq, mode,
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 02b5476a4376..be2e486907f9 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -42,9 +42,8 @@ static ssize_t name## _read(struct file *file, char __user *userbuf, \
}
#define DEBUGFS_READONLY_FILE_OPS(name) \
-static const struct file_operations name## _ops = { \
+static const struct debugfs_short_fops name## _ops = { \
.read = name## _read, \
- .open = simple_open, \
.llseek = generic_file_llseek, \
};
@@ -142,10 +141,9 @@ static ssize_t aqm_write(struct file *file,
return -EINVAL;
}
-static const struct file_operations aqm_ops = {
+static const struct debugfs_short_fops aqm_ops = {
.write = aqm_write,
.read = aqm_read,
- .open = simple_open,
.llseek = default_llseek,
};
@@ -194,10 +192,9 @@ static ssize_t airtime_flags_write(struct file *file,
return count;
}
-static const struct file_operations airtime_flags_ops = {
+static const struct debugfs_short_fops airtime_flags_ops = {
.write = airtime_flags_write,
.read = airtime_flags_read,
- .open = simple_open,
.llseek = default_llseek,
};
@@ -225,9 +222,8 @@ static ssize_t aql_pending_read(struct file *file,
buf, len);
}
-static const struct file_operations aql_pending_ops = {
+static const struct debugfs_short_fops aql_pending_ops = {
.read = aql_pending_read,
- .open = simple_open,
.llseek = default_llseek,
};
@@ -305,10 +301,9 @@ static ssize_t aql_txq_limit_write(struct file *file,
return count;
}
-static const struct file_operations aql_txq_limit_ops = {
+static const struct debugfs_short_fops aql_txq_limit_ops = {
.write = aql_txq_limit_write,
.read = aql_txq_limit_read,
- .open = simple_open,
.llseek = default_llseek,
};
@@ -355,10 +350,9 @@ static ssize_t aql_enable_write(struct file *file, const char __user *user_buf,
return count;
}
-static const struct file_operations aql_enable_ops = {
+static const struct debugfs_short_fops aql_enable_ops = {
.write = aql_enable_write,
.read = aql_enable_read,
- .open = simple_open,
.llseek = default_llseek,
};
@@ -406,10 +400,9 @@ static ssize_t force_tx_status_write(struct file *file,
return count;
}
-static const struct file_operations force_tx_status_ops = {
+static const struct debugfs_short_fops force_tx_status_ops = {
.write = force_tx_status_write,
.read = force_tx_status_read,
- .open = simple_open,
.llseek = default_llseek,
};
@@ -434,9 +427,8 @@ static ssize_t reset_write(struct file *file, const char __user *user_buf,
return count;
}
-static const struct file_operations reset_ops = {
+static const struct debugfs_short_fops reset_ops = {
.write = reset_write,
- .open = simple_open,
.llseek = noop_llseek,
};
#endif
@@ -456,6 +448,7 @@ static const char *hw_flag_names[] = {
FLAG(SUPPORTS_DYNAMIC_PS),
FLAG(MFP_CAPABLE),
FLAG(WANT_MONITOR_VIF),
+ FLAG(NO_VIRTUAL_MONITOR),
FLAG(NO_AUTO_VIF),
FLAG(SW_CRYPTO_CONTROL),
FLAG(SUPPORT_FAST_XMIT),
@@ -623,9 +616,8 @@ static ssize_t stats_ ##name## _read(struct file *file, \
print_devstats_##name); \
} \
\
-static const struct file_operations stats_ ##name## _ops = { \
+static const struct debugfs_short_fops stats_ ##name## _ops = { \
.read = stats_ ##name## _read, \
- .open = simple_open, \
.llseek = generic_file_llseek, \
};
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 7e54da508765..b3a64edea0f2 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -26,17 +26,15 @@ static ssize_t key_##name##_read(struct file *file, \
#define KEY_READ_X(name) KEY_READ(name, name, "0x%x\n")
#define KEY_OPS(name) \
-static const struct file_operations key_ ##name## _ops = { \
+static const struct debugfs_short_fops key_ ##name## _ops = { \
.read = key_##name##_read, \
- .open = simple_open, \
.llseek = generic_file_llseek, \
}
#define KEY_OPS_W(name) \
-static const struct file_operations key_ ##name## _ops = { \
+static const struct debugfs_short_fops key_ ##name## _ops = { \
.read = key_##name##_read, \
.write = key_##name##_write, \
- .open = simple_open, \
.llseek = generic_file_llseek, \
}
@@ -49,9 +47,8 @@ static const struct file_operations key_ ##name## _ops = { \
#define KEY_CONF_READ_D(name) KEY_CONF_READ(name, "%d\n")
#define KEY_CONF_OPS(name) \
-static const struct file_operations key_ ##name## _ops = { \
+static const struct debugfs_short_fops key_ ##name## _ops = { \
.read = key_conf_##name##_read, \
- .open = simple_open, \
.llseek = generic_file_llseek, \
}
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 68596ef78b15..a9bc2fd59f55 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -221,10 +221,9 @@ static ssize_t ieee80211_if_fmt_##name( \
}
#define _IEEE80211_IF_FILE_OPS(name, _read, _write) \
-static const struct file_operations name##_ops = { \
+static const struct debugfs_short_fops name##_ops = { \
.read = (_read), \
.write = (_write), \
- .open = simple_open, \
.llseek = generic_file_llseek, \
}
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 1e9389c49a57..a67a9d316008 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -30,17 +30,15 @@ static ssize_t sta_ ##name## _read(struct file *file, \
#define STA_READ_D(name, field) STA_READ(name, field, "%d\n")
#define STA_OPS(name) \
-static const struct file_operations sta_ ##name## _ops = { \
+static const struct debugfs_short_fops sta_ ##name## _ops = { \
.read = sta_##name##_read, \
- .open = simple_open, \
.llseek = generic_file_llseek, \
}
#define STA_OPS_RW(name) \
-static const struct file_operations sta_ ##name## _ops = { \
+static const struct debugfs_short_fops sta_ ##name## _ops = { \
.read = sta_##name##_read, \
.write = sta_##name##_write, \
- .open = simple_open, \
.llseek = generic_file_llseek, \
}
@@ -450,9 +448,8 @@ STA_OPS_RW(agg_status);
/* link sta attributes */
#define LINK_STA_OPS(name) \
-static const struct file_operations link_sta_ ##name## _ops = { \
+static const struct debugfs_short_fops link_sta_ ##name## _ops = { \
.read = link_sta_##name##_read, \
- .open = simple_open, \
.llseek = generic_file_llseek, \
}
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index fe868b521622..299d38e9e863 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c
@@ -65,6 +65,7 @@ int drv_add_interface(struct ieee80211_local *local,
if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
(sdata->vif.type == NL80211_IFTYPE_MONITOR &&
!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) &&
+ !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR) &&
!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE))))
return -EINVAL;
@@ -181,9 +182,10 @@ int drv_sta_set_txpwr(struct ieee80211_local *local,
return ret;
}
-void drv_sta_rc_update(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- struct ieee80211_sta *sta, u32 changed)
+void drv_link_sta_rc_update(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_sta *link_sta,
+ u32 changed)
{
sdata = get_bss_sdata(sdata);
if (!check_sdata_in_driver(sdata))
@@ -193,10 +195,10 @@ void drv_sta_rc_update(struct ieee80211_local *local,
(sdata->vif.type != NL80211_IFTYPE_ADHOC &&
sdata->vif.type != NL80211_IFTYPE_MESH_POINT));
- trace_drv_sta_rc_update(local, sdata, sta, changed);
- if (local->ops->sta_rc_update)
- local->ops->sta_rc_update(&local->hw, &sdata->vif,
- sta, changed);
+ trace_drv_link_sta_rc_update(local, sdata, link_sta, changed);
+ if (local->ops->link_sta_rc_update)
+ local->ops->link_sta_rc_update(&local->hw, &sdata->vif,
+ link_sta, changed);
trace_drv_return_void(local);
}
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index d382d9729e85..edd1e4d4ad9d 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -594,9 +594,9 @@ int drv_sta_set_txpwr(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct sta_info *sta);
-void drv_sta_rc_update(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- struct ieee80211_sta *sta, u32 changed);
+void drv_link_sta_rc_update(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_sta *link_sta, u32 changed);
static inline void drv_sta_rate_tbl_update(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
@@ -1728,4 +1728,16 @@ drv_can_neg_ttlm(struct ieee80211_local *local,
return res;
}
+
+static inline void
+drv_prep_add_interface(struct ieee80211_local *local,
+ enum nl80211_iftype type)
+{
+ trace_drv_prep_add_interface(local, type);
+ if (local->ops->prep_add_interface)
+ local->ops->prep_add_interface(&local->hw, type);
+
+ trace_drv_return_void(local);
+}
+
#endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/eht.c b/net/mac80211/eht.c
index ddc7acc68335..7a3116c36df9 100644
--- a/net/mac80211/eht.c
+++ b/net/mac80211/eht.c
@@ -2,7 +2,7 @@
/*
* EHT handling
*
- * Copyright(c) 2021-2023 Intel Corporation
+ * Copyright(c) 2021-2024 Intel Corporation
*/
#include "ieee80211_i.h"
@@ -75,4 +75,23 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata,
link_sta->cur_max_bandwidth = ieee80211_sta_cap_rx_bw(link_sta);
link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta);
+
+ switch (u8_get_bits(eht_cap->eht_cap_elem.mac_cap_info[0],
+ IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_MASK)) {
+ case IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_11454:
+ link_sta->pub->agg.max_amsdu_len =
+ IEEE80211_MAX_MPDU_LEN_VHT_11454;
+ break;
+ case IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_7991:
+ link_sta->pub->agg.max_amsdu_len =
+ IEEE80211_MAX_MPDU_LEN_VHT_7991;
+ break;
+ case IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_3895:
+ default:
+ link_sta->pub->agg.max_amsdu_len =
+ IEEE80211_MAX_MPDU_LEN_VHT_3895;
+ break;
+ }
+
+ ieee80211_sta_recalc_aggregates(&link_sta->sta->sta);
}
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 1c2b7dd8976a..32390d8a9d75 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -379,7 +379,7 @@ void ieee80211_ba_session_work(struct wiphy *wiphy, struct wiphy_work *work)
sta->ampdu_mlme.tid_rx_manage_offl))
__ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid,
IEEE80211_MAX_AMPDU_BUF_HT,
- false, true, NULL);
+ false, true, 0);
if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS,
sta->ampdu_mlme.tid_rx_manage_offl))
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 3f74bbceeca5..a1b4178deccf 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -569,7 +569,7 @@ static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta)
if (!sta->sdata->u.ibss.control_port)
sta_info_pre_move_state(sta, IEEE80211_STA_AUTHORIZED);
- rate_control_rate_init(sta);
+ rate_control_rate_init(&sta->deflink);
/* If it fails, maybe we raced another insertion? */
if (sta_info_insert_rcu(sta))
@@ -1068,11 +1068,12 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
/* Force rx_nss recalculation */
sta->sta.deflink.rx_nss = 0;
- rate_control_rate_init(sta);
+ rate_control_rate_init(&sta->deflink);
if (sta->sta.deflink.rx_nss != rx_nss)
changed |= IEEE80211_RC_NSS_CHANGED;
- drv_sta_rc_update(local, sdata, &sta->sta, changed);
+ drv_link_sta_rc_update(local, sdata, &sta->sta.deflink,
+ changed);
}
rcu_read_unlock();
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 3d3c9139ff5e..a00096dd787b 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1016,8 +1016,6 @@ struct ieee80211_link_data_managed {
int wmm_last_param_set;
int mu_edca_last_param_set;
-
- u8 bss_param_ch_cnt;
};
struct ieee80211_link_data_ap {
@@ -1371,7 +1369,7 @@ struct ieee80211_local {
spinlock_t queue_stop_reason_lock;
int open_count;
- int monitors, cooked_mntrs;
+ int monitors, cooked_mntrs, tx_mntrs;
/* number of interfaces with corresponding FIF_ flags */
int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss, fif_pspoll,
fif_probe_req;
@@ -2037,8 +2035,8 @@ void ieee80211_sdata_stop(struct ieee80211_sub_if_data *sdata);
int ieee80211_add_virtual_monitor(struct ieee80211_local *local);
void ieee80211_del_virtual_monitor(struct ieee80211_local *local);
-bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata);
-void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata,
+bool __ieee80211_recalc_txpower(struct ieee80211_link_data *link);
+void ieee80211_recalc_txpower(struct ieee80211_link_data *link,
bool update_bss);
void ieee80211_recalc_offload(struct ieee80211_local *local);
@@ -2115,14 +2113,19 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
const u8 *bssid, int link_id);
bool ieee80211_smps_is_restrictive(enum ieee80211_smps_mode smps_mode_old,
enum ieee80211_smps_mode smps_mode_new);
-
+void ieee80211_add_addbaext(struct sk_buff *skb,
+ const u8 req_addba_ext_data,
+ u16 buf_size);
+u8 ieee80211_retrieve_addba_ext_data(struct sta_info *sta,
+ const void *elem_data, ssize_t elem_len,
+ u16 *buf_size);
void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
u16 initiator, u16 reason, bool stop);
void __ieee80211_start_rx_ba_session(struct sta_info *sta,
u8 dialog_token, u16 timeout,
u16 start_seq_num, u16 ba_policy, u16 tid,
u16 buf_size, bool tx, bool auto_seq,
- const struct ieee80211_addba_ext_ie *addbaext);
+ const u8 addba_ext_data);
void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
enum ieee80211_agg_stop_reason reason);
void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
@@ -2198,8 +2201,6 @@ ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta)
return _ieee80211_sta_cur_vht_bw(link_sta, NULL);
}
void ieee80211_sta_init_nss(struct link_sta_info *link_sta);
-enum ieee80211_sta_rx_bandwidth
-ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width);
enum nl80211_chan_width
ieee80211_sta_cap_chan_bw(struct link_sta_info *link_sta);
void ieee80211_process_mu_groups(struct ieee80211_sub_if_data *sdata,
@@ -2454,7 +2455,7 @@ static inline bool ieee80211_can_run_worker(struct ieee80211_local *local)
/*
* If quiescing is set, we are racing with __ieee80211_suspend.
* __ieee80211_suspend flushes the workers after setting quiescing,
- * and we check quiescing / suspended before enqueing new workers.
+ * and we check quiescing / suspended before enqueuing new workers.
* We should abort the worker to avoid the races below.
*/
if (local->quiescing)
@@ -2547,8 +2548,8 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
const struct cfg80211_chan_def *chandef);
u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata);
-u8 *ieee80211_ie_build_he_oper(u8 *pos, struct cfg80211_chan_def *chandef);
-u8 *ieee80211_ie_build_eht_oper(u8 *pos, struct cfg80211_chan_def *chandef,
+u8 *ieee80211_ie_build_he_oper(u8 *pos, const struct cfg80211_chan_def *chandef);
+u8 *ieee80211_ie_build_eht_oper(u8 *pos, const struct cfg80211_chan_def *chandef,
const struct ieee80211_sta_eht_cap *eht_cap);
int ieee80211_parse_bitrates(enum nl80211_chan_width width,
const struct ieee80211_supported_band *sband,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 6ef0990d3d29..a8fbedd530f4 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -44,13 +44,13 @@
static void ieee80211_iface_work(struct wiphy *wiphy, struct wiphy_work *work);
-bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata)
+bool __ieee80211_recalc_txpower(struct ieee80211_link_data *link)
{
struct ieee80211_chanctx_conf *chanctx_conf;
int power;
rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf);
+ chanctx_conf = rcu_dereference(link->conf->chanctx_conf);
if (!chanctx_conf) {
rcu_read_unlock();
return false;
@@ -59,27 +59,26 @@ bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata)
power = ieee80211_chandef_max_power(&chanctx_conf->def);
rcu_read_unlock();
- if (sdata->deflink.user_power_level != IEEE80211_UNSET_POWER_LEVEL)
- power = min(power, sdata->deflink.user_power_level);
+ if (link->user_power_level != IEEE80211_UNSET_POWER_LEVEL)
+ power = min(power, link->user_power_level);
- if (sdata->deflink.ap_power_level != IEEE80211_UNSET_POWER_LEVEL)
- power = min(power, sdata->deflink.ap_power_level);
+ if (link->ap_power_level != IEEE80211_UNSET_POWER_LEVEL)
+ power = min(power, link->ap_power_level);
- if (power != sdata->vif.bss_conf.txpower) {
- sdata->vif.bss_conf.txpower = power;
- ieee80211_hw_config(sdata->local, 0);
+ if (power != link->conf->txpower) {
+ link->conf->txpower = power;
return true;
}
return false;
}
-void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata,
+void ieee80211_recalc_txpower(struct ieee80211_link_data *link,
bool update_bss)
{
- if (__ieee80211_recalc_txpower(sdata) ||
- (update_bss && ieee80211_sdata_running(sdata)))
- ieee80211_link_info_change_notify(sdata, &sdata->deflink,
+ if (__ieee80211_recalc_txpower(link) ||
+ (update_bss && ieee80211_sdata_running(link->sdata)))
+ ieee80211_link_info_change_notify(link->sdata, link,
BSS_CHANGED_TXPOWER);
}
@@ -279,8 +278,13 @@ static int _ieee80211_change_mac(struct ieee80211_sub_if_data *sdata,
ret = eth_mac_addr(sdata->dev, sa);
if (ret == 0) {
- memcpy(sdata->vif.addr, sa->sa_data, ETH_ALEN);
- ether_addr_copy(sdata->vif.bss_conf.addr, sdata->vif.addr);
+ if (check_dup) {
+ memcpy(sdata->vif.addr, sa->sa_data, ETH_ALEN);
+ ether_addr_copy(sdata->vif.bss_conf.addr, sdata->vif.addr);
+ } else {
+ memset(sdata->vif.addr, 0, ETH_ALEN);
+ memset(sdata->vif.bss_conf.addr, 0, ETH_ALEN);
+ }
}
/* Regardless of eth_mac_addr() return we still want to add the
@@ -699,9 +703,11 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
ieee80211_recalc_idle(local);
ieee80211_recalc_offload(local);
- if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE))
+ if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) &&
+ !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR))
break;
+ ieee80211_link_release_channel(&sdata->deflink);
fallthrough;
default:
if (!going_down)
@@ -1087,6 +1093,8 @@ void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata,
ADJUST(CONTROL, control);
ADJUST(CONTROL, pspoll);
ADJUST(OTHER_BSS, other_bss);
+ if (!(flags & MONITOR_FLAG_SKIP_TX))
+ local->tx_mntrs += offset;
#undef ADJUST
}
@@ -1131,7 +1139,8 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
ASSERT_RTNL();
lockdep_assert_wiphy(local->hw.wiphy);
- if (local->monitor_sdata)
+ if (local->monitor_sdata ||
+ ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR))
return 0;
sdata = kzalloc(sizeof(*sdata) + local->hw.vif_data_size, GFP_KERNEL);
@@ -1193,6 +1202,9 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
{
struct ieee80211_sub_if_data *sdata;
+ if (ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR))
+ return;
+
ASSERT_RTNL();
lockdep_assert_wiphy(local->hw.wiphy);
@@ -1328,7 +1340,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
break;
}
- if (sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) {
+ if ((sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) ||
+ ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) {
res = drv_add_interface(local, sdata);
if (res)
goto err_stop;
@@ -2176,9 +2189,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
ieee80211_set_default_queues(sdata);
- sdata->deflink.ap_power_level = IEEE80211_UNSET_POWER_LEVEL;
- sdata->deflink.user_power_level = local->user_power_level;
-
/* setup type-dependent data */
ieee80211_setup_sdata(sdata, type);
diff --git a/net/mac80211/link.c b/net/mac80211/link.c
index 46092fbcde90..58a76bcd6ae6 100644
--- a/net/mac80211/link.c
+++ b/net/mac80211/link.c
@@ -36,6 +36,9 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata,
link->conf = link_conf;
link_conf->link_id = link_id;
link_conf->vif = &sdata->vif;
+ link->ap_power_level = IEEE80211_UNSET_POWER_LEVEL;
+ link->user_power_level = sdata->local->user_power_level;
+ link_conf->txpower = INT_MIN;
wiphy_work_init(&link->csa.finalize_work,
ieee80211_csa_finalize_work);
@@ -386,6 +389,37 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata,
jiffies);
}
+ for_each_set_bit(link_id, &add, IEEE80211_MLD_MAX_NUM_LINKS) {
+ struct ieee80211_link_data *link;
+
+ link = sdata_dereference(sdata->link[link_id], sdata);
+
+ /*
+ * This call really should not fail. Unfortunately, it appears
+ * that this may happen occasionally with some drivers. Should
+ * it happen, we are stuck in a bad place as going backwards is
+ * not really feasible.
+ *
+ * So lets just tell link_use_channel that it must not fail to
+ * assign the channel context (from mac80211's perspective) and
+ * assume the driver is going to trigger a recovery flow if it
+ * had a failure.
+ * That really is not great nor guaranteed to work. But at least
+ * the internal mac80211 state remains consistent and there is
+ * a chance that we can recover.
+ */
+ ret = _ieee80211_link_use_channel(link,
+ &link->conf->chanreq,
+ IEEE80211_CHANCTX_SHARED,
+ true);
+ WARN_ON_ONCE(ret);
+
+ /*
+ * inform about the link info changed parameters after all
+ * stations are also added
+ */
+ }
+
list_for_each_entry(sta, &local->sta_list, list) {
if (sdata != sta->sdata)
continue;
@@ -429,26 +463,6 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata,
link = sdata_dereference(sdata->link[link_id], sdata);
- /*
- * This call really should not fail. Unfortunately, it appears
- * that this may happen occasionally with some drivers. Should
- * it happen, we are stuck in a bad place as going backwards is
- * not really feasible.
- *
- * So lets just tell link_use_channel that it must not fail to
- * assign the channel context (from mac80211's perspective) and
- * assume the driver is going to trigger a recovery flow if it
- * had a failure.
- * That really is not great nor guaranteed to work. But at least
- * the internal mac80211 state remains consistent and there is
- * a chance that we can recover.
- */
- ret = _ieee80211_link_use_channel(link,
- &link->conf->chanreq,
- IEEE80211_CHANCTX_SHARED,
- true);
- WARN_ON_ONCE(ret);
-
ieee80211_mgd_set_link_qos_params(link);
ieee80211_link_info_change_notify(sdata, link,
BSS_CHANGED_ERP_CTS_PROT |
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 640239f4425b..cb5f16366b9c 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -1482,7 +1482,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
if (!elems)
return;
- /* ignore non-mesh or secure / unsecure mismatch */
+ /* ignore non-mesh or secure / insecure mismatch */
if ((!elems->mesh_id || !elems->mesh_config) ||
(elems->rsn && sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) ||
(!elems->rsn && sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE))
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 579d0f24ac9d..4e9546e998b6 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -220,12 +220,12 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata,
/**
* mesh_path_error_tx - Sends a PERR mesh management frame
*
+ * @sdata: local mesh subif
* @ttl: allowed remaining hops
* @target: broken destination
* @target_sn: SN of the broken destination
* @target_rcode: reason code for this PERR
* @ra: node this frame is addressed to
- * @sdata: local mesh subif
*
* Note: This function may be called with driver locks taken that the driver
* also acquires in the TX path. To avoid a deadlock we don't transmit the
@@ -1137,8 +1137,8 @@ enddiscovery:
/**
* mesh_nexthop_resolve - lookup next hop; conditionally start path discovery
*
- * @skb: 802.11 frame to be sent
* @sdata: network subif the frame will be sent through
+ * @skb: 802.11 frame to be sent
*
* Lookup next hop for given skb and start path discovery if no
* forwarding information is found.
@@ -1245,8 +1245,8 @@ void mesh_path_refresh(struct ieee80211_sub_if_data *sdata,
* this function is considered "using" the associated mpath, so preempt a path
* refresh if this mpath expires soon.
*
- * @skb: 802.11 frame to be sent
* @sdata: network subif the frame will be sent through
+ * @skb: 802.11 frame to be sent
*
* Returns: 0 if the next hop was found. Nonzero otherwise.
*/
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 30c0d89203af..9f9cb5af0a97 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -300,8 +300,8 @@ __mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx)
/**
* mesh_path_lookup_by_idx - look up a path in the mesh path table by its index
- * @idx: index
* @sdata: local subif, or NULL for all entries
+ * @idx: index
*
* Returns: pointer to the mesh path structure, or NULL if not found.
*
@@ -315,8 +315,8 @@ mesh_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx)
/**
* mpp_path_lookup_by_idx - look up a path in the proxy path table by its index
- * @idx: index
* @sdata: local subif, or NULL for all entries
+ * @idx: index
*
* Returns: pointer to the proxy path structure, or NULL if not found.
*
@@ -670,8 +670,8 @@ void mesh_fast_tx_flush_addr(struct ieee80211_sub_if_data *sdata,
/**
* mesh_path_add - allocate and add a new path to the mesh path table
- * @dst: destination address of the path (ETH_ALEN length)
* @sdata: local subif
+ * @dst: destination address of the path (ETH_ALEN length)
*
* Returns: 0 on success
*
@@ -916,8 +916,8 @@ static int table_path_del(struct mesh_table *tbl,
/**
* mesh_path_del - delete a mesh path from the table
*
- * @addr: dst address (ETH_ALEN length)
* @sdata: local subif
+ * @addr: dst address (ETH_ALEN length)
*
* Returns: 0 if successful
*/
@@ -996,8 +996,8 @@ int mesh_path_send_to_gates(struct mesh_path *mpath)
/**
* mesh_path_discard_frame - discard a frame whose path could not be resolved
*
- * @skb: frame to discard
* @sdata: network subif the frame was to be sent through
+ * @skb: frame to discard
*
* Locking: the function must me called within a rcu_read_lock region
*/
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 8f2b492a9fe9..6ea35c88dc48 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -486,10 +486,11 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
sta->sta.deflink.bandwidth = IEEE80211_STA_RX_BW_20;
}
+ /* FIXME: this check is wrong without SW rate control */
if (!test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
- rate_control_rate_init(sta);
+ rate_control_rate_init(&sta->deflink);
else
- rate_control_rate_update(local, sband, sta, 0, changed);
+ rate_control_rate_update(local, sband, &sta->deflink, changed);
out:
spin_unlock_bh(&sta->mesh->plink_lock);
}
@@ -667,7 +668,7 @@ void mesh_plink_timer(struct timer_list *t)
/*
* This STA is valid because sta_info_destroy() will
* del_timer_sync() this timer after having made sure
- * it cannot be readded (by deleting the plink.)
+ * it cannot be re-added (by deleting the plink.)
*/
sta = mesh->plink_sta;
diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c
index 8cf3f395f52f..3a66b4cefca7 100644
--- a/net/mac80211/mesh_sync.c
+++ b/net/mac80211/mesh_sync.c
@@ -175,7 +175,7 @@ static void mesh_sync_offset_adjust_tsf(struct ieee80211_sub_if_data *sdata,
spin_lock_bh(&ifmsh->sync_offset_lock);
if (ifmsh->sync_offset_clockdrift_max > TOFFSET_MINIMUM_ADJUSTMENT) {
- /* Since ajusting the tsf here would
+ /* Since adjusting the tsf here would
* require a possibly blocking call
* to the driver tsf setter, we punt
* the tsf adjustment to the mesh tasklet
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0303972c23e4..480b664151c9 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -31,6 +31,8 @@
#include "led.h"
#include "fils_aead.h"
+#include <kunit/static_stub.h>
+
#define IEEE80211_AUTH_TIMEOUT (HZ / 5)
#define IEEE80211_AUTH_TIMEOUT_LONG (HZ / 2)
#define IEEE80211_AUTH_TIMEOUT_SHORT (HZ / 10)
@@ -2643,9 +2645,91 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
&ifmgd->csa_connection_drop_work);
}
+struct sta_bss_param_ch_cnt_data {
+ struct ieee80211_sub_if_data *sdata;
+ u8 reporting_link_id;
+ u8 mld_id;
+};
+
+static enum cfg80211_rnr_iter_ret
+ieee80211_sta_bss_param_ch_cnt_iter(void *_data, u8 type,
+ const struct ieee80211_neighbor_ap_info *info,
+ const u8 *tbtt_info, u8 tbtt_info_len)
+{
+ struct sta_bss_param_ch_cnt_data *data = _data;
+ struct ieee80211_sub_if_data *sdata = data->sdata;
+ const struct ieee80211_tbtt_info_ge_11 *ti;
+ u8 bss_param_ch_cnt;
+ int link_id;
+
+ if (type != IEEE80211_TBTT_INFO_TYPE_TBTT)
+ return RNR_ITER_CONTINUE;
+
+ if (tbtt_info_len < sizeof(*ti))
+ return RNR_ITER_CONTINUE;
+
+ ti = (const void *)tbtt_info;
+
+ if (ti->mld_params.mld_id != data->mld_id)
+ return RNR_ITER_CONTINUE;
+
+ link_id = le16_get_bits(ti->mld_params.params,
+ IEEE80211_RNR_MLD_PARAMS_LINK_ID);
+ bss_param_ch_cnt =
+ le16_get_bits(ti->mld_params.params,
+ IEEE80211_RNR_MLD_PARAMS_BSS_CHANGE_COUNT);
+
+ if (bss_param_ch_cnt != 255 &&
+ link_id < ARRAY_SIZE(sdata->link)) {
+ struct ieee80211_link_data *link =
+ sdata_dereference(sdata->link[link_id], sdata);
+
+ if (link && link->conf->bss_param_ch_cnt != bss_param_ch_cnt) {
+ link->conf->bss_param_ch_cnt = bss_param_ch_cnt;
+ link->conf->bss_param_ch_cnt_link_id =
+ data->reporting_link_id;
+ }
+ }
+
+ return RNR_ITER_CONTINUE;
+}
+
+static void
+ieee80211_mgd_update_bss_param_ch_cnt(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_bss_conf *bss_conf,
+ struct ieee802_11_elems *elems)
+{
+ struct sta_bss_param_ch_cnt_data data = {
+ .reporting_link_id = bss_conf->link_id,
+ .sdata = sdata,
+ };
+ int bss_param_ch_cnt;
+
+ if (!elems->ml_basic)
+ return;
+
+ data.mld_id = ieee80211_mle_get_mld_id((const void *)elems->ml_basic);
+
+ cfg80211_iter_rnr(elems->ie_start, elems->total_len,
+ ieee80211_sta_bss_param_ch_cnt_iter, &data);
+
+ bss_param_ch_cnt =
+ ieee80211_mle_get_bss_param_ch_cnt((const void *)elems->ml_basic);
+
+ /*
+ * Update bss_param_ch_cnt_link_id even if bss_param_ch_cnt
+ * didn't change to indicate that we got a beacon on our own
+ * link.
+ */
+ if (bss_param_ch_cnt >= 0 && bss_param_ch_cnt != 255) {
+ bss_conf->bss_param_ch_cnt = bss_param_ch_cnt;
+ bss_conf->bss_param_ch_cnt_link_id =
+ bss_conf->link_id;
+ }
+}
+
static bool
-ieee80211_find_80211h_pwr_constr(struct ieee80211_sub_if_data *sdata,
- struct ieee80211_channel *channel,
+ieee80211_find_80211h_pwr_constr(struct ieee80211_channel *channel,
const u8 *country_ie, u8 country_ie_len,
const u8 *pwr_constr_elem,
int *chan_pwr, int *pwr_reduction)
@@ -2715,8 +2799,7 @@ ieee80211_find_80211h_pwr_constr(struct ieee80211_sub_if_data *sdata,
return have_chan_pwr;
}
-static void ieee80211_find_cisco_dtpc(struct ieee80211_sub_if_data *sdata,
- struct ieee80211_channel *channel,
+static void ieee80211_find_cisco_dtpc(struct ieee80211_channel *channel,
const u8 *cisco_dtpc_ie,
int *pwr_level)
{
@@ -2750,7 +2833,7 @@ static u64 ieee80211_handle_pwr_constr(struct ieee80211_link_data *link,
(capab & cpu_to_le16(WLAN_CAPABILITY_SPECTRUM_MGMT) ||
capab & cpu_to_le16(WLAN_CAPABILITY_RADIO_MEASURE))) {
has_80211h_pwr = ieee80211_find_80211h_pwr_constr(
- sdata, channel, country_ie, country_ie_len,
+ channel, country_ie, country_ie_len,
pwr_constr_ie, &chan_pwr, &pwr_reduction_80211h);
pwr_level_80211h =
max_t(int, 0, chan_pwr - pwr_reduction_80211h);
@@ -2758,7 +2841,7 @@ static u64 ieee80211_handle_pwr_constr(struct ieee80211_link_data *link,
if (cisco_dtpc_ie) {
ieee80211_find_cisco_dtpc(
- sdata, channel, cisco_dtpc_ie, &pwr_level_cisco);
+ channel, cisco_dtpc_ie, &pwr_level_cisco);
has_cisco_pwr = true;
}
@@ -2791,7 +2874,7 @@ static u64 ieee80211_handle_pwr_constr(struct ieee80211_link_data *link,
}
link->ap_power_level = new_ap_level;
- if (__ieee80211_recalc_txpower(sdata))
+ if (__ieee80211_recalc_txpower(link))
return BSS_CHANGED_TXPOWER;
return 0;
}
@@ -4101,8 +4184,13 @@ EXPORT_SYMBOL(ieee80211_beacon_loss);
void ieee80211_connection_loss(struct ieee80211_vif *vif)
{
- struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
- struct ieee80211_hw *hw = &sdata->local->hw;
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_hw *hw;
+
+ KUNIT_STATIC_STUB_REDIRECT(ieee80211_connection_loss, vif);
+
+ sdata = vif_to_sdata(vif);
+ hw = &sdata->local->hw;
trace_api_connection_loss(sdata);
@@ -4667,7 +4755,8 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
ret = false;
goto out;
}
- link->u.mgd.bss_param_ch_cnt = bss_param_ch_cnt;
+ bss_conf->bss_param_ch_cnt = bss_param_ch_cnt;
+ bss_conf->bss_param_ch_cnt_link_id = link_id;
}
} else if (elems->parse_error & IEEE80211_PARSE_ERR_DUP_NEST_ML_BASIC ||
!elems->prof ||
@@ -4677,6 +4766,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
} else {
const u8 *ptr = elems->prof->variable +
elems->prof->sta_info_len - 1;
+ int bss_param_ch_cnt;
/*
* During parsing, we validated that these fields exist,
@@ -4684,8 +4774,10 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
*/
capab_info = get_unaligned_le16(ptr);
assoc_data->link[link_id].status = get_unaligned_le16(ptr + 2);
- link->u.mgd.bss_param_ch_cnt =
+ bss_param_ch_cnt =
ieee80211_mle_basic_sta_prof_bss_param_ch_cnt(elems->prof);
+ bss_conf->bss_param_ch_cnt = bss_param_ch_cnt;
+ bss_conf->bss_param_ch_cnt_link_id = link_id;
if (assoc_data->link[link_id].status != WLAN_STATUS_SUCCESS) {
link_info(link, "association response status code=%u\n",
@@ -5665,7 +5757,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
/* links might have changed due to rejected ones, set them again */
ieee80211_vif_set_links(sdata, valid_links, dormant_links);
- rate_control_rate_init(sta);
+ rate_control_rate_init_all_links(sta);
if (ifmgd->flags & IEEE80211_STA_MFP_ENABLED) {
set_sta_flag(sta, WLAN_STA_MFP);
@@ -6913,6 +7005,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
/* note that after this elems->ml_basic can no longer be used fully */
ieee80211_mgd_check_cross_link_csa(sdata, rx_status->link_id, elems);
+ ieee80211_mgd_update_bss_param_ch_cnt(sdata, bss_conf, elems);
+
if (!link->u.mgd.disable_wmm_tracking &&
ieee80211_sta_wmm_params(local, link, elems->wmm_param,
elems->wmm_param_len,
diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c
index f4c51e4a1e29..6218abc3e441 100644
--- a/net/mac80211/ocb.c
+++ b/net/mac80211/ocb.c
@@ -4,7 +4,7 @@
*
* Copyright: (c) 2014 Czech Technical University in Prague
* (c) 2014 Volkswagen Group Research
- * Copyright (C) 2022 - 2023 Intel Corporation
+ * Copyright (C) 2022 - 2024 Intel Corporation
* Author: Rostislav Lisovy <rostislav.lisovy@fel.cvut.cz>
* Funded by: Volkswagen Group Research
*/
@@ -96,7 +96,7 @@ static struct sta_info *ieee80211_ocb_finish_sta(struct sta_info *sta)
sta_info_move_state(sta, IEEE80211_STA_ASSOC);
sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED);
- rate_control_rate_init(sta);
+ rate_control_rate_init(&sta->deflink);
/* If it fails, maybe we raced another insertion? */
if (sta_info_insert_rcu(sta))
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 3dc9752188d5..0d056db9f81e 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -28,8 +28,9 @@ module_param(ieee80211_default_rc_algo, charp, 0644);
MODULE_PARM_DESC(ieee80211_default_rc_algo,
"Default rate control algorithm for mac80211 to use");
-void rate_control_rate_init(struct sta_info *sta)
+void rate_control_rate_init(struct link_sta_info *link_sta)
{
+ struct sta_info *sta = link_sta->sta;
struct ieee80211_local *local = sta->sdata->local;
struct rate_control_ref *ref = sta->rate_ctrl;
struct ieee80211_sta *ista = &sta->sta;
@@ -37,11 +38,15 @@ void rate_control_rate_init(struct sta_info *sta)
struct ieee80211_supported_band *sband;
struct ieee80211_chanctx_conf *chanctx_conf;
- ieee80211_sta_init_nss(&sta->deflink);
+ ieee80211_sta_init_nss(link_sta);
if (!ref)
return;
+ /* SW rate control isn't supported with MLO right now */
+ if (WARN_ON(ieee80211_vif_is_mld(&sta->sdata->vif)))
+ return;
+
rcu_read_lock();
chanctx_conf = rcu_dereference(sta->sdata->vif.bss_conf.chanctx_conf);
@@ -67,6 +72,21 @@ void rate_control_rate_init(struct sta_info *sta)
set_sta_flag(sta, WLAN_STA_RATE_CONTROL);
}
+void rate_control_rate_init_all_links(struct sta_info *sta)
+{
+ int link_id;
+
+ for (link_id = 0; link_id < ARRAY_SIZE(sta->link); link_id++) {
+ struct link_sta_info *link_sta;
+
+ link_sta = sdata_dereference(sta->link[link_id], sta->sdata);
+ if (!link_sta)
+ continue;
+
+ rate_control_rate_init(link_sta);
+ }
+}
+
void rate_control_tx_status(struct ieee80211_local *local,
struct ieee80211_tx_status *st)
{
@@ -93,16 +113,15 @@ void rate_control_tx_status(struct ieee80211_local *local,
void rate_control_rate_update(struct ieee80211_local *local,
struct ieee80211_supported_band *sband,
- struct sta_info *sta, unsigned int link_id,
+ struct link_sta_info *link_sta,
u32 changed)
{
struct rate_control_ref *ref = local->rate_ctrl;
+ struct sta_info *sta = link_sta->sta;
struct ieee80211_sta *ista = &sta->sta;
void *priv_sta = sta->rate_ctrl_priv;
struct ieee80211_chanctx_conf *chanctx_conf;
- WARN_ON(link_id != 0);
-
if (ref && ref->ops->rate_update) {
rcu_read_lock();
@@ -120,7 +139,8 @@ void rate_control_rate_update(struct ieee80211_local *local,
}
if (sta->uploaded)
- drv_sta_rc_update(local, sta->sdata, &sta->sta, changed);
+ drv_link_sta_rc_update(local, sta->sdata, link_sta->pub,
+ changed);
}
int ieee80211_rate_control_register(const struct rate_control_ops *ops)
@@ -229,9 +249,8 @@ static ssize_t rcname_read(struct file *file, char __user *userbuf,
ref->ops->name, len);
}
-const struct file_operations rcname_ops = {
+const struct debugfs_short_fops rcname_ops = {
.read = rcname_read,
- .open = simple_open,
.llseek = default_llseek,
};
#endif
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index d6190f10fe7c..5e4bde598212 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -3,7 +3,7 @@
* Copyright 2002-2005, Instant802 Networks, Inc.
* Copyright 2005, Devicescape Software, Inc.
* Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
- * Copyright (C) 2022 Intel Corporation
+ * Copyright (C) 2022, 2024 Intel Corporation
*/
#ifndef IEEE80211_RATE_H
@@ -29,11 +29,11 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
void rate_control_tx_status(struct ieee80211_local *local,
struct ieee80211_tx_status *st);
-void rate_control_rate_init(struct sta_info *sta);
+void rate_control_rate_init(struct link_sta_info *link_sta);
+void rate_control_rate_init_all_links(struct sta_info *sta);
void rate_control_rate_update(struct ieee80211_local *local,
struct ieee80211_supported_band *sband,
- struct sta_info *sta,
- unsigned int link_id,
+ struct link_sta_info *link_sta,
u32 changed);
static inline void *rate_control_alloc_sta(struct rate_control_ref *ref,
@@ -62,7 +62,7 @@ static inline void rate_control_add_sta_debugfs(struct sta_info *sta)
#endif
}
-extern const struct file_operations rcname_ops;
+extern const struct debugfs_short_fops rcname_ops;
static inline void rate_control_add_debugfs(struct ieee80211_local *local)
{
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 6bf3b4444a43..706cbc99f718 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -1053,7 +1053,7 @@ minstrel_ht_refill_sample_rates(struct minstrel_ht_sta *mi)
* - max_prob_rate must use only one stream, as a tradeoff between delivery
* probability and throughput during strong fluctuations
* - as long as the max prob rate has a probability of more than 75%, pick
- * higher throughput rates, even if the probablity is a bit lower
+ * higher throughput rates, even if the probability is a bit lower
*/
static void
minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 694b43091fec..2bec18fc1b03 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -508,18 +508,13 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
flags |= IEEE80211_RADIOTAP_AMPDU_IS_LAST;
if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_ERROR)
flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR;
- if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN)
- flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_KNOWN;
if (status->flag & RX_FLAG_AMPDU_EOF_BIT_KNOWN)
flags |= IEEE80211_RADIOTAP_AMPDU_EOF_KNOWN;
if (status->flag & RX_FLAG_AMPDU_EOF_BIT)
flags |= IEEE80211_RADIOTAP_AMPDU_EOF;
put_unaligned_le16(flags, pos);
pos += 2;
- if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN)
- *pos++ = status->ampdu_delimiter_crc;
- else
- *pos++ = 0;
+ *pos++ = 0;
*pos++ = 0;
}
@@ -767,8 +762,8 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
struct ieee80211_rate *rate)
{
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(origskb);
- struct ieee80211_sub_if_data *sdata;
- struct sk_buff *monskb = NULL;
+ struct ieee80211_sub_if_data *sdata, *prev_sdata = NULL;
+ struct sk_buff *skb, *monskb = NULL;
int present_fcs_len = 0;
unsigned int rtap_space = 0;
struct ieee80211_sub_if_data *monitor_sdata =
@@ -842,40 +837,52 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
ieee80211_handle_mu_mimo_mon(monitor_sdata, origskb, rtap_space);
list_for_each_entry_rcu(sdata, &local->mon_list, u.mntr.list) {
- bool last_monitor = list_is_last(&sdata->u.mntr.list,
- &local->mon_list);
+ struct cfg80211_chan_def *chandef;
+
+ chandef = &sdata->vif.bss_conf.chanreq.oper;
+ if (chandef->chan &&
+ chandef->chan->center_freq != status->freq)
+ continue;
+
+ if (!prev_sdata) {
+ prev_sdata = sdata;
+ continue;
+ }
+
+ if (ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR))
+ ieee80211_handle_mu_mimo_mon(sdata, origskb, rtap_space);
if (!monskb)
monskb = ieee80211_make_monitor_skb(local, &origskb,
rate, rtap_space,
- only_monitor &&
- last_monitor);
+ false);
+ if (!monskb)
+ continue;
- if (monskb) {
- struct sk_buff *skb;
+ skb = skb_clone(monskb, GFP_ATOMIC);
+ if (!skb)
+ continue;
- if (last_monitor) {
- skb = monskb;
- monskb = NULL;
- } else {
- skb = skb_clone(monskb, GFP_ATOMIC);
- }
+ skb->dev = prev_sdata->dev;
+ dev_sw_netstats_rx_add(skb->dev, skb->len);
+ netif_receive_skb(skb);
+ prev_sdata = sdata;
+ }
- if (skb) {
- skb->dev = sdata->dev;
- dev_sw_netstats_rx_add(skb->dev, skb->len);
- netif_receive_skb(skb);
- }
+ if (prev_sdata) {
+ if (monskb)
+ skb = monskb;
+ else
+ skb = ieee80211_make_monitor_skb(local, &origskb,
+ rate, rtap_space,
+ only_monitor);
+ if (skb) {
+ skb->dev = prev_sdata->dev;
+ dev_sw_netstats_rx_add(skb->dev, skb->len);
+ netif_receive_skb(skb);
}
-
- if (last_monitor)
- break;
}
- /* this happens if last_monitor was erroneously false */
- dev_kfree_skb(monskb);
-
- /* ditto */
if (!origskb)
return NULL;
@@ -3568,7 +3575,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
sband = rx->local->hw.wiphy->bands[status->band];
- rate_control_rate_update(local, sband, rx->sta, 0,
+ rate_control_rate_update(local, sband, rx->link_sta,
IEEE80211_RC_SMPS_CHANGED);
cfg80211_sta_opmode_change_notify(sdata->dev,
rx->sta->addr,
@@ -3605,7 +3612,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
ieee80211_sta_rx_bw_to_chan_width(rx->link_sta);
sta_opmode.changed = STA_OPMODE_MAX_BW_CHANGED;
- rate_control_rate_update(local, sband, rx->sta, 0,
+ rate_control_rate_update(local, sband, rx->link_sta,
IEEE80211_RC_BW_CHANGED);
cfg80211_sta_opmode_change_notify(sdata->dev,
rx->sta->addr,
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index adb88c06b598..cb7079071885 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -1176,14 +1176,14 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata,
unsigned int n_channels)
{
struct ieee80211_local *local = sdata->local;
- int ret = -EBUSY, i, n_ch = 0;
+ int i, n_ch = 0;
enum nl80211_band band;
lockdep_assert_wiphy(local->hw.wiphy);
/* busy scanning */
if (local->scan_req)
- goto unlock;
+ return -EBUSY;
/* fill internal scan request */
if (!channels) {
@@ -1200,7 +1200,9 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata,
&local->hw.wiphy->bands[band]->channels[i];
if (tmp_ch->flags & (IEEE80211_CHAN_NO_IR |
- IEEE80211_CHAN_DISABLED))
+ IEEE80211_CHAN_DISABLED) ||
+ !cfg80211_wdev_channel_allowed(&sdata->wdev,
+ tmp_ch))
continue;
local->int_scan_req->channels[n_ch] = tmp_ch;
@@ -1209,21 +1211,23 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata,
}
if (WARN_ON_ONCE(n_ch == 0))
- goto unlock;
+ return -EINVAL;
local->int_scan_req->n_channels = n_ch;
} else {
for (i = 0; i < n_channels; i++) {
if (channels[i]->flags & (IEEE80211_CHAN_NO_IR |
- IEEE80211_CHAN_DISABLED))
+ IEEE80211_CHAN_DISABLED) ||
+ !cfg80211_wdev_channel_allowed(&sdata->wdev,
+ channels[i]))
continue;
local->int_scan_req->channels[n_ch] = channels[i];
n_ch++;
}
- if (WARN_ON_ONCE(n_ch == 0))
- goto unlock;
+ if (n_ch == 0)
+ return -EINVAL;
local->int_scan_req->n_channels = n_ch;
}
@@ -1233,9 +1237,7 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata,
memcpy(local->int_scan_req->ssids[0].ssid, ssid, IEEE80211_MAX_SSID_LEN);
local->int_scan_req->ssids[0].ssid_len = ssid_len;
- ret = __ieee80211_start_scan(sdata, sdata->local->int_scan_req);
- unlock:
- return ret;
+ return __ieee80211_start_scan(sdata, sdata->local->int_scan_req);
}
void ieee80211_scan_cancel(struct ieee80211_local *local)
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index 073ff9e0f397..c6015cd00372 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -377,13 +377,8 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
/* capture the AP chandef before (potential) downgrading */
csa_ie->chanreq.ap = new_chandef;
- if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_320 &&
- new_chandef.width == NL80211_CHAN_WIDTH_320)
- ieee80211_chandef_downgrade(&new_chandef, NULL);
-
- if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_160 &&
- (new_chandef.width == NL80211_CHAN_WIDTH_80P80 ||
- new_chandef.width == NL80211_CHAN_WIDTH_160))
+ while (conn->bw_limit <
+ ieee80211_min_bw_limit_from_chandef(&new_chandef))
ieee80211_chandef_downgrade(&new_chandef, NULL);
if (!cfg80211_chandef_compatible(&new_chandef,
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 9195d5a2de0a..9f89fb5bee37 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -169,7 +169,7 @@ struct sta_info;
* @buf_size: reorder buffer size at receiver
* @failed_bar_ssn: ssn of the last failed BAR tx attempt
* @bar_pending: BAR needs to be re-sent
- * @amsdu: support A-MSDU withing A-MDPU
+ * @amsdu: support A-MSDU within A-MDPU
* @ssn: starting sequence number of the session
*
* This structure's lifetime is managed by RCU, assignments to
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index b41b867f43b2..5f28f3633fa0 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -927,6 +927,9 @@ void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb,
if (!ieee80211_sdata_running(sdata))
continue;
+ if (sdata->u.mntr.flags & MONITOR_FLAG_SKIP_TX)
+ continue;
+
if ((sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) &&
!send_to_cooked)
continue;
@@ -1099,7 +1102,7 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
* This is a bit racy but we can avoid a lot of work
* with this test...
*/
- if (!local->monitors && (!send_to_cooked || !local->cooked_mntrs)) {
+ if (!local->tx_mntrs && (!send_to_cooked || !local->cooked_mntrs)) {
if (status->free_list)
list_add_tail(&skb->list, status->free_list);
else
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index f07b40916485..2f92e7c7f203 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -1342,7 +1342,8 @@ static void iee80211_tdls_recalc_chanctx(struct ieee80211_sub_if_data *sdata,
bw = min(bw, ieee80211_sta_cap_rx_bw(&sta->deflink));
if (bw != sta->sta.deflink.bandwidth) {
sta->sta.deflink.bandwidth = bw;
- rate_control_rate_update(local, sband, sta, 0,
+ rate_control_rate_update(local, sband,
+ &sta->deflink,
IEEE80211_RC_BW_CHANGED);
/*
* if a TDLS peer BW was updated, we need to
diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index 880a1fa8705a..94c00e71f6f8 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c
@@ -313,7 +313,7 @@ int ieee80211_tkip_decrypt_data(struct arc4_ctx *ctx,
* Record previously received IV, will be copied into the
* key information after MIC verification. It is possible
* that we don't catch replays of fragments but that's ok
- * because the Michael MIC verication will then fail.
+ * because the Michael MIC verification will then fail.
*/
*out_iv32 = iv32;
*out_iv16 = iv16;
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index dc498cd8cd91..7a4985fc2b16 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -939,31 +939,34 @@ TRACE_EVENT(drv_sta_set_txpwr,
)
);
-TRACE_EVENT(drv_sta_rc_update,
+TRACE_EVENT(drv_link_sta_rc_update,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
- struct ieee80211_sta *sta,
+ struct ieee80211_link_sta *link_sta,
u32 changed),
- TP_ARGS(local, sdata, sta, changed),
+ TP_ARGS(local, sdata, link_sta, changed),
TP_STRUCT__entry(
LOCAL_ENTRY
VIF_ENTRY
STA_ENTRY
__field(u32, changed)
+ __field(u32, link_id)
),
TP_fast_assign(
LOCAL_ASSIGN;
VIF_ASSIGN;
- STA_ASSIGN;
+ STA_NAMED_ASSIGN(link_sta->sta);
__entry->changed = changed;
+ __entry->link_id = link_sta->link_id;
),
TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " changed: 0x%x",
- LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->changed
+ LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " (link %d) changed: 0x%x",
+ LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->link_id,
+ __entry->changed
)
);
@@ -3154,6 +3157,25 @@ TRACE_EVENT(drv_neg_ttlm_res,
LOCAL_PR_ARG, VIF_PR_ARG, __entry->res
)
);
+
+TRACE_EVENT(drv_prep_add_interface,
+ TP_PROTO(struct ieee80211_local *local,
+ enum nl80211_iftype type),
+
+ TP_ARGS(local, type),
+ TP_STRUCT__entry(LOCAL_ENTRY
+ __field(u32, type)
+ ),
+
+ TP_fast_assign(LOCAL_ASSIGN;
+ __entry->type = type;
+ ),
+
+ TP_printk(LOCAL_PR_FMT " type: %u\n ",
+ LOCAL_PR_ARG, __entry->type
+ )
+);
+
#endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */
#undef TRACE_INCLUDE_PATH
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 0ff8b56f5807..a24636bda679 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1763,7 +1763,8 @@ static bool __ieee80211_tx(struct ieee80211_local *local,
switch (sdata->vif.type) {
case NL80211_IFTYPE_MONITOR:
- if (sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) {
+ if ((sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) ||
+ ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) {
vif = &sdata->vif;
break;
}
@@ -3952,7 +3953,8 @@ begin:
switch (tx.sdata->vif.type) {
case NL80211_IFTYPE_MONITOR:
- if (tx.sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) {
+ if ((tx.sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) ||
+ ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) {
vif = &tx.sdata->vif;
break;
}
@@ -6214,7 +6216,7 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
goto start_xmit;
/* update QoS header to prioritize control port frames if possible,
- * priorization also happens for control port frames send over
+ * prioritization also happens for control port frames send over
* AF_PACKET
*/
rcu_read_lock();
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index f94faa86ba8a..a4e1301cc999 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -756,7 +756,8 @@ static void __iterate_interfaces(struct ieee80211_local *local,
lockdep_is_held(&local->hw.wiphy->mtx)) {
switch (sdata->vif.type) {
case NL80211_IFTYPE_MONITOR:
- if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE))
+ if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) &&
+ !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR))
continue;
break;
case NL80211_IFTYPE_AP_VLAN:
@@ -1010,7 +1011,7 @@ void ieee80211_set_wmm_default(struct ieee80211_link_data *link,
else
aCWmin = 15;
- /* Confiure old 802.11b/g medium access rules. */
+ /* Configure old 802.11b/g medium access rules. */
qparam.cw_max = aCWmax;
qparam.cw_min = aCWmin;
qparam.txop = 0;
@@ -1873,8 +1874,10 @@ int ieee80211_reconfig(struct ieee80211_local *local)
}
list_for_each_entry(sdata, &local->interfaces, list) {
+ if (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
+ !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR))
+ continue;
if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
- sdata->vif.type != NL80211_IFTYPE_MONITOR &&
ieee80211_sdata_running(sdata)) {
res = drv_add_interface(local, sdata);
if (WARN_ON(res))
@@ -1887,11 +1890,14 @@ int ieee80211_reconfig(struct ieee80211_local *local)
*/
if (res) {
list_for_each_entry_continue_reverse(sdata, &local->interfaces,
- list)
+ list) {
+ if (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
+ !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR))
+ continue;
if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
- sdata->vif.type != NL80211_IFTYPE_MONITOR &&
ieee80211_sdata_running(sdata))
drv_remove_interface(local, sdata);
+ }
ieee80211_handle_reconfig_failure(local);
return res;
}
@@ -2752,7 +2758,7 @@ u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
return pos + sizeof(struct ieee80211_vht_operation);
}
-u8 *ieee80211_ie_build_he_oper(u8 *pos, struct cfg80211_chan_def *chandef)
+u8 *ieee80211_ie_build_he_oper(u8 *pos, const struct cfg80211_chan_def *chandef)
{
struct ieee80211_he_operation *he_oper;
struct ieee80211_he_6ghz_oper *he_6ghz_op;
@@ -2844,7 +2850,7 @@ out:
return pos;
}
-u8 *ieee80211_ie_build_eht_oper(u8 *pos, struct cfg80211_chan_def *chandef,
+u8 *ieee80211_ie_build_eht_oper(u8 *pos, const struct cfg80211_chan_def *chandef,
const struct ieee80211_sta_eht_cap *eht_cap)
{
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index bf6ef45af757..6a20fa099190 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -280,10 +280,10 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
/*
* This is a workaround for VHT-enabled STAs which break the spec
* and have the VHT-MCS Rx map filled in with value 3 for all eight
- * spacial streams, an example is AR9462.
+ * spatial streams, an example is AR9462.
*
* As per spec, in section 22.1.1 Introduction to the VHT PHY
- * A VHT STA shall support at least single spactial stream VHT-MCSs
+ * A VHT STA shall support at least single spatial stream VHT-MCSs
* 0 to 7 (transmit and receive) in all supported channel widths.
*/
if (vht_cap->vht_mcs.rx_mcs_map == cpu_to_le16(0xFFFF)) {
@@ -479,28 +479,6 @@ ieee80211_sta_rx_bw_to_chan_width(struct link_sta_info *link_sta)
}
}
-enum ieee80211_sta_rx_bandwidth
-ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width)
-{
- switch (width) {
- case NL80211_CHAN_WIDTH_20_NOHT:
- case NL80211_CHAN_WIDTH_20:
- return IEEE80211_STA_RX_BW_20;
- case NL80211_CHAN_WIDTH_40:
- return IEEE80211_STA_RX_BW_40;
- case NL80211_CHAN_WIDTH_80:
- return IEEE80211_STA_RX_BW_80;
- case NL80211_CHAN_WIDTH_160:
- case NL80211_CHAN_WIDTH_80P80:
- return IEEE80211_STA_RX_BW_160;
- case NL80211_CHAN_WIDTH_320:
- return IEEE80211_STA_RX_BW_320;
- default:
- WARN_ON_ONCE(1);
- return IEEE80211_STA_RX_BW_20;
- }
-}
-
/* FIXME: rename/move - this deals with everything not just VHT */
enum ieee80211_sta_rx_bandwidth
_ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta,
@@ -766,8 +744,7 @@ void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
if (changed > 0) {
ieee80211_recalc_min_chandef(sdata, link_sta->link_id);
- rate_control_rate_update(local, sband, link_sta->sta,
- link_sta->link_id, changed);
+ rate_control_rate_update(local, sband, link_sta, changed);
}
}
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 293afa3f57c5..40d5d9e48479 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -598,9 +598,6 @@ static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad,
memcpy(j_0, hdr->addr2, ETH_ALEN);
memcpy(&j_0[ETH_ALEN], pn, IEEE80211_GCMP_PN_LEN);
- j_0[13] = 0;
- j_0[14] = 0;
- j_0[AES_BLOCK_SIZE - 1] = 0x01;
ccmp_gcmp_aad(skb, aad, spp_amsdu);
}
diff --git a/net/mctp/device.c b/net/mctp/device.c
index 85cc5f31f1e7..26ce34b7e88e 100644
--- a/net/mctp/device.c
+++ b/net/mctp/device.c
@@ -371,6 +371,8 @@ static int mctp_fill_link_af(struct sk_buff *skb,
return -ENODATA;
if (nla_put_u32(skb, IFLA_MCTP_NET, mdev->net))
return -EMSGSIZE;
+ if (nla_put_u8(skb, IFLA_MCTP_PHYS_BINDING, mdev->binding))
+ return -EMSGSIZE;
return 0;
}
@@ -385,6 +387,7 @@ static size_t mctp_get_link_af_size(const struct net_device *dev,
if (!mdev)
return 0;
ret = nla_total_size(4); /* IFLA_MCTP_NET */
+ ret += nla_total_size(1); /* IFLA_MCTP_PHYS_BINDING */
mctp_dev_put(mdev);
return ret;
}
@@ -480,7 +483,8 @@ static int mctp_dev_notify(struct notifier_block *this, unsigned long event,
}
static int mctp_register_netdevice(struct net_device *dev,
- const struct mctp_netdev_ops *ops)
+ const struct mctp_netdev_ops *ops,
+ enum mctp_phys_binding binding)
{
struct mctp_dev *mdev;
@@ -489,17 +493,19 @@ static int mctp_register_netdevice(struct net_device *dev,
return PTR_ERR(mdev);
mdev->ops = ops;
+ mdev->binding = binding;
return register_netdevice(dev);
}
int mctp_register_netdev(struct net_device *dev,
- const struct mctp_netdev_ops *ops)
+ const struct mctp_netdev_ops *ops,
+ enum mctp_phys_binding binding)
{
int rc;
rtnl_lock();
- rc = mctp_register_netdevice(dev, ops);
+ rc = mctp_register_netdevice(dev, ops, binding);
rtnl_unlock();
return rc;
@@ -535,14 +541,20 @@ int __init mctp_device_init(void)
int err;
register_netdevice_notifier(&mctp_dev_nb);
- rtnl_af_register(&mctp_af_ops);
+
+ err = rtnl_af_register(&mctp_af_ops);
+ if (err)
+ goto err_notifier;
err = rtnl_register_many(mctp_device_rtnl_msg_handlers);
- if (err) {
- rtnl_af_unregister(&mctp_af_ops);
- unregister_netdevice_notifier(&mctp_dev_nb);
- }
+ if (err)
+ goto err_af;
+ return 0;
+err_af:
+ rtnl_af_unregister(&mctp_af_ops);
+err_notifier:
+ unregister_netdevice_notifier(&mctp_dev_nb);
return err;
}
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index df62638b6498..1f63b32d76d6 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1664,7 +1664,7 @@ static int nla_put_via(struct sk_buff *skb,
u8 table, const void *addr, int alen)
{
static const int table_to_family[NEIGH_NR_TABLES + 1] = {
- AF_INET, AF_INET6, AF_DECnet, AF_PACKET,
+ AF_INET, AF_INET6, AF_PACKET,
};
struct nlattr *nla;
struct rtvia *via;
@@ -2753,7 +2753,9 @@ static int __init mpls_init(void)
dev_add_pack(&mpls_packet_type);
- rtnl_af_register(&mpls_af_ops);
+ err = rtnl_af_register(&mpls_af_ops);
+ if (err)
+ goto out_unregister_dev_type;
err = rtnl_register_many(mpls_rtnl_msg_handlers);
if (err)
@@ -2773,6 +2775,7 @@ out_unregister_rtnl:
rtnl_unregister_many(mpls_rtnl_msg_handlers);
out_unregister_rtnl_af:
rtnl_af_unregister(&mpls_af_ops);
+out_unregister_dev_type:
dev_remove_pack(&mpls_packet_type);
out_unregister_pernet:
unregister_pernet_subsys(&mpls_net_ops);
diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c
index 2d3efb405437..02205f7994d7 100644
--- a/net/mptcp/diag.c
+++ b/net/mptcp/diag.c
@@ -47,7 +47,7 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb)
flags |= MPTCP_SUBFLOW_FLAG_BKUP_REM;
if (sf->request_bkup)
flags |= MPTCP_SUBFLOW_FLAG_BKUP_LOC;
- if (sf->fully_established)
+ if (READ_ONCE(sf->fully_established))
flags |= MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED;
if (sf->conn_finished)
flags |= MPTCP_SUBFLOW_FLAG_CONNECTED;
diff --git a/net/mptcp/mptcp_pm_gen.c b/net/mptcp/mptcp_pm_gen.c
index bfb37c5a88c4..dcffd847af33 100644
--- a/net/mptcp/mptcp_pm_gen.c
+++ b/net/mptcp/mptcp_pm_gen.c
@@ -14,7 +14,7 @@
const struct nla_policy mptcp_pm_address_nl_policy[MPTCP_PM_ADDR_ATTR_IF_IDX + 1] = {
[MPTCP_PM_ADDR_ATTR_FAMILY] = { .type = NLA_U16, },
[MPTCP_PM_ADDR_ATTR_ID] = { .type = NLA_U8, },
- [MPTCP_PM_ADDR_ATTR_ADDR4] = { .type = NLA_U32, },
+ [MPTCP_PM_ADDR_ATTR_ADDR4] = { .type = NLA_BE32, },
[MPTCP_PM_ADDR_ATTR_ADDR6] = NLA_POLICY_EXACT_LEN(16),
[MPTCP_PM_ADDR_ATTR_PORT] = { .type = NLA_U16, },
[MPTCP_PM_ADDR_ATTR_FLAGS] = { .type = NLA_U32, },
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 370c3836b771..1603b3702e22 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -461,7 +461,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
return false;
/* MPC/MPJ needed only on 3rd ack packet, DATA_FIN and TCP shutdown take precedence */
- if (subflow->fully_established || snd_data_fin_enable ||
+ if (READ_ONCE(subflow->fully_established) || snd_data_fin_enable ||
subflow->snd_isn != TCP_SKB_CB(skb)->seq ||
sk->sk_state != TCP_ESTABLISHED)
return false;
@@ -930,7 +930,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
/* here we can process OoO, in-window pkts, only in-sequence 4th ack
* will make the subflow fully established
*/
- if (likely(subflow->fully_established)) {
+ if (likely(READ_ONCE(subflow->fully_established))) {
/* on passive sockets, check for 3rd ack retransmission
* note that msk is always set by subflow_syn_recv_sock()
* for mp_join subflows
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 620264c75dc2..16c336c51940 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -154,6 +154,9 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk)
void mptcp_pm_connection_closed(struct mptcp_sock *msk)
{
pr_debug("msk=%p\n", msk);
+
+ if (msk->token)
+ mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
}
void mptcp_pm_subflow_established(struct mptcp_sock *msk)
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 45a2b5f05d38..7a0f7998376a 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -512,7 +512,8 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
{
struct mptcp_pm_addr_entry *entry;
- list_for_each_entry(entry, &pernet->local_addr_list, list) {
+ list_for_each_entry_rcu(entry, &pernet->local_addr_list, list,
+ lockdep_is_held(&pernet->lock)) {
if (entry->addr.id == id)
return entry;
}
@@ -782,7 +783,7 @@ bool mptcp_pm_nl_is_init_remote_addr(struct mptcp_sock *msk,
void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
{
- struct mptcp_subflow_context *subflow;
+ struct mptcp_subflow_context *subflow, *alt = NULL;
msk_owned_by_me(msk);
lockdep_assert_held(&msk->pm.lock);
@@ -793,10 +794,18 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
mptcp_for_each_subflow(msk, subflow) {
if (__mptcp_subflow_active(subflow)) {
- mptcp_pm_send_ack(msk, subflow, false, false);
- break;
+ if (!subflow->stale) {
+ mptcp_pm_send_ack(msk, subflow, false, false);
+ return;
+ }
+
+ if (!alt)
+ alt = subflow;
}
}
+
+ if (alt)
+ mptcp_pm_send_ack(msk, alt, false, false);
}
int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
@@ -1134,17 +1143,13 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc
{
struct mptcp_pm_addr_entry *entry;
struct pm_nl_pernet *pernet;
- int ret = -1;
+ int ret;
pernet = pm_nl_get_pernet_from_msk(msk);
rcu_read_lock();
- list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
- if (mptcp_addresses_equal(&entry->addr, skc, entry->addr.port)) {
- ret = entry->addr.id;
- break;
- }
- }
+ entry = __lookup_addr(pernet, skc);
+ ret = entry ? entry->addr.id : -1;
rcu_read_unlock();
if (ret >= 0)
return ret;
@@ -1171,15 +1176,11 @@ bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc)
{
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
struct mptcp_pm_addr_entry *entry;
- bool backup = false;
+ bool backup;
rcu_read_lock();
- list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
- if (mptcp_addresses_equal(&entry->addr, skc, entry->addr.port)) {
- backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
- break;
- }
- }
+ entry = __lookup_addr(pernet, skc);
+ backup = entry && !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
rcu_read_unlock();
return backup;
@@ -1816,7 +1817,7 @@ int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info)
goto fail;
}
- spin_lock_bh(&pernet->lock);
+ rcu_read_lock();
entry = __lookup_addr_by_id(pernet, addr.addr.id);
if (!entry) {
GENL_SET_ERR_MSG(info, "address not found");
@@ -1830,11 +1831,11 @@ int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info)
genlmsg_end(msg, reply);
ret = genlmsg_reply(msg, info);
- spin_unlock_bh(&pernet->lock);
+ rcu_read_unlock();
return ret;
unlock_fail:
- spin_unlock_bh(&pernet->lock);
+ rcu_read_unlock();
fail:
nlmsg_free(msg);
@@ -1858,7 +1859,7 @@ int mptcp_pm_nl_dump_addr(struct sk_buff *msg,
pernet = pm_nl_get_pernet(net);
- spin_lock_bh(&pernet->lock);
+ rcu_read_lock();
for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) {
if (test_bit(i, pernet->id_bitmap)) {
entry = __lookup_addr_by_id(pernet, i);
@@ -1883,7 +1884,7 @@ int mptcp_pm_nl_dump_addr(struct sk_buff *msg,
genlmsg_end(msg, hdr);
}
}
- spin_unlock_bh(&pernet->lock);
+ rcu_read_unlock();
cb->args[0] = id;
return msg->len;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 48d480982b78..08a72242428c 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2728,8 +2728,8 @@ void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout)
if (!fail_tout && !inet_csk(sk)->icsk_mtup.probe_timestamp)
return;
- close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies +
- mptcp_close_timeout(sk);
+ close_timeout = (unsigned long)inet_csk(sk)->icsk_mtup.probe_timestamp -
+ tcp_jiffies32 + jiffies + mptcp_close_timeout(sk);
/* the close timeout takes precedence on the fail one, and here at least one of
* them is active
@@ -3147,8 +3147,7 @@ cleanup:
sock_hold(sk);
pr_debug("msk=%p state=%d\n", sk, sk->sk_state);
- if (msk->token)
- mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
+ mptcp_pm_connection_closed(msk);
if (sk->sk_state == TCP_CLOSE) {
__mptcp_destroy_sock(sk);
@@ -3214,8 +3213,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
mptcp_stop_rtx_timer(sk);
mptcp_stop_tout_timer(sk);
- if (msk->token)
- mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
+ mptcp_pm_connection_closed(msk);
/* msk->subflow is still intact, the following will not free the first
* subflow
@@ -3519,7 +3517,7 @@ static void schedule_3rdack_retransmission(struct sock *ssk)
struct tcp_sock *tp = tcp_sk(ssk);
unsigned long timeout;
- if (mptcp_subflow_ctx(ssk)->fully_established)
+ if (READ_ONCE(mptcp_subflow_ctx(ssk)->fully_established))
return;
/* reschedule with a timeout above RTT, as we must look only for drop */
@@ -3530,7 +3528,8 @@ static void schedule_3rdack_retransmission(struct sock *ssk)
timeout += jiffies;
WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER);
- icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
+ smp_store_release(&icsk->icsk_ack.pending,
+ icsk->icsk_ack.pending | ICSK_ACK_SCHED | ICSK_ACK_TIMER);
icsk->icsk_ack.timeout = timeout;
sk_reset_timer(ssk, &icsk->icsk_delack_timer, timeout);
}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 568a72702b08..a93e661ef5c4 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -513,7 +513,6 @@ struct mptcp_subflow_context {
request_bkup : 1,
mp_capable : 1, /* remote is MPTCP capable */
mp_join : 1, /* remote is JOINing */
- fully_established : 1, /* path validated */
pm_notified : 1, /* PM hook called for established status */
conn_finished : 1,
map_valid : 1,
@@ -532,10 +531,11 @@ struct mptcp_subflow_context {
is_mptfo : 1, /* subflow is doing TFO */
close_event_done : 1, /* has done the post-closed part */
mpc_drop : 1, /* the MPC option has been dropped in a rtx */
- __unused : 8;
+ __unused : 9;
bool data_avail;
bool scheduled;
bool pm_listener; /* a listener managed by the kernel PM? */
+ bool fully_established; /* path validated */
u32 remote_nonce;
u64 thmac;
u32 local_nonce;
@@ -780,7 +780,7 @@ static inline bool __tcp_can_send(const struct sock *ssk)
static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
{
/* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */
- if (subflow->request_join && !subflow->fully_established)
+ if (subflow->request_join && !READ_ONCE(subflow->fully_established))
return false;
return __tcp_can_send(mptcp_subflow_tcp_sock(subflow));
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
index 78ed508ebc1b..df7dbcfa3b71 100644
--- a/net/mptcp/sched.c
+++ b/net/mptcp/sched.c
@@ -60,7 +60,6 @@ void mptcp_get_available_schedulers(char *buf, size_t maxlen)
size_t offs = 0;
rcu_read_lock();
- spin_lock(&mptcp_sched_list_lock);
list_for_each_entry_rcu(sched, &mptcp_sched_list, list) {
offs += snprintf(buf + offs, maxlen - offs,
"%s%s",
@@ -69,7 +68,6 @@ void mptcp_get_available_schedulers(char *buf, size_t maxlen)
if (WARN_ON_ONCE(offs >= maxlen))
break;
}
- spin_unlock(&mptcp_sched_list_lock);
rcu_read_unlock();
}
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 6170f2fff71e..fd021cf8286e 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -800,7 +800,7 @@ void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
const struct mptcp_options_received *mp_opt)
{
subflow_set_remote_key(msk, subflow, mp_opt);
- subflow->fully_established = 1;
+ WRITE_ONCE(subflow->fully_established, true);
WRITE_ONCE(msk->fully_established, true);
if (subflow->is_mptfo)
@@ -971,7 +971,8 @@ enum mapping_status {
MAPPING_EMPTY,
MAPPING_DATA_FIN,
MAPPING_DUMMY,
- MAPPING_BAD_CSUM
+ MAPPING_BAD_CSUM,
+ MAPPING_NODSS
};
static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
@@ -1128,8 +1129,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
return MAPPING_EMPTY;
}
+ /* If the required DSS has likely been dropped by a middlebox */
if (!subflow->map_valid)
- return MAPPING_INVALID;
+ return MAPPING_NODSS;
goto validate_seq;
}
@@ -1343,7 +1345,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
status = get_mapping_status(ssk, msk);
trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue));
if (unlikely(status == MAPPING_INVALID || status == MAPPING_DUMMY ||
- status == MAPPING_BAD_CSUM))
+ status == MAPPING_BAD_CSUM || status == MAPPING_NODSS))
goto fallback;
if (status != MAPPING_OK)
@@ -1396,7 +1398,9 @@ fallback:
* subflow_error_report() will introduce the appropriate barriers
*/
subflow->reset_transient = 0;
- subflow->reset_reason = MPTCP_RST_EMPTCP;
+ subflow->reset_reason = status == MAPPING_NODSS ?
+ MPTCP_RST_EMIDDLEBOX :
+ MPTCP_RST_EMPTCP;
reset:
WRITE_ONCE(ssk->sk_err, EBADMSG);
@@ -2045,7 +2049,6 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->tcp_state_change = old_ctx->tcp_state_change;
new_ctx->tcp_error_report = old_ctx->tcp_error_report;
new_ctx->rel_write_seq = 1;
- new_ctx->tcp_sock = newsk;
if (subflow_req->mp_capable) {
/* see comments in subflow_syn_recv_sock(), MPTCP connection
@@ -2062,7 +2065,7 @@ static void subflow_ulp_clone(const struct request_sock *req,
} else if (subflow_req->mp_join) {
new_ctx->ssn_offset = subflow_req->ssn_offset;
new_ctx->mp_join = 1;
- new_ctx->fully_established = 1;
+ WRITE_ONCE(new_ctx->fully_established, true);
new_ctx->remote_key_valid = 1;
new_ctx->backup = subflow_req->backup;
new_ctx->request_bkup = subflow_req->request_bkup;
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index e4fa00abde6a..5988b9bb9029 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -163,11 +163,8 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
if (ret)
return ret;
- if (ip > ip_to) {
+ if (ip > ip_to)
swap(ip, ip_to);
- if (ip < map->first_ip)
- return -IPSET_ERR_BITMAP_RANGE;
- }
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
@@ -178,7 +175,7 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
ip_to = ip;
}
- if (ip_to > map->last_ip)
+ if (ip < map->first_ip || ip_to > map->last_ip)
return -IPSET_ERR_BITMAP_RANGE;
for (; !before(ip_to, ip); ip += map->hosts) {
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index dc6ddc4abbe2..7d13110ce188 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3662,10 +3662,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
udest->port = nla_get_be16(nla_port);
- if (nla_addr_family)
- udest->af = nla_get_u16(nla_addr_family);
- else
- udest->af = 0;
+ udest->af = nla_get_u16_default(nla_addr_family, 0);
/* If a full entry was requested, check for the additional fields */
if (full_entry) {
diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
index 3d64a4511fcf..06b084844700 100644
--- a/net/netfilter/nf_bpf_link.c
+++ b/net/netfilter/nf_bpf_link.c
@@ -43,7 +43,7 @@ get_proto_defrag_hook(struct bpf_nf_link *link,
hook = rcu_dereference(*ptr_global_hook);
if (!hook) {
rcu_read_unlock();
- err = request_module(mod);
+ err = request_module("%s", mod);
if (err)
return ERR_PTR(err < 0 ? err : -EINVAL);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 6a1239433830..36168f8b6efa 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -3870,7 +3870,7 @@ static int __init ctnetlink_init(void)
{
int ret;
- NL_ASSERT_DUMP_CTX_FITS(struct ctnetlink_list_dump_ctx);
+ NL_ASSERT_CTX_FITS(struct ctnetlink_list_dump_ctx);
ret = nfnetlink_subsys_register(&ctnl_subsys);
if (ret < 0) {
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 4085c436e306..aad84aabd7f1 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -1090,10 +1090,8 @@ static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
range->flags |= NF_NAT_RANGE_MAP_IPS;
}
- if (tb[CTA_NAT_V4_MAXIP])
- range->max_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MAXIP]);
- else
- range->max_addr.ip = range->min_addr.ip;
+ range->max_addr.ip = nla_get_be32_default(tb[CTA_NAT_V4_MAXIP],
+ range->min_addr.ip);
return 0;
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 588a2757986c..21b6f7410a1f 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -26,6 +26,9 @@
#define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-"))
#define NFT_SET_MAX_ANONLEN 16
+/* limit compaction to avoid huge kmalloc/krealloc sizes. */
+#define NFT_MAX_SET_NELEMS ((2048 - sizeof(struct nft_trans_elem)) / sizeof(struct nft_trans_one_elem))
+
unsigned int nf_tables_net_id __read_mostly;
static LIST_HEAD(nf_tables_expressions);
@@ -391,6 +394,86 @@ static void nf_tables_unregister_hook(struct net *net,
return __nf_tables_unregister_hook(net, table, chain, false);
}
+static bool nft_trans_collapse_set_elem_allowed(const struct nft_trans_elem *a, const struct nft_trans_elem *b)
+{
+ /* NB: the ->bound equality check is defensive, at this time we only merge
+ * a new nft_trans_elem transaction request with the transaction tail
+ * element, but a->bound != b->bound would imply a NEWRULE transaction
+ * is queued in-between.
+ *
+ * The set check is mandatory, the NFT_MAX_SET_NELEMS check prevents
+ * huge krealloc() requests.
+ */
+ return a->set == b->set && a->bound == b->bound && a->nelems < NFT_MAX_SET_NELEMS;
+}
+
+static bool nft_trans_collapse_set_elem(struct nftables_pernet *nft_net,
+ struct nft_trans_elem *tail,
+ struct nft_trans_elem *trans,
+ gfp_t gfp)
+{
+ unsigned int nelems, old_nelems = tail->nelems;
+ struct nft_trans_elem *new_trans;
+
+ if (!nft_trans_collapse_set_elem_allowed(tail, trans))
+ return false;
+
+ /* "cannot happen", at this time userspace element add
+ * requests always allocate a new transaction element.
+ *
+ * This serves as a reminder to adjust the list_add_tail
+ * logic below in case this ever changes.
+ */
+ if (WARN_ON_ONCE(trans->nelems != 1))
+ return false;
+
+ if (check_add_overflow(old_nelems, trans->nelems, &nelems))
+ return false;
+
+ /* krealloc might free tail which invalidates list pointers */
+ list_del_init(&tail->nft_trans.list);
+
+ new_trans = krealloc(tail, struct_size(tail, elems, nelems), gfp);
+ if (!new_trans) {
+ list_add_tail(&tail->nft_trans.list, &nft_net->commit_list);
+ return false;
+ }
+
+ /*
+ * new_trans->nft_trans.list contains garbage, but
+ * list_add_tail() doesn't care.
+ */
+ new_trans->nelems = nelems;
+ new_trans->elems[old_nelems] = trans->elems[0];
+ list_add_tail(&new_trans->nft_trans.list, &nft_net->commit_list);
+
+ return true;
+}
+
+static bool nft_trans_try_collapse(struct nftables_pernet *nft_net,
+ struct nft_trans *trans, gfp_t gfp)
+{
+ struct nft_trans *tail;
+
+ if (list_empty(&nft_net->commit_list))
+ return false;
+
+ tail = list_last_entry(&nft_net->commit_list, struct nft_trans, list);
+
+ if (tail->msg_type != trans->msg_type)
+ return false;
+
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWSETELEM:
+ case NFT_MSG_DELSETELEM:
+ return nft_trans_collapse_set_elem(nft_net,
+ nft_trans_container_elem(tail),
+ nft_trans_container_elem(trans), gfp);
+ }
+
+ return false;
+}
+
static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans)
{
struct nftables_pernet *nft_net = nft_pernet(net);
@@ -421,6 +504,24 @@ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *tr
}
}
+static void nft_trans_commit_list_add_elem(struct net *net, struct nft_trans *trans,
+ gfp_t gfp)
+{
+ struct nftables_pernet *nft_net = nft_pernet(net);
+
+ WARN_ON_ONCE(trans->msg_type != NFT_MSG_NEWSETELEM &&
+ trans->msg_type != NFT_MSG_DELSETELEM);
+
+ might_alloc(gfp);
+
+ if (nft_trans_try_collapse(nft_net, trans, gfp)) {
+ kfree(trans);
+ return;
+ }
+
+ nft_trans_commit_list_add_tail(net, trans);
+}
+
static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
{
struct nft_trans *trans;
@@ -1825,7 +1926,8 @@ nla_put_failure:
return -ENOSPC;
}
-static int nft_dump_basechain_hook(struct sk_buff *skb, int family,
+static int nft_dump_basechain_hook(struct sk_buff *skb,
+ const struct net *net, int family,
const struct nft_base_chain *basechain,
const struct list_head *hook_list)
{
@@ -1850,7 +1952,8 @@ static int nft_dump_basechain_hook(struct sk_buff *skb, int family,
if (!hook_list)
hook_list = &basechain->hook_list;
- list_for_each_entry_rcu(hook, hook_list, list) {
+ list_for_each_entry_rcu(hook, hook_list, list,
+ lockdep_commit_lock_is_held(net)) {
if (!first)
first = hook;
@@ -1901,7 +2004,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
const struct nft_base_chain *basechain = nft_base_chain(chain);
struct nft_stats __percpu *stats;
- if (nft_dump_basechain_hook(skb, family, basechain, hook_list))
+ if (nft_dump_basechain_hook(skb, net, family, basechain, hook_list))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_CHAIN_POLICY,
@@ -2083,14 +2186,14 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
err = nla_parse_nested_deprecated(tb, NFTA_COUNTER_MAX, attr,
nft_counter_policy, NULL);
if (err < 0)
- return ERR_PTR(err);
+ return ERR_PTR_PCPU(err);
if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS])
- return ERR_PTR(-EINVAL);
+ return ERR_PTR_PCPU(-EINVAL);
newstats = netdev_alloc_pcpu_stats(struct nft_stats);
if (newstats == NULL)
- return ERR_PTR(-ENOMEM);
+ return ERR_PTR_PCPU(-ENOMEM);
/* Restore old counters on this cpu, no problem. Per-cpu statistics
* are not exposed to userspace.
@@ -2534,10 +2637,10 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
if (nla[NFTA_CHAIN_COUNTERS]) {
stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
- if (IS_ERR(stats)) {
+ if (IS_ERR_PCPU(stats)) {
nft_chain_release_hook(&hook);
kfree(basechain);
- return PTR_ERR(stats);
+ return PTR_ERR_PCPU(stats);
}
rcu_assign_pointer(basechain->stats, stats);
}
@@ -2651,7 +2754,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
struct nft_table *table = ctx->table;
struct nft_chain *chain = ctx->chain;
struct nft_chain_hook hook = {};
- struct nft_stats *stats = NULL;
+ struct nft_stats __percpu *stats = NULL;
struct nft_hook *h, *next;
struct nf_hook_ops *ops;
struct nft_trans *trans;
@@ -2747,8 +2850,8 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
}
stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
- if (IS_ERR(stats)) {
- err = PTR_ERR(stats);
+ if (IS_ERR_PCPU(stats)) {
+ err = PTR_ERR_PCPU(stats);
goto err_hooks;
}
}
@@ -3295,25 +3398,37 @@ int nft_expr_inner_parse(const struct nft_ctx *ctx, const struct nlattr *nla,
if (!tb[NFTA_EXPR_DATA] || !tb[NFTA_EXPR_NAME])
return -EINVAL;
+ rcu_read_lock();
+
type = __nft_expr_type_get(ctx->family, tb[NFTA_EXPR_NAME]);
- if (!type)
- return -ENOENT;
+ if (!type) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
- if (!type->inner_ops)
- return -EOPNOTSUPP;
+ if (!type->inner_ops) {
+ err = -EOPNOTSUPP;
+ goto out_unlock;
+ }
err = nla_parse_nested_deprecated(info->tb, type->maxattr,
tb[NFTA_EXPR_DATA],
type->policy, NULL);
if (err < 0)
- goto err_nla_parse;
+ goto out_unlock;
info->attr = nla;
info->ops = type->inner_ops;
+ /* No module reference will be taken on type->owner.
+ * Presence of type->inner_ops implies that the expression
+ * is builtin, so it cannot go away.
+ */
+ rcu_read_unlock();
return 0;
-err_nla_parse:
+out_unlock:
+ rcu_read_unlock();
return err;
}
@@ -3412,13 +3527,15 @@ void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr)
* Rules
*/
-static struct nft_rule *__nft_rule_lookup(const struct nft_chain *chain,
+static struct nft_rule *__nft_rule_lookup(const struct net *net,
+ const struct nft_chain *chain,
u64 handle)
{
struct nft_rule *rule;
// FIXME: this sucks
- list_for_each_entry_rcu(rule, &chain->rules, list) {
+ list_for_each_entry_rcu(rule, &chain->rules, list,
+ lockdep_commit_lock_is_held(net)) {
if (handle == rule->handle)
return rule;
}
@@ -3426,13 +3543,14 @@ static struct nft_rule *__nft_rule_lookup(const struct nft_chain *chain,
return ERR_PTR(-ENOENT);
}
-static struct nft_rule *nft_rule_lookup(const struct nft_chain *chain,
+static struct nft_rule *nft_rule_lookup(const struct net *net,
+ const struct nft_chain *chain,
const struct nlattr *nla)
{
if (nla == NULL)
return ERR_PTR(-EINVAL);
- return __nft_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla)));
+ return __nft_rule_lookup(net, chain, be64_to_cpu(nla_get_be64(nla)));
}
static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
@@ -3733,7 +3851,7 @@ static int nf_tables_dump_rules_done(struct netlink_callback *cb)
return 0;
}
-/* called with rcu_read_lock held */
+/* Caller must hold rcu read lock or transaction mutex */
static struct sk_buff *
nf_tables_getrule_single(u32 portid, const struct nfnl_info *info,
const struct nlattr * const nla[], bool reset)
@@ -3760,7 +3878,7 @@ nf_tables_getrule_single(u32 portid, const struct nfnl_info *info,
return ERR_CAST(chain);
}
- rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
+ rule = nft_rule_lookup(net, chain, nla[NFTA_RULE_HANDLE]);
if (IS_ERR(rule)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
return ERR_CAST(rule);
@@ -3984,7 +4102,8 @@ int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set)
struct nft_set_ext *ext;
int ret = 0;
- list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+ list_for_each_entry_rcu(catchall, &set->catchall_list, list,
+ lockdep_commit_lock_is_held(ctx->net)) {
ext = nft_set_elem_ext(set, catchall->elem);
if (!nft_set_elem_active(ext, dummy_iter.genmask))
continue;
@@ -4058,7 +4177,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
if (nla[NFTA_RULE_HANDLE]) {
handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE]));
- rule = __nft_rule_lookup(chain, handle);
+ rule = __nft_rule_lookup(net, chain, handle);
if (IS_ERR(rule)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
return PTR_ERR(rule);
@@ -4080,7 +4199,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
if (nla[NFTA_RULE_POSITION]) {
pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
- old_rule = __nft_rule_lookup(chain, pos_handle);
+ old_rule = __nft_rule_lookup(net, chain, pos_handle);
if (IS_ERR(old_rule)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]);
return PTR_ERR(old_rule);
@@ -4297,7 +4416,7 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
if (chain) {
if (nla[NFTA_RULE_HANDLE]) {
- rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
+ rule = nft_rule_lookup(info->net, chain, nla[NFTA_RULE_HANDLE]);
if (IS_ERR(rule)) {
if (PTR_ERR(rule) == -ENOENT &&
NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYRULE)
@@ -4457,7 +4576,8 @@ static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
[NFTA_SET_DESC_CONCAT] = NLA_POLICY_NESTED_ARRAY(nft_concat_policy),
};
-static struct nft_set *nft_set_lookup(const struct nft_table *table,
+static struct nft_set *nft_set_lookup(const struct net *net,
+ const struct nft_table *table,
const struct nlattr *nla, u8 genmask)
{
struct nft_set *set;
@@ -4465,7 +4585,8 @@ static struct nft_set *nft_set_lookup(const struct nft_table *table,
if (nla == NULL)
return ERR_PTR(-EINVAL);
- list_for_each_entry_rcu(set, &table->sets, list) {
+ list_for_each_entry_rcu(set, &table->sets, list,
+ lockdep_commit_lock_is_held(net)) {
if (!nla_strcmp(nla, set->name) &&
nft_active_genmask(set, genmask))
return set;
@@ -4515,7 +4636,7 @@ struct nft_set *nft_set_lookup_global(const struct net *net,
{
struct nft_set *set;
- set = nft_set_lookup(table, nla_set_name, genmask);
+ set = nft_set_lookup(net, table, nla_set_name, genmask);
if (IS_ERR(set)) {
if (!nla_set_id)
return set;
@@ -4891,7 +5012,7 @@ static int nf_tables_getset(struct sk_buff *skb, const struct nfnl_info *info,
if (!nla[NFTA_SET_TABLE])
return -EINVAL;
- set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask);
+ set = nft_set_lookup(net, table, nla[NFTA_SET_NAME], genmask);
if (IS_ERR(set)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
return PTR_ERR(set);
@@ -5227,7 +5348,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
- set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask);
+ set = nft_set_lookup(net, table, nla[NFTA_SET_NAME], genmask);
if (IS_ERR(set)) {
if (PTR_ERR(set) != -ENOENT) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
@@ -5429,7 +5550,7 @@ static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info,
set = nft_set_lookup_byhandle(table, attr, genmask);
} else {
attr = nla[NFTA_SET_NAME];
- set = nft_set_lookup(table, attr, genmask);
+ set = nft_set_lookup(net, table, attr, genmask);
}
if (IS_ERR(set)) {
@@ -5493,7 +5614,8 @@ static int nft_set_catchall_bind_check(const struct nft_ctx *ctx,
struct nft_set_ext *ext;
int ret = 0;
- list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+ list_for_each_entry_rcu(catchall, &set->catchall_list, list,
+ lockdep_commit_lock_is_held(ctx->net)) {
ext = nft_set_elem_ext(set, catchall->elem);
if (!nft_set_elem_active(ext, genmask))
continue;
@@ -6259,7 +6381,7 @@ static int nft_set_dump_ctx_init(struct nft_set_dump_ctx *dump_ctx,
return PTR_ERR(table);
}
- set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
+ set = nft_set_lookup(net, table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
if (IS_ERR(set)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]);
return PTR_ERR(set);
@@ -6410,17 +6532,21 @@ err:
nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
}
-static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
+static struct nft_trans *nft_trans_elem_alloc(const struct nft_ctx *ctx,
int msg_type,
struct nft_set *set)
{
+ struct nft_trans_elem *te;
struct nft_trans *trans;
- trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_elem));
+ trans = nft_trans_alloc(ctx, msg_type, struct_size(te, elems, 1));
if (trans == NULL)
return NULL;
- nft_trans_elem_set(trans) = set;
+ te = nft_trans_container_elem(trans);
+ te->nelems = 1;
+ te->set = set;
+
return trans;
}
@@ -6542,28 +6668,52 @@ static void nft_set_elem_expr_destroy(const struct nft_ctx *ctx,
}
/* Drop references and destroy. Called from gc, dynset and abort path. */
-void nft_set_elem_destroy(const struct nft_set *set,
- const struct nft_elem_priv *elem_priv,
- bool destroy_expr)
+static void __nft_set_elem_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set,
+ const struct nft_elem_priv *elem_priv,
+ bool destroy_expr)
{
struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
- struct nft_ctx ctx = {
- .net = read_pnet(&set->net),
- .family = set->table->family,
- };
nft_data_release(nft_set_ext_key(ext), NFT_DATA_VALUE);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_release(nft_set_ext_data(ext), set->dtype);
if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS))
- nft_set_elem_expr_destroy(&ctx, nft_set_ext_expr(ext));
+ nft_set_elem_expr_destroy(ctx, nft_set_ext_expr(ext));
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
nft_use_dec(&(*nft_set_ext_obj(ext))->use);
kfree(elem_priv);
}
+
+/* Drop references and destroy. Called from gc and dynset. */
+void nft_set_elem_destroy(const struct nft_set *set,
+ const struct nft_elem_priv *elem_priv,
+ bool destroy_expr)
+{
+ struct nft_ctx ctx = {
+ .net = read_pnet(&set->net),
+ .family = set->table->family,
+ };
+
+ __nft_set_elem_destroy(&ctx, set, elem_priv, destroy_expr);
+}
EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
+/* Drop references and destroy. Called from abort path. */
+static void nft_trans_set_elem_destroy(const struct nft_ctx *ctx, struct nft_trans_elem *te)
+{
+ int i;
+
+ for (i = 0; i < te->nelems; i++) {
+ /* skip update request, see nft_trans_elems_new_abort() */
+ if (!te->elems[i].priv)
+ continue;
+
+ __nft_set_elem_destroy(ctx, te->set, te->elems[i].priv, true);
+ }
+}
+
/* Destroy element. References have been already dropped in the preparation
* path via nft_setelem_data_deactivate().
*/
@@ -6579,6 +6729,15 @@ void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
kfree(elem_priv);
}
+static void nft_trans_elems_destroy(const struct nft_ctx *ctx,
+ const struct nft_trans_elem *te)
+{
+ int i;
+
+ for (i = 0; i < te->nelems; i++)
+ nf_tables_set_elem_destroy(ctx, te->set, te->elems[i].priv);
+}
+
int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_expr *expr_array[])
{
@@ -6735,6 +6894,38 @@ static void nft_setelem_activate(struct net *net, struct nft_set *set,
}
}
+static void nft_trans_elem_update(const struct nft_set *set,
+ const struct nft_trans_one_elem *elem)
+{
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ const struct nft_elem_update *update = elem->update;
+
+ if (update->flags & NFT_TRANS_UPD_TIMEOUT)
+ WRITE_ONCE(nft_set_ext_timeout(ext)->timeout, update->timeout);
+
+ if (update->flags & NFT_TRANS_UPD_EXPIRATION)
+ WRITE_ONCE(nft_set_ext_timeout(ext)->expiration, get_jiffies_64() + update->expiration);
+}
+
+static void nft_trans_elems_add(const struct nft_ctx *ctx,
+ struct nft_trans_elem *te)
+{
+ int i;
+
+ for (i = 0; i < te->nelems; i++) {
+ struct nft_trans_one_elem *elem = &te->elems[i];
+
+ if (elem->update)
+ nft_trans_elem_update(te->set, elem);
+ else
+ nft_setelem_activate(ctx->net, te->set, elem->priv);
+
+ nf_tables_setelem_notify(ctx, te->set, elem->priv,
+ NFT_MSG_NEWSETELEM);
+ kfree(elem->update);
+ }
+}
+
static int nft_setelem_catchall_deactivate(const struct net *net,
struct nft_set *set,
struct nft_set_elem *elem)
@@ -6817,6 +7008,26 @@ static void nft_setelem_remove(const struct net *net,
set->ops->remove(net, set, elem_priv);
}
+static void nft_trans_elems_remove(const struct nft_ctx *ctx,
+ const struct nft_trans_elem *te)
+{
+ int i;
+
+ for (i = 0; i < te->nelems; i++) {
+ WARN_ON_ONCE(te->elems[i].update);
+
+ nf_tables_setelem_notify(ctx, te->set,
+ te->elems[i].priv,
+ te->nft_trans.msg_type);
+
+ nft_setelem_remove(ctx->net, te->set, te->elems[i].priv);
+ if (!nft_setelem_is_catchall(te->set, te->elems[i].priv)) {
+ atomic_dec(&te->set->nelems);
+ te->set->ndeact--;
+ }
+ }
+}
+
static bool nft_setelem_valid_key_end(const struct nft_set *set,
struct nlattr **nla, u32 flags)
{
@@ -6853,7 +7064,6 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
struct nft_data_desc desc;
enum nft_registers dreg;
struct nft_trans *trans;
- u8 update_flags;
u64 expiration;
u64 timeout;
int err, i;
@@ -7168,23 +7378,33 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
else if (!(nlmsg_flags & NLM_F_EXCL)) {
err = 0;
if (nft_set_ext_exists(ext2, NFT_SET_EXT_TIMEOUT)) {
- update_flags = 0;
+ struct nft_elem_update update = { };
+
if (timeout != nft_set_ext_timeout(ext2)->timeout) {
- nft_trans_elem_timeout(trans) = timeout;
+ update.timeout = timeout;
if (expiration == 0)
expiration = timeout;
- update_flags |= NFT_TRANS_UPD_TIMEOUT;
+ update.flags |= NFT_TRANS_UPD_TIMEOUT;
}
if (expiration) {
- nft_trans_elem_expiration(trans) = expiration;
- update_flags |= NFT_TRANS_UPD_EXPIRATION;
+ update.expiration = expiration;
+ update.flags |= NFT_TRANS_UPD_EXPIRATION;
}
- if (update_flags) {
- nft_trans_elem_priv(trans) = elem_priv;
- nft_trans_elem_update_flags(trans) = update_flags;
- nft_trans_commit_list_add_tail(ctx->net, trans);
+ if (update.flags) {
+ struct nft_trans_one_elem *ue;
+
+ ue = &nft_trans_container_elem(trans)->elems[0];
+
+ ue->update = kmemdup(&update, sizeof(update), GFP_KERNEL);
+ if (!ue->update) {
+ err = -ENOMEM;
+ goto err_element_clash;
+ }
+
+ ue->priv = elem_priv;
+ nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
goto err_elem_free;
}
}
@@ -7207,8 +7427,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
}
}
- nft_trans_elem_priv(trans) = elem.priv;
- nft_trans_commit_list_add_tail(ctx->net, trans);
+ nft_trans_container_elem(trans)->elems[0].priv = elem.priv;
+ nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
return 0;
err_set_full:
@@ -7345,6 +7565,55 @@ void nft_setelem_data_deactivate(const struct net *net,
nft_use_dec(&(*nft_set_ext_obj(ext))->use);
}
+/* similar to nft_trans_elems_remove, but called from abort path to undo newsetelem.
+ * No notifications and no ndeact changes.
+ *
+ * Returns true if set had been added to (i.e., elements need to be removed again).
+ */
+static bool nft_trans_elems_new_abort(const struct nft_ctx *ctx,
+ struct nft_trans_elem *te)
+{
+ bool removed = false;
+ int i;
+
+ for (i = 0; i < te->nelems; i++) {
+ if (te->elems[i].update) {
+ kfree(te->elems[i].update);
+ te->elems[i].update = NULL;
+ /* Update request, so do not release this element */
+ te->elems[i].priv = NULL;
+ continue;
+ }
+
+ if (!te->set->ops->abort || nft_setelem_is_catchall(te->set, te->elems[i].priv))
+ nft_setelem_remove(ctx->net, te->set, te->elems[i].priv);
+
+ if (!nft_setelem_is_catchall(te->set, te->elems[i].priv))
+ atomic_dec(&te->set->nelems);
+
+ removed = true;
+ }
+
+ return removed;
+}
+
+/* Called from abort path to undo DELSETELEM/DESTROYSETELEM. */
+static void nft_trans_elems_destroy_abort(const struct nft_ctx *ctx,
+ const struct nft_trans_elem *te)
+{
+ int i;
+
+ for (i = 0; i < te->nelems; i++) {
+ if (!nft_setelem_active_next(ctx->net, te->set, te->elems[i].priv)) {
+ nft_setelem_data_activate(ctx->net, te->set, te->elems[i].priv);
+ nft_setelem_activate(ctx->net, te->set, te->elems[i].priv);
+ }
+
+ if (!nft_setelem_is_catchall(te->set, te->elems[i].priv))
+ te->set->ndeact--;
+ }
+}
+
static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr)
{
@@ -7424,8 +7693,8 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
nft_setelem_data_deactivate(ctx->net, set, elem.priv);
- nft_trans_elem_priv(trans) = elem.priv;
- nft_trans_commit_list_add_tail(ctx->net, trans);
+ nft_trans_container_elem(trans)->elems[0].priv = elem.priv;
+ nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
return 0;
fail_ops:
@@ -7451,7 +7720,8 @@ static int nft_setelem_flush(const struct nft_ctx *ctx,
return 0;
trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM,
- sizeof(struct nft_trans_elem), GFP_ATOMIC);
+ struct_size_t(struct nft_trans_elem, elems, 1),
+ GFP_ATOMIC);
if (!trans)
return -ENOMEM;
@@ -7460,8 +7730,9 @@ static int nft_setelem_flush(const struct nft_ctx *ctx,
nft_setelem_data_deactivate(ctx->net, set, elem_priv);
nft_trans_elem_set(trans) = set;
- nft_trans_elem_priv(trans) = elem_priv;
- nft_trans_commit_list_add_tail(ctx->net, trans);
+ nft_trans_container_elem(trans)->nelems = 1;
+ nft_trans_container_elem(trans)->elems[0].priv = elem_priv;
+ nft_trans_commit_list_add_elem(ctx->net, trans, GFP_ATOMIC);
return 0;
}
@@ -7472,15 +7743,13 @@ static int __nft_set_catchall_flush(const struct nft_ctx *ctx,
{
struct nft_trans *trans;
- trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM,
- sizeof(struct nft_trans_elem), GFP_KERNEL);
+ trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set);
if (!trans)
return -ENOMEM;
nft_setelem_data_deactivate(ctx->net, set, elem_priv);
- nft_trans_elem_set(trans) = set;
- nft_trans_elem_priv(trans) = elem_priv;
- nft_trans_commit_list_add_tail(ctx->net, trans);
+ nft_trans_container_elem(trans)->elems[0].priv = elem_priv;
+ nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
return 0;
}
@@ -7493,7 +7762,8 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx,
struct nft_set_ext *ext;
int ret = 0;
- list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+ list_for_each_entry_rcu(catchall, &set->catchall_list, list,
+ lockdep_commit_lock_is_held(ctx->net)) {
ext = nft_set_elem_ext(set, catchall->elem);
if (!nft_set_elem_active(ext, genmask))
continue;
@@ -7543,7 +7813,7 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
return PTR_ERR(table);
}
- set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
+ set = nft_set_lookup(net, table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
if (IS_ERR(set)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]);
return PTR_ERR(set);
@@ -7790,9 +8060,7 @@ static int nf_tables_updobj(const struct nft_ctx *ctx,
struct nft_trans *trans;
int err = -ENOMEM;
- if (!try_module_get(type->owner))
- return -ENOENT;
-
+ /* caller must have obtained type->owner reference. */
trans = nft_trans_alloc(ctx, NFT_MSG_NEWOBJ,
sizeof(struct nft_trans_obj));
if (!trans)
@@ -7860,15 +8128,16 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- type = __nft_obj_type_get(objtype, family);
- if (WARN_ON_ONCE(!type))
- return -ENOENT;
-
if (!obj->ops->update)
return 0;
+ type = nft_obj_type_get(net, objtype, family);
+ if (WARN_ON_ONCE(IS_ERR(type)))
+ return PTR_ERR(type);
+
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+ /* type->owner reference is put when transaction object is released. */
return nf_tables_updobj(&ctx, type, nla[NFTA_OBJ_DATA], obj);
}
@@ -8104,7 +8373,7 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
return 0;
}
-/* called with rcu_read_lock held */
+/* Caller must hold rcu read lock or transaction mutex */
static struct sk_buff *
nf_tables_getobj_single(u32 portid, const struct nfnl_info *info,
const struct nlattr * const nla[], bool reset)
@@ -8373,12 +8642,14 @@ static const struct nla_policy nft_flowtable_policy[NFTA_FLOWTABLE_MAX + 1] = {
[NFTA_FLOWTABLE_FLAGS] = { .type = NLA_U32 },
};
-struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table,
+struct nft_flowtable *nft_flowtable_lookup(const struct net *net,
+ const struct nft_table *table,
const struct nlattr *nla, u8 genmask)
{
struct nft_flowtable *flowtable;
- list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
+ list_for_each_entry_rcu(flowtable, &table->flowtables, list,
+ lockdep_commit_lock_is_held(net)) {
if (!nla_strcmp(nla, flowtable->name) &&
nft_active_genmask(flowtable, genmask))
return flowtable;
@@ -8734,7 +9005,7 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
return PTR_ERR(table);
}
- flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
+ flowtable = nft_flowtable_lookup(net, table, nla[NFTA_FLOWTABLE_NAME],
genmask);
if (IS_ERR(flowtable)) {
err = PTR_ERR(flowtable);
@@ -8928,7 +9199,7 @@ static int nf_tables_delflowtable(struct sk_buff *skb,
flowtable = nft_flowtable_lookup_byhandle(table, attr, genmask);
} else {
attr = nla[NFTA_FLOWTABLE_NAME];
- flowtable = nft_flowtable_lookup(table, attr, genmask);
+ flowtable = nft_flowtable_lookup(net, table, attr, genmask);
}
if (IS_ERR(flowtable)) {
@@ -8998,7 +9269,8 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
if (!hook_list)
hook_list = &flowtable->hook_list;
- list_for_each_entry_rcu(hook, hook_list, list) {
+ list_for_each_entry_rcu(hook, hook_list, list,
+ lockdep_commit_lock_is_held(net)) {
if (nla_put_string(skb, NFTA_DEVICE_NAME, hook->ops.dev->name))
goto nla_put_failure;
}
@@ -9140,7 +9412,7 @@ static int nf_tables_getflowtable(struct sk_buff *skb,
return PTR_ERR(table);
}
- flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
+ flowtable = nft_flowtable_lookup(net, table, nla[NFTA_FLOWTABLE_NAME],
genmask);
if (IS_ERR(flowtable)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]);
@@ -9658,9 +9930,7 @@ static void nft_commit_release(struct nft_trans *trans)
break;
case NFT_MSG_DELSETELEM:
case NFT_MSG_DESTROYSETELEM:
- nf_tables_set_elem_destroy(&ctx,
- nft_trans_elem_set(trans),
- nft_trans_elem_priv(trans));
+ nft_trans_elems_destroy(&ctx, nft_trans_container_elem(trans));
break;
case NFT_MSG_DELOBJ:
case NFT_MSG_DESTROYOBJ:
@@ -10233,9 +10503,24 @@ static void nf_tables_commit_audit_free(struct list_head *adl)
}
}
+/* nft audit emits the number of elements that get added/removed/updated,
+ * so NEW/DELSETELEM needs to increment based on the total elem count.
+ */
+static unsigned int nf_tables_commit_audit_entrycount(const struct nft_trans *trans)
+{
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWSETELEM:
+ case NFT_MSG_DELSETELEM:
+ return nft_trans_container_elem(trans)->nelems;
+ }
+
+ return 1;
+}
+
static void nf_tables_commit_audit_collect(struct list_head *adl,
- struct nft_table *table, u32 op)
+ const struct nft_trans *trans, u32 op)
{
+ const struct nft_table *table = trans->table;
struct nft_audit_data *adp;
list_for_each_entry(adp, adl, list) {
@@ -10245,7 +10530,7 @@ static void nf_tables_commit_audit_collect(struct list_head *adl,
WARN_ONCE(1, "table=%s not expected in commit list", table->name);
return;
found:
- adp->entries++;
+ adp->entries += nf_tables_commit_audit_entrycount(trans);
if (!adp->op || adp->op > op)
adp->op = op;
}
@@ -10404,7 +10689,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_ctx_update(&ctx, trans);
- nf_tables_commit_audit_collect(&adl, table, trans->msg_type);
+ nf_tables_commit_audit_collect(&adl, trans, trans->msg_type);
switch (trans->msg_type) {
case NFT_MSG_NEWTABLE:
if (nft_trans_table_update(trans)) {
@@ -10513,25 +10798,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_NEWSETELEM:
te = nft_trans_container_elem(trans);
- if (te->update_flags) {
- const struct nft_set_ext *ext =
- nft_set_elem_ext(te->set, te->elem_priv);
+ nft_trans_elems_add(&ctx, te);
- if (te->update_flags & NFT_TRANS_UPD_TIMEOUT) {
- WRITE_ONCE(nft_set_ext_timeout(ext)->timeout,
- te->timeout);
- }
- if (te->update_flags & NFT_TRANS_UPD_EXPIRATION) {
- WRITE_ONCE(nft_set_ext_timeout(ext)->expiration,
- get_jiffies_64() + te->expiration);
- }
- } else {
- nft_setelem_activate(net, te->set, te->elem_priv);
- }
-
- nf_tables_setelem_notify(&ctx, te->set,
- te->elem_priv,
- NFT_MSG_NEWSETELEM);
if (te->set->ops->commit &&
list_empty(&te->set->pending_update)) {
list_add_tail(&te->set->pending_update,
@@ -10543,14 +10811,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_DESTROYSETELEM:
te = nft_trans_container_elem(trans);
- nf_tables_setelem_notify(&ctx, te->set,
- te->elem_priv,
- trans->msg_type);
- nft_setelem_remove(net, te->set, te->elem_priv);
- if (!nft_setelem_is_catchall(te->set, te->elem_priv)) {
- atomic_dec(&te->set->nelems);
- te->set->ndeact--;
- }
+ nft_trans_elems_remove(&ctx, te);
+
if (te->set->ops->commit &&
list_empty(&te->set->pending_update)) {
list_add_tail(&te->set->pending_update,
@@ -10670,8 +10932,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
nft_set_destroy(&ctx, nft_trans_set(trans));
break;
case NFT_MSG_NEWSETELEM:
- nft_set_elem_destroy(nft_trans_elem_set(trans),
- nft_trans_elem_priv(trans), true);
+ nft_trans_set_elem_destroy(&ctx, nft_trans_container_elem(trans));
break;
case NFT_MSG_NEWOBJ:
nft_obj_destroy(&ctx, nft_trans_obj(trans));
@@ -10828,18 +11089,15 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSETELEM:
- if (nft_trans_elem_update_flags(trans) ||
- nft_trans_elem_set_bound(trans)) {
+ if (nft_trans_elem_set_bound(trans)) {
nft_trans_destroy(trans);
break;
}
te = nft_trans_container_elem(trans);
- if (!te->set->ops->abort ||
- nft_setelem_is_catchall(te->set, te->elem_priv))
- nft_setelem_remove(net, te->set, te->elem_priv);
-
- if (!nft_setelem_is_catchall(te->set, te->elem_priv))
- atomic_dec(&te->set->nelems);
+ if (!nft_trans_elems_new_abort(&ctx, te)) {
+ nft_trans_destroy(trans);
+ break;
+ }
if (te->set->ops->abort &&
list_empty(&te->set->pending_update)) {
@@ -10851,12 +11109,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
case NFT_MSG_DESTROYSETELEM:
te = nft_trans_container_elem(trans);
- if (!nft_setelem_active_next(net, te->set, te->elem_priv)) {
- nft_setelem_data_activate(net, te->set, te->elem_priv);
- nft_setelem_activate(net, te->set, te->elem_priv);
- }
- if (!nft_setelem_is_catchall(te->set, te->elem_priv))
- te->set->ndeact--;
+ nft_trans_elems_destroy_abort(&ctx, te);
if (te->set->ops->abort &&
list_empty(&te->set->pending_update)) {
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 7784ec094097..e598a2a252b0 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -517,7 +517,7 @@ replay_abort:
err = nla_parse_deprecated(cda,
ss->cb[cb_id].attr_count,
attr, attrlen,
- ss->cb[cb_id].policy, NULL);
+ ss->cb[cb_id].policy, &extack);
if (err < 0)
goto ack;
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index 7de95674fd8c..d550910aabec 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -17,6 +17,7 @@
struct nft_bitwise {
u8 sreg;
+ u8 sreg2;
u8 dreg;
enum nft_bitwise_ops op:8;
u8 len;
@@ -25,8 +26,8 @@ struct nft_bitwise {
struct nft_data data;
};
-static void nft_bitwise_eval_bool(u32 *dst, const u32 *src,
- const struct nft_bitwise *priv)
+static void nft_bitwise_eval_mask_xor(u32 *dst, const u32 *src,
+ const struct nft_bitwise *priv)
{
unsigned int i;
@@ -60,28 +61,72 @@ static void nft_bitwise_eval_rshift(u32 *dst, const u32 *src,
}
}
+static void nft_bitwise_eval_and(u32 *dst, const u32 *src, const u32 *src2,
+ const struct nft_bitwise *priv)
+{
+ unsigned int i, n;
+
+ for (i = 0, n = DIV_ROUND_UP(priv->len, sizeof(u32)); i < n; i++)
+ dst[i] = src[i] & src2[i];
+}
+
+static void nft_bitwise_eval_or(u32 *dst, const u32 *src, const u32 *src2,
+ const struct nft_bitwise *priv)
+{
+ unsigned int i, n;
+
+ for (i = 0, n = DIV_ROUND_UP(priv->len, sizeof(u32)); i < n; i++)
+ dst[i] = src[i] | src2[i];
+}
+
+static void nft_bitwise_eval_xor(u32 *dst, const u32 *src, const u32 *src2,
+ const struct nft_bitwise *priv)
+{
+ unsigned int i, n;
+
+ for (i = 0, n = DIV_ROUND_UP(priv->len, sizeof(u32)); i < n; i++)
+ dst[i] = src[i] ^ src2[i];
+}
+
void nft_bitwise_eval(const struct nft_expr *expr,
struct nft_regs *regs, const struct nft_pktinfo *pkt)
{
const struct nft_bitwise *priv = nft_expr_priv(expr);
- const u32 *src = &regs->data[priv->sreg];
+ const u32 *src = &regs->data[priv->sreg], *src2;
u32 *dst = &regs->data[priv->dreg];
- switch (priv->op) {
- case NFT_BITWISE_BOOL:
- nft_bitwise_eval_bool(dst, src, priv);
- break;
- case NFT_BITWISE_LSHIFT:
+ if (priv->op == NFT_BITWISE_MASK_XOR) {
+ nft_bitwise_eval_mask_xor(dst, src, priv);
+ return;
+ }
+ if (priv->op == NFT_BITWISE_LSHIFT) {
nft_bitwise_eval_lshift(dst, src, priv);
- break;
- case NFT_BITWISE_RSHIFT:
+ return;
+ }
+ if (priv->op == NFT_BITWISE_RSHIFT) {
nft_bitwise_eval_rshift(dst, src, priv);
- break;
+ return;
+ }
+
+ src2 = priv->sreg2 ? &regs->data[priv->sreg2] : priv->data.data;
+
+ if (priv->op == NFT_BITWISE_AND) {
+ nft_bitwise_eval_and(dst, src, src2, priv);
+ return;
+ }
+ if (priv->op == NFT_BITWISE_OR) {
+ nft_bitwise_eval_or(dst, src, src2, priv);
+ return;
+ }
+ if (priv->op == NFT_BITWISE_XOR) {
+ nft_bitwise_eval_xor(dst, src, src2, priv);
+ return;
}
}
static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = {
[NFTA_BITWISE_SREG] = { .type = NLA_U32 },
+ [NFTA_BITWISE_SREG2] = { .type = NLA_U32 },
[NFTA_BITWISE_DREG] = { .type = NLA_U32 },
[NFTA_BITWISE_LEN] = { .type = NLA_U32 },
[NFTA_BITWISE_MASK] = { .type = NLA_NESTED },
@@ -90,8 +135,8 @@ static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = {
[NFTA_BITWISE_DATA] = { .type = NLA_NESTED },
};
-static int nft_bitwise_init_bool(struct nft_bitwise *priv,
- const struct nlattr *const tb[])
+static int nft_bitwise_init_mask_xor(struct nft_bitwise *priv,
+ const struct nlattr *const tb[])
{
struct nft_data_desc mask = {
.type = NFT_DATA_VALUE,
@@ -105,7 +150,8 @@ static int nft_bitwise_init_bool(struct nft_bitwise *priv,
};
int err;
- if (tb[NFTA_BITWISE_DATA])
+ if (tb[NFTA_BITWISE_DATA] ||
+ tb[NFTA_BITWISE_SREG2])
return -EINVAL;
if (!tb[NFTA_BITWISE_MASK] ||
@@ -139,7 +185,8 @@ static int nft_bitwise_init_shift(struct nft_bitwise *priv,
int err;
if (tb[NFTA_BITWISE_MASK] ||
- tb[NFTA_BITWISE_XOR])
+ tb[NFTA_BITWISE_XOR] ||
+ tb[NFTA_BITWISE_SREG2])
return -EINVAL;
if (!tb[NFTA_BITWISE_DATA])
@@ -157,6 +204,41 @@ static int nft_bitwise_init_shift(struct nft_bitwise *priv,
return 0;
}
+static int nft_bitwise_init_bool(const struct nft_ctx *ctx,
+ struct nft_bitwise *priv,
+ const struct nlattr *const tb[])
+{
+ int err;
+
+ if (tb[NFTA_BITWISE_MASK] ||
+ tb[NFTA_BITWISE_XOR])
+ return -EINVAL;
+
+ if ((!tb[NFTA_BITWISE_DATA] && !tb[NFTA_BITWISE_SREG2]) ||
+ (tb[NFTA_BITWISE_DATA] && tb[NFTA_BITWISE_SREG2]))
+ return -EINVAL;
+
+ if (tb[NFTA_BITWISE_DATA]) {
+ struct nft_data_desc desc = {
+ .type = NFT_DATA_VALUE,
+ .size = sizeof(priv->data),
+ .len = priv->len,
+ };
+
+ err = nft_data_init(NULL, &priv->data, &desc,
+ tb[NFTA_BITWISE_DATA]);
+ if (err < 0)
+ return err;
+ } else {
+ err = nft_parse_register_load(ctx, tb[NFTA_BITWISE_SREG2],
+ &priv->sreg2, priv->len);
+ if (err < 0)
+ return err;
+ }
+
+ return 0;
+}
+
static int nft_bitwise_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
@@ -185,32 +267,40 @@ static int nft_bitwise_init(const struct nft_ctx *ctx,
if (tb[NFTA_BITWISE_OP]) {
priv->op = ntohl(nla_get_be32(tb[NFTA_BITWISE_OP]));
switch (priv->op) {
- case NFT_BITWISE_BOOL:
+ case NFT_BITWISE_MASK_XOR:
case NFT_BITWISE_LSHIFT:
case NFT_BITWISE_RSHIFT:
+ case NFT_BITWISE_AND:
+ case NFT_BITWISE_OR:
+ case NFT_BITWISE_XOR:
break;
default:
return -EOPNOTSUPP;
}
} else {
- priv->op = NFT_BITWISE_BOOL;
+ priv->op = NFT_BITWISE_MASK_XOR;
}
switch(priv->op) {
- case NFT_BITWISE_BOOL:
- err = nft_bitwise_init_bool(priv, tb);
+ case NFT_BITWISE_MASK_XOR:
+ err = nft_bitwise_init_mask_xor(priv, tb);
break;
case NFT_BITWISE_LSHIFT:
case NFT_BITWISE_RSHIFT:
err = nft_bitwise_init_shift(priv, tb);
break;
+ case NFT_BITWISE_AND:
+ case NFT_BITWISE_OR:
+ case NFT_BITWISE_XOR:
+ err = nft_bitwise_init_bool(ctx, priv, tb);
+ break;
}
return err;
}
-static int nft_bitwise_dump_bool(struct sk_buff *skb,
- const struct nft_bitwise *priv)
+static int nft_bitwise_dump_mask_xor(struct sk_buff *skb,
+ const struct nft_bitwise *priv)
{
if (nft_data_dump(skb, NFTA_BITWISE_MASK, &priv->mask,
NFT_DATA_VALUE, priv->len) < 0)
@@ -232,6 +322,21 @@ static int nft_bitwise_dump_shift(struct sk_buff *skb,
return 0;
}
+static int nft_bitwise_dump_bool(struct sk_buff *skb,
+ const struct nft_bitwise *priv)
+{
+ if (priv->sreg2) {
+ if (nft_dump_register(skb, NFTA_BITWISE_SREG2, priv->sreg2))
+ return -1;
+ } else {
+ if (nft_data_dump(skb, NFTA_BITWISE_DATA, &priv->data,
+ NFT_DATA_VALUE, sizeof(u32)) < 0)
+ return -1;
+ }
+
+ return 0;
+}
+
static int nft_bitwise_dump(struct sk_buff *skb,
const struct nft_expr *expr, bool reset)
{
@@ -248,13 +353,18 @@ static int nft_bitwise_dump(struct sk_buff *skb,
return -1;
switch (priv->op) {
- case NFT_BITWISE_BOOL:
- err = nft_bitwise_dump_bool(skb, priv);
+ case NFT_BITWISE_MASK_XOR:
+ err = nft_bitwise_dump_mask_xor(skb, priv);
break;
case NFT_BITWISE_LSHIFT:
case NFT_BITWISE_RSHIFT:
err = nft_bitwise_dump_shift(skb, priv);
break;
+ case NFT_BITWISE_AND:
+ case NFT_BITWISE_OR:
+ case NFT_BITWISE_XOR:
+ err = nft_bitwise_dump_bool(skb, priv);
+ break;
}
return err;
@@ -269,7 +379,7 @@ static int nft_bitwise_offload(struct nft_offload_ctx *ctx,
const struct nft_bitwise *priv = nft_expr_priv(expr);
struct nft_offload_reg *reg = &ctx->regs[priv->dreg];
- if (priv->op != NFT_BITWISE_BOOL)
+ if (priv->op != NFT_BITWISE_MASK_XOR)
return -EOPNOTSUPP;
if (memcmp(&priv->xor, &zero, sizeof(priv->xor)) ||
@@ -299,6 +409,7 @@ static bool nft_bitwise_reduce(struct nft_regs_track *track,
track->regs[priv->dreg].bitwise &&
track->regs[priv->dreg].bitwise->ops == expr->ops &&
priv->sreg == bitwise->sreg &&
+ priv->sreg2 == bitwise->sreg2 &&
priv->dreg == bitwise->dreg &&
priv->op == bitwise->op &&
priv->len == bitwise->len &&
@@ -375,7 +486,8 @@ static int nft_bitwise_fast_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
- if (tb[NFTA_BITWISE_DATA])
+ if (tb[NFTA_BITWISE_DATA] ||
+ tb[NFTA_BITWISE_SREG2])
return -EINVAL;
if (!tb[NFTA_BITWISE_MASK] ||
@@ -406,7 +518,7 @@ nft_bitwise_fast_dump(struct sk_buff *skb,
return -1;
if (nla_put_be32(skb, NFTA_BITWISE_LEN, htonl(sizeof(u32))))
return -1;
- if (nla_put_be32(skb, NFTA_BITWISE_OP, htonl(NFT_BITWISE_BOOL)))
+ if (nla_put_be32(skb, NFTA_BITWISE_OP, htonl(NFT_BITWISE_MASK_XOR)))
return -1;
data.data[0] = priv->mask;
@@ -501,7 +613,7 @@ nft_bitwise_select_ops(const struct nft_ctx *ctx,
return &nft_bitwise_ops;
if (tb[NFTA_BITWISE_OP] &&
- ntohl(nla_get_be32(tb[NFTA_BITWISE_OP])) != NFT_BITWISE_BOOL)
+ ntohl(nla_get_be32(tb[NFTA_BITWISE_OP])) != NFT_BITWISE_MASK_XOR)
return &nft_bitwise_ops;
return &nft_bitwise_fast_ops;
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 2f732fae5a83..3b474d235663 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -8,7 +8,7 @@
#include <linux/spinlock.h>
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/netfilter/nf_tables.h>
-#include <net/ip.h> /* for ipv4 options. */
+#include <net/ip.h>
#include <net/inet_dscp.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
@@ -236,7 +236,7 @@ static int nft_flow_route(const struct nft_pktinfo *pkt,
fl.u.ip4.saddr = ct->tuplehash[!dir].tuple.src.u3.ip;
fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex;
fl.u.ip4.flowi4_iif = this_dst->dev->ifindex;
- fl.u.ip4.flowi4_tos = ip_hdr(pkt->skb)->tos & INET_DSCP_MASK;
+ fl.u.ip4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip_hdr(pkt->skb)));
fl.u.ip4.flowi4_mark = pkt->skb->mark;
fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC;
break;
@@ -409,8 +409,8 @@ static int nft_flow_offload_init(const struct nft_ctx *ctx,
if (!tb[NFTA_FLOW_TABLE_NAME])
return -EINVAL;
- flowtable = nft_flowtable_lookup(ctx->table, tb[NFTA_FLOW_TABLE_NAME],
- genmask);
+ flowtable = nft_flowtable_lookup(ctx->net, ctx->table,
+ tb[NFTA_FLOW_TABLE_NAME], genmask);
if (IS_ERR(flowtable))
return PTR_ERR(flowtable);
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 1caa04619dc6..12390d2e994f 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -88,13 +88,15 @@ bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
}
static struct nft_bitmap_elem *
-nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this,
+nft_bitmap_elem_find(const struct net *net,
+ const struct nft_set *set, struct nft_bitmap_elem *this,
u8 genmask)
{
const struct nft_bitmap *priv = nft_set_priv(set);
struct nft_bitmap_elem *be;
- list_for_each_entry_rcu(be, &priv->list, head) {
+ list_for_each_entry_rcu(be, &priv->list, head,
+ lockdep_is_held(&nft_pernet(net)->commit_mutex)) {
if (memcmp(nft_set_ext_key(&be->ext),
nft_set_ext_key(&this->ext), set->klen) ||
!nft_set_elem_active(&be->ext, genmask))
@@ -132,7 +134,7 @@ static int nft_bitmap_insert(const struct net *net, const struct nft_set *set,
u8 genmask = nft_genmask_next(net);
u32 idx, off;
- be = nft_bitmap_elem_find(set, new, genmask);
+ be = nft_bitmap_elem_find(net, set, new, genmask);
if (be) {
*elem_priv = &be->priv;
return -EEXIST;
@@ -201,7 +203,7 @@ nft_bitmap_deactivate(const struct net *net, const struct nft_set *set,
nft_bitmap_location(set, elem->key.val.data, &idx, &off);
- be = nft_bitmap_elem_find(set, this, genmask);
+ be = nft_bitmap_elem_find(net, set, this, genmask);
if (!be)
return NULL;
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index daa56dda737a..65bd291318f2 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -647,7 +647,8 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
int i;
for (i = 0; i < priv->buckets; i++) {
- hlist_for_each_entry_rcu(he, &priv->table[i], node) {
+ hlist_for_each_entry_rcu(he, &priv->table[i], node,
+ lockdep_is_held(&nft_pernet(ctx->net)->commit_mutex)) {
if (iter->count < iter->skip)
goto cont;
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 5c6ed68cc6e0..681301b46aa4 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -497,10 +497,7 @@ static int nft_tunnel_obj_init(const struct nft_ctx *ctx,
}
if (tb[NFTA_TUNNEL_KEY_TOS])
info.key.tos = nla_get_u8(tb[NFTA_TUNNEL_KEY_TOS]);
- if (tb[NFTA_TUNNEL_KEY_TTL])
- info.key.ttl = nla_get_u8(tb[NFTA_TUNNEL_KEY_TTL]);
- else
- info.key.ttl = U8_MAX;
+ info.key.ttl = nla_get_u8_default(tb[NFTA_TUNNEL_KEY_TTL], U8_MAX);
if (tb[NFTA_TUNNEL_KEY_OPTS]) {
err = nft_tunnel_obj_opts_init(ctx, tb[NFTA_TUNNEL_KEY_OPTS],
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index f8b25b6f5da7..85f017e37cfc 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -107,14 +107,12 @@ static void idletimer_tg_expired(struct timer_list *t)
schedule_work(&timer->work);
}
-static enum alarmtimer_restart idletimer_tg_alarmproc(struct alarm *alarm,
- ktime_t now)
+static void idletimer_tg_alarmproc(struct alarm *alarm, ktime_t now)
{
struct idletimer_tg *timer = alarm->data;
pr_debug("alarm %s expired\n", timer->attr.attr.name);
schedule_work(&timer->work);
- return ALARMTIMER_NORESTART;
}
static int idletimer_check_sysfs_name(const char *name, unsigned int size)
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 689eaa2afbec..079fe72a6384 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -107,11 +107,9 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
switch (entry->def.type) {
case NETLBL_NLTYPE_UNLABELED:
- if (info->attrs[NLBL_MGMT_A_FAMILY])
- entry->family =
- nla_get_u16(info->attrs[NLBL_MGMT_A_FAMILY]);
- else
- entry->family = AF_UNSPEC;
+ entry->family =
+ nla_get_u16_default(info->attrs[NLBL_MGMT_A_FAMILY],
+ AF_UNSPEC);
break;
case NETLBL_NLTYPE_CIPSOV4:
if (!info->attrs[NLBL_MGMT_A_CV4DOI])
@@ -601,10 +599,7 @@ static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info)
struct netlbl_dom_map *entry;
u16 family;
- if (info->attrs[NLBL_MGMT_A_FAMILY])
- family = nla_get_u16(info->attrs[NLBL_MGMT_A_FAMILY]);
- else
- family = AF_INET;
+ family = nla_get_u16_default(info->attrs[NLBL_MGMT_A_FAMILY], AF_INET);
ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (ans_skb == NULL)
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 9996883bf2b7..1bc2d0890a9f 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -1538,7 +1538,7 @@ int __init netlbl_unlabel_defconf(void)
/* Only the kernel is allowed to call this function and the only time
* it is called is at bootup before the audit subsystem is reporting
* messages so don't worry to much about these values. */
- security_current_getsecid_subj(&audit_info.secid);
+ security_current_getlsmprop_subj(&audit_info.prop);
audit_info.loginuid = GLOBAL_ROOT_UID;
audit_info.sessionid = 0;
diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c
index 3ed4fea2a2de..81635a13987b 100644
--- a/net/netlabel/netlabel_user.c
+++ b/net/netlabel/netlabel_user.c
@@ -98,10 +98,9 @@ struct audit_buffer *netlbl_audit_start_common(int type,
from_kuid(&init_user_ns, audit_info->loginuid),
audit_info->sessionid);
- if (audit_info->secid != 0 &&
- security_secid_to_secctx(audit_info->secid,
- &secctx,
- &secctx_len) == 0) {
+ if (lsmprop_is_set(&audit_info->prop) &&
+ security_lsmprop_to_secctx(&audit_info->prop, &secctx,
+ &secctx_len) == 0) {
audit_log_format(audit_buf, " subj=%s", secctx);
security_release_secctx(secctx, secctx_len);
}
diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h
index d6c5b31eb4eb..d4c434956212 100644
--- a/net/netlabel/netlabel_user.h
+++ b/net/netlabel/netlabel_user.h
@@ -32,7 +32,7 @@
*/
static inline void netlbl_netlink_auditinfo(struct netlbl_audit *audit_info)
{
- security_current_getsecid_subj(&audit_info->secid);
+ security_current_getlsmprop_subj(&audit_info->prop);
audit_info->loginuid = audit_get_loginuid(current);
audit_info->sessionid = audit_get_sessionid(current);
}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index f84aad420d44..f4e7b5e4bb59 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1165,11 +1165,16 @@ static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
return sock;
}
-struct sock *netlink_getsockbyfilp(struct file *filp)
+struct sock *netlink_getsockbyfd(int fd)
{
- struct inode *inode = file_inode(filp);
+ CLASS(fd, f)(fd);
+ struct inode *inode;
struct sock *sock;
+ if (fd_empty(f))
+ return ERR_PTR(-EBADF);
+
+ inode = file_inode(fd_file(f));
if (!S_ISSOCK(inode->i_mode))
return ERR_PTR(-ENOTSOCK);
@@ -2176,9 +2181,14 @@ netlink_ack_tlv_len(struct netlink_sock *nlk, int err,
return tlvlen;
}
+static bool nlmsg_check_in_payload(const struct nlmsghdr *nlh, const void *addr)
+{
+ return !WARN_ON(addr < nlmsg_data(nlh) ||
+ addr - (const void *) nlh >= nlh->nlmsg_len);
+}
+
static void
-netlink_ack_tlv_fill(struct sk_buff *in_skb, struct sk_buff *skb,
- const struct nlmsghdr *nlh, int err,
+netlink_ack_tlv_fill(struct sk_buff *skb, const struct nlmsghdr *nlh, int err,
const struct netlink_ext_ack *extack)
{
if (extack->_msg)
@@ -2190,9 +2200,7 @@ netlink_ack_tlv_fill(struct sk_buff *in_skb, struct sk_buff *skb,
if (!err)
return;
- if (extack->bad_attr &&
- !WARN_ON((u8 *)extack->bad_attr < in_skb->data ||
- (u8 *)extack->bad_attr >= in_skb->data + in_skb->len))
+ if (extack->bad_attr && nlmsg_check_in_payload(nlh, extack->bad_attr))
WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS,
(u8 *)extack->bad_attr - (const u8 *)nlh));
if (extack->policy)
@@ -2201,9 +2209,7 @@ netlink_ack_tlv_fill(struct sk_buff *in_skb, struct sk_buff *skb,
if (extack->miss_type)
WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_TYPE,
extack->miss_type));
- if (extack->miss_nest &&
- !WARN_ON((u8 *)extack->miss_nest < in_skb->data ||
- (u8 *)extack->miss_nest > in_skb->data + in_skb->len))
+ if (extack->miss_nest && nlmsg_check_in_payload(nlh, extack->miss_nest))
WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_NEST,
(u8 *)extack->miss_nest - (const u8 *)nlh));
}
@@ -2232,7 +2238,7 @@ static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb,
if (extack_len) {
nlh->nlmsg_flags |= NLM_F_ACK_TLVS;
if (skb_tailroom(skb) >= extack_len) {
- netlink_ack_tlv_fill(cb->skb, skb, cb->nlh,
+ netlink_ack_tlv_fill(skb, cb->nlh,
nlk->dump_done_errno, extack);
nlmsg_end(skb, nlh);
}
@@ -2264,7 +2270,7 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
goto errout_skb;
/* NLMSG_GOODSIZE is small to avoid high order allocations being
- * required, but it makes sense to _attempt_ a 16K bytes allocation
+ * required, but it makes sense to _attempt_ a 32KiB allocation
* to reduce number of system calls on dump operations, if user
* ever provided a big enough buffer.
*/
@@ -2286,7 +2292,7 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
goto errout_skb;
/* Trim skb to allocated size. User is expected to provide buffer as
- * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at
+ * large as max(min_dump_alloc, 32KiB (max_recvmsg_len capped at
* netlink_recvmsg())). dump will pack as many smaller messages as
* could fit within the allocated skb. skb is typically allocated
* with larger space than required (could be as much as near 2x the
@@ -2491,7 +2497,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
}
if (tlvlen)
- netlink_ack_tlv_fill(in_skb, skb, nlh, err, extack);
+ netlink_ack_tlv_fill(skb, nlh, err, extack);
nlmsg_end(skb, rep);
@@ -2920,12 +2926,8 @@ static int __init netlink_proto_init(void)
for (i = 0; i < MAX_LINKS; i++) {
if (rhashtable_init(&nl_table[i].hash,
- &netlink_rhashtable_params) < 0) {
- while (--i > 0)
- rhashtable_destroy(&nl_table[i].hash);
- kfree(nl_table);
+ &netlink_rhashtable_params) < 0)
goto panic;
- }
}
netlink_add_usersock_entry();
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 07ad65774fe2..104732d34543 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -997,7 +997,7 @@ static int genl_start(struct netlink_callback *cb)
info->info.attrs = attrs;
genl_info_net_set(&info->info, sock_net(cb->skb->sk));
info->info.extack = cb->extack;
- memset(&info->info.user_ptr, 0, sizeof(info->info.user_ptr));
+ memset(&info->info.ctx, 0, sizeof(info->info.ctx));
cb->data = info;
if (ops->start) {
@@ -1104,7 +1104,7 @@ static int genl_family_rcv_msg_doit(const struct genl_family *family,
info.attrs = attrbuf;
info.extack = extack;
genl_info_net_set(&info, net);
- memset(&info.user_ptr, 0, sizeof(info.user_ptr));
+ memset(&info.ctx, 0, sizeof(info.ctx));
if (ops->pre_doit) {
err = ops->pre_doit(ops, skb, &info);
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index f456a5911e7d..1ec5955fe469 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -757,6 +757,14 @@ int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id)
}
EXPORT_SYMBOL(nci_core_conn_close);
+static void nci_set_target_ats(struct nfc_target *target, struct nci_dev *ndev)
+{
+ if (ndev->target_ats_len > 0) {
+ target->ats_len = ndev->target_ats_len;
+ memcpy(target->ats, ndev->target_ats, target->ats_len);
+ }
+}
+
static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev)
{
struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
@@ -939,8 +947,11 @@ static int nci_activate_target(struct nfc_dev *nfc_dev,
msecs_to_jiffies(NCI_RF_DISC_SELECT_TIMEOUT));
}
- if (!rc)
+ if (!rc) {
ndev->target_active_prot = protocol;
+ if (protocol == NFC_PROTO_ISO14443)
+ nci_set_target_ats(target, ndev);
+ }
return rc;
}
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 994a0a1efb58..a818eff27e6b 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -402,7 +402,7 @@ static int nci_extract_activation_params_iso_dep(struct nci_dev *ndev,
switch (ntf->activation_rf_tech_and_mode) {
case NCI_NFC_A_PASSIVE_POLL_MODE:
nfca_poll = &ntf->activation_params.nfca_poll_iso_dep;
- nfca_poll->rats_res_len = min_t(__u8, *data++, 20);
+ nfca_poll->rats_res_len = min_t(__u8, *data++, NFC_ATS_MAXSIZE);
pr_debug("rats_res_len %d\n", nfca_poll->rats_res_len);
if (nfca_poll->rats_res_len > 0) {
memcpy(nfca_poll->rats_res,
@@ -531,6 +531,28 @@ static int nci_store_general_bytes_nfc_dep(struct nci_dev *ndev,
return NCI_STATUS_OK;
}
+static int nci_store_ats_nfc_iso_dep(struct nci_dev *ndev,
+ const struct nci_rf_intf_activated_ntf *ntf)
+{
+ ndev->target_ats_len = 0;
+
+ if (ntf->activation_params_len <= 0)
+ return NCI_STATUS_OK;
+
+ if (ntf->activation_params.nfca_poll_iso_dep.rats_res_len > NFC_ATS_MAXSIZE) {
+ pr_debug("ATS too long\n");
+ return NCI_STATUS_RF_PROTOCOL_ERROR;
+ }
+
+ if (ntf->activation_params.nfca_poll_iso_dep.rats_res_len > 0) {
+ ndev->target_ats_len = ntf->activation_params.nfca_poll_iso_dep.rats_res_len;
+ memcpy(ndev->target_ats, ntf->activation_params.nfca_poll_iso_dep.rats_res,
+ ndev->target_ats_len);
+ }
+
+ return NCI_STATUS_OK;
+}
+
static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev,
const struct sk_buff *skb)
{
@@ -660,6 +682,14 @@ exit:
if (err != NCI_STATUS_OK)
pr_err("unable to store general bytes\n");
}
+
+ /* store ATS to be reported later in nci_activate_target */
+ if (ntf.rf_interface == NCI_RF_INTERFACE_ISO_DEP &&
+ ntf.activation_rf_tech_and_mode == NCI_NFC_A_PASSIVE_POLL_MODE) {
+ err = nci_store_ats_nfc_iso_dep(ndev, &ntf);
+ if (err != NCI_STATUS_OK)
+ pr_err("unable to store ATS\n");
+ }
}
if (!(ntf.activation_rf_tech_and_mode & NCI_RF_TECH_MODE_LISTEN_MASK)) {
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index dd2ce73a24fb..6a40b8d0350d 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -96,6 +96,11 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target,
goto nla_put_failure;
}
+ if (target->ats_len > 0 &&
+ nla_put(msg, NFC_ATTR_TARGET_ATS, target->ats_len,
+ target->ats))
+ goto nla_put_failure;
+
genlmsg_end(msg, hdr);
return 0;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 78d9961fcd44..225f6048867f 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1828,8 +1828,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
parms.dp = dp;
parms.port_no = OVSP_LOCAL;
parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
- parms.desired_ifindex = a[OVS_DP_ATTR_IFINDEX]
- ? nla_get_s32(a[OVS_DP_ATTR_IFINDEX]) : 0;
+ parms.desired_ifindex = nla_get_s32_default(a[OVS_DP_ATTR_IFINDEX], 0);
/* So far only local changes have been made, now need the lock. */
ovs_lock();
@@ -2266,8 +2265,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (a[OVS_VPORT_ATTR_IFINDEX] && parms.type != OVS_VPORT_TYPE_INTERNAL)
return -EOPNOTSUPP;
- port_no = a[OVS_VPORT_ATTR_PORT_NO]
- ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
+ port_no = nla_get_u32_default(a[OVS_VPORT_ATTR_PORT_NO], 0);
if (port_no >= DP_MAX_PORTS)
return -EFBIG;
@@ -2304,8 +2302,8 @@ restart:
parms.dp = dp;
parms.port_no = port_no;
parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
- parms.desired_ifindex = a[OVS_VPORT_ATTR_IFINDEX]
- ? nla_get_s32(a[OVS_VPORT_ATTR_IFINDEX]) : 0;
+ parms.desired_ifindex = nla_get_s32_default(a[OVS_VPORT_ATTR_IFINDEX],
+ 0);
vport = new_vport(&parms);
err = PTR_ERR(vport);
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 729ef582a3a8..881ddd3696d5 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1938,7 +1938,7 @@ int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
{
- return attr ? nla_get_u32(attr) : 0;
+ return nla_get_u32_default(attr, 0);
}
/**
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 5858d65ea1a9..2412d7813d24 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -195,7 +195,6 @@ static int internal_dev_recv(struct sk_buff *skb)
skb_dst_drop(skb);
nf_reset_ct(skb);
- secpath_reset(skb);
skb->pkt_type = PACKET_HOST;
skb->protocol = eth_type_trans(skb, netdev);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index a705ec214254..886c0dd47b66 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1846,21 +1846,22 @@ static int fanout_add(struct sock *sk, struct fanout_args *args)
err = -EINVAL;
spin_lock(&po->bind_lock);
- if (packet_sock_flag(po, PACKET_SOCK_RUNNING) &&
+ if (po->num &&
match->type == type &&
match->prot_hook.type == po->prot_hook.type &&
match->prot_hook.dev == po->prot_hook.dev) {
err = -ENOSPC;
if (refcount_read(&match->sk_ref) < match->max_num_members) {
- __dev_remove_pack(&po->prot_hook);
-
/* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */
WRITE_ONCE(po->fanout, match);
po->rollover = rollover;
rollover = NULL;
refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
- __fanout_link(sk, po);
+ if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) {
+ __dev_remove_pack(&po->prot_hook);
+ __fanout_link(sk, po);
+ }
err = 0;
}
}
@@ -2118,7 +2119,7 @@ retry:
skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = READ_ONCE(sk->sk_mark);
skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid);
- skb_setup_tx_timestamp(skb, sockc.tsflags);
+ skb_setup_tx_timestamp(skb, &sockc);
if (unlikely(extra_len == 4))
skb->no_fcs = 1;
@@ -2650,7 +2651,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
skb->priority = READ_ONCE(po->sk.sk_priority);
skb->mark = READ_ONCE(po->sk.sk_mark);
skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, po->sk.sk_clockid);
- skb_setup_tx_timestamp(skb, sockc->tsflags);
+ skb_setup_tx_timestamp(skb, sockc);
skb_zcopy_set_nouarg(skb, ph.raw);
skb_reserve(skb, hlen);
@@ -3115,7 +3116,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
goto out_free;
}
- skb_setup_tx_timestamp(skb, sockc.tsflags);
+ skb_setup_tx_timestamp(skb, &sockc);
if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) &&
!packet_extra_vlan_len_allowed(dev, skb)) {
@@ -3421,17 +3422,17 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
if (sock->type == SOCK_PACKET)
sock->ops = &packet_ops_spkt;
+ po = pkt_sk(sk);
+ err = packet_alloc_pending(po);
+ if (err)
+ goto out_sk_free;
+
sock_init_data(sock, sk);
- po = pkt_sk(sk);
init_completion(&po->skb_completion);
sk->sk_family = PF_PACKET;
po->num = proto;
- err = packet_alloc_pending(po);
- if (err)
- goto out2;
-
packet_cached_dev_reset(po);
sk->sk_destruct = packet_sock_destruct;
@@ -3463,7 +3464,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
sock_prot_inuse_add(net, &packet_proto, 1);
return 0;
-out2:
+out_sk_free:
sk_free(sk);
out:
return err;
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index cde671d29d5d..5c36bae37b8f 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -22,7 +22,7 @@
#include <net/phonet/pn_dev.h>
struct phonet_routes {
- struct mutex lock;
+ spinlock_t lock;
struct net_device __rcu *table[64];
};
@@ -54,7 +54,7 @@ static struct phonet_device *__phonet_device_alloc(struct net_device *dev)
pnd->netdev = dev;
bitmap_zero(pnd->addrs, 64);
- BUG_ON(!mutex_is_locked(&pndevs->lock));
+ lockdep_assert_held(&pndevs->lock);
list_add_rcu(&pnd->list, &pndevs->list);
return pnd;
}
@@ -64,7 +64,8 @@ static struct phonet_device *__phonet_get(struct net_device *dev)
struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
struct phonet_device *pnd;
- BUG_ON(!mutex_is_locked(&pndevs->lock));
+ lockdep_assert_held(&pndevs->lock);
+
list_for_each_entry(pnd, &pndevs->list, list) {
if (pnd->netdev == dev)
return pnd;
@@ -91,17 +92,22 @@ static void phonet_device_destroy(struct net_device *dev)
ASSERT_RTNL();
- mutex_lock(&pndevs->lock);
+ spin_lock(&pndevs->lock);
+
pnd = __phonet_get(dev);
if (pnd)
list_del_rcu(&pnd->list);
- mutex_unlock(&pndevs->lock);
+
+ spin_unlock(&pndevs->lock);
if (pnd) {
+ struct net *net = dev_net(dev);
+ u32 ifindex = dev->ifindex;
u8 addr;
for_each_set_bit(addr, pnd->addrs, 64)
- phonet_address_notify(RTM_DELADDR, dev, addr);
+ phonet_address_notify(net, RTM_DELADDR, ifindex, addr);
+
kfree(pnd);
}
}
@@ -133,7 +139,8 @@ int phonet_address_add(struct net_device *dev, u8 addr)
struct phonet_device *pnd;
int err = 0;
- mutex_lock(&pndevs->lock);
+ spin_lock(&pndevs->lock);
+
/* Find or create Phonet-specific device data */
pnd = __phonet_get(dev);
if (pnd == NULL)
@@ -142,7 +149,9 @@ int phonet_address_add(struct net_device *dev, u8 addr)
err = -ENOMEM;
else if (test_and_set_bit(addr >> 2, pnd->addrs))
err = -EEXIST;
- mutex_unlock(&pndevs->lock);
+
+ spin_unlock(&pndevs->lock);
+
return err;
}
@@ -152,7 +161,8 @@ int phonet_address_del(struct net_device *dev, u8 addr)
struct phonet_device *pnd;
int err = 0;
- mutex_lock(&pndevs->lock);
+ spin_lock(&pndevs->lock);
+
pnd = __phonet_get(dev);
if (!pnd || !test_and_clear_bit(addr >> 2, pnd->addrs)) {
err = -EADDRNOTAVAIL;
@@ -161,7 +171,8 @@ int phonet_address_del(struct net_device *dev, u8 addr)
list_del_rcu(&pnd->list);
else
pnd = NULL;
- mutex_unlock(&pndevs->lock);
+
+ spin_unlock(&pndevs->lock);
if (pnd)
kfree_rcu(pnd, rcu);
@@ -244,32 +255,39 @@ static int phonet_device_autoconf(struct net_device *dev)
ret = phonet_address_add(dev, req.ifr_phonet_autoconf.device);
if (ret)
return ret;
- phonet_address_notify(RTM_NEWADDR, dev,
- req.ifr_phonet_autoconf.device);
+
+ phonet_address_notify(dev_net(dev), RTM_NEWADDR, dev->ifindex,
+ req.ifr_phonet_autoconf.device);
return 0;
}
static void phonet_route_autodel(struct net_device *dev)
{
- struct phonet_net *pnn = phonet_pernet(dev_net(dev));
- unsigned int i;
+ struct net *net = dev_net(dev);
DECLARE_BITMAP(deleted, 64);
+ u32 ifindex = dev->ifindex;
+ struct phonet_net *pnn;
+ unsigned int i;
+
+ pnn = phonet_pernet(net);
/* Remove left-over Phonet routes */
bitmap_zero(deleted, 64);
- mutex_lock(&pnn->routes.lock);
- for (i = 0; i < 64; i++)
+
+ spin_lock(&pnn->routes.lock);
+ for (i = 0; i < 64; i++) {
if (rcu_access_pointer(pnn->routes.table[i]) == dev) {
RCU_INIT_POINTER(pnn->routes.table[i], NULL);
set_bit(i, deleted);
}
- mutex_unlock(&pnn->routes.lock);
+ }
+ spin_unlock(&pnn->routes.lock);
if (bitmap_empty(deleted, 64))
return; /* short-circuit RCU */
synchronize_rcu();
for_each_set_bit(i, deleted, 64) {
- rtm_phonet_notify(RTM_DELROUTE, dev, i);
+ rtm_phonet_notify(net, RTM_DELROUTE, ifindex, i);
dev_put(dev);
}
}
@@ -309,8 +327,8 @@ static int __net_init phonet_init_net(struct net *net)
return -ENOMEM;
INIT_LIST_HEAD(&pnn->pndevs.list);
- mutex_init(&pnn->pndevs.lock);
- mutex_init(&pnn->routes.lock);
+ spin_lock_init(&pnn->pndevs.lock);
+ spin_lock_init(&pnn->routes.lock);
return 0;
}
@@ -360,13 +378,15 @@ int phonet_route_add(struct net_device *dev, u8 daddr)
int err = -EEXIST;
daddr = daddr >> 2;
- mutex_lock(&routes->lock);
+
+ spin_lock(&routes->lock);
if (routes->table[daddr] == NULL) {
rcu_assign_pointer(routes->table[daddr], dev);
dev_hold(dev);
err = 0;
}
- mutex_unlock(&routes->lock);
+ spin_unlock(&routes->lock);
+
return err;
}
@@ -376,17 +396,19 @@ int phonet_route_del(struct net_device *dev, u8 daddr)
struct phonet_routes *routes = &pnn->routes;
daddr = daddr >> 2;
- mutex_lock(&routes->lock);
+
+ spin_lock(&routes->lock);
if (rcu_access_pointer(routes->table[daddr]) == dev)
RCU_INIT_POINTER(routes->table[daddr], NULL);
else
dev = NULL;
- mutex_unlock(&routes->lock);
+ spin_unlock(&routes->lock);
if (!dev)
return -ENOENT;
- synchronize_rcu();
- dev_put(dev);
+
+ /* Note : our caller must call synchronize_rcu() and dev_put(dev) */
+
return 0;
}
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 894e5c72d6bf..b9043c92dc24 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -19,10 +19,10 @@
/* Device address handling */
-static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr,
+static int fill_addr(struct sk_buff *skb, u32 ifindex, u8 addr,
u32 portid, u32 seq, int event);
-void phonet_address_notify(int event, struct net_device *dev, u8 addr)
+void phonet_address_notify(struct net *net, int event, u32 ifindex, u8 addr)
{
struct sk_buff *skb;
int err = -ENOBUFS;
@@ -31,17 +31,18 @@ void phonet_address_notify(int event, struct net_device *dev, u8 addr)
nla_total_size(1), GFP_KERNEL);
if (skb == NULL)
goto errout;
- err = fill_addr(skb, dev, addr, 0, 0, event);
+
+ err = fill_addr(skb, ifindex, addr, 0, 0, event);
if (err < 0) {
WARN_ON(err == -EMSGSIZE);
kfree_skb(skb);
goto errout;
}
- rtnl_notify(skb, dev_net(dev), 0,
- RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL);
+
+ rtnl_notify(skb, net, 0, RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL);
return;
errout:
- rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_IFADDR, err);
+ rtnl_set_sk_err(net, RTNLGRP_PHONET_IFADDR, err);
}
static const struct nla_policy ifa_phonet_policy[IFA_MAX+1] = {
@@ -64,8 +65,6 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!netlink_capable(skb, CAP_SYS_ADMIN))
return -EPERM;
- ASSERT_RTNL();
-
err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
ifa_phonet_policy, extack);
if (err < 0)
@@ -79,21 +78,29 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
/* Phonet addresses only have 6 high-order bits */
return -EINVAL;
- dev = __dev_get_by_index(net, ifm->ifa_index);
- if (dev == NULL)
+ rcu_read_lock();
+
+ dev = dev_get_by_index_rcu(net, ifm->ifa_index);
+ if (!dev) {
+ rcu_read_unlock();
return -ENODEV;
+ }
if (nlh->nlmsg_type == RTM_NEWADDR)
err = phonet_address_add(dev, pnaddr);
else
err = phonet_address_del(dev, pnaddr);
+
+ rcu_read_unlock();
+
if (!err)
- phonet_address_notify(nlh->nlmsg_type, dev, pnaddr);
+ phonet_address_notify(net, nlh->nlmsg_type, ifm->ifa_index, pnaddr);
+
return err;
}
-static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr,
- u32 portid, u32 seq, int event)
+static int fill_addr(struct sk_buff *skb, u32 ifindex, u8 addr,
+ u32 portid, u32 seq, int event)
{
struct ifaddrmsg *ifm;
struct nlmsghdr *nlh;
@@ -107,7 +114,7 @@ static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr,
ifm->ifa_prefixlen = 0;
ifm->ifa_flags = IFA_F_PERMANENT;
ifm->ifa_scope = RT_SCOPE_LINK;
- ifm->ifa_index = dev->ifindex;
+ ifm->ifa_index = ifindex;
if (nla_put_u8(skb, IFA_LOCAL, addr))
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -120,14 +127,17 @@ nla_put_failure:
static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
+ int addr_idx = 0, addr_start_idx = cb->args[1];
+ int dev_idx = 0, dev_start_idx = cb->args[0];
struct phonet_device_list *pndevs;
struct phonet_device *pnd;
- int dev_idx = 0, dev_start_idx = cb->args[0];
- int addr_idx = 0, addr_start_idx = cb->args[1];
+ int err = 0;
pndevs = phonet_device_list(sock_net(skb->sk));
+
rcu_read_lock();
list_for_each_entry_rcu(pnd, &pndevs->list, list) {
+ DECLARE_BITMAP(addrs, 64);
u8 addr;
if (dev_idx > dev_start_idx)
@@ -136,29 +146,32 @@ static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
continue;
addr_idx = 0;
- for_each_set_bit(addr, pnd->addrs, 64) {
+ memcpy(addrs, pnd->addrs, sizeof(pnd->addrs));
+
+ for_each_set_bit(addr, addrs, 64) {
if (addr_idx++ < addr_start_idx)
continue;
- if (fill_addr(skb, pnd->netdev, addr << 2,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, RTM_NEWADDR) < 0)
+ err = fill_addr(skb, READ_ONCE(pnd->netdev->ifindex),
+ addr << 2, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, RTM_NEWADDR);
+ if (err < 0)
goto out;
}
}
-
out:
rcu_read_unlock();
+
cb->args[0] = dev_idx;
cb->args[1] = addr_idx;
- return skb->len;
+ return err;
}
/* Routes handling */
-static int fill_route(struct sk_buff *skb, struct net_device *dev, u8 dst,
- u32 portid, u32 seq, int event)
+static int fill_route(struct sk_buff *skb, u32 ifindex, u8 dst,
+ u32 portid, u32 seq, int event)
{
struct rtmsg *rtm;
struct nlmsghdr *nlh;
@@ -177,8 +190,7 @@ static int fill_route(struct sk_buff *skb, struct net_device *dev, u8 dst,
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
rtm->rtm_type = RTN_UNICAST;
rtm->rtm_flags = 0;
- if (nla_put_u8(skb, RTA_DST, dst) ||
- nla_put_u32(skb, RTA_OIF, READ_ONCE(dev->ifindex)))
+ if (nla_put_u8(skb, RTA_DST, dst) || nla_put_u32(skb, RTA_OIF, ifindex))
goto nla_put_failure;
nlmsg_end(skb, nlh);
return 0;
@@ -188,7 +200,7 @@ nla_put_failure:
return -EMSGSIZE;
}
-void rtm_phonet_notify(int event, struct net_device *dev, u8 dst)
+void rtm_phonet_notify(struct net *net, int event, u32 ifindex, u8 dst)
{
struct sk_buff *skb;
int err = -ENOBUFS;
@@ -197,17 +209,18 @@ void rtm_phonet_notify(int event, struct net_device *dev, u8 dst)
nla_total_size(1) + nla_total_size(4), GFP_KERNEL);
if (skb == NULL)
goto errout;
- err = fill_route(skb, dev, dst, 0, 0, event);
+
+ err = fill_route(skb, ifindex, dst, 0, 0, event);
if (err < 0) {
WARN_ON(err == -EMSGSIZE);
kfree_skb(skb);
goto errout;
}
- rtnl_notify(skb, dev_net(dev), 0,
- RTNLGRP_PHONET_ROUTE, NULL, GFP_KERNEL);
+
+ rtnl_notify(skb, net, 0, RTNLGRP_PHONET_ROUTE, NULL, GFP_KERNEL);
return;
errout:
- rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_ROUTE, err);
+ rtnl_set_sk_err(net, RTNLGRP_PHONET_ROUTE, err);
}
static const struct nla_policy rtm_phonet_policy[RTA_MAX+1] = {
@@ -220,8 +233,10 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[RTA_MAX+1];
+ bool sync_needed = false;
struct net_device *dev;
struct rtmsg *rtm;
+ u32 ifindex;
int err;
u8 dst;
@@ -231,8 +246,6 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!netlink_capable(skb, CAP_SYS_ADMIN))
return -EPERM;
- ASSERT_RTNL();
-
err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
rtm_phonet_policy, extack);
if (err < 0)
@@ -247,16 +260,33 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
if (dst & 3) /* Phonet addresses only have 6 high-order bits */
return -EINVAL;
- dev = __dev_get_by_index(net, nla_get_u32(tb[RTA_OIF]));
- if (dev == NULL)
+ ifindex = nla_get_u32(tb[RTA_OIF]);
+
+ rcu_read_lock();
+
+ dev = dev_get_by_index_rcu(net, ifindex);
+ if (!dev) {
+ rcu_read_unlock();
return -ENODEV;
+ }
- if (nlh->nlmsg_type == RTM_NEWROUTE)
+ if (nlh->nlmsg_type == RTM_NEWROUTE) {
err = phonet_route_add(dev, dst);
- else
+ } else {
err = phonet_route_del(dev, dst);
+ if (!err)
+ sync_needed = true;
+ }
+
+ rcu_read_unlock();
+
+ if (sync_needed) {
+ synchronize_rcu();
+ dev_put(dev);
+ }
if (!err)
- rtm_phonet_notify(nlh->nlmsg_type, dev, dst);
+ rtm_phonet_notify(net, nlh->nlmsg_type, ifindex, dst);
+
return err;
}
@@ -273,7 +303,7 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
if (!dev)
continue;
- err = fill_route(skb, dev, addr << 2,
+ err = fill_route(skb, READ_ONCE(dev->ifindex), addr << 2,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, RTM_NEWROUTE);
if (err < 0)
@@ -286,13 +316,18 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
}
static const struct rtnl_msg_handler phonet_rtnl_msg_handlers[] __initdata_or_module = {
- {THIS_MODULE, PF_PHONET, RTM_NEWADDR, addr_doit, NULL, 0},
- {THIS_MODULE, PF_PHONET, RTM_DELADDR, addr_doit, NULL, 0},
- {THIS_MODULE, PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit, 0},
- {THIS_MODULE, PF_PHONET, RTM_NEWROUTE, route_doit, NULL, 0},
- {THIS_MODULE, PF_PHONET, RTM_DELROUTE, route_doit, NULL, 0},
- {THIS_MODULE, PF_PHONET, RTM_GETROUTE, NULL, route_dumpit,
- RTNL_FLAG_DUMP_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_PHONET, .msgtype = RTM_NEWADDR,
+ .doit = addr_doit, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_PHONET, .msgtype = RTM_DELADDR,
+ .doit = addr_doit, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_PHONET, .msgtype = RTM_GETADDR,
+ .dumpit = getaddr_dumpit, .flags = RTNL_FLAG_DUMP_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_PHONET, .msgtype = RTM_NEWROUTE,
+ .doit = route_doit, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_PHONET, .msgtype = RTM_DELROUTE,
+ .doit = route_doit, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_PHONET, .msgtype = RTM_GETROUTE,
+ .dumpit = route_dumpit, .flags = RTNL_FLAG_DUMP_UNLOCKED},
};
int __init phonet_netlink_register(void)
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 8f070ee7e742..d1cfceeff133 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -40,10 +40,6 @@
#include "rds.h"
struct workqueue_struct *rds_ib_mr_wq;
-struct rds_ib_dereg_odp_mr {
- struct work_struct work;
- struct ib_mr *mr;
-};
static void rds_ib_odp_mr_worker(struct work_struct *work);
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index c268c2b011f4..a8e21060112f 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -32,8 +32,12 @@ static int rfkill_gpio_set_power(void *data, bool blocked)
{
struct rfkill_gpio_data *rfkill = data;
- if (!blocked && !IS_ERR(rfkill->clk) && !rfkill->clk_enabled)
- clk_enable(rfkill->clk);
+ if (!blocked && !IS_ERR(rfkill->clk) && !rfkill->clk_enabled) {
+ int ret = clk_enable(rfkill->clk);
+
+ if (ret)
+ return ret;
+ }
gpiod_set_value_cansleep(rfkill->shutdown_gpio, !blocked);
gpiod_set_value_cansleep(rfkill->reset_gpio, !blocked);
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index f4844683e120..9d8bd0b37e41 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -707,9 +707,10 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
ret = -EISCONN;
if (rx->sk.sk_state != RXRPC_UNBOUND)
goto error;
- ret = copy_from_sockptr(&min_sec_level, optval,
- sizeof(unsigned int));
- if (ret < 0)
+ ret = copy_safe_from_sockptr(&min_sec_level,
+ sizeof(min_sec_level),
+ optval, optlen);
+ if (ret)
goto error;
ret = -EINVAL;
if (min_sec_level > RXRPC_SECURITY_MAX)
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 1539d315afe7..694c4df7a1a3 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -337,9 +337,7 @@ static void rxrpc_clean_up_connection(struct work_struct *work)
*/
rxrpc_purge_queue(&conn->rx_queue);
- if (conn->tx_data_alloc.va)
- __page_frag_cache_drain(virt_to_page(conn->tx_data_alloc.va),
- conn->tx_data_alloc.pagecnt_bias);
+ page_frag_cache_drain(&conn->tx_data_alloc);
call_rcu(&conn->rcu, rxrpc_rcu_free_connection);
}
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index f9623ace2201..2792d2304605 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -452,9 +452,7 @@ void rxrpc_destroy_local(struct rxrpc_local *local)
#endif
rxrpc_purge_queue(&local->rx_queue);
rxrpc_purge_client_connections(local);
- if (local->tx_alloc.va)
- __page_frag_cache_drain(virt_to_page(local->tx_alloc.va),
- local->tx_alloc.pagecnt_bias);
+ page_frag_cache_drain(&local->tx_alloc);
}
/*
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 23d18fe5de9f..6abb8eec1b2b 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -29,6 +29,7 @@ bool rxrpc_propose_abort(struct rxrpc_call *call, s32 abort_code, int error,
call->send_abort_why = why;
call->send_abort_err = error;
call->send_abort_seq = 0;
+ trace_rxrpc_abort_call(call, abort_code);
/* Request abort locklessly vs rxrpc_input_call_event(). */
smp_store_release(&call->send_abort, abort_code);
rxrpc_poke_call(call, rxrpc_call_poke_abort);
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index eecad65fec92..839790043256 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -504,6 +504,50 @@ nla_put_failure:
return -1;
}
+static int
+tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+{
+ unsigned char *b = skb_tail_pointer(skb);
+ struct nlattr *nest;
+ int err = -EINVAL;
+ u32 flags;
+
+ if (tcf_action_dump_terse(skb, a, false))
+ goto nla_put_failure;
+
+ if (a->hw_stats != TCA_ACT_HW_STATS_ANY &&
+ nla_put_bitfield32(skb, TCA_ACT_HW_STATS,
+ a->hw_stats, TCA_ACT_HW_STATS_ANY))
+ goto nla_put_failure;
+
+ if (a->used_hw_stats_valid &&
+ nla_put_bitfield32(skb, TCA_ACT_USED_HW_STATS,
+ a->used_hw_stats, TCA_ACT_HW_STATS_ANY))
+ goto nla_put_failure;
+
+ flags = a->tcfa_flags & TCA_ACT_FLAGS_USER_MASK;
+ if (flags &&
+ nla_put_bitfield32(skb, TCA_ACT_FLAGS,
+ flags, flags))
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_ACT_IN_HW_COUNT, a->in_hw_count))
+ goto nla_put_failure;
+
+ nest = nla_nest_start_noflag(skb, TCA_ACT_OPTIONS);
+ if (nest == NULL)
+ goto nla_put_failure;
+ err = tcf_action_dump_old(skb, a, bind, ref);
+ if (err > 0) {
+ nla_nest_end(skb, nest);
+ return err;
+ }
+
+nla_put_failure:
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
struct netlink_callback *cb)
{
@@ -1190,51 +1234,6 @@ tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
return a->ops->dump(skb, a, bind, ref);
}
-int
-tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
-{
- int err = -EINVAL;
- unsigned char *b = skb_tail_pointer(skb);
- struct nlattr *nest;
- u32 flags;
-
- if (tcf_action_dump_terse(skb, a, false))
- goto nla_put_failure;
-
- if (a->hw_stats != TCA_ACT_HW_STATS_ANY &&
- nla_put_bitfield32(skb, TCA_ACT_HW_STATS,
- a->hw_stats, TCA_ACT_HW_STATS_ANY))
- goto nla_put_failure;
-
- if (a->used_hw_stats_valid &&
- nla_put_bitfield32(skb, TCA_ACT_USED_HW_STATS,
- a->used_hw_stats, TCA_ACT_HW_STATS_ANY))
- goto nla_put_failure;
-
- flags = a->tcfa_flags & TCA_ACT_FLAGS_USER_MASK;
- if (flags &&
- nla_put_bitfield32(skb, TCA_ACT_FLAGS,
- flags, flags))
- goto nla_put_failure;
-
- if (nla_put_u32(skb, TCA_ACT_IN_HW_COUNT, a->in_hw_count))
- goto nla_put_failure;
-
- nest = nla_nest_start_noflag(skb, TCA_ACT_OPTIONS);
- if (nest == NULL)
- goto nla_put_failure;
- err = tcf_action_dump_old(skb, a, bind, ref);
- if (err > 0) {
- nla_nest_end(skb, nest);
- return err;
- }
-
-nla_put_failure:
- nlmsg_trim(skb, b);
- return -1;
-}
-EXPORT_SYMBOL(tcf_action_dump_1);
-
int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[],
int bind, int ref, bool terse)
{
@@ -2264,13 +2263,16 @@ out_module_put:
return skb->len;
}
+static const struct rtnl_msg_handler tc_action_rtnl_msg_handlers[] __initconst = {
+ {.msgtype = RTM_NEWACTION, .doit = tc_ctl_action},
+ {.msgtype = RTM_DELACTION, .doit = tc_ctl_action},
+ {.msgtype = RTM_GETACTION, .doit = tc_ctl_action,
+ .dumpit = tc_dump_action},
+};
+
static int __init tc_action_init(void)
{
- rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action,
- 0);
-
+ rtnl_register_many(tc_action_rtnl_msg_handlers);
return 0;
}
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 2197eb625658..c02f39efc6ef 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -1183,9 +1183,8 @@ static int tcf_ct_fill_params_nat(struct tcf_ct_params *p,
range->min_addr.ip =
nla_get_in_addr(tb[TCA_CT_NAT_IPV4_MIN]);
- range->max_addr.ip = max_attr ?
- nla_get_in_addr(max_attr) :
- range->min_addr.ip;
+ range->max_addr.ip =
+ nla_get_in_addr_default(max_attr, range->min_addr.ip);
} else if (tb[TCA_CT_NAT_IPV6_MIN]) {
struct nlattr *max_attr = tb[TCA_CT_NAT_IPV6_MAX];
@@ -1314,8 +1313,9 @@ static int tcf_ct_fill_params(struct net *net,
err = -EINVAL;
goto err;
}
- family = tb[TCA_CT_HELPER_FAMILY] ? nla_get_u8(tb[TCA_CT_HELPER_FAMILY]) : AF_INET;
- proto = tb[TCA_CT_HELPER_PROTO] ? nla_get_u8(tb[TCA_CT_HELPER_PROTO]) : IPPROTO_TCP;
+ family = nla_get_u8_default(tb[TCA_CT_HELPER_FAMILY], AF_INET);
+ proto = nla_get_u8_default(tb[TCA_CT_HELPER_PROTO],
+ IPPROTO_TCP);
err = nf_ct_add_helper(tmpl, name, family, proto,
p->ct_action & TCA_CT_ACT_NAT, &p->helper);
if (err) {
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index 5dd41a012110..5b1241ddc758 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -197,8 +197,9 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
"dscp mask must be 6 contiguous bits");
return -EINVAL;
}
- dscpstatemask = tb[TCA_CTINFO_PARMS_DSCP_STATEMASK] ?
- nla_get_u32(tb[TCA_CTINFO_PARMS_DSCP_STATEMASK]) : 0;
+ dscpstatemask =
+ nla_get_u32_default(tb[TCA_CTINFO_PARMS_DSCP_STATEMASK],
+ 0);
/* mask & statemask must not overlap */
if (dscpmask & dscpstatemask) {
NL_SET_ERR_MSG_ATTR(extack,
@@ -243,8 +244,7 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
}
cp_new->net = net;
- cp_new->zone = tb[TCA_CTINFO_ZONE] ?
- nla_get_u16(tb[TCA_CTINFO_ZONE]) : 0;
+ cp_new->zone = nla_get_u16_default(tb[TCA_CTINFO_ZONE], 0);
if (dscpmask) {
cp_new->dscpmask = dscpmask;
cp_new->dscpmaskshift = dscpmaskshift;
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index 1dd74125398a..91c0ec729823 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -190,15 +190,10 @@ static int fill_gate_entry(struct nlattr **tb, struct tcfg_gate_entry *entry,
entry->interval = interval;
- if (tb[TCA_GATE_ENTRY_IPV])
- entry->ipv = nla_get_s32(tb[TCA_GATE_ENTRY_IPV]);
- else
- entry->ipv = -1;
+ entry->ipv = nla_get_s32_default(tb[TCA_GATE_ENTRY_IPV], -1);
- if (tb[TCA_GATE_ENTRY_MAX_OCTETS])
- entry->maxoctets = nla_get_s32(tb[TCA_GATE_ENTRY_MAX_OCTETS]);
- else
- entry->maxoctets = -1;
+ entry->maxoctets = nla_get_s32_default(tb[TCA_GATE_ENTRY_MAX_OCTETS],
+ -1);
return 0;
}
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index 44a37a71ae92..9f86f4e666d3 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -288,16 +288,14 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla,
}
p->tcfm_action = parm->m_action;
- p->tcfm_label = tb[TCA_MPLS_LABEL] ? nla_get_u32(tb[TCA_MPLS_LABEL]) :
- ACT_MPLS_LABEL_NOT_SET;
- p->tcfm_tc = tb[TCA_MPLS_TC] ? nla_get_u8(tb[TCA_MPLS_TC]) :
- ACT_MPLS_TC_NOT_SET;
- p->tcfm_ttl = tb[TCA_MPLS_TTL] ? nla_get_u8(tb[TCA_MPLS_TTL]) :
- mpls_ttl;
- p->tcfm_bos = tb[TCA_MPLS_BOS] ? nla_get_u8(tb[TCA_MPLS_BOS]) :
- ACT_MPLS_BOS_NOT_SET;
- p->tcfm_proto = tb[TCA_MPLS_PROTO] ? nla_get_be16(tb[TCA_MPLS_PROTO]) :
- htons(ETH_P_MPLS_UC);
+ p->tcfm_label = nla_get_u32_default(tb[TCA_MPLS_LABEL],
+ ACT_MPLS_LABEL_NOT_SET);
+ p->tcfm_tc = nla_get_u8_default(tb[TCA_MPLS_TC], ACT_MPLS_TC_NOT_SET);
+ p->tcfm_ttl = nla_get_u8_default(tb[TCA_MPLS_TTL], mpls_ttl);
+ p->tcfm_bos = nla_get_u8_default(tb[TCA_MPLS_BOS],
+ ACT_MPLS_BOS_NOT_SET);
+ p->tcfm_proto = nla_get_be16_default(tb[TCA_MPLS_PROTO],
+ htons(ETH_P_MPLS_UC));
spin_lock_bh(&m->tcf_lock);
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 8555125ed34d..a214ed681142 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -167,8 +167,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
}
if (R_tab) {
new->rate_present = true;
- rate64 = tb[TCA_POLICE_RATE64] ?
- nla_get_u64(tb[TCA_POLICE_RATE64]) : 0;
+ rate64 = nla_get_u64_default(tb[TCA_POLICE_RATE64], 0);
psched_ratecfg_precompute(&new->rate, &R_tab->rate, rate64);
qdisc_put_rtab(R_tab);
} else {
@@ -176,8 +175,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
}
if (P_tab) {
new->peak_present = true;
- prate64 = tb[TCA_POLICE_PEAKRATE64] ?
- nla_get_u64(tb[TCA_POLICE_PEAKRATE64]) : 0;
+ prate64 = nla_get_u64_default(tb[TCA_POLICE_PEAKRATE64], 0);
psched_ratecfg_precompute(&new->peak, &P_tab->rate, prate64);
qdisc_put_rtab(P_tab);
} else {
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index bbc778c233c8..7578e27260c9 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1933,7 +1933,8 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain,
static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
struct tcf_chain_info *chain_info,
u32 protocol, u32 prio,
- bool prio_allocate);
+ bool prio_allocate,
+ struct netlink_ext_ack *extack);
/* Try to insert new proto.
* If proto with specified priority already exists, free new proto
@@ -1957,8 +1958,7 @@ static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
return ERR_PTR(-EAGAIN);
}
- tp = tcf_chain_tp_find(chain, &chain_info,
- protocol, prio, false);
+ tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, false, NULL);
if (!tp)
err = tcf_chain_tp_insert(chain, &chain_info, tp_new);
mutex_unlock(&chain->filter_chain_lock);
@@ -2018,7 +2018,8 @@ static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
struct tcf_chain_info *chain_info,
u32 protocol, u32 prio,
- bool prio_allocate)
+ bool prio_allocate,
+ struct netlink_ext_ack *extack)
{
struct tcf_proto **pprev;
struct tcf_proto *tp;
@@ -2029,9 +2030,14 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
pprev = &tp->next) {
if (tp->prio >= prio) {
if (tp->prio == prio) {
- if (prio_allocate ||
- (tp->protocol != protocol && protocol))
+ if (prio_allocate) {
+ NL_SET_ERR_MSG(extack, "Lowest ID from auto-alloc range already in use");
+ return ERR_PTR(-ENOSPC);
+ }
+ if (tp->protocol != protocol && protocol) {
+ NL_SET_ERR_MSG(extack, "Protocol mismatch for filter with specified priority");
return ERR_PTR(-EINVAL);
+ }
} else {
tp = NULL;
}
@@ -2297,7 +2303,7 @@ replay:
}
block->classid = parent;
- chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
+ chain_index = nla_get_u32_default(tca[TCA_CHAIN], 0);
if (chain_index > TC_ACT_EXT_VAL_MASK) {
NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
err = -EINVAL;
@@ -2312,9 +2318,8 @@ replay:
mutex_lock(&chain->filter_chain_lock);
tp = tcf_chain_tp_find(chain, &chain_info, protocol,
- prio, prio_allocate);
+ prio, prio_allocate, extack);
if (IS_ERR(tp)) {
- NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
err = PTR_ERR(tp);
goto errout_locked;
}
@@ -2509,7 +2514,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
goto errout;
}
- chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
+ chain_index = nla_get_u32_default(tca[TCA_CHAIN], 0);
if (chain_index > TC_ACT_EXT_VAL_MASK) {
NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
err = -EINVAL;
@@ -2539,10 +2544,13 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
mutex_lock(&chain->filter_chain_lock);
tp = tcf_chain_tp_find(chain, &chain_info, protocol,
- prio, false);
- if (!tp || IS_ERR(tp)) {
+ prio, false, extack);
+ if (!tp) {
+ err = -ENOENT;
NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
- err = tp ? PTR_ERR(tp) : -ENOENT;
+ goto errout_locked;
+ } else if (IS_ERR(tp)) {
+ err = PTR_ERR(tp);
goto errout_locked;
} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
@@ -2664,7 +2672,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
goto errout;
}
- chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
+ chain_index = nla_get_u32_default(tca[TCA_CHAIN], 0);
if (chain_index > TC_ACT_EXT_VAL_MASK) {
NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
err = -EINVAL;
@@ -2679,11 +2687,14 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
mutex_lock(&chain->filter_chain_lock);
tp = tcf_chain_tp_find(chain, &chain_info, protocol,
- prio, false);
+ prio, false, extack);
mutex_unlock(&chain->filter_chain_lock);
- if (!tp || IS_ERR(tp)) {
+ if (!tp) {
+ err = -ENOENT;
NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
- err = tp ? PTR_ERR(tp) : -ENOENT;
+ goto errout;
+ } else if (IS_ERR(tp)) {
+ err = PTR_ERR(tp);
goto errout;
} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
@@ -3104,7 +3115,7 @@ replay:
if (IS_ERR(block))
return PTR_ERR(block);
- chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
+ chain_index = nla_get_u32_default(tca[TCA_CHAIN], 0);
if (chain_index > TC_ACT_EXT_VAL_MASK) {
NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
err = -EINVAL;
@@ -4056,6 +4067,19 @@ static struct pernet_operations tcf_net_ops = {
.size = sizeof(struct tcf_net),
};
+static const struct rtnl_msg_handler tc_filter_rtnl_msg_handlers[] __initconst = {
+ {.msgtype = RTM_NEWTFILTER, .doit = tc_new_tfilter,
+ .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.msgtype = RTM_DELTFILTER, .doit = tc_del_tfilter,
+ .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.msgtype = RTM_GETTFILTER, .doit = tc_get_tfilter,
+ .dumpit = tc_dump_tfilter, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.msgtype = RTM_NEWCHAIN, .doit = tc_ctl_chain},
+ {.msgtype = RTM_DELCHAIN, .doit = tc_ctl_chain},
+ {.msgtype = RTM_GETCHAIN, .doit = tc_ctl_chain,
+ .dumpit = tc_dump_chain},
+};
+
static int __init tc_filter_init(void)
{
int err;
@@ -4069,17 +4093,7 @@ static int __init tc_filter_init(void)
goto err_register_pernet_subsys;
xa_init_flags(&tcf_exts_miss_cookies_xa, XA_FLAGS_ALLOC1);
-
- rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
- RTNL_FLAG_DOIT_UNLOCKED);
- rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
- RTNL_FLAG_DOIT_UNLOCKED);
- rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
- tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED);
- rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,
- tc_dump_chain, 0);
+ rtnl_register_many(tc_filter_rtnl_msg_handlers);
return 0;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index a1d27bc039a3..300430b8c4d2 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -2420,6 +2420,17 @@ static struct pernet_operations psched_net_ops = {
DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper);
#endif
+static const struct rtnl_msg_handler psched_rtnl_msg_handlers[] __initconst = {
+ {.msgtype = RTM_NEWQDISC, .doit = tc_modify_qdisc},
+ {.msgtype = RTM_DELQDISC, .doit = tc_get_qdisc},
+ {.msgtype = RTM_GETQDISC, .doit = tc_get_qdisc,
+ .dumpit = tc_dump_qdisc},
+ {.msgtype = RTM_NEWTCLASS, .doit = tc_ctl_tclass},
+ {.msgtype = RTM_DELTCLASS, .doit = tc_ctl_tclass},
+ {.msgtype = RTM_GETTCLASS, .doit = tc_ctl_tclass,
+ .dumpit = tc_dump_tclass},
+};
+
static int __init pktsched_init(void)
{
int err;
@@ -2438,14 +2449,7 @@ static int __init pktsched_init(void)
register_qdisc(&mq_qdisc_ops);
register_qdisc(&noqueue_qdisc_ops);
- rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
- 0);
- rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
- 0);
+ rtnl_register_many(psched_rtnl_msg_handlers);
tc_wrapper_init();
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index 939425da1895..8c9a0400c862 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -310,7 +310,7 @@ static void cbs_set_port_rate(struct net_device *dev, struct cbs_sched_data *q)
{
struct ethtool_link_ksettings ecmd;
int speed = SPEED_10;
- int port_rate;
+ s64 port_rate;
int err;
err = __ethtool_get_link_ksettings(dev, &ecmd);
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 91072010923d..1e940ad0d2fa 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -356,7 +356,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt,
tb[TCA_CHOKE_STAB] == NULL)
return -EINVAL;
- max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
+ max_P = nla_get_u32_default(tb[TCA_CHOKE_MAX_P], 0);
ctl = nla_data(tb[TCA_CHOKE_PARMS]);
stab = nla_data(tb[TCA_CHOKE_STAB]);
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 19a49af5a9e5..a5e87f9ea986 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -111,6 +111,7 @@ struct fq_perband_flows {
struct fq_sched_data {
/* Read mostly cache line */
+ u64 offload_horizon;
u32 quantum;
u32 initial_quantum;
u32 flow_refill_delay;
@@ -299,7 +300,7 @@ static void fq_gc(struct fq_sched_data *q,
}
/* Fast path can be used if :
- * 1) Packet tstamp is in the past.
+ * 1) Packet tstamp is in the past, or within the pacing offload horizon.
* 2) FQ qlen == 0 OR
* (no flow is currently eligible for transmit,
* AND fast path queue has less than 8 packets)
@@ -314,7 +315,7 @@ static bool fq_fastpath_check(const struct Qdisc *sch, struct sk_buff *skb,
const struct fq_sched_data *q = qdisc_priv(sch);
const struct sock *sk;
- if (fq_skb_cb(skb)->time_to_send > now)
+ if (fq_skb_cb(skb)->time_to_send > now + q->offload_horizon)
return false;
if (sch->q.qlen != 0) {
@@ -331,6 +332,12 @@ static bool fq_fastpath_check(const struct Qdisc *sch, struct sk_buff *skb,
*/
if (q->internal.qlen >= 8)
return false;
+
+ /* Ordering invariants fall apart if some delayed flows
+ * are ready but we haven't serviced them, yet.
+ */
+ if (q->time_next_delayed_flow <= now + q->offload_horizon)
+ return false;
}
sk = skb->sk;
@@ -361,8 +368,9 @@ static struct fq_flow *fq_classify(struct Qdisc *sch, struct sk_buff *skb,
* 3) We do not want to rate limit them (eg SYNFLOOD attack),
* especially if the listener set SO_MAX_PACING_RATE
* 4) We pretend they are orphaned
+ * TCP can also associate TIME_WAIT sockets with RST or ACK packets.
*/
- if (!sk || sk_listener(sk)) {
+ if (!sk || sk_listener_or_tw(sk)) {
unsigned long hash = skb_get_hash(skb) & q->orphan_mask;
/* By forcing low order bit to 1, we make sure to not
@@ -595,15 +603,18 @@ static void fq_check_throttled(struct fq_sched_data *q, u64 now)
unsigned long sample;
struct rb_node *p;
- if (q->time_next_delayed_flow > now)
+ if (q->time_next_delayed_flow > now + q->offload_horizon)
return;
/* Update unthrottle latency EWMA.
* This is cheap and can help diagnosing timer/latency problems.
*/
sample = (unsigned long)(now - q->time_next_delayed_flow);
- q->unthrottle_latency_ns -= q->unthrottle_latency_ns >> 3;
- q->unthrottle_latency_ns += sample >> 3;
+ if ((long)sample > 0) {
+ q->unthrottle_latency_ns -= q->unthrottle_latency_ns >> 3;
+ q->unthrottle_latency_ns += sample >> 3;
+ }
+ now += q->offload_horizon;
q->time_next_delayed_flow = ~0ULL;
while ((p = rb_first(&q->delayed)) != NULL) {
@@ -687,7 +698,7 @@ begin:
u64 time_next_packet = max_t(u64, fq_skb_cb(skb)->time_to_send,
f->time_next_packet);
- if (now < time_next_packet) {
+ if (now + q->offload_horizon < time_next_packet) {
head->first = f->next;
f->time_next_packet = time_next_packet;
fq_flow_set_throttled(q, f);
@@ -925,6 +936,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
[TCA_FQ_HORIZON_DROP] = { .type = NLA_U8 },
[TCA_FQ_PRIOMAP] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_prio_qopt)),
[TCA_FQ_WEIGHTS] = NLA_POLICY_EXACT_LEN(FQ_BANDS * sizeof(s32)),
+ [TCA_FQ_OFFLOAD_HORIZON] = { .type = NLA_U32 },
};
/* compress a u8 array with all elems <= 3 to an array of 2-bit fields */
@@ -1100,6 +1112,17 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
WRITE_ONCE(q->horizon_drop,
nla_get_u8(tb[TCA_FQ_HORIZON_DROP]));
+ if (tb[TCA_FQ_OFFLOAD_HORIZON]) {
+ u64 offload_horizon = (u64)NSEC_PER_USEC *
+ nla_get_u32(tb[TCA_FQ_OFFLOAD_HORIZON]);
+
+ if (offload_horizon <= qdisc_dev(sch)->max_pacing_offload_horizon) {
+ WRITE_ONCE(q->offload_horizon, offload_horizon);
+ } else {
+ NL_SET_ERR_MSG_MOD(extack, "invalid offload_horizon");
+ err = -EINVAL;
+ }
+ }
if (!err) {
sch_tree_unlock(sch);
@@ -1183,6 +1206,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
.bands = FQ_BANDS,
};
struct nlattr *opts;
+ u64 offload_horizon;
u64 ce_threshold;
s32 weights[3];
u64 horizon;
@@ -1199,6 +1223,9 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
horizon = READ_ONCE(q->horizon);
do_div(horizon, NSEC_PER_USEC);
+ offload_horizon = READ_ONCE(q->offload_horizon);
+ do_div(offload_horizon, NSEC_PER_USEC);
+
if (nla_put_u32(skb, TCA_FQ_PLIMIT,
READ_ONCE(sch->limit)) ||
nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT,
@@ -1224,6 +1251,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_put_u32(skb, TCA_FQ_TIMER_SLACK,
READ_ONCE(q->timer_slack)) ||
nla_put_u32(skb, TCA_FQ_HORIZON, (u32)horizon) ||
+ nla_put_u32(skb, TCA_FQ_OFFLOAD_HORIZON, (u32)offload_horizon) ||
nla_put_u8(skb, TCA_FQ_HORIZON_DROP,
READ_ONCE(q->horizon_drop)))
goto nla_put_failure;
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 79ba9dc70254..7d2151c62c4a 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -668,7 +668,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt,
return -EINVAL;
}
- max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0;
+ max_P = nla_get_u32_default(tb[TCA_GRED_MAX_P], 0);
ctl = nla_data(tb[TCA_GRED_PARMS]);
stab = nla_data(tb[TCA_GRED_STAB]);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index ff3de37874e4..c31bc5489bdd 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1810,8 +1810,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
NULL));
- rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
- ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
+ rate64 = nla_get_u64_default(tb[TCA_HTB_RATE64], 0);
+ ceil64 = nla_get_u64_default(tb[TCA_HTB_CEIL64], 0);
if (!cl) { /* new class */
struct net_device *dev = qdisc_dev(sch);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 39382ee1e331..fe6fed291a7b 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -17,6 +17,7 @@
#include <linux/errno.h>
#include <linux/skbuff.h>
#include <linux/vmalloc.h>
+#include <linux/prandom.h>
#include <linux/rtnetlink.h>
#include <linux/reciprocal_div.h>
#include <linux/rbtree.h>
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index d584c0c25899..6a07cdbdb9e1 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -421,10 +421,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (err < 0)
return err;
- if (tb[TCA_QFQ_WEIGHT])
- weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]);
- else
- weight = 1;
+ weight = nla_get_u32_default(tb[TCA_QFQ_WEIGHT], 1);
if (tb[TCA_QFQ_LMAX]) {
lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index b5f096588fae..6029bc29b51e 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -248,7 +248,7 @@ static int __red_change(struct Qdisc *sch, struct nlattr **tb,
tb[TCA_RED_STAB] == NULL)
return -EINVAL;
- max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
+ max_P = nla_get_u32_default(tb[TCA_RED_MAX_P], 0);
ctl = nla_data(tb[TCA_RED_PARMS]);
stab = nla_data(tb[TCA_RED_STAB]);
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 3b9245a3c767..a4b8296a2fa1 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -77,12 +77,6 @@
#define SFQ_EMPTY_SLOT 0xffff
#define SFQ_DEFAULT_HASH_DIVISOR 1024
-/* We use 16 bits to store allot, and want to handle packets up to 64K
- * Scale allot by 8 (1<<3) so that no overflow occurs.
- */
-#define SFQ_ALLOT_SHIFT 3
-#define SFQ_ALLOT_SIZE(X) DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
-
/* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */
typedef u16 sfq_index;
@@ -104,7 +98,7 @@ struct sfq_slot {
sfq_index next; /* next slot in sfq RR chain */
struct sfq_head dep; /* anchor in dep[] chains */
unsigned short hash; /* hash value (index in ht[]) */
- short allot; /* credit for this slot */
+ int allot; /* credit for this slot */
unsigned int backlog;
struct red_vars vars;
@@ -120,7 +114,6 @@ struct sfq_sched_data {
siphash_key_t perturbation;
u8 cur_depth; /* depth of longest slot */
u8 flags;
- unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
struct tcf_proto __rcu *filter_list;
struct tcf_block *block;
sfq_index *ht; /* Hash table ('divisor' slots) */
@@ -456,7 +449,7 @@ enqueue:
*/
q->tail = slot;
/* We could use a bigger initial quantum for new flows */
- slot->allot = q->scaled_quantum;
+ slot->allot = q->quantum;
}
if (++sch->q.qlen <= q->limit)
return NET_XMIT_SUCCESS;
@@ -493,7 +486,7 @@ next_slot:
slot = &q->slots[a];
if (slot->allot <= 0) {
q->tail = slot;
- slot->allot += q->scaled_quantum;
+ slot->allot += q->quantum;
goto next_slot;
}
skb = slot_dequeue_head(slot);
@@ -512,7 +505,7 @@ next_slot:
}
q->tail->next = next_a;
} else {
- slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb));
+ slot->allot -= qdisc_pkt_len(skb);
}
return skb;
}
@@ -595,7 +588,7 @@ drop:
q->tail->next = x;
}
q->tail = slot;
- slot->allot = q->scaled_quantum;
+ slot->allot = q->quantum;
}
}
sch->q.qlen -= dropped;
@@ -628,7 +621,8 @@ static void sfq_perturbation(struct timer_list *t)
rcu_read_unlock();
}
-static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
+static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct sfq_sched_data *q = qdisc_priv(sch);
struct tc_sfq_qopt *ctl = nla_data(opt);
@@ -646,14 +640,10 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
(!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
return -EINVAL;
- /* slot->allot is a short, make sure quantum is not too big. */
- if (ctl->quantum) {
- unsigned int scaled = SFQ_ALLOT_SIZE(ctl->quantum);
-
- if (scaled <= 0 || scaled > SHRT_MAX)
- return -EINVAL;
+ if ((int)ctl->quantum < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
+ return -EINVAL;
}
-
if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
ctl_v1->Wlog, ctl_v1->Scell_log, NULL))
return -EINVAL;
@@ -663,10 +653,8 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
return -ENOMEM;
}
sch_tree_lock(sch);
- if (ctl->quantum) {
+ if (ctl->quantum)
q->quantum = ctl->quantum;
- q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
- }
WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ);
if (ctl->flows)
q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
@@ -762,12 +750,11 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt,
q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
q->maxflows = SFQ_DEFAULT_FLOWS;
q->quantum = psched_mtu(qdisc_dev(sch));
- q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
q->perturb_period = 0;
get_random_bytes(&q->perturbation, sizeof(q->perturbation));
if (opt) {
- int err = sfq_change(sch, opt);
+ int err = sfq_change(sch, opt, extack);
if (err)
return err;
}
@@ -878,7 +865,7 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
if (idx != SFQ_EMPTY_SLOT) {
const struct sfq_slot *slot = &q->slots[idx];
- xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
+ xstats.allot = slot->allot;
qs.qlen = slot->qlen;
qs.backlog = slot->backlog;
}
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 8623dc0bafc0..a68e17891b0b 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -1828,7 +1828,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
* zero; (2) the 'flags' of a "running" taprio instance cannot be
* changed.
*/
- taprio_flags = tb[TCA_TAPRIO_ATTR_FLAGS] ? nla_get_u32(tb[TCA_TAPRIO_ATTR_FLAGS]) : 0;
+ taprio_flags = nla_get_u32_default(tb[TCA_TAPRIO_ATTR_FLAGS], 0);
/* txtime-assist and full offload are mutually exclusive */
if ((taprio_flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) &&
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 38e2fbdcbeac..a9ed2ccab1bd 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -103,10 +103,10 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
ipv6_addr_equal(&addr->a.v6.sin6_addr,
&ifa->addr) &&
addr->a.v6.sin6_scope_id == ifa->idev->dev->ifindex) {
- sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL);
found = 1;
addr->valid = 0;
list_del_rcu(&addr->list);
+ sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL);
break;
}
}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 39ca5403d4d7..8b9a1b96695e 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -738,6 +738,20 @@ void sctp_addr_wq_mgmt(struct net *net, struct sctp_sockaddr_entry *addr, int cm
*/
spin_lock_bh(&net->sctp.addr_wq_lock);
+
+ /* Avoid searching the queue or modifying it if there are no consumers,
+ * as it can lead to performance degradation if addresses are modified
+ * en-masse.
+ *
+ * If the queue already contains some events, update it anyway to avoid
+ * ugly races between new sessions and new address events.
+ */
+ if (list_empty(&net->sctp.auto_asconf_splist) &&
+ list_empty(&net->sctp.addr_waitq)) {
+ spin_unlock_bh(&net->sctp.addr_wq_lock);
+ return;
+ }
+
/* Offsets existing events in addr_wq */
addrw = sctp_addr_wq_lookup(net, addr);
if (addrw) {
@@ -808,10 +822,10 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
if (addr->a.sa.sa_family == AF_INET &&
addr->a.v4.sin_addr.s_addr ==
ifa->ifa_local) {
- sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL);
found = 1;
addr->valid = 0;
list_del_rcu(&addr->list);
+ sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL);
break;
}
}
diff --git a/net/shaper/Makefile b/net/shaper/Makefile
new file mode 100644
index 000000000000..54af7169a331
--- /dev/null
+++ b/net/shaper/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for the net shaper infrastructure.
+#
+# Copyright (c) 2024, Red Hat, Inc.
+#
+
+obj-y += shaper.o shaper_nl_gen.o
diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c
new file mode 100644
index 000000000000..15463062fe7b
--- /dev/null
+++ b/net/shaper/shaper.c
@@ -0,0 +1,1438 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/bits.h>
+#include <linux/bitfield.h>
+#include <linux/idr.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <linux/skbuff.h>
+#include <linux/xarray.h>
+#include <net/devlink.h>
+#include <net/net_shaper.h>
+
+#include "shaper_nl_gen.h"
+
+#include "../core/dev.h"
+
+#define NET_SHAPER_SCOPE_SHIFT 26
+#define NET_SHAPER_ID_MASK GENMASK(NET_SHAPER_SCOPE_SHIFT - 1, 0)
+#define NET_SHAPER_SCOPE_MASK GENMASK(31, NET_SHAPER_SCOPE_SHIFT)
+
+#define NET_SHAPER_ID_UNSPEC NET_SHAPER_ID_MASK
+
+struct net_shaper_hierarchy {
+ struct xarray shapers;
+};
+
+struct net_shaper_nl_ctx {
+ struct net_shaper_binding binding;
+ netdevice_tracker dev_tracker;
+ unsigned long start_index;
+};
+
+static struct net_shaper_binding *net_shaper_binding_from_ctx(void *ctx)
+{
+ return &((struct net_shaper_nl_ctx *)ctx)->binding;
+}
+
+static void net_shaper_lock(struct net_shaper_binding *binding)
+{
+ switch (binding->type) {
+ case NET_SHAPER_BINDING_TYPE_NETDEV:
+ mutex_lock(&binding->netdev->lock);
+ break;
+ }
+}
+
+static void net_shaper_unlock(struct net_shaper_binding *binding)
+{
+ switch (binding->type) {
+ case NET_SHAPER_BINDING_TYPE_NETDEV:
+ mutex_unlock(&binding->netdev->lock);
+ break;
+ }
+}
+
+static struct net_shaper_hierarchy *
+net_shaper_hierarchy(struct net_shaper_binding *binding)
+{
+ /* Pairs with WRITE_ONCE() in net_shaper_hierarchy_setup. */
+ if (binding->type == NET_SHAPER_BINDING_TYPE_NETDEV)
+ return READ_ONCE(binding->netdev->net_shaper_hierarchy);
+
+ /* No other type supported yet. */
+ return NULL;
+}
+
+static const struct net_shaper_ops *
+net_shaper_ops(struct net_shaper_binding *binding)
+{
+ if (binding->type == NET_SHAPER_BINDING_TYPE_NETDEV)
+ return binding->netdev->netdev_ops->net_shaper_ops;
+
+ /* No other type supported yet. */
+ return NULL;
+}
+
+/* Count the number of [multi] attributes of the given type. */
+static int net_shaper_list_len(struct genl_info *info, int type)
+{
+ struct nlattr *attr;
+ int rem, cnt = 0;
+
+ nla_for_each_attr_type(attr, type, genlmsg_data(info->genlhdr),
+ genlmsg_len(info->genlhdr), rem)
+ cnt++;
+ return cnt;
+}
+
+static int net_shaper_handle_size(void)
+{
+ return nla_total_size(nla_total_size(sizeof(u32)) +
+ nla_total_size(sizeof(u32)));
+}
+
+static int net_shaper_fill_binding(struct sk_buff *msg,
+ const struct net_shaper_binding *binding,
+ u32 type)
+{
+ /* Should never happen, as currently only NETDEV is supported. */
+ if (WARN_ON_ONCE(binding->type != NET_SHAPER_BINDING_TYPE_NETDEV))
+ return -EINVAL;
+
+ if (nla_put_u32(msg, type, binding->netdev->ifindex))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int net_shaper_fill_handle(struct sk_buff *msg,
+ const struct net_shaper_handle *handle,
+ u32 type)
+{
+ struct nlattr *handle_attr;
+
+ if (handle->scope == NET_SHAPER_SCOPE_UNSPEC)
+ return 0;
+
+ handle_attr = nla_nest_start(msg, type);
+ if (!handle_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(msg, NET_SHAPER_A_HANDLE_SCOPE, handle->scope) ||
+ (handle->scope >= NET_SHAPER_SCOPE_QUEUE &&
+ nla_put_u32(msg, NET_SHAPER_A_HANDLE_ID, handle->id)))
+ goto handle_nest_cancel;
+
+ nla_nest_end(msg, handle_attr);
+ return 0;
+
+handle_nest_cancel:
+ nla_nest_cancel(msg, handle_attr);
+ return -EMSGSIZE;
+}
+
+static int
+net_shaper_fill_one(struct sk_buff *msg,
+ const struct net_shaper_binding *binding,
+ const struct net_shaper *shaper,
+ const struct genl_info *info)
+{
+ void *hdr;
+
+ hdr = genlmsg_iput(msg, info);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (net_shaper_fill_binding(msg, binding, NET_SHAPER_A_IFINDEX) ||
+ net_shaper_fill_handle(msg, &shaper->parent,
+ NET_SHAPER_A_PARENT) ||
+ net_shaper_fill_handle(msg, &shaper->handle,
+ NET_SHAPER_A_HANDLE) ||
+ ((shaper->bw_min || shaper->bw_max || shaper->burst) &&
+ nla_put_u32(msg, NET_SHAPER_A_METRIC, shaper->metric)) ||
+ (shaper->bw_min &&
+ nla_put_uint(msg, NET_SHAPER_A_BW_MIN, shaper->bw_min)) ||
+ (shaper->bw_max &&
+ nla_put_uint(msg, NET_SHAPER_A_BW_MAX, shaper->bw_max)) ||
+ (shaper->burst &&
+ nla_put_uint(msg, NET_SHAPER_A_BURST, shaper->burst)) ||
+ (shaper->priority &&
+ nla_put_u32(msg, NET_SHAPER_A_PRIORITY, shaper->priority)) ||
+ (shaper->weight &&
+ nla_put_u32(msg, NET_SHAPER_A_WEIGHT, shaper->weight)))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+/* Initialize the context fetching the relevant device and
+ * acquiring a reference to it.
+ */
+static int net_shaper_ctx_setup(const struct genl_info *info, int type,
+ struct net_shaper_nl_ctx *ctx)
+{
+ struct net *ns = genl_info_net(info);
+ struct net_device *dev;
+ int ifindex;
+
+ if (GENL_REQ_ATTR_CHECK(info, type))
+ return -EINVAL;
+
+ ifindex = nla_get_u32(info->attrs[type]);
+ dev = netdev_get_by_index(ns, ifindex, &ctx->dev_tracker, GFP_KERNEL);
+ if (!dev) {
+ NL_SET_BAD_ATTR(info->extack, info->attrs[type]);
+ return -ENOENT;
+ }
+
+ if (!dev->netdev_ops->net_shaper_ops) {
+ NL_SET_BAD_ATTR(info->extack, info->attrs[type]);
+ netdev_put(dev, &ctx->dev_tracker);
+ return -EOPNOTSUPP;
+ }
+
+ ctx->binding.type = NET_SHAPER_BINDING_TYPE_NETDEV;
+ ctx->binding.netdev = dev;
+ return 0;
+}
+
+static void net_shaper_ctx_cleanup(struct net_shaper_nl_ctx *ctx)
+{
+ if (ctx->binding.type == NET_SHAPER_BINDING_TYPE_NETDEV)
+ netdev_put(ctx->binding.netdev, &ctx->dev_tracker);
+}
+
+static u32 net_shaper_handle_to_index(const struct net_shaper_handle *handle)
+{
+ return FIELD_PREP(NET_SHAPER_SCOPE_MASK, handle->scope) |
+ FIELD_PREP(NET_SHAPER_ID_MASK, handle->id);
+}
+
+static void net_shaper_index_to_handle(u32 index,
+ struct net_shaper_handle *handle)
+{
+ handle->scope = FIELD_GET(NET_SHAPER_SCOPE_MASK, index);
+ handle->id = FIELD_GET(NET_SHAPER_ID_MASK, index);
+}
+
+static void net_shaper_default_parent(const struct net_shaper_handle *handle,
+ struct net_shaper_handle *parent)
+{
+ switch (handle->scope) {
+ case NET_SHAPER_SCOPE_UNSPEC:
+ case NET_SHAPER_SCOPE_NETDEV:
+ case __NET_SHAPER_SCOPE_MAX:
+ parent->scope = NET_SHAPER_SCOPE_UNSPEC;
+ break;
+
+ case NET_SHAPER_SCOPE_QUEUE:
+ case NET_SHAPER_SCOPE_NODE:
+ parent->scope = NET_SHAPER_SCOPE_NETDEV;
+ break;
+ }
+ parent->id = 0;
+}
+
+/*
+ * MARK_0 is already in use due to XA_FLAGS_ALLOC, can't reuse such flag as
+ * it's cleared by xa_store().
+ */
+#define NET_SHAPER_NOT_VALID XA_MARK_1
+
+static struct net_shaper *
+net_shaper_lookup(struct net_shaper_binding *binding,
+ const struct net_shaper_handle *handle)
+{
+ struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding);
+ u32 index = net_shaper_handle_to_index(handle);
+
+ if (!hierarchy || xa_get_mark(&hierarchy->shapers, index,
+ NET_SHAPER_NOT_VALID))
+ return NULL;
+
+ return xa_load(&hierarchy->shapers, index);
+}
+
+/* Allocate on demand the per device shaper's hierarchy container.
+ * Called under the net shaper lock
+ */
+static struct net_shaper_hierarchy *
+net_shaper_hierarchy_setup(struct net_shaper_binding *binding)
+{
+ struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding);
+
+ if (hierarchy)
+ return hierarchy;
+
+ hierarchy = kmalloc(sizeof(*hierarchy), GFP_KERNEL);
+ if (!hierarchy)
+ return NULL;
+
+ /* The flag is required for ID allocation */
+ xa_init_flags(&hierarchy->shapers, XA_FLAGS_ALLOC);
+
+ switch (binding->type) {
+ case NET_SHAPER_BINDING_TYPE_NETDEV:
+ /* Pairs with READ_ONCE in net_shaper_hierarchy. */
+ WRITE_ONCE(binding->netdev->net_shaper_hierarchy, hierarchy);
+ break;
+ }
+ return hierarchy;
+}
+
+/* Prepare the hierarchy container to actually insert the given shaper, doing
+ * in advance the needed allocations.
+ */
+static int net_shaper_pre_insert(struct net_shaper_binding *binding,
+ struct net_shaper_handle *handle,
+ struct netlink_ext_ack *extack)
+{
+ struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding);
+ struct net_shaper *prev, *cur;
+ bool id_allocated = false;
+ int ret, index;
+
+ if (!hierarchy)
+ return -ENOMEM;
+
+ index = net_shaper_handle_to_index(handle);
+ cur = xa_load(&hierarchy->shapers, index);
+ if (cur)
+ return 0;
+
+ /* Allocated a new id, if needed. */
+ if (handle->scope == NET_SHAPER_SCOPE_NODE &&
+ handle->id == NET_SHAPER_ID_UNSPEC) {
+ u32 min, max;
+
+ handle->id = NET_SHAPER_ID_MASK - 1;
+ max = net_shaper_handle_to_index(handle);
+ handle->id = 0;
+ min = net_shaper_handle_to_index(handle);
+
+ ret = xa_alloc(&hierarchy->shapers, &index, NULL,
+ XA_LIMIT(min, max), GFP_KERNEL);
+ if (ret < 0) {
+ NL_SET_ERR_MSG(extack, "Can't allocate new id for NODE shaper");
+ return ret;
+ }
+
+ net_shaper_index_to_handle(index, handle);
+ id_allocated = true;
+ }
+
+ cur = kzalloc(sizeof(*cur), GFP_KERNEL);
+ if (!cur) {
+ ret = -ENOMEM;
+ goto free_id;
+ }
+
+ /* Mark 'tentative' shaper inside the hierarchy container.
+ * xa_set_mark is a no-op if the previous store fails.
+ */
+ xa_lock(&hierarchy->shapers);
+ prev = __xa_store(&hierarchy->shapers, index, cur, GFP_KERNEL);
+ __xa_set_mark(&hierarchy->shapers, index, NET_SHAPER_NOT_VALID);
+ xa_unlock(&hierarchy->shapers);
+ if (xa_err(prev)) {
+ NL_SET_ERR_MSG(extack, "Can't insert shaper into device store");
+ kfree_rcu(cur, rcu);
+ ret = xa_err(prev);
+ goto free_id;
+ }
+ return 0;
+
+free_id:
+ if (id_allocated)
+ xa_erase(&hierarchy->shapers, index);
+ return ret;
+}
+
+/* Commit the tentative insert with the actual values.
+ * Must be called only after a successful net_shaper_pre_insert().
+ */
+static void net_shaper_commit(struct net_shaper_binding *binding,
+ int nr_shapers, const struct net_shaper *shapers)
+{
+ struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding);
+ struct net_shaper *cur;
+ int index;
+ int i;
+
+ xa_lock(&hierarchy->shapers);
+ for (i = 0; i < nr_shapers; ++i) {
+ index = net_shaper_handle_to_index(&shapers[i].handle);
+
+ cur = xa_load(&hierarchy->shapers, index);
+ if (WARN_ON_ONCE(!cur))
+ continue;
+
+ /* Successful update: drop the tentative mark
+ * and update the hierarchy container.
+ */
+ __xa_clear_mark(&hierarchy->shapers, index,
+ NET_SHAPER_NOT_VALID);
+ *cur = shapers[i];
+ }
+ xa_unlock(&hierarchy->shapers);
+}
+
+/* Rollback all the tentative inserts from the hierarchy. */
+static void net_shaper_rollback(struct net_shaper_binding *binding)
+{
+ struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding);
+ struct net_shaper *cur;
+ unsigned long index;
+
+ if (!hierarchy)
+ return;
+
+ xa_lock(&hierarchy->shapers);
+ xa_for_each_marked(&hierarchy->shapers, index, cur,
+ NET_SHAPER_NOT_VALID) {
+ __xa_erase(&hierarchy->shapers, index);
+ kfree(cur);
+ }
+ xa_unlock(&hierarchy->shapers);
+}
+
+static int net_shaper_parse_handle(const struct nlattr *attr,
+ const struct genl_info *info,
+ struct net_shaper_handle *handle)
+{
+ struct nlattr *tb[NET_SHAPER_A_HANDLE_MAX + 1];
+ struct nlattr *id_attr;
+ u32 id = 0;
+ int ret;
+
+ ret = nla_parse_nested(tb, NET_SHAPER_A_HANDLE_MAX, attr,
+ net_shaper_handle_nl_policy, info->extack);
+ if (ret < 0)
+ return ret;
+
+ if (NL_REQ_ATTR_CHECK(info->extack, attr, tb,
+ NET_SHAPER_A_HANDLE_SCOPE))
+ return -EINVAL;
+
+ handle->scope = nla_get_u32(tb[NET_SHAPER_A_HANDLE_SCOPE]);
+
+ /* The default id for NODE scope shapers is an invalid one
+ * to help the 'group' operation discriminate between new
+ * NODE shaper creation (ID_UNSPEC) and reuse of existing
+ * shaper (any other value).
+ */
+ id_attr = tb[NET_SHAPER_A_HANDLE_ID];
+ if (id_attr)
+ id = nla_get_u32(id_attr);
+ else if (handle->scope == NET_SHAPER_SCOPE_NODE)
+ id = NET_SHAPER_ID_UNSPEC;
+
+ handle->id = id;
+ return 0;
+}
+
+static int net_shaper_validate_caps(struct net_shaper_binding *binding,
+ struct nlattr **tb,
+ const struct genl_info *info,
+ struct net_shaper *shaper)
+{
+ const struct net_shaper_ops *ops = net_shaper_ops(binding);
+ struct nlattr *bad = NULL;
+ unsigned long caps = 0;
+
+ ops->capabilities(binding, shaper->handle.scope, &caps);
+
+ if (tb[NET_SHAPER_A_PRIORITY] &&
+ !(caps & BIT(NET_SHAPER_A_CAPS_SUPPORT_PRIORITY)))
+ bad = tb[NET_SHAPER_A_PRIORITY];
+ if (tb[NET_SHAPER_A_WEIGHT] &&
+ !(caps & BIT(NET_SHAPER_A_CAPS_SUPPORT_WEIGHT)))
+ bad = tb[NET_SHAPER_A_WEIGHT];
+ if (tb[NET_SHAPER_A_BW_MIN] &&
+ !(caps & BIT(NET_SHAPER_A_CAPS_SUPPORT_BW_MIN)))
+ bad = tb[NET_SHAPER_A_BW_MIN];
+ if (tb[NET_SHAPER_A_BW_MAX] &&
+ !(caps & BIT(NET_SHAPER_A_CAPS_SUPPORT_BW_MAX)))
+ bad = tb[NET_SHAPER_A_BW_MAX];
+ if (tb[NET_SHAPER_A_BURST] &&
+ !(caps & BIT(NET_SHAPER_A_CAPS_SUPPORT_BURST)))
+ bad = tb[NET_SHAPER_A_BURST];
+
+ if (!caps)
+ bad = tb[NET_SHAPER_A_HANDLE];
+
+ if (bad) {
+ NL_SET_BAD_ATTR(info->extack, bad);
+ return -EOPNOTSUPP;
+ }
+
+ if (shaper->handle.scope == NET_SHAPER_SCOPE_QUEUE &&
+ binding->type == NET_SHAPER_BINDING_TYPE_NETDEV &&
+ shaper->handle.id >= binding->netdev->real_num_tx_queues) {
+ NL_SET_ERR_MSG_FMT(info->extack,
+ "Not existing queue id %d max %d",
+ shaper->handle.id,
+ binding->netdev->real_num_tx_queues);
+ return -ENOENT;
+ }
+
+ /* The metric is really used only if there is *any* rate-related
+ * setting, either in current attributes set or in pre-existing
+ * values.
+ */
+ if (shaper->burst || shaper->bw_min || shaper->bw_max) {
+ u32 metric_cap = NET_SHAPER_A_CAPS_SUPPORT_METRIC_BPS +
+ shaper->metric;
+
+ /* The metric test can fail even when the user did not
+ * specify the METRIC attribute. Pointing to rate related
+ * attribute will be confusing, as the attribute itself
+ * could be indeed supported, with a different metric.
+ * Be more specific.
+ */
+ if (!(caps & BIT(metric_cap))) {
+ NL_SET_ERR_MSG_FMT(info->extack, "Bad metric %d",
+ shaper->metric);
+ return -EOPNOTSUPP;
+ }
+ }
+ return 0;
+}
+
+static int net_shaper_parse_info(struct net_shaper_binding *binding,
+ struct nlattr **tb,
+ const struct genl_info *info,
+ struct net_shaper *shaper,
+ bool *exists)
+{
+ struct net_shaper *old;
+ int ret;
+
+ /* The shaper handle is the only mandatory attribute. */
+ if (NL_REQ_ATTR_CHECK(info->extack, NULL, tb, NET_SHAPER_A_HANDLE))
+ return -EINVAL;
+
+ ret = net_shaper_parse_handle(tb[NET_SHAPER_A_HANDLE], info,
+ &shaper->handle);
+ if (ret)
+ return ret;
+
+ if (shaper->handle.scope == NET_SHAPER_SCOPE_UNSPEC) {
+ NL_SET_BAD_ATTR(info->extack, tb[NET_SHAPER_A_HANDLE]);
+ return -EINVAL;
+ }
+
+ /* Fetch existing hierarchy, if any, so that user provide info will
+ * incrementally update the existing shaper configuration.
+ */
+ old = net_shaper_lookup(binding, &shaper->handle);
+ if (old)
+ *shaper = *old;
+ *exists = !!old;
+
+ if (tb[NET_SHAPER_A_METRIC])
+ shaper->metric = nla_get_u32(tb[NET_SHAPER_A_METRIC]);
+
+ if (tb[NET_SHAPER_A_BW_MIN])
+ shaper->bw_min = nla_get_uint(tb[NET_SHAPER_A_BW_MIN]);
+
+ if (tb[NET_SHAPER_A_BW_MAX])
+ shaper->bw_max = nla_get_uint(tb[NET_SHAPER_A_BW_MAX]);
+
+ if (tb[NET_SHAPER_A_BURST])
+ shaper->burst = nla_get_uint(tb[NET_SHAPER_A_BURST]);
+
+ if (tb[NET_SHAPER_A_PRIORITY])
+ shaper->priority = nla_get_u32(tb[NET_SHAPER_A_PRIORITY]);
+
+ if (tb[NET_SHAPER_A_WEIGHT])
+ shaper->weight = nla_get_u32(tb[NET_SHAPER_A_WEIGHT]);
+
+ ret = net_shaper_validate_caps(binding, tb, info, shaper);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int net_shaper_validate_nesting(struct net_shaper_binding *binding,
+ const struct net_shaper *shaper,
+ struct netlink_ext_ack *extack)
+{
+ const struct net_shaper_ops *ops = net_shaper_ops(binding);
+ unsigned long caps = 0;
+
+ ops->capabilities(binding, shaper->handle.scope, &caps);
+ if (!(caps & BIT(NET_SHAPER_A_CAPS_SUPPORT_NESTING))) {
+ NL_SET_ERR_MSG_FMT(extack,
+ "Nesting not supported for scope %d",
+ shaper->handle.scope);
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+/* Fetch the existing leaf and update it with the user-provided
+ * attributes.
+ */
+static int net_shaper_parse_leaf(struct net_shaper_binding *binding,
+ const struct nlattr *attr,
+ const struct genl_info *info,
+ const struct net_shaper *node,
+ struct net_shaper *shaper)
+{
+ struct nlattr *tb[NET_SHAPER_A_WEIGHT + 1];
+ bool exists;
+ int ret;
+
+ ret = nla_parse_nested(tb, NET_SHAPER_A_WEIGHT, attr,
+ net_shaper_leaf_info_nl_policy, info->extack);
+ if (ret < 0)
+ return ret;
+
+ ret = net_shaper_parse_info(binding, tb, info, shaper, &exists);
+ if (ret < 0)
+ return ret;
+
+ if (shaper->handle.scope != NET_SHAPER_SCOPE_QUEUE) {
+ NL_SET_BAD_ATTR(info->extack, tb[NET_SHAPER_A_HANDLE]);
+ return -EINVAL;
+ }
+
+ if (node->handle.scope == NET_SHAPER_SCOPE_NODE) {
+ ret = net_shaper_validate_nesting(binding, shaper,
+ info->extack);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (!exists)
+ net_shaper_default_parent(&shaper->handle, &shaper->parent);
+ return 0;
+}
+
+/* Alike net_parse_shaper_info(), but additionally allow the user specifying
+ * the shaper's parent handle.
+ */
+static int net_shaper_parse_node(struct net_shaper_binding *binding,
+ struct nlattr **tb,
+ const struct genl_info *info,
+ struct net_shaper *shaper)
+{
+ bool exists;
+ int ret;
+
+ ret = net_shaper_parse_info(binding, tb, info, shaper, &exists);
+ if (ret)
+ return ret;
+
+ if (shaper->handle.scope != NET_SHAPER_SCOPE_NODE &&
+ shaper->handle.scope != NET_SHAPER_SCOPE_NETDEV) {
+ NL_SET_BAD_ATTR(info->extack, tb[NET_SHAPER_A_HANDLE]);
+ return -EINVAL;
+ }
+
+ if (tb[NET_SHAPER_A_PARENT]) {
+ ret = net_shaper_parse_handle(tb[NET_SHAPER_A_PARENT], info,
+ &shaper->parent);
+ if (ret)
+ return ret;
+
+ if (shaper->parent.scope != NET_SHAPER_SCOPE_NODE &&
+ shaper->parent.scope != NET_SHAPER_SCOPE_NETDEV) {
+ NL_SET_BAD_ATTR(info->extack, tb[NET_SHAPER_A_PARENT]);
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+static int net_shaper_generic_pre(struct genl_info *info, int type)
+{
+ struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)info->ctx;
+
+ BUILD_BUG_ON(sizeof(*ctx) > sizeof(info->ctx));
+
+ return net_shaper_ctx_setup(info, type, ctx);
+}
+
+int net_shaper_nl_pre_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ return net_shaper_generic_pre(info, NET_SHAPER_A_IFINDEX);
+}
+
+static void net_shaper_generic_post(struct genl_info *info)
+{
+ net_shaper_ctx_cleanup((struct net_shaper_nl_ctx *)info->ctx);
+}
+
+void net_shaper_nl_post_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ net_shaper_generic_post(info);
+}
+
+int net_shaper_nl_pre_dumpit(struct netlink_callback *cb)
+{
+ struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)cb->ctx;
+ const struct genl_info *info = genl_info_dump(cb);
+
+ return net_shaper_ctx_setup(info, NET_SHAPER_A_IFINDEX, ctx);
+}
+
+int net_shaper_nl_post_dumpit(struct netlink_callback *cb)
+{
+ net_shaper_ctx_cleanup((struct net_shaper_nl_ctx *)cb->ctx);
+ return 0;
+}
+
+int net_shaper_nl_cap_pre_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ return net_shaper_generic_pre(info, NET_SHAPER_A_CAPS_IFINDEX);
+}
+
+void net_shaper_nl_cap_post_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ net_shaper_generic_post(info);
+}
+
+int net_shaper_nl_cap_pre_dumpit(struct netlink_callback *cb)
+{
+ struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)cb->ctx;
+
+ return net_shaper_ctx_setup(genl_info_dump(cb),
+ NET_SHAPER_A_CAPS_IFINDEX, ctx);
+}
+
+int net_shaper_nl_cap_post_dumpit(struct netlink_callback *cb)
+{
+ struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)cb->ctx;
+
+ net_shaper_ctx_cleanup(ctx);
+ return 0;
+}
+
+int net_shaper_nl_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net_shaper_binding *binding;
+ struct net_shaper_handle handle;
+ struct net_shaper *shaper;
+ struct sk_buff *msg;
+ int ret;
+
+ if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_HANDLE))
+ return -EINVAL;
+
+ binding = net_shaper_binding_from_ctx(info->ctx);
+ ret = net_shaper_parse_handle(info->attrs[NET_SHAPER_A_HANDLE], info,
+ &handle);
+ if (ret < 0)
+ return ret;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ rcu_read_lock();
+ shaper = net_shaper_lookup(binding, &handle);
+ if (!shaper) {
+ NL_SET_BAD_ATTR(info->extack,
+ info->attrs[NET_SHAPER_A_HANDLE]);
+ rcu_read_unlock();
+ ret = -ENOENT;
+ goto free_msg;
+ }
+
+ ret = net_shaper_fill_one(msg, binding, shaper, info);
+ rcu_read_unlock();
+ if (ret)
+ goto free_msg;
+
+ ret = genlmsg_reply(msg, info);
+ if (ret)
+ goto free_msg;
+
+ return 0;
+
+free_msg:
+ nlmsg_free(msg);
+ return ret;
+}
+
+int net_shaper_nl_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct net_shaper_nl_ctx *ctx = (struct net_shaper_nl_ctx *)cb->ctx;
+ const struct genl_info *info = genl_info_dump(cb);
+ struct net_shaper_hierarchy *hierarchy;
+ struct net_shaper_binding *binding;
+ struct net_shaper *shaper;
+ int ret = 0;
+
+ /* Don't error out dumps performed before any set operation. */
+ binding = net_shaper_binding_from_ctx(ctx);
+ hierarchy = net_shaper_hierarchy(binding);
+ if (!hierarchy)
+ return 0;
+
+ rcu_read_lock();
+ for (; (shaper = xa_find(&hierarchy->shapers, &ctx->start_index,
+ U32_MAX, XA_PRESENT)); ctx->start_index++) {
+ ret = net_shaper_fill_one(skb, binding, shaper, info);
+ if (ret)
+ break;
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+
+int net_shaper_nl_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net_shaper_hierarchy *hierarchy;
+ struct net_shaper_binding *binding;
+ const struct net_shaper_ops *ops;
+ struct net_shaper_handle handle;
+ struct net_shaper shaper = {};
+ bool exists;
+ int ret;
+
+ binding = net_shaper_binding_from_ctx(info->ctx);
+
+ net_shaper_lock(binding);
+ ret = net_shaper_parse_info(binding, info->attrs, info, &shaper,
+ &exists);
+ if (ret)
+ goto unlock;
+
+ if (!exists)
+ net_shaper_default_parent(&shaper.handle, &shaper.parent);
+
+ hierarchy = net_shaper_hierarchy_setup(binding);
+ if (!hierarchy) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ /* The 'set' operation can't create node-scope shapers. */
+ handle = shaper.handle;
+ if (handle.scope == NET_SHAPER_SCOPE_NODE &&
+ !net_shaper_lookup(binding, &handle)) {
+ ret = -ENOENT;
+ goto unlock;
+ }
+
+ ret = net_shaper_pre_insert(binding, &handle, info->extack);
+ if (ret)
+ goto unlock;
+
+ ops = net_shaper_ops(binding);
+ ret = ops->set(binding, &shaper, info->extack);
+ if (ret) {
+ net_shaper_rollback(binding);
+ goto unlock;
+ }
+
+ net_shaper_commit(binding, 1, &shaper);
+
+unlock:
+ net_shaper_unlock(binding);
+ return ret;
+}
+
+static int __net_shaper_delete(struct net_shaper_binding *binding,
+ struct net_shaper *shaper,
+ struct netlink_ext_ack *extack)
+{
+ struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding);
+ struct net_shaper_handle parent_handle, handle = shaper->handle;
+ const struct net_shaper_ops *ops = net_shaper_ops(binding);
+ int ret;
+
+again:
+ parent_handle = shaper->parent;
+
+ ret = ops->delete(binding, &handle, extack);
+ if (ret < 0)
+ return ret;
+
+ xa_erase(&hierarchy->shapers, net_shaper_handle_to_index(&handle));
+ kfree_rcu(shaper, rcu);
+
+ /* Eventually delete the parent, if it is left over with no leaves. */
+ if (parent_handle.scope == NET_SHAPER_SCOPE_NODE) {
+ shaper = net_shaper_lookup(binding, &parent_handle);
+ if (shaper && !--shaper->leaves) {
+ handle = parent_handle;
+ goto again;
+ }
+ }
+ return 0;
+}
+
+static int net_shaper_handle_cmp(const struct net_shaper_handle *a,
+ const struct net_shaper_handle *b)
+{
+ /* Must avoid holes in struct net_shaper_handle. */
+ BUILD_BUG_ON(sizeof(*a) != 8);
+
+ return memcmp(a, b, sizeof(*a));
+}
+
+static int net_shaper_parent_from_leaves(int leaves_count,
+ const struct net_shaper *leaves,
+ struct net_shaper *node,
+ struct netlink_ext_ack *extack)
+{
+ struct net_shaper_handle parent = leaves[0].parent;
+ int i;
+
+ for (i = 1; i < leaves_count; ++i) {
+ if (net_shaper_handle_cmp(&leaves[i].parent, &parent)) {
+ NL_SET_ERR_MSG_FMT(extack, "All the leaves shapers must have the same old parent");
+ return -EINVAL;
+ }
+ }
+
+ node->parent = parent;
+ return 0;
+}
+
+static int __net_shaper_group(struct net_shaper_binding *binding,
+ bool update_node, int leaves_count,
+ struct net_shaper *leaves,
+ struct net_shaper *node,
+ struct netlink_ext_ack *extack)
+{
+ const struct net_shaper_ops *ops = net_shaper_ops(binding);
+ struct net_shaper_handle leaf_handle;
+ struct net_shaper *parent = NULL;
+ bool new_node = false;
+ int i, ret;
+
+ if (node->handle.scope == NET_SHAPER_SCOPE_NODE) {
+ new_node = node->handle.id == NET_SHAPER_ID_UNSPEC;
+
+ if (!new_node && !net_shaper_lookup(binding, &node->handle)) {
+ /* The related attribute is not available when
+ * reaching here from the delete() op.
+ */
+ NL_SET_ERR_MSG_FMT(extack, "Node shaper %d:%d does not exists",
+ node->handle.scope, node->handle.id);
+ return -ENOENT;
+ }
+
+ /* When unspecified, the node parent scope is inherited from
+ * the leaves.
+ */
+ if (node->parent.scope == NET_SHAPER_SCOPE_UNSPEC) {
+ ret = net_shaper_parent_from_leaves(leaves_count,
+ leaves, node,
+ extack);
+ if (ret)
+ return ret;
+ }
+
+ } else {
+ net_shaper_default_parent(&node->handle, &node->parent);
+ }
+
+ if (node->parent.scope == NET_SHAPER_SCOPE_NODE) {
+ parent = net_shaper_lookup(binding, &node->parent);
+ if (!parent) {
+ NL_SET_ERR_MSG_FMT(extack, "Node parent shaper %d:%d does not exists",
+ node->parent.scope, node->parent.id);
+ return -ENOENT;
+ }
+
+ ret = net_shaper_validate_nesting(binding, node, extack);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (update_node) {
+ /* For newly created node scope shaper, the following will
+ * update the handle, due to id allocation.
+ */
+ ret = net_shaper_pre_insert(binding, &node->handle, extack);
+ if (ret)
+ return ret;
+ }
+
+ for (i = 0; i < leaves_count; ++i) {
+ leaf_handle = leaves[i].handle;
+
+ ret = net_shaper_pre_insert(binding, &leaf_handle, extack);
+ if (ret)
+ goto rollback;
+
+ if (!net_shaper_handle_cmp(&leaves[i].parent, &node->handle))
+ continue;
+
+ /* The leaves shapers will be nested to the node, update the
+ * linking accordingly.
+ */
+ leaves[i].parent = node->handle;
+ node->leaves++;
+ }
+
+ ret = ops->group(binding, leaves_count, leaves, node, extack);
+ if (ret < 0)
+ goto rollback;
+
+ /* The node's parent gains a new leaf only when the node itself
+ * is created by this group operation
+ */
+ if (new_node && parent)
+ parent->leaves++;
+ if (update_node)
+ net_shaper_commit(binding, 1, node);
+ net_shaper_commit(binding, leaves_count, leaves);
+ return 0;
+
+rollback:
+ net_shaper_rollback(binding);
+ return ret;
+}
+
+static int net_shaper_pre_del_node(struct net_shaper_binding *binding,
+ const struct net_shaper *shaper,
+ struct netlink_ext_ack *extack)
+{
+ struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding);
+ struct net_shaper *cur, *leaves, node = {};
+ int ret, leaves_count = 0;
+ unsigned long index;
+ bool update_node;
+
+ if (!shaper->leaves)
+ return 0;
+
+ /* Fetch the new node information. */
+ node.handle = shaper->parent;
+ cur = net_shaper_lookup(binding, &node.handle);
+ if (cur) {
+ node = *cur;
+ } else {
+ /* A scope NODE shaper can be nested only to the NETDEV scope
+ * shaper without creating the latter, this check may fail only
+ * if the data is in inconsistent status.
+ */
+ if (WARN_ON_ONCE(node.handle.scope != NET_SHAPER_SCOPE_NETDEV))
+ return -EINVAL;
+ }
+
+ leaves = kcalloc(shaper->leaves, sizeof(struct net_shaper),
+ GFP_KERNEL);
+ if (!leaves)
+ return -ENOMEM;
+
+ /* Build the leaves arrays. */
+ xa_for_each(&hierarchy->shapers, index, cur) {
+ if (net_shaper_handle_cmp(&cur->parent, &shaper->handle))
+ continue;
+
+ if (WARN_ON_ONCE(leaves_count == shaper->leaves)) {
+ ret = -EINVAL;
+ goto free;
+ }
+
+ leaves[leaves_count++] = *cur;
+ }
+
+ /* When re-linking to the netdev shaper, avoid the eventual, implicit,
+ * creation of the new node, would be surprising since the user is
+ * doing a delete operation.
+ */
+ update_node = node.handle.scope != NET_SHAPER_SCOPE_NETDEV;
+ ret = __net_shaper_group(binding, update_node, leaves_count,
+ leaves, &node, extack);
+
+free:
+ kfree(leaves);
+ return ret;
+}
+
+int net_shaper_nl_delete_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net_shaper_hierarchy *hierarchy;
+ struct net_shaper_binding *binding;
+ struct net_shaper_handle handle;
+ struct net_shaper *shaper;
+ int ret;
+
+ if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_HANDLE))
+ return -EINVAL;
+
+ binding = net_shaper_binding_from_ctx(info->ctx);
+
+ net_shaper_lock(binding);
+ ret = net_shaper_parse_handle(info->attrs[NET_SHAPER_A_HANDLE], info,
+ &handle);
+ if (ret)
+ goto unlock;
+
+ hierarchy = net_shaper_hierarchy(binding);
+ if (!hierarchy) {
+ ret = -ENOENT;
+ goto unlock;
+ }
+
+ shaper = net_shaper_lookup(binding, &handle);
+ if (!shaper) {
+ ret = -ENOENT;
+ goto unlock;
+ }
+
+ if (handle.scope == NET_SHAPER_SCOPE_NODE) {
+ ret = net_shaper_pre_del_node(binding, shaper, info->extack);
+ if (ret)
+ goto unlock;
+ }
+
+ ret = __net_shaper_delete(binding, shaper, info->extack);
+
+unlock:
+ net_shaper_unlock(binding);
+ return ret;
+}
+
+static int net_shaper_group_send_reply(struct net_shaper_binding *binding,
+ const struct net_shaper_handle *handle,
+ struct genl_info *info,
+ struct sk_buff *msg)
+{
+ void *hdr;
+
+ hdr = genlmsg_iput(msg, info);
+ if (!hdr)
+ goto free_msg;
+
+ if (net_shaper_fill_binding(msg, binding, NET_SHAPER_A_IFINDEX) ||
+ net_shaper_fill_handle(msg, handle, NET_SHAPER_A_HANDLE))
+ goto free_msg;
+
+ genlmsg_end(msg, hdr);
+
+ return genlmsg_reply(msg, info);
+
+free_msg:
+ /* Should never happen as msg is pre-allocated with enough space. */
+ WARN_ONCE(true, "calculated message payload length (%d)",
+ net_shaper_handle_size());
+ nlmsg_free(msg);
+ return -EMSGSIZE;
+}
+
+int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net_shaper **old_nodes, *leaves, node = {};
+ struct net_shaper_hierarchy *hierarchy;
+ struct net_shaper_binding *binding;
+ int i, ret, rem, leaves_count;
+ int old_nodes_count = 0;
+ struct sk_buff *msg;
+ struct nlattr *attr;
+
+ if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_LEAVES))
+ return -EINVAL;
+
+ binding = net_shaper_binding_from_ctx(info->ctx);
+
+ /* The group operation is optional. */
+ if (!net_shaper_ops(binding)->group)
+ return -EOPNOTSUPP;
+
+ net_shaper_lock(binding);
+ leaves_count = net_shaper_list_len(info, NET_SHAPER_A_LEAVES);
+ if (!leaves_count) {
+ NL_SET_BAD_ATTR(info->extack,
+ info->attrs[NET_SHAPER_A_LEAVES]);
+ ret = -EINVAL;
+ goto unlock;
+ }
+
+ leaves = kcalloc(leaves_count, sizeof(struct net_shaper) +
+ sizeof(struct net_shaper *), GFP_KERNEL);
+ if (!leaves) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+ old_nodes = (void *)&leaves[leaves_count];
+
+ ret = net_shaper_parse_node(binding, info->attrs, info, &node);
+ if (ret)
+ goto free_leaves;
+
+ i = 0;
+ nla_for_each_attr_type(attr, NET_SHAPER_A_LEAVES,
+ genlmsg_data(info->genlhdr),
+ genlmsg_len(info->genlhdr), rem) {
+ if (WARN_ON_ONCE(i >= leaves_count))
+ goto free_leaves;
+
+ ret = net_shaper_parse_leaf(binding, attr, info,
+ &node, &leaves[i]);
+ if (ret)
+ goto free_leaves;
+ i++;
+ }
+
+ /* Prepare the msg reply in advance, to avoid device operation
+ * rollback on allocation failure.
+ */
+ msg = genlmsg_new(net_shaper_handle_size(), GFP_KERNEL);
+ if (!msg)
+ goto free_leaves;
+
+ hierarchy = net_shaper_hierarchy_setup(binding);
+ if (!hierarchy) {
+ ret = -ENOMEM;
+ goto free_msg;
+ }
+
+ /* Record the node shapers that this group() operation can make
+ * childless for later cleanup.
+ */
+ for (i = 0; i < leaves_count; i++) {
+ if (leaves[i].parent.scope == NET_SHAPER_SCOPE_NODE &&
+ net_shaper_handle_cmp(&leaves[i].parent, &node.handle)) {
+ struct net_shaper *tmp;
+
+ tmp = net_shaper_lookup(binding, &leaves[i].parent);
+ if (!tmp)
+ continue;
+
+ old_nodes[old_nodes_count++] = tmp;
+ }
+ }
+
+ ret = __net_shaper_group(binding, true, leaves_count, leaves, &node,
+ info->extack);
+ if (ret)
+ goto free_msg;
+
+ /* Check if we need to delete any node left alone by the new leaves
+ * linkage.
+ */
+ for (i = 0; i < old_nodes_count; ++i) {
+ struct net_shaper *tmp = old_nodes[i];
+
+ if (--tmp->leaves > 0)
+ continue;
+
+ /* Errors here are not fatal: the grouping operation is
+ * completed, and user-space can still explicitly clean-up
+ * left-over nodes.
+ */
+ __net_shaper_delete(binding, tmp, info->extack);
+ }
+
+ ret = net_shaper_group_send_reply(binding, &node.handle, info, msg);
+ if (ret)
+ GENL_SET_ERR_MSG_FMT(info, "Can't send reply");
+
+free_leaves:
+ kfree(leaves);
+
+unlock:
+ net_shaper_unlock(binding);
+ return ret;
+
+free_msg:
+ kfree_skb(msg);
+ goto free_leaves;
+}
+
+static int
+net_shaper_cap_fill_one(struct sk_buff *msg,
+ struct net_shaper_binding *binding,
+ enum net_shaper_scope scope, unsigned long flags,
+ const struct genl_info *info)
+{
+ unsigned long cur;
+ void *hdr;
+
+ hdr = genlmsg_iput(msg, info);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (net_shaper_fill_binding(msg, binding, NET_SHAPER_A_CAPS_IFINDEX) ||
+ nla_put_u32(msg, NET_SHAPER_A_CAPS_SCOPE, scope))
+ goto nla_put_failure;
+
+ for (cur = NET_SHAPER_A_CAPS_SUPPORT_METRIC_BPS;
+ cur <= NET_SHAPER_A_CAPS_MAX; ++cur) {
+ if (flags & BIT(cur) && nla_put_flag(msg, cur))
+ goto nla_put_failure;
+ }
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+int net_shaper_nl_cap_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net_shaper_binding *binding;
+ const struct net_shaper_ops *ops;
+ enum net_shaper_scope scope;
+ unsigned long flags = 0;
+ struct sk_buff *msg;
+ int ret;
+
+ if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_CAPS_SCOPE))
+ return -EINVAL;
+
+ binding = net_shaper_binding_from_ctx(info->ctx);
+ scope = nla_get_u32(info->attrs[NET_SHAPER_A_CAPS_SCOPE]);
+ ops = net_shaper_ops(binding);
+ ops->capabilities(binding, scope, &flags);
+ if (!flags)
+ return -EOPNOTSUPP;
+
+ msg = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ ret = net_shaper_cap_fill_one(msg, binding, scope, flags, info);
+ if (ret)
+ goto free_msg;
+
+ ret = genlmsg_reply(msg, info);
+ if (ret)
+ goto free_msg;
+ return 0;
+
+free_msg:
+ nlmsg_free(msg);
+ return ret;
+}
+
+int net_shaper_nl_cap_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ const struct genl_info *info = genl_info_dump(cb);
+ struct net_shaper_binding *binding;
+ const struct net_shaper_ops *ops;
+ enum net_shaper_scope scope;
+ int ret;
+
+ binding = net_shaper_binding_from_ctx(cb->ctx);
+ ops = net_shaper_ops(binding);
+ for (scope = 0; scope <= NET_SHAPER_SCOPE_MAX; ++scope) {
+ unsigned long flags = 0;
+
+ ops->capabilities(binding, scope, &flags);
+ if (!flags)
+ continue;
+
+ ret = net_shaper_cap_fill_one(skb, binding, scope, flags,
+ info);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static void net_shaper_flush(struct net_shaper_binding *binding)
+{
+ struct net_shaper_hierarchy *hierarchy = net_shaper_hierarchy(binding);
+ struct net_shaper *cur;
+ unsigned long index;
+
+ if (!hierarchy)
+ return;
+
+ net_shaper_lock(binding);
+ xa_lock(&hierarchy->shapers);
+ xa_for_each(&hierarchy->shapers, index, cur) {
+ __xa_erase(&hierarchy->shapers, index);
+ kfree(cur);
+ }
+ xa_unlock(&hierarchy->shapers);
+ net_shaper_unlock(binding);
+
+ kfree(hierarchy);
+}
+
+void net_shaper_flush_netdev(struct net_device *dev)
+{
+ struct net_shaper_binding binding = {
+ .type = NET_SHAPER_BINDING_TYPE_NETDEV,
+ .netdev = dev,
+ };
+
+ net_shaper_flush(&binding);
+}
+
+void net_shaper_set_real_num_tx_queues(struct net_device *dev,
+ unsigned int txq)
+{
+ struct net_shaper_hierarchy *hierarchy;
+ struct net_shaper_binding binding;
+ int i;
+
+ binding.type = NET_SHAPER_BINDING_TYPE_NETDEV;
+ binding.netdev = dev;
+ hierarchy = net_shaper_hierarchy(&binding);
+ if (!hierarchy)
+ return;
+
+ /* Only drivers implementing shapers support ensure
+ * the lock is acquired in advance.
+ */
+ lockdep_assert_held(&dev->lock);
+
+ /* Take action only when decreasing the tx queue number. */
+ for (i = txq; i < dev->real_num_tx_queues; ++i) {
+ struct net_shaper_handle handle, parent_handle;
+ struct net_shaper *shaper;
+ u32 index;
+
+ handle.scope = NET_SHAPER_SCOPE_QUEUE;
+ handle.id = i;
+ shaper = net_shaper_lookup(&binding, &handle);
+ if (!shaper)
+ continue;
+
+ /* Don't touch the H/W for the queue shaper, the drivers already
+ * deleted the queue and related resources.
+ */
+ parent_handle = shaper->parent;
+ index = net_shaper_handle_to_index(&handle);
+ xa_erase(&hierarchy->shapers, index);
+ kfree_rcu(shaper, rcu);
+
+ /* The recursion on parent does the full job. */
+ if (parent_handle.scope != NET_SHAPER_SCOPE_NODE)
+ continue;
+
+ shaper = net_shaper_lookup(&binding, &parent_handle);
+ if (shaper && !--shaper->leaves)
+ __net_shaper_delete(&binding, shaper, NULL);
+ }
+}
+
+static int __init shaper_init(void)
+{
+ return genl_register_family(&net_shaper_nl_family);
+}
+
+subsys_initcall(shaper_init);
diff --git a/net/shaper/shaper_nl_gen.c b/net/shaper/shaper_nl_gen.c
new file mode 100644
index 000000000000..204c8ae8c7b1
--- /dev/null
+++ b/net/shaper/shaper_nl_gen.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/net_shaper.yaml */
+/* YNL-GEN kernel source */
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include "shaper_nl_gen.h"
+
+#include <uapi/linux/net_shaper.h>
+
+/* Common nested types */
+const struct nla_policy net_shaper_handle_nl_policy[NET_SHAPER_A_HANDLE_ID + 1] = {
+ [NET_SHAPER_A_HANDLE_SCOPE] = NLA_POLICY_MAX(NLA_U32, 3),
+ [NET_SHAPER_A_HANDLE_ID] = { .type = NLA_U32, },
+};
+
+const struct nla_policy net_shaper_leaf_info_nl_policy[NET_SHAPER_A_WEIGHT + 1] = {
+ [NET_SHAPER_A_HANDLE] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy),
+ [NET_SHAPER_A_PRIORITY] = { .type = NLA_U32, },
+ [NET_SHAPER_A_WEIGHT] = { .type = NLA_U32, },
+};
+
+/* NET_SHAPER_CMD_GET - do */
+static const struct nla_policy net_shaper_get_do_nl_policy[NET_SHAPER_A_IFINDEX + 1] = {
+ [NET_SHAPER_A_IFINDEX] = { .type = NLA_U32, },
+ [NET_SHAPER_A_HANDLE] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy),
+};
+
+/* NET_SHAPER_CMD_GET - dump */
+static const struct nla_policy net_shaper_get_dump_nl_policy[NET_SHAPER_A_IFINDEX + 1] = {
+ [NET_SHAPER_A_IFINDEX] = { .type = NLA_U32, },
+};
+
+/* NET_SHAPER_CMD_SET - do */
+static const struct nla_policy net_shaper_set_nl_policy[NET_SHAPER_A_IFINDEX + 1] = {
+ [NET_SHAPER_A_IFINDEX] = { .type = NLA_U32, },
+ [NET_SHAPER_A_HANDLE] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy),
+ [NET_SHAPER_A_METRIC] = NLA_POLICY_MAX(NLA_U32, 1),
+ [NET_SHAPER_A_BW_MIN] = { .type = NLA_UINT, },
+ [NET_SHAPER_A_BW_MAX] = { .type = NLA_UINT, },
+ [NET_SHAPER_A_BURST] = { .type = NLA_UINT, },
+ [NET_SHAPER_A_PRIORITY] = { .type = NLA_U32, },
+ [NET_SHAPER_A_WEIGHT] = { .type = NLA_U32, },
+};
+
+/* NET_SHAPER_CMD_DELETE - do */
+static const struct nla_policy net_shaper_delete_nl_policy[NET_SHAPER_A_IFINDEX + 1] = {
+ [NET_SHAPER_A_IFINDEX] = { .type = NLA_U32, },
+ [NET_SHAPER_A_HANDLE] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy),
+};
+
+/* NET_SHAPER_CMD_GROUP - do */
+static const struct nla_policy net_shaper_group_nl_policy[NET_SHAPER_A_LEAVES + 1] = {
+ [NET_SHAPER_A_IFINDEX] = { .type = NLA_U32, },
+ [NET_SHAPER_A_PARENT] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy),
+ [NET_SHAPER_A_HANDLE] = NLA_POLICY_NESTED(net_shaper_handle_nl_policy),
+ [NET_SHAPER_A_METRIC] = NLA_POLICY_MAX(NLA_U32, 1),
+ [NET_SHAPER_A_BW_MIN] = { .type = NLA_UINT, },
+ [NET_SHAPER_A_BW_MAX] = { .type = NLA_UINT, },
+ [NET_SHAPER_A_BURST] = { .type = NLA_UINT, },
+ [NET_SHAPER_A_PRIORITY] = { .type = NLA_U32, },
+ [NET_SHAPER_A_WEIGHT] = { .type = NLA_U32, },
+ [NET_SHAPER_A_LEAVES] = NLA_POLICY_NESTED(net_shaper_leaf_info_nl_policy),
+};
+
+/* NET_SHAPER_CMD_CAP_GET - do */
+static const struct nla_policy net_shaper_cap_get_do_nl_policy[NET_SHAPER_A_CAPS_SCOPE + 1] = {
+ [NET_SHAPER_A_CAPS_IFINDEX] = { .type = NLA_U32, },
+ [NET_SHAPER_A_CAPS_SCOPE] = NLA_POLICY_MAX(NLA_U32, 3),
+};
+
+/* NET_SHAPER_CMD_CAP_GET - dump */
+static const struct nla_policy net_shaper_cap_get_dump_nl_policy[NET_SHAPER_A_CAPS_IFINDEX + 1] = {
+ [NET_SHAPER_A_CAPS_IFINDEX] = { .type = NLA_U32, },
+};
+
+/* Ops table for net_shaper */
+static const struct genl_split_ops net_shaper_nl_ops[] = {
+ {
+ .cmd = NET_SHAPER_CMD_GET,
+ .pre_doit = net_shaper_nl_pre_doit,
+ .doit = net_shaper_nl_get_doit,
+ .post_doit = net_shaper_nl_post_doit,
+ .policy = net_shaper_get_do_nl_policy,
+ .maxattr = NET_SHAPER_A_IFINDEX,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NET_SHAPER_CMD_GET,
+ .start = net_shaper_nl_pre_dumpit,
+ .dumpit = net_shaper_nl_get_dumpit,
+ .done = net_shaper_nl_post_dumpit,
+ .policy = net_shaper_get_dump_nl_policy,
+ .maxattr = NET_SHAPER_A_IFINDEX,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = NET_SHAPER_CMD_SET,
+ .pre_doit = net_shaper_nl_pre_doit,
+ .doit = net_shaper_nl_set_doit,
+ .post_doit = net_shaper_nl_post_doit,
+ .policy = net_shaper_set_nl_policy,
+ .maxattr = NET_SHAPER_A_IFINDEX,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NET_SHAPER_CMD_DELETE,
+ .pre_doit = net_shaper_nl_pre_doit,
+ .doit = net_shaper_nl_delete_doit,
+ .post_doit = net_shaper_nl_post_doit,
+ .policy = net_shaper_delete_nl_policy,
+ .maxattr = NET_SHAPER_A_IFINDEX,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NET_SHAPER_CMD_GROUP,
+ .pre_doit = net_shaper_nl_pre_doit,
+ .doit = net_shaper_nl_group_doit,
+ .post_doit = net_shaper_nl_post_doit,
+ .policy = net_shaper_group_nl_policy,
+ .maxattr = NET_SHAPER_A_LEAVES,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NET_SHAPER_CMD_CAP_GET,
+ .pre_doit = net_shaper_nl_cap_pre_doit,
+ .doit = net_shaper_nl_cap_get_doit,
+ .post_doit = net_shaper_nl_cap_post_doit,
+ .policy = net_shaper_cap_get_do_nl_policy,
+ .maxattr = NET_SHAPER_A_CAPS_SCOPE,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NET_SHAPER_CMD_CAP_GET,
+ .start = net_shaper_nl_cap_pre_dumpit,
+ .dumpit = net_shaper_nl_cap_get_dumpit,
+ .done = net_shaper_nl_cap_post_dumpit,
+ .policy = net_shaper_cap_get_dump_nl_policy,
+ .maxattr = NET_SHAPER_A_CAPS_IFINDEX,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+};
+
+struct genl_family net_shaper_nl_family __ro_after_init = {
+ .name = NET_SHAPER_FAMILY_NAME,
+ .version = NET_SHAPER_FAMILY_VERSION,
+ .netnsok = true,
+ .parallel_ops = true,
+ .module = THIS_MODULE,
+ .split_ops = net_shaper_nl_ops,
+ .n_split_ops = ARRAY_SIZE(net_shaper_nl_ops),
+};
diff --git a/net/shaper/shaper_nl_gen.h b/net/shaper/shaper_nl_gen.h
new file mode 100644
index 000000000000..cb7f9026fc23
--- /dev/null
+++ b/net/shaper/shaper_nl_gen.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/net_shaper.yaml */
+/* YNL-GEN kernel header */
+
+#ifndef _LINUX_NET_SHAPER_GEN_H
+#define _LINUX_NET_SHAPER_GEN_H
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include <uapi/linux/net_shaper.h>
+
+/* Common nested types */
+extern const struct nla_policy net_shaper_handle_nl_policy[NET_SHAPER_A_HANDLE_ID + 1];
+extern const struct nla_policy net_shaper_leaf_info_nl_policy[NET_SHAPER_A_WEIGHT + 1];
+
+int net_shaper_nl_pre_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info);
+int net_shaper_nl_cap_pre_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info);
+void
+net_shaper_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+ struct genl_info *info);
+void
+net_shaper_nl_cap_post_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info);
+int net_shaper_nl_pre_dumpit(struct netlink_callback *cb);
+int net_shaper_nl_cap_pre_dumpit(struct netlink_callback *cb);
+int net_shaper_nl_post_dumpit(struct netlink_callback *cb);
+int net_shaper_nl_cap_post_dumpit(struct netlink_callback *cb);
+
+int net_shaper_nl_get_doit(struct sk_buff *skb, struct genl_info *info);
+int net_shaper_nl_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int net_shaper_nl_set_doit(struct sk_buff *skb, struct genl_info *info);
+int net_shaper_nl_delete_doit(struct sk_buff *skb, struct genl_info *info);
+int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info);
+int net_shaper_nl_cap_get_doit(struct sk_buff *skb, struct genl_info *info);
+int net_shaper_nl_cap_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+
+extern struct genl_family net_shaper_nl_family;
+
+#endif /* _LINUX_NET_SHAPER_GEN_H */
diff --git a/net/smc/smc.h b/net/smc/smc.h
index ad77d6b6b8d3..78ae10d06ed2 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -278,7 +278,7 @@ struct smc_connection {
*/
u64 peer_token; /* SMC-D token of peer */
u8 killed : 1; /* abnormal termination */
- u8 freed : 1; /* normal termiation */
+ u8 freed : 1; /* normal termination */
u8 out_of_sync : 1; /* out of sync with peer */
};
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 5625fda2960b..5fd6f5b8ef03 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -156,7 +156,7 @@ struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/
} __aligned(4);
struct smc_clc_msg_smcd { /* SMC-D GID information */
- struct smc_clc_smcd_gid_chid ism; /* ISM native GID+CHID of requestor */
+ struct smc_clc_smcd_gid_chid ism; /* ISM native GID+CHID of requester */
__be16 v2_ext_offset; /* SMC Version 2 Extension Offset */
u8 vendor_oui[3]; /* vendor organizationally unique identifier */
u8 vendor_exp_options[5];
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 4e694860ece4..500952c2e67b 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -2321,7 +2321,7 @@ static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
}
if (lgr->buf_type == SMCR_PHYS_CONT_BUFS)
goto out;
- fallthrough; // try virtually continguous buf
+ fallthrough; // try virtually contiguous buf
case SMCR_VIRT_CONT_BUFS:
buf_desc->order = get_order(bufsize);
buf_desc->cpu_addr = vzalloc(PAGE_SIZE << buf_desc->order);
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 0db4e5f79ac4..69b54ecd6503 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -30,7 +30,7 @@
*/
#define SMC_CONN_PER_LGR_PREFER 255 /* Preferred connections per link group used for
* SMC-R v2.1 and later negotiation, vendors or
- * distrubutions may modify it to a value between
+ * distributions may modify it to a value between
* 16-255 as needed.
*/
@@ -181,7 +181,7 @@ struct smc_link {
*/
#define SMC_LINKS_PER_LGR_MAX_PREFER 2 /* Preferred max links per link group used for
* SMC-R v2.1 and later negotiation, vendors or
- * distrubutions may modify it to a value between
+ * distributions may modify it to a value between
* 1-2 as needed.
*/
diff --git a/net/socket.c b/net/socket.c
index 042451f01c65..9a117248f18f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -509,7 +509,7 @@ static int sock_map_fd(struct socket *sock, int flags)
struct socket *sock_from_file(struct file *file)
{
- if (file->f_op == &socket_file_ops)
+ if (likely(file->f_op == &socket_file_ops))
return file->private_data; /* set in sock_alloc_file */
return NULL;
@@ -549,24 +549,6 @@ struct socket *sockfd_lookup(int fd, int *err)
}
EXPORT_SYMBOL(sockfd_lookup);
-static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
-{
- struct fd f = fdget(fd);
- struct socket *sock;
-
- *err = -EBADF;
- if (fd_file(f)) {
- sock = sock_from_file(fd_file(f));
- if (likely(sock)) {
- *fput_needed = f.word & FDPUT_FPUT;
- return sock;
- }
- *err = -ENOTSOCK;
- fdput(f);
- }
- return NULL;
-}
-
static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
size_t size)
{
@@ -687,7 +669,7 @@ void sock_release(struct socket *sock)
}
EXPORT_SYMBOL(sock_release);
-void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
+void __sock_tx_timestamp(__u32 tsflags, __u8 *tx_flags)
{
u8 flags = *tx_flags;
@@ -1576,9 +1558,11 @@ int __sock_create(struct net *net, int family, int type, int protocol,
err = pf->create(net, sock, protocol, kern);
if (err < 0) {
/* ->create should release the allocated sock->sk object on error
- * but it may leave the dangling pointer
+ * and make sure sock->sk is set to NULL to avoid use-after-free
*/
- sock->sk = NULL;
+ DEBUG_NET_WARN_ONCE(sock->sk,
+ "%ps must clear sock->sk on failure, family: %d, type: %d, protocol: %d\n",
+ pf->create, family, type, protocol);
goto out_module_put;
}
@@ -1858,16 +1842,20 @@ int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
{
struct socket *sock;
struct sockaddr_storage address;
- int err, fput_needed;
-
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (sock) {
- err = move_addr_to_kernel(umyaddr, addrlen, &address);
- if (!err)
- err = __sys_bind_socket(sock, &address, addrlen);
- fput_light(sock->file, fput_needed);
- }
- return err;
+ CLASS(fd, f)(fd);
+ int err;
+
+ if (fd_empty(f))
+ return -EBADF;
+ sock = sock_from_file(fd_file(f));
+ if (unlikely(!sock))
+ return -ENOTSOCK;
+
+ err = move_addr_to_kernel(umyaddr, addrlen, &address);
+ if (unlikely(err))
+ return err;
+
+ return __sys_bind_socket(sock, &address, addrlen);
}
SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
@@ -1896,15 +1884,16 @@ int __sys_listen_socket(struct socket *sock, int backlog)
int __sys_listen(int fd, int backlog)
{
+ CLASS(fd, f)(fd);
struct socket *sock;
- int err, fput_needed;
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (sock) {
- err = __sys_listen_socket(sock, backlog);
- fput_light(sock->file, fput_needed);
- }
- return err;
+ if (fd_empty(f))
+ return -EBADF;
+ sock = sock_from_file(fd_file(f));
+ if (unlikely(!sock))
+ return -ENOTSOCK;
+
+ return __sys_listen_socket(sock, backlog);
}
SYSCALL_DEFINE2(listen, int, fd, int, backlog)
@@ -2014,17 +2003,12 @@ static int __sys_accept4_file(struct file *file, struct sockaddr __user *upeer_s
int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags)
{
- int ret = -EBADF;
- struct fd f;
+ CLASS(fd, f)(fd);
- f = fdget(fd);
- if (fd_file(f)) {
- ret = __sys_accept4_file(fd_file(f), upeer_sockaddr,
+ if (fd_empty(f))
+ return -EBADF;
+ return __sys_accept4_file(fd_file(f), upeer_sockaddr,
upeer_addrlen, flags);
- fdput(f);
- }
-
- return ret;
}
SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
@@ -2076,20 +2060,18 @@ out:
int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
{
- int ret = -EBADF;
- struct fd f;
+ struct sockaddr_storage address;
+ CLASS(fd, f)(fd);
+ int ret;
- f = fdget(fd);
- if (fd_file(f)) {
- struct sockaddr_storage address;
+ if (fd_empty(f))
+ return -EBADF;
- ret = move_addr_to_kernel(uservaddr, addrlen, &address);
- if (!ret)
- ret = __sys_connect_file(fd_file(f), &address, addrlen, 0);
- fdput(f);
- }
+ ret = move_addr_to_kernel(uservaddr, addrlen, &address);
+ if (ret)
+ return ret;
- return ret;
+ return __sys_connect_file(fd_file(f), &address, addrlen, 0);
}
SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
@@ -2108,26 +2090,25 @@ int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
{
struct socket *sock;
struct sockaddr_storage address;
- int err, fput_needed;
+ CLASS(fd, f)(fd);
+ int err;
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (!sock)
- goto out;
+ if (fd_empty(f))
+ return -EBADF;
+ sock = sock_from_file(fd_file(f));
+ if (unlikely(!sock))
+ return -ENOTSOCK;
err = security_socket_getsockname(sock);
if (err)
- goto out_put;
+ return err;
err = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, 0);
if (err < 0)
- goto out_put;
- /* "err" is actually length in this case */
- err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
+ return err;
-out_put:
- fput_light(sock->file, fput_needed);
-out:
- return err;
+ /* "err" is actually length in this case */
+ return move_addr_to_user(&address, err, usockaddr, usockaddr_len);
}
SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
@@ -2146,26 +2127,25 @@ int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
{
struct socket *sock;
struct sockaddr_storage address;
- int err, fput_needed;
+ CLASS(fd, f)(fd);
+ int err;
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (sock != NULL) {
- const struct proto_ops *ops = READ_ONCE(sock->ops);
+ if (fd_empty(f))
+ return -EBADF;
+ sock = sock_from_file(fd_file(f));
+ if (unlikely(!sock))
+ return -ENOTSOCK;
- err = security_socket_getpeername(sock);
- if (err) {
- fput_light(sock->file, fput_needed);
- return err;
- }
+ err = security_socket_getpeername(sock);
+ if (err)
+ return err;
- err = ops->getname(sock, (struct sockaddr *)&address, 1);
- if (err >= 0)
- /* "err" is actually length in this case */
- err = move_addr_to_user(&address, err, usockaddr,
- usockaddr_len);
- fput_light(sock->file, fput_needed);
- }
- return err;
+ err = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, 1);
+ if (err < 0)
+ return err;
+
+ /* "err" is actually length in this case */
+ return move_addr_to_user(&address, err, usockaddr, usockaddr_len);
}
SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
@@ -2186,14 +2166,17 @@ int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
struct sockaddr_storage address;
int err;
struct msghdr msg;
- int fput_needed;
err = import_ubuf(ITER_SOURCE, buff, len, &msg.msg_iter);
if (unlikely(err))
return err;
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (!sock)
- goto out;
+
+ CLASS(fd, f)(fd);
+ if (fd_empty(f))
+ return -EBADF;
+ sock = sock_from_file(fd_file(f));
+ if (unlikely(!sock))
+ return -ENOTSOCK;
msg.msg_name = NULL;
msg.msg_control = NULL;
@@ -2203,7 +2186,7 @@ int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
if (addr) {
err = move_addr_to_kernel(addr, addr_len, &address);
if (err < 0)
- goto out_put;
+ return err;
msg.msg_name = (struct sockaddr *)&address;
msg.msg_namelen = addr_len;
}
@@ -2211,12 +2194,7 @@ int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
msg.msg_flags = flags;
- err = __sock_sendmsg(sock, &msg);
-
-out_put:
- fput_light(sock->file, fput_needed);
-out:
- return err;
+ return __sock_sendmsg(sock, &msg);
}
SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
@@ -2251,14 +2229,18 @@ int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
};
struct socket *sock;
int err, err2;
- int fput_needed;
err = import_ubuf(ITER_DEST, ubuf, size, &msg.msg_iter);
if (unlikely(err))
return err;
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (!sock)
- goto out;
+
+ CLASS(fd, f)(fd);
+
+ if (fd_empty(f))
+ return -EBADF;
+ sock = sock_from_file(fd_file(f));
+ if (unlikely(!sock))
+ return -ENOTSOCK;
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
@@ -2270,9 +2252,6 @@ int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
if (err2 < 0)
err = err2;
}
-
- fput_light(sock->file, fput_needed);
-out:
return err;
}
@@ -2347,17 +2326,16 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
{
sockptr_t optval = USER_SOCKPTR(user_optval);
bool compat = in_compat_syscall();
- int err, fput_needed;
struct socket *sock;
+ CLASS(fd, f)(fd);
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (!sock)
- return err;
-
- err = do_sock_setsockopt(sock, compat, level, optname, optval, optlen);
+ if (fd_empty(f))
+ return -EBADF;
+ sock = sock_from_file(fd_file(f));
+ if (unlikely(!sock))
+ return -ENOTSOCK;
- fput_light(sock->file, fput_needed);
- return err;
+ return do_sock_setsockopt(sock, compat, level, optname, optval, optlen);
}
SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
@@ -2413,20 +2391,17 @@ EXPORT_SYMBOL(do_sock_getsockopt);
int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
int __user *optlen)
{
- int err, fput_needed;
struct socket *sock;
- bool compat;
+ CLASS(fd, f)(fd);
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (!sock)
- return err;
+ if (fd_empty(f))
+ return -EBADF;
+ sock = sock_from_file(fd_file(f));
+ if (unlikely(!sock))
+ return -ENOTSOCK;
- compat = in_compat_syscall();
- err = do_sock_getsockopt(sock, compat, level, optname,
+ return do_sock_getsockopt(sock, in_compat_syscall(), level, optname,
USER_SOCKPTR(optval), USER_SOCKPTR(optlen));
-
- fput_light(sock->file, fput_needed);
- return err;
}
SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
@@ -2452,15 +2427,16 @@ int __sys_shutdown_sock(struct socket *sock, int how)
int __sys_shutdown(int fd, int how)
{
- int err, fput_needed;
struct socket *sock;
+ CLASS(fd, f)(fd);
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (sock != NULL) {
- err = __sys_shutdown_sock(sock, how);
- fput_light(sock->file, fput_needed);
- }
- return err;
+ if (fd_empty(f))
+ return -EBADF;
+ sock = sock_from_file(fd_file(f));
+ if (unlikely(!sock))
+ return -ENOTSOCK;
+
+ return __sys_shutdown_sock(sock, how);
}
SYSCALL_DEFINE2(shutdown, int, fd, int, how)
@@ -2676,22 +2652,21 @@ long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
bool forbid_cmsg_compat)
{
- int fput_needed, err;
struct msghdr msg_sys;
struct socket *sock;
if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
return -EINVAL;
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (!sock)
- goto out;
+ CLASS(fd, f)(fd);
- err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
+ if (fd_empty(f))
+ return -EBADF;
+ sock = sock_from_file(fd_file(f));
+ if (unlikely(!sock))
+ return -ENOTSOCK;
- fput_light(sock->file, fput_needed);
-out:
- return err;
+ return ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
}
SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
@@ -2706,7 +2681,7 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int
int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
unsigned int flags, bool forbid_cmsg_compat)
{
- int fput_needed, err, datagrams;
+ int err, datagrams;
struct socket *sock;
struct mmsghdr __user *entry;
struct compat_mmsghdr __user *compat_entry;
@@ -2722,9 +2697,13 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
datagrams = 0;
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (!sock)
- return err;
+ CLASS(fd, f)(fd);
+
+ if (fd_empty(f))
+ return -EBADF;
+ sock = sock_from_file(fd_file(f));
+ if (unlikely(!sock))
+ return -ENOTSOCK;
used_address.name_len = UINT_MAX;
entry = mmsg;
@@ -2761,8 +2740,6 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
cond_resched();
}
- fput_light(sock->file, fput_needed);
-
/* We only return an error if no datagrams were able to be sent */
if (datagrams != 0)
return datagrams;
@@ -2884,22 +2861,21 @@ long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
bool forbid_cmsg_compat)
{
- int fput_needed, err;
struct msghdr msg_sys;
struct socket *sock;
if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
return -EINVAL;
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (!sock)
- goto out;
+ CLASS(fd, f)(fd);
- err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
+ if (fd_empty(f))
+ return -EBADF;
+ sock = sock_from_file(fd_file(f));
+ if (unlikely(!sock))
+ return -ENOTSOCK;
- fput_light(sock->file, fput_needed);
-out:
- return err;
+ return ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
}
SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
@@ -2916,7 +2892,7 @@ static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
unsigned int vlen, unsigned int flags,
struct timespec64 *timeout)
{
- int fput_needed, err, datagrams;
+ int err = 0, datagrams;
struct socket *sock;
struct mmsghdr __user *entry;
struct compat_mmsghdr __user *compat_entry;
@@ -2931,16 +2907,18 @@ static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
datagrams = 0;
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (!sock)
- return err;
+ CLASS(fd, f)(fd);
+
+ if (fd_empty(f))
+ return -EBADF;
+ sock = sock_from_file(fd_file(f));
+ if (unlikely(!sock))
+ return -ENOTSOCK;
if (likely(!(flags & MSG_ERRQUEUE))) {
err = sock_error(sock->sk);
- if (err) {
- datagrams = err;
- goto out_put;
- }
+ if (err)
+ return err;
}
entry = mmsg;
@@ -2997,12 +2975,10 @@ static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
}
if (err == 0)
- goto out_put;
+ return datagrams;
- if (datagrams == 0) {
- datagrams = err;
- goto out_put;
- }
+ if (datagrams == 0)
+ return err;
/*
* We may return less entries than requested (vlen) if the
@@ -3017,9 +2993,6 @@ static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
*/
WRITE_ONCE(sock->sk->sk_err, -err);
}
-out_put:
- fput_light(sock->file, fput_needed);
-
return datagrams;
}
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 1bd3e531b0e0..059f6ef1ad18 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1427,7 +1427,9 @@ static int c_show(struct seq_file *m, void *p)
seq_printf(m, "# expiry=%lld refcnt=%d flags=%lx\n",
convert_to_wallclock(cp->expiry_time),
kref_read(&cp->ref), cp->flags);
- cache_get(cp);
+ if (!cache_get_rcu(cp))
+ return 0;
+
if (cache_check(cd, cp, NULL))
/* cache_check does a cache_put on failure */
seq_puts(m, "# ");
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 825ec5357691..95397677673b 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1551,6 +1551,10 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
newlen = error;
if (protocol == IPPROTO_TCP) {
+ __netns_tracker_free(net, &sock->sk->ns_tracker, false);
+ sock->sk->sk_net_refcnt = 1;
+ get_net_track(net, &sock->sk->ns_tracker, GFP_KERNEL);
+ sock_inuse_add(net, 1);
if ((error = kernel_listen(sock, 64)) < 0)
goto bummer;
}
@@ -1608,7 +1612,6 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
static void svc_sock_free(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
- struct page_frag_cache *pfc = &svsk->sk_frag_cache;
struct socket *sock = svsk->sk_sock;
trace_svcsock_free(svsk, sock);
@@ -1618,8 +1621,7 @@ static void svc_sock_free(struct svc_xprt *xprt)
sockfd_put(sock);
else
sock_release(sock);
- if (pfc->va)
- __page_frag_cache_drain(virt_to_head_page(pfc->va),
- pfc->pagecnt_bias);
+
+ page_frag_cache_drain(&svsk->sk_frag_cache);
kfree(svsk);
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 58ae6ec4f25b..415c0310101f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -233,25 +233,34 @@ static int svc_rdma_proc_init(void)
rc = percpu_counter_init(&svcrdma_stat_read, 0, GFP_KERNEL);
if (rc)
- goto out_err;
+ goto err;
rc = percpu_counter_init(&svcrdma_stat_recv, 0, GFP_KERNEL);
if (rc)
- goto out_err;
+ goto err_read;
rc = percpu_counter_init(&svcrdma_stat_sq_starve, 0, GFP_KERNEL);
if (rc)
- goto out_err;
+ goto err_recv;
rc = percpu_counter_init(&svcrdma_stat_write, 0, GFP_KERNEL);
if (rc)
- goto out_err;
+ goto err_sq;
svcrdma_table_header = register_sysctl("sunrpc/svc_rdma",
svcrdma_parm_table);
+ if (!svcrdma_table_header)
+ goto err_write;
+
return 0;
-out_err:
+err_write:
+ rc = -ENOMEM;
+ percpu_counter_destroy(&svcrdma_stat_write);
+err_sq:
percpu_counter_destroy(&svcrdma_stat_sq_starve);
+err_recv:
percpu_counter_destroy(&svcrdma_stat_recv);
+err_read:
percpu_counter_destroy(&svcrdma_stat_read);
+err:
return rc;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index ae3fb9bc8a21..292022f0976e 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -493,7 +493,13 @@ static bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt)
if (xdr_stream_decode_u32(&rctxt->rc_stream, &segcount))
return false;
- /* A bogus segcount causes this buffer overflow check to fail. */
+ /* Before trusting the segcount value enough to use it in
+ * a computation, perform a simple range check. This is an
+ * arbitrary but sensible limit (ie, not architectural).
+ */
+ if (unlikely(segcount > RPCSVC_MAXPAGES))
+ return false;
+
p = xdr_inline_decode(&rctxt->rc_stream,
segcount * rpcrdma_segment_maxsz * sizeof(*p));
return p != NULL;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 1326fbf45a34..c60936d8cef7 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1198,6 +1198,7 @@ static void xs_sock_reset_state_flags(struct rpc_xprt *xprt)
clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state);
clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state);
clear_bit(XPRT_SOCK_NOSPACE, &transport->sock_state);
+ clear_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state);
}
static void xs_run_error_worker(struct sock_xprt *transport, unsigned int nr)
@@ -1278,6 +1279,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
transport->file = NULL;
sk->sk_user_data = NULL;
+ sk->sk_sndtimeo = 0;
xs_restore_old_callbacks(transport, sk);
xprt_clear_connected(xprt);
@@ -1939,6 +1941,13 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
goto out;
}
+ if (protocol == IPPROTO_TCP) {
+ __netns_tracker_free(xprt->xprt_net, &sock->sk->ns_tracker, false);
+ sock->sk->sk_net_refcnt = 1;
+ get_net_track(xprt->xprt_net, &sock->sk->ns_tracker, GFP_KERNEL);
+ sock_inuse_add(xprt->xprt_net, 1);
+ }
+
filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
if (IS_ERR(filp))
return ERR_CAST(filp);
@@ -2614,11 +2623,10 @@ static int xs_tls_handshake_sync(struct rpc_xprt *lower_xprt, struct xprtsec_par
rc = wait_for_completion_interruptible_timeout(&lower_transport->handshake_done,
XS_TLS_HANDSHAKE_TO);
if (rc <= 0) {
- if (!tls_handshake_cancel(sk)) {
- if (rc == 0)
- rc = -ETIMEDOUT;
- goto out_put_xprt;
- }
+ tls_handshake_cancel(sk);
+ if (rc == 0)
+ rc = -ETIMEDOUT;
+ goto out_put_xprt;
}
rc = lower_transport->xprt_err;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index dfd29160fe11..25b28b1434f5 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -2420,6 +2420,7 @@ static int vsock_create(struct net *net, struct socket *sock,
if (sock->type == SOCK_DGRAM) {
ret = vsock_assign_transport(vsk, NULL);
if (ret < 0) {
+ sock->sk = NULL;
sock_put(sk);
return ret;
}
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index e2157e387217..56c232cf5b0f 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -549,6 +549,7 @@ static void hvs_destruct(struct vsock_sock *vsk)
vmbus_hvsock_device_unregister(chan);
kfree(hvs);
+ vsk->trans = NULL;
}
static int hvs_dgram_bind(struct vsock_sock *vsk, struct sockaddr_vm *addr)
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 10345388ad13..2d67b5f2010e 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -11,9 +11,6 @@ config WEXT_PROC
depends on PROC_FS
depends on WEXT_CORE
-config WEXT_SPY
- bool
-
config WEXT_PRIV
bool
@@ -188,19 +185,12 @@ config CFG80211_CRDA_SUPPORT
If unsure, say Y.
config CFG80211_WEXT
- bool "cfg80211 wireless extensions compatibility" if !CFG80211_WEXT_EXPORT
+ bool "cfg80211 wireless extensions compatibility"
select WEXT_CORE
- default y if CFG80211_WEXT_EXPORT
help
Enable this option if you need old userspace for wireless
extensions with cfg80211-based drivers.
-config CFG80211_WEXT_EXPORT
- bool
- help
- Drivers should select this option if they require cfg80211's
- wext compatibility symbols to be exported.
-
config CFG80211_KUNIT_TEST
tristate "KUnit tests for cfg80211" if !KUNIT_ALL_TESTS
depends on KUNIT
@@ -212,36 +202,3 @@ config CFG80211_KUNIT_TEST
If unsure, say N.
endif # CFG80211
-
-config LIB80211
- tristate
- default n
- help
- This options enables a library of common routines used
- by IEEE802.11 wireless LAN drivers.
-
- Drivers should select this themselves if needed.
-
-config LIB80211_CRYPT_WEP
- tristate
- select CRYPTO_LIB_ARC4
-
-config LIB80211_CRYPT_CCMP
- tristate
- select CRYPTO
- select CRYPTO_AES
- select CRYPTO_CCM
-
-config LIB80211_CRYPT_TKIP
- tristate
- select CRYPTO_LIB_ARC4
-
-config LIB80211_DEBUG
- bool "lib80211 debugging messages"
- depends on LIB80211
- default n
- help
- You can enable this if you want verbose debugging messages
- from lib80211.
-
- If unsure, say N.
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 1d49cc8b6da1..62a83faf0e07 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -1,14 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_CFG80211) += cfg80211.o
-obj-$(CONFIG_LIB80211) += lib80211.o
-obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o
-obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o
-obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o
obj-y += tests/
obj-$(CONFIG_WEXT_CORE) += wext-core.o
obj-$(CONFIG_WEXT_PROC) += wext-proc.o
-obj-$(CONFIG_WEXT_SPY) += wext-spy.o
obj-$(CONFIG_WEXT_PRIV) += wext-priv.o
cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index e579d7e1425f..40b6375a5de4 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -289,7 +289,7 @@ static bool cfg80211_valid_center_freq(u32 center,
/*
* Valid channels are packed from lowest frequency towards higher ones.
- * So test that the lower frequency alignes with one of these steps.
+ * So test that the lower frequency aligns with one of these steps.
*/
return (center - bw / 2 - 5945) % step == 0;
}
@@ -1628,6 +1628,7 @@ bool cfg80211_reg_check_beaconing(struct wiphy *wiphy,
EXPORT_SYMBOL(cfg80211_reg_check_beaconing);
int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
struct cfg80211_chan_def *chandef)
{
if (!rdev->ops->set_monitor_channel)
@@ -1635,7 +1636,7 @@ int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev,
if (!cfg80211_has_monitors_only(rdev))
return -EBUSY;
- return rdev_set_monitor_channel(rdev, chandef);
+ return rdev_set_monitor_channel(rdev, dev, chandef);
}
bool cfg80211_any_usable_channels(struct wiphy *wiphy,
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 74ca18833df1..afbdc549fb4a 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -603,16 +603,20 @@ use_default_name:
}
EXPORT_SYMBOL(wiphy_new_nm);
-static int wiphy_verify_combinations(struct wiphy *wiphy)
+static
+int wiphy_verify_iface_combinations(struct wiphy *wiphy,
+ const struct ieee80211_iface_combination *iface_comb,
+ int n_iface_comb,
+ bool combined_radio)
{
const struct ieee80211_iface_combination *c;
int i, j;
- for (i = 0; i < wiphy->n_iface_combinations; i++) {
+ for (i = 0; i < n_iface_comb; i++) {
u32 cnt = 0;
u16 all_iftypes = 0;
- c = &wiphy->iface_combinations[i];
+ c = &iface_comb[i];
/*
* Combinations with just one interface aren't real,
@@ -625,9 +629,13 @@ static int wiphy_verify_combinations(struct wiphy *wiphy)
if (WARN_ON(!c->num_different_channels))
return -EINVAL;
- /* DFS only works on one channel. */
- if (WARN_ON(c->radar_detect_widths &&
- (c->num_different_channels > 1)))
+ /* DFS only works on one channel. Avoid this check
+ * for multi-radio global combination, since it hold
+ * the capabilities of all radio combinations.
+ */
+ if (!combined_radio &&
+ WARN_ON(c->radar_detect_widths &&
+ c->num_different_channels > 1))
return -EINVAL;
if (WARN_ON(!c->n_limits))
@@ -648,13 +656,21 @@ static int wiphy_verify_combinations(struct wiphy *wiphy)
if (WARN_ON(wiphy->software_iftypes & types))
return -EINVAL;
- /* Only a single P2P_DEVICE can be allowed */
- if (WARN_ON(types & BIT(NL80211_IFTYPE_P2P_DEVICE) &&
+ /* Only a single P2P_DEVICE can be allowed, avoid this
+ * check for multi-radio global combination, since it
+ * hold the capabilities of all radio combinations.
+ */
+ if (!combined_radio &&
+ WARN_ON(types & BIT(NL80211_IFTYPE_P2P_DEVICE) &&
c->limits[j].max > 1))
return -EINVAL;
- /* Only a single NAN can be allowed */
- if (WARN_ON(types & BIT(NL80211_IFTYPE_NAN) &&
+ /* Only a single NAN can be allowed, avoid this
+ * check for multi-radio global combination, since it
+ * hold the capabilities of all radio combinations.
+ */
+ if (!combined_radio &&
+ WARN_ON(types & BIT(NL80211_IFTYPE_NAN) &&
c->limits[j].max > 1))
return -EINVAL;
@@ -693,6 +709,34 @@ static int wiphy_verify_combinations(struct wiphy *wiphy)
return 0;
}
+static int wiphy_verify_combinations(struct wiphy *wiphy)
+{
+ int i, ret;
+ bool combined_radio = false;
+
+ if (wiphy->n_radio) {
+ for (i = 0; i < wiphy->n_radio; i++) {
+ const struct wiphy_radio *radio = &wiphy->radio[i];
+
+ ret = wiphy_verify_iface_combinations(wiphy,
+ radio->iface_combinations,
+ radio->n_iface_combinations,
+ false);
+ if (ret)
+ return ret;
+ }
+
+ combined_radio = true;
+ }
+
+ ret = wiphy_verify_iface_combinations(wiphy,
+ wiphy->iface_combinations,
+ wiphy->n_iface_combinations,
+ combined_radio);
+
+ return ret;
+}
+
int wiphy_register(struct wiphy *wiphy)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
@@ -1387,6 +1431,8 @@ void cfg80211_init_wdev(struct wireless_dev *wdev)
/* allow mac80211 to determine the timeout */
wdev->ps_timeout = -1;
+ wdev->radio_mask = BIT(wdev->wiphy->n_radio) - 1;
+
if ((wdev->iftype == NL80211_IFTYPE_STATION ||
wdev->iftype == NL80211_IFTYPE_P2P_CLIENT ||
wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr)
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 3b3e3cd7027a..4c45f994a8c0 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -516,6 +516,7 @@ static inline unsigned int elapsed_jiffies_msecs(unsigned long start)
}
int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
struct cfg80211_chan_def *chandef);
int ieee80211_get_ratemask(struct ieee80211_supported_band *sband,
diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c
deleted file mode 100644
index 64c447040786..000000000000
--- a/net/wireless/lib80211.c
+++ /dev/null
@@ -1,257 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * lib80211 -- common bits for IEEE802.11 drivers
- *
- * Copyright(c) 2008 John W. Linville <linville@tuxdriver.com>
- *
- * Portions copied from old ieee80211 component, w/ original copyright
- * notices below:
- *
- * Host AP crypto routines
- *
- * Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi>
- * Portions Copyright (C) 2004, Intel Corporation <jketreno@linux.intel.com>
- *
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/module.h>
-#include <linux/ctype.h>
-#include <linux/ieee80211.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-
-#include <net/lib80211.h>
-
-#define DRV_DESCRIPTION "common routines for IEEE802.11 drivers"
-
-MODULE_DESCRIPTION(DRV_DESCRIPTION);
-MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>");
-MODULE_LICENSE("GPL");
-
-struct lib80211_crypto_alg {
- struct list_head list;
- const struct lib80211_crypto_ops *ops;
-};
-
-static LIST_HEAD(lib80211_crypto_algs);
-static DEFINE_SPINLOCK(lib80211_crypto_lock);
-
-static void lib80211_crypt_deinit_entries(struct lib80211_crypt_info *info,
- int force);
-static void lib80211_crypt_quiescing(struct lib80211_crypt_info *info);
-static void lib80211_crypt_deinit_handler(struct timer_list *t);
-
-int lib80211_crypt_info_init(struct lib80211_crypt_info *info, char *name,
- spinlock_t *lock)
-{
- memset(info, 0, sizeof(*info));
-
- info->name = name;
- info->lock = lock;
-
- INIT_LIST_HEAD(&info->crypt_deinit_list);
- timer_setup(&info->crypt_deinit_timer, lib80211_crypt_deinit_handler,
- 0);
-
- return 0;
-}
-EXPORT_SYMBOL(lib80211_crypt_info_init);
-
-void lib80211_crypt_info_free(struct lib80211_crypt_info *info)
-{
- int i;
-
- lib80211_crypt_quiescing(info);
- del_timer_sync(&info->crypt_deinit_timer);
- lib80211_crypt_deinit_entries(info, 1);
-
- for (i = 0; i < NUM_WEP_KEYS; i++) {
- struct lib80211_crypt_data *crypt = info->crypt[i];
- if (crypt) {
- if (crypt->ops) {
- crypt->ops->deinit(crypt->priv);
- module_put(crypt->ops->owner);
- }
- kfree(crypt);
- info->crypt[i] = NULL;
- }
- }
-}
-EXPORT_SYMBOL(lib80211_crypt_info_free);
-
-static void lib80211_crypt_deinit_entries(struct lib80211_crypt_info *info,
- int force)
-{
- struct lib80211_crypt_data *entry, *next;
- unsigned long flags;
-
- spin_lock_irqsave(info->lock, flags);
- list_for_each_entry_safe(entry, next, &info->crypt_deinit_list, list) {
- if (atomic_read(&entry->refcnt) != 0 && !force)
- continue;
-
- list_del(&entry->list);
-
- if (entry->ops) {
- entry->ops->deinit(entry->priv);
- module_put(entry->ops->owner);
- }
- kfree(entry);
- }
- spin_unlock_irqrestore(info->lock, flags);
-}
-
-/* After this, crypt_deinit_list won't accept new members */
-static void lib80211_crypt_quiescing(struct lib80211_crypt_info *info)
-{
- unsigned long flags;
-
- spin_lock_irqsave(info->lock, flags);
- info->crypt_quiesced = 1;
- spin_unlock_irqrestore(info->lock, flags);
-}
-
-static void lib80211_crypt_deinit_handler(struct timer_list *t)
-{
- struct lib80211_crypt_info *info = from_timer(info, t,
- crypt_deinit_timer);
- unsigned long flags;
-
- lib80211_crypt_deinit_entries(info, 0);
-
- spin_lock_irqsave(info->lock, flags);
- if (!list_empty(&info->crypt_deinit_list) && !info->crypt_quiesced) {
- printk(KERN_DEBUG "%s: entries remaining in delayed crypt "
- "deletion list\n", info->name);
- info->crypt_deinit_timer.expires = jiffies + HZ;
- add_timer(&info->crypt_deinit_timer);
- }
- spin_unlock_irqrestore(info->lock, flags);
-}
-
-void lib80211_crypt_delayed_deinit(struct lib80211_crypt_info *info,
- struct lib80211_crypt_data **crypt)
-{
- struct lib80211_crypt_data *tmp;
- unsigned long flags;
-
- if (*crypt == NULL)
- return;
-
- tmp = *crypt;
- *crypt = NULL;
-
- /* must not run ops->deinit() while there may be pending encrypt or
- * decrypt operations. Use a list of delayed deinits to avoid needing
- * locking. */
-
- spin_lock_irqsave(info->lock, flags);
- if (!info->crypt_quiesced) {
- list_add(&tmp->list, &info->crypt_deinit_list);
- if (!timer_pending(&info->crypt_deinit_timer)) {
- info->crypt_deinit_timer.expires = jiffies + HZ;
- add_timer(&info->crypt_deinit_timer);
- }
- }
- spin_unlock_irqrestore(info->lock, flags);
-}
-EXPORT_SYMBOL(lib80211_crypt_delayed_deinit);
-
-int lib80211_register_crypto_ops(const struct lib80211_crypto_ops *ops)
-{
- unsigned long flags;
- struct lib80211_crypto_alg *alg;
-
- alg = kzalloc(sizeof(*alg), GFP_KERNEL);
- if (alg == NULL)
- return -ENOMEM;
-
- alg->ops = ops;
-
- spin_lock_irqsave(&lib80211_crypto_lock, flags);
- list_add(&alg->list, &lib80211_crypto_algs);
- spin_unlock_irqrestore(&lib80211_crypto_lock, flags);
-
- printk(KERN_DEBUG "lib80211_crypt: registered algorithm '%s'\n",
- ops->name);
-
- return 0;
-}
-EXPORT_SYMBOL(lib80211_register_crypto_ops);
-
-int lib80211_unregister_crypto_ops(const struct lib80211_crypto_ops *ops)
-{
- struct lib80211_crypto_alg *alg;
- unsigned long flags;
-
- spin_lock_irqsave(&lib80211_crypto_lock, flags);
- list_for_each_entry(alg, &lib80211_crypto_algs, list) {
- if (alg->ops == ops)
- goto found;
- }
- spin_unlock_irqrestore(&lib80211_crypto_lock, flags);
- return -EINVAL;
-
- found:
- printk(KERN_DEBUG "lib80211_crypt: unregistered algorithm '%s'\n",
- ops->name);
- list_del(&alg->list);
- spin_unlock_irqrestore(&lib80211_crypto_lock, flags);
- kfree(alg);
- return 0;
-}
-EXPORT_SYMBOL(lib80211_unregister_crypto_ops);
-
-const struct lib80211_crypto_ops *lib80211_get_crypto_ops(const char *name)
-{
- struct lib80211_crypto_alg *alg;
- unsigned long flags;
-
- spin_lock_irqsave(&lib80211_crypto_lock, flags);
- list_for_each_entry(alg, &lib80211_crypto_algs, list) {
- if (strcmp(alg->ops->name, name) == 0)
- goto found;
- }
- spin_unlock_irqrestore(&lib80211_crypto_lock, flags);
- return NULL;
-
- found:
- spin_unlock_irqrestore(&lib80211_crypto_lock, flags);
- return alg->ops;
-}
-EXPORT_SYMBOL(lib80211_get_crypto_ops);
-
-static void *lib80211_crypt_null_init(int keyidx)
-{
- return (void *)1;
-}
-
-static void lib80211_crypt_null_deinit(void *priv)
-{
-}
-
-static const struct lib80211_crypto_ops lib80211_crypt_null = {
- .name = "NULL",
- .init = lib80211_crypt_null_init,
- .deinit = lib80211_crypt_null_deinit,
- .owner = THIS_MODULE,
-};
-
-static int __init lib80211_init(void)
-{
- pr_info(DRV_DESCRIPTION "\n");
- return lib80211_register_crypto_ops(&lib80211_crypt_null);
-}
-
-static void __exit lib80211_exit(void)
-{
- lib80211_unregister_crypto_ops(&lib80211_crypt_null);
- BUG_ON(!list_empty(&lib80211_crypto_algs));
-}
-
-module_init(lib80211_init);
-module_exit(lib80211_exit);
diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c
deleted file mode 100644
index 5aad139130e1..000000000000
--- a/net/wireless/lib80211_crypt_ccmp.c
+++ /dev/null
@@ -1,448 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * lib80211 crypt: host-based CCMP encryption implementation for lib80211
- *
- * Copyright (c) 2003-2004, Jouni Malinen <j@w1.fi>
- * Copyright (c) 2008, John W. Linville <linville@tuxdriver.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/if_ether.h>
-#include <linux/if_arp.h>
-#include <asm/string.h>
-#include <linux/wireless.h>
-
-#include <linux/ieee80211.h>
-
-#include <linux/crypto.h>
-#include <crypto/aead.h>
-
-#include <net/lib80211.h>
-
-MODULE_AUTHOR("Jouni Malinen");
-MODULE_DESCRIPTION("Host AP crypt: CCMP");
-MODULE_LICENSE("GPL");
-
-#define AES_BLOCK_LEN 16
-#define CCMP_HDR_LEN 8
-#define CCMP_MIC_LEN 8
-#define CCMP_TK_LEN 16
-#define CCMP_PN_LEN 6
-
-struct lib80211_ccmp_data {
- u8 key[CCMP_TK_LEN];
- int key_set;
-
- u8 tx_pn[CCMP_PN_LEN];
- u8 rx_pn[CCMP_PN_LEN];
-
- u32 dot11RSNAStatsCCMPFormatErrors;
- u32 dot11RSNAStatsCCMPReplays;
- u32 dot11RSNAStatsCCMPDecryptErrors;
-
- int key_idx;
-
- struct crypto_aead *tfm;
-
- /* scratch buffers for virt_to_page() (crypto API) */
- u8 tx_aad[2 * AES_BLOCK_LEN];
- u8 rx_aad[2 * AES_BLOCK_LEN];
-};
-
-static void *lib80211_ccmp_init(int key_idx)
-{
- struct lib80211_ccmp_data *priv;
-
- priv = kzalloc(sizeof(*priv), GFP_ATOMIC);
- if (priv == NULL)
- goto fail;
- priv->key_idx = key_idx;
-
- priv->tfm = crypto_alloc_aead("ccm(aes)", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(priv->tfm)) {
- priv->tfm = NULL;
- goto fail;
- }
-
- return priv;
-
- fail:
- if (priv) {
- if (priv->tfm)
- crypto_free_aead(priv->tfm);
- kfree(priv);
- }
-
- return NULL;
-}
-
-static void lib80211_ccmp_deinit(void *priv)
-{
- struct lib80211_ccmp_data *_priv = priv;
- if (_priv && _priv->tfm)
- crypto_free_aead(_priv->tfm);
- kfree(priv);
-}
-
-static int ccmp_init_iv_and_aad(const struct ieee80211_hdr *hdr,
- const u8 *pn, u8 *iv, u8 *aad)
-{
- u8 *pos, qc = 0;
- size_t aad_len;
- int a4_included, qc_included;
-
- a4_included = ieee80211_has_a4(hdr->frame_control);
- qc_included = ieee80211_is_data_qos(hdr->frame_control);
-
- aad_len = 22;
- if (a4_included)
- aad_len += 6;
- if (qc_included) {
- pos = (u8 *) & hdr->addr4;
- if (a4_included)
- pos += 6;
- qc = *pos & 0x0f;
- aad_len += 2;
- }
-
- /* In CCM, the initial vectors (IV) used for CTR mode encryption and CBC
- * mode authentication are not allowed to collide, yet both are derived
- * from the same vector. We only set L := 1 here to indicate that the
- * data size can be represented in (L+1) bytes. The CCM layer will take
- * care of storing the data length in the top (L+1) bytes and setting
- * and clearing the other bits as is required to derive the two IVs.
- */
- iv[0] = 0x1;
-
- /* Nonce: QC | A2 | PN */
- iv[1] = qc;
- memcpy(iv + 2, hdr->addr2, ETH_ALEN);
- memcpy(iv + 8, pn, CCMP_PN_LEN);
-
- /* AAD:
- * FC with bits 4..6 and 11..13 masked to zero; 14 is always one
- * A1 | A2 | A3
- * SC with bits 4..15 (seq#) masked to zero
- * A4 (if present)
- * QC (if present)
- */
- pos = (u8 *) hdr;
- aad[0] = pos[0] & 0x8f;
- aad[1] = pos[1] & 0xc7;
- memcpy(aad + 2, &hdr->addrs, 3 * ETH_ALEN);
- pos = (u8 *) & hdr->seq_ctrl;
- aad[20] = pos[0] & 0x0f;
- aad[21] = 0; /* all bits masked */
- memset(aad + 22, 0, 8);
- if (a4_included)
- memcpy(aad + 22, hdr->addr4, ETH_ALEN);
- if (qc_included) {
- aad[a4_included ? 28 : 22] = qc;
- /* rest of QC masked */
- }
- return aad_len;
-}
-
-static int lib80211_ccmp_hdr(struct sk_buff *skb, int hdr_len,
- u8 *aeskey, int keylen, void *priv)
-{
- struct lib80211_ccmp_data *key = priv;
- int i;
- u8 *pos;
-
- if (skb_headroom(skb) < CCMP_HDR_LEN || skb->len < hdr_len)
- return -1;
-
- if (aeskey != NULL && keylen >= CCMP_TK_LEN)
- memcpy(aeskey, key->key, CCMP_TK_LEN);
-
- pos = skb_push(skb, CCMP_HDR_LEN);
- memmove(pos, pos + CCMP_HDR_LEN, hdr_len);
- pos += hdr_len;
-
- i = CCMP_PN_LEN - 1;
- while (i >= 0) {
- key->tx_pn[i]++;
- if (key->tx_pn[i] != 0)
- break;
- i--;
- }
-
- *pos++ = key->tx_pn[5];
- *pos++ = key->tx_pn[4];
- *pos++ = 0;
- *pos++ = (key->key_idx << 6) | (1 << 5) /* Ext IV included */ ;
- *pos++ = key->tx_pn[3];
- *pos++ = key->tx_pn[2];
- *pos++ = key->tx_pn[1];
- *pos++ = key->tx_pn[0];
-
- return CCMP_HDR_LEN;
-}
-
-static int lib80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
-{
- struct lib80211_ccmp_data *key = priv;
- struct ieee80211_hdr *hdr;
- struct aead_request *req;
- struct scatterlist sg[2];
- u8 *aad = key->tx_aad;
- u8 iv[AES_BLOCK_LEN];
- int len, data_len, aad_len;
- int ret;
-
- if (skb_tailroom(skb) < CCMP_MIC_LEN || skb->len < hdr_len)
- return -1;
-
- data_len = skb->len - hdr_len;
- len = lib80211_ccmp_hdr(skb, hdr_len, NULL, 0, priv);
- if (len < 0)
- return -1;
-
- req = aead_request_alloc(key->tfm, GFP_ATOMIC);
- if (!req)
- return -ENOMEM;
-
- hdr = (struct ieee80211_hdr *)skb->data;
- aad_len = ccmp_init_iv_and_aad(hdr, key->tx_pn, iv, aad);
-
- skb_put(skb, CCMP_MIC_LEN);
-
- sg_init_table(sg, 2);
- sg_set_buf(&sg[0], aad, aad_len);
- sg_set_buf(&sg[1], skb->data + hdr_len + CCMP_HDR_LEN,
- data_len + CCMP_MIC_LEN);
-
- aead_request_set_callback(req, 0, NULL, NULL);
- aead_request_set_ad(req, aad_len);
- aead_request_set_crypt(req, sg, sg, data_len, iv);
-
- ret = crypto_aead_encrypt(req);
- aead_request_free(req);
-
- return ret;
-}
-
-/*
- * deal with seq counter wrapping correctly.
- * refer to timer_after() for jiffies wrapping handling
- */
-static inline int ccmp_replay_check(u8 *pn_n, u8 *pn_o)
-{
- u32 iv32_n, iv16_n;
- u32 iv32_o, iv16_o;
-
- iv32_n = (pn_n[0] << 24) | (pn_n[1] << 16) | (pn_n[2] << 8) | pn_n[3];
- iv16_n = (pn_n[4] << 8) | pn_n[5];
-
- iv32_o = (pn_o[0] << 24) | (pn_o[1] << 16) | (pn_o[2] << 8) | pn_o[3];
- iv16_o = (pn_o[4] << 8) | pn_o[5];
-
- if ((s32)iv32_n - (s32)iv32_o < 0 ||
- (iv32_n == iv32_o && iv16_n <= iv16_o))
- return 1;
- return 0;
-}
-
-static int lib80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
-{
- struct lib80211_ccmp_data *key = priv;
- u8 keyidx, *pos;
- struct ieee80211_hdr *hdr;
- struct aead_request *req;
- struct scatterlist sg[2];
- u8 *aad = key->rx_aad;
- u8 iv[AES_BLOCK_LEN];
- u8 pn[6];
- int aad_len, ret;
- size_t data_len = skb->len - hdr_len - CCMP_HDR_LEN;
-
- if (skb->len < hdr_len + CCMP_HDR_LEN + CCMP_MIC_LEN) {
- key->dot11RSNAStatsCCMPFormatErrors++;
- return -1;
- }
-
- hdr = (struct ieee80211_hdr *)skb->data;
- pos = skb->data + hdr_len;
- keyidx = pos[3];
- if (!(keyidx & (1 << 5))) {
- net_dbg_ratelimited("CCMP: received packet without ExtIV flag from %pM\n",
- hdr->addr2);
- key->dot11RSNAStatsCCMPFormatErrors++;
- return -2;
- }
- keyidx >>= 6;
- if (key->key_idx != keyidx) {
- net_dbg_ratelimited("CCMP: RX tkey->key_idx=%d frame keyidx=%d\n",
- key->key_idx, keyidx);
- return -6;
- }
- if (!key->key_set) {
- net_dbg_ratelimited("CCMP: received packet from %pM with keyid=%d that does not have a configured key\n",
- hdr->addr2, keyidx);
- return -3;
- }
-
- pn[0] = pos[7];
- pn[1] = pos[6];
- pn[2] = pos[5];
- pn[3] = pos[4];
- pn[4] = pos[1];
- pn[5] = pos[0];
- pos += 8;
-
- if (ccmp_replay_check(pn, key->rx_pn)) {
-#ifdef CONFIG_LIB80211_DEBUG
- net_dbg_ratelimited("CCMP: replay detected: STA=%pM previous PN %02x%02x%02x%02x%02x%02x received PN %02x%02x%02x%02x%02x%02x\n",
- hdr->addr2,
- key->rx_pn[0], key->rx_pn[1], key->rx_pn[2],
- key->rx_pn[3], key->rx_pn[4], key->rx_pn[5],
- pn[0], pn[1], pn[2], pn[3], pn[4], pn[5]);
-#endif
- key->dot11RSNAStatsCCMPReplays++;
- return -4;
- }
-
- req = aead_request_alloc(key->tfm, GFP_ATOMIC);
- if (!req)
- return -ENOMEM;
-
- aad_len = ccmp_init_iv_and_aad(hdr, pn, iv, aad);
-
- sg_init_table(sg, 2);
- sg_set_buf(&sg[0], aad, aad_len);
- sg_set_buf(&sg[1], pos, data_len);
-
- aead_request_set_callback(req, 0, NULL, NULL);
- aead_request_set_ad(req, aad_len);
- aead_request_set_crypt(req, sg, sg, data_len, iv);
-
- ret = crypto_aead_decrypt(req);
- aead_request_free(req);
-
- if (ret) {
- net_dbg_ratelimited("CCMP: decrypt failed: STA=%pM (%d)\n",
- hdr->addr2, ret);
- key->dot11RSNAStatsCCMPDecryptErrors++;
- return -5;
- }
-
- memcpy(key->rx_pn, pn, CCMP_PN_LEN);
-
- /* Remove hdr and MIC */
- memmove(skb->data + CCMP_HDR_LEN, skb->data, hdr_len);
- skb_pull(skb, CCMP_HDR_LEN);
- skb_trim(skb, skb->len - CCMP_MIC_LEN);
-
- return keyidx;
-}
-
-static int lib80211_ccmp_set_key(void *key, int len, u8 * seq, void *priv)
-{
- struct lib80211_ccmp_data *data = priv;
- int keyidx;
- struct crypto_aead *tfm = data->tfm;
-
- keyidx = data->key_idx;
- memset(data, 0, sizeof(*data));
- data->key_idx = keyidx;
- data->tfm = tfm;
- if (len == CCMP_TK_LEN) {
- memcpy(data->key, key, CCMP_TK_LEN);
- data->key_set = 1;
- if (seq) {
- data->rx_pn[0] = seq[5];
- data->rx_pn[1] = seq[4];
- data->rx_pn[2] = seq[3];
- data->rx_pn[3] = seq[2];
- data->rx_pn[4] = seq[1];
- data->rx_pn[5] = seq[0];
- }
- if (crypto_aead_setauthsize(data->tfm, CCMP_MIC_LEN) ||
- crypto_aead_setkey(data->tfm, data->key, CCMP_TK_LEN))
- return -1;
- } else if (len == 0)
- data->key_set = 0;
- else
- return -1;
-
- return 0;
-}
-
-static int lib80211_ccmp_get_key(void *key, int len, u8 * seq, void *priv)
-{
- struct lib80211_ccmp_data *data = priv;
-
- if (len < CCMP_TK_LEN)
- return -1;
-
- if (!data->key_set)
- return 0;
- memcpy(key, data->key, CCMP_TK_LEN);
-
- if (seq) {
- seq[0] = data->tx_pn[5];
- seq[1] = data->tx_pn[4];
- seq[2] = data->tx_pn[3];
- seq[3] = data->tx_pn[2];
- seq[4] = data->tx_pn[1];
- seq[5] = data->tx_pn[0];
- }
-
- return CCMP_TK_LEN;
-}
-
-static void lib80211_ccmp_print_stats(struct seq_file *m, void *priv)
-{
- struct lib80211_ccmp_data *ccmp = priv;
-
- seq_printf(m,
- "key[%d] alg=CCMP key_set=%d "
- "tx_pn=%02x%02x%02x%02x%02x%02x "
- "rx_pn=%02x%02x%02x%02x%02x%02x "
- "format_errors=%d replays=%d decrypt_errors=%d\n",
- ccmp->key_idx, ccmp->key_set,
- ccmp->tx_pn[0], ccmp->tx_pn[1], ccmp->tx_pn[2],
- ccmp->tx_pn[3], ccmp->tx_pn[4], ccmp->tx_pn[5],
- ccmp->rx_pn[0], ccmp->rx_pn[1], ccmp->rx_pn[2],
- ccmp->rx_pn[3], ccmp->rx_pn[4], ccmp->rx_pn[5],
- ccmp->dot11RSNAStatsCCMPFormatErrors,
- ccmp->dot11RSNAStatsCCMPReplays,
- ccmp->dot11RSNAStatsCCMPDecryptErrors);
-}
-
-static const struct lib80211_crypto_ops lib80211_crypt_ccmp = {
- .name = "CCMP",
- .init = lib80211_ccmp_init,
- .deinit = lib80211_ccmp_deinit,
- .encrypt_mpdu = lib80211_ccmp_encrypt,
- .decrypt_mpdu = lib80211_ccmp_decrypt,
- .encrypt_msdu = NULL,
- .decrypt_msdu = NULL,
- .set_key = lib80211_ccmp_set_key,
- .get_key = lib80211_ccmp_get_key,
- .print_stats = lib80211_ccmp_print_stats,
- .extra_mpdu_prefix_len = CCMP_HDR_LEN,
- .extra_mpdu_postfix_len = CCMP_MIC_LEN,
- .owner = THIS_MODULE,
-};
-
-static int __init lib80211_crypto_ccmp_init(void)
-{
- return lib80211_register_crypto_ops(&lib80211_crypt_ccmp);
-}
-
-static void __exit lib80211_crypto_ccmp_exit(void)
-{
- lib80211_unregister_crypto_ops(&lib80211_crypt_ccmp);
-}
-
-module_init(lib80211_crypto_ccmp_init);
-module_exit(lib80211_crypto_ccmp_exit);
diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c
deleted file mode 100644
index 63e68e5e121e..000000000000
--- a/net/wireless/lib80211_crypt_tkip.c
+++ /dev/null
@@ -1,738 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * lib80211 crypt: host-based TKIP encryption implementation for lib80211
- *
- * Copyright (c) 2003-2004, Jouni Malinen <j@w1.fi>
- * Copyright (c) 2008, John W. Linville <linville@tuxdriver.com>
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/err.h>
-#include <linux/fips.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/scatterlist.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/mm.h>
-#include <linux/if_ether.h>
-#include <linux/if_arp.h>
-#include <asm/string.h>
-
-#include <linux/wireless.h>
-#include <linux/ieee80211.h>
-#include <net/iw_handler.h>
-
-#include <crypto/arc4.h>
-#include <crypto/hash.h>
-#include <linux/crypto.h>
-#include <linux/crc32.h>
-
-#include <net/lib80211.h>
-
-MODULE_AUTHOR("Jouni Malinen");
-MODULE_DESCRIPTION("lib80211 crypt: TKIP");
-MODULE_LICENSE("GPL");
-
-#define TKIP_HDR_LEN 8
-
-struct lib80211_tkip_data {
-#define TKIP_KEY_LEN 32
- u8 key[TKIP_KEY_LEN];
- int key_set;
-
- u32 tx_iv32;
- u16 tx_iv16;
- u16 tx_ttak[5];
- int tx_phase1_done;
-
- u32 rx_iv32;
- u16 rx_iv16;
- u16 rx_ttak[5];
- int rx_phase1_done;
- u32 rx_iv32_new;
- u16 rx_iv16_new;
-
- u32 dot11RSNAStatsTKIPReplays;
- u32 dot11RSNAStatsTKIPICVErrors;
- u32 dot11RSNAStatsTKIPLocalMICFailures;
-
- int key_idx;
-
- struct arc4_ctx rx_ctx_arc4;
- struct arc4_ctx tx_ctx_arc4;
- struct crypto_shash *rx_tfm_michael;
- struct crypto_shash *tx_tfm_michael;
-
- /* scratch buffers for virt_to_page() (crypto API) */
- u8 rx_hdr[16], tx_hdr[16];
-
- unsigned long flags;
-};
-
-static unsigned long lib80211_tkip_set_flags(unsigned long flags, void *priv)
-{
- struct lib80211_tkip_data *_priv = priv;
- unsigned long old_flags = _priv->flags;
- _priv->flags = flags;
- return old_flags;
-}
-
-static unsigned long lib80211_tkip_get_flags(void *priv)
-{
- struct lib80211_tkip_data *_priv = priv;
- return _priv->flags;
-}
-
-static void *lib80211_tkip_init(int key_idx)
-{
- struct lib80211_tkip_data *priv;
-
- if (fips_enabled)
- return NULL;
-
- priv = kzalloc(sizeof(*priv), GFP_ATOMIC);
- if (priv == NULL)
- goto fail;
-
- priv->key_idx = key_idx;
-
- priv->tx_tfm_michael = crypto_alloc_shash("michael_mic", 0, 0);
- if (IS_ERR(priv->tx_tfm_michael)) {
- priv->tx_tfm_michael = NULL;
- goto fail;
- }
-
- priv->rx_tfm_michael = crypto_alloc_shash("michael_mic", 0, 0);
- if (IS_ERR(priv->rx_tfm_michael)) {
- priv->rx_tfm_michael = NULL;
- goto fail;
- }
-
- return priv;
-
- fail:
- if (priv) {
- crypto_free_shash(priv->tx_tfm_michael);
- crypto_free_shash(priv->rx_tfm_michael);
- kfree(priv);
- }
-
- return NULL;
-}
-
-static void lib80211_tkip_deinit(void *priv)
-{
- struct lib80211_tkip_data *_priv = priv;
- if (_priv) {
- crypto_free_shash(_priv->tx_tfm_michael);
- crypto_free_shash(_priv->rx_tfm_michael);
- }
- kfree_sensitive(priv);
-}
-
-static inline u16 RotR1(u16 val)
-{
- return (val >> 1) | (val << 15);
-}
-
-static inline u8 Lo8(u16 val)
-{
- return val & 0xff;
-}
-
-static inline u8 Hi8(u16 val)
-{
- return val >> 8;
-}
-
-static inline u16 Lo16(u32 val)
-{
- return val & 0xffff;
-}
-
-static inline u16 Hi16(u32 val)
-{
- return val >> 16;
-}
-
-static inline u16 Mk16(u8 hi, u8 lo)
-{
- return lo | (((u16) hi) << 8);
-}
-
-static inline u16 Mk16_le(__le16 * v)
-{
- return le16_to_cpu(*v);
-}
-
-static const u16 Sbox[256] = {
- 0xC6A5, 0xF884, 0xEE99, 0xF68D, 0xFF0D, 0xD6BD, 0xDEB1, 0x9154,
- 0x6050, 0x0203, 0xCEA9, 0x567D, 0xE719, 0xB562, 0x4DE6, 0xEC9A,
- 0x8F45, 0x1F9D, 0x8940, 0xFA87, 0xEF15, 0xB2EB, 0x8EC9, 0xFB0B,
- 0x41EC, 0xB367, 0x5FFD, 0x45EA, 0x23BF, 0x53F7, 0xE496, 0x9B5B,
- 0x75C2, 0xE11C, 0x3DAE, 0x4C6A, 0x6C5A, 0x7E41, 0xF502, 0x834F,
- 0x685C, 0x51F4, 0xD134, 0xF908, 0xE293, 0xAB73, 0x6253, 0x2A3F,
- 0x080C, 0x9552, 0x4665, 0x9D5E, 0x3028, 0x37A1, 0x0A0F, 0x2FB5,
- 0x0E09, 0x2436, 0x1B9B, 0xDF3D, 0xCD26, 0x4E69, 0x7FCD, 0xEA9F,
- 0x121B, 0x1D9E, 0x5874, 0x342E, 0x362D, 0xDCB2, 0xB4EE, 0x5BFB,
- 0xA4F6, 0x764D, 0xB761, 0x7DCE, 0x527B, 0xDD3E, 0x5E71, 0x1397,
- 0xA6F5, 0xB968, 0x0000, 0xC12C, 0x4060, 0xE31F, 0x79C8, 0xB6ED,
- 0xD4BE, 0x8D46, 0x67D9, 0x724B, 0x94DE, 0x98D4, 0xB0E8, 0x854A,
- 0xBB6B, 0xC52A, 0x4FE5, 0xED16, 0x86C5, 0x9AD7, 0x6655, 0x1194,
- 0x8ACF, 0xE910, 0x0406, 0xFE81, 0xA0F0, 0x7844, 0x25BA, 0x4BE3,
- 0xA2F3, 0x5DFE, 0x80C0, 0x058A, 0x3FAD, 0x21BC, 0x7048, 0xF104,
- 0x63DF, 0x77C1, 0xAF75, 0x4263, 0x2030, 0xE51A, 0xFD0E, 0xBF6D,
- 0x814C, 0x1814, 0x2635, 0xC32F, 0xBEE1, 0x35A2, 0x88CC, 0x2E39,
- 0x9357, 0x55F2, 0xFC82, 0x7A47, 0xC8AC, 0xBAE7, 0x322B, 0xE695,
- 0xC0A0, 0x1998, 0x9ED1, 0xA37F, 0x4466, 0x547E, 0x3BAB, 0x0B83,
- 0x8CCA, 0xC729, 0x6BD3, 0x283C, 0xA779, 0xBCE2, 0x161D, 0xAD76,
- 0xDB3B, 0x6456, 0x744E, 0x141E, 0x92DB, 0x0C0A, 0x486C, 0xB8E4,
- 0x9F5D, 0xBD6E, 0x43EF, 0xC4A6, 0x39A8, 0x31A4, 0xD337, 0xF28B,
- 0xD532, 0x8B43, 0x6E59, 0xDAB7, 0x018C, 0xB164, 0x9CD2, 0x49E0,
- 0xD8B4, 0xACFA, 0xF307, 0xCF25, 0xCAAF, 0xF48E, 0x47E9, 0x1018,
- 0x6FD5, 0xF088, 0x4A6F, 0x5C72, 0x3824, 0x57F1, 0x73C7, 0x9751,
- 0xCB23, 0xA17C, 0xE89C, 0x3E21, 0x96DD, 0x61DC, 0x0D86, 0x0F85,
- 0xE090, 0x7C42, 0x71C4, 0xCCAA, 0x90D8, 0x0605, 0xF701, 0x1C12,
- 0xC2A3, 0x6A5F, 0xAEF9, 0x69D0, 0x1791, 0x9958, 0x3A27, 0x27B9,
- 0xD938, 0xEB13, 0x2BB3, 0x2233, 0xD2BB, 0xA970, 0x0789, 0x33A7,
- 0x2DB6, 0x3C22, 0x1592, 0xC920, 0x8749, 0xAAFF, 0x5078, 0xA57A,
- 0x038F, 0x59F8, 0x0980, 0x1A17, 0x65DA, 0xD731, 0x84C6, 0xD0B8,
- 0x82C3, 0x29B0, 0x5A77, 0x1E11, 0x7BCB, 0xA8FC, 0x6DD6, 0x2C3A,
-};
-
-static inline u16 _S_(u16 v)
-{
- u16 t = Sbox[Hi8(v)];
- return Sbox[Lo8(v)] ^ ((t << 8) | (t >> 8));
-}
-
-#define PHASE1_LOOP_COUNT 8
-
-static void tkip_mixing_phase1(u16 * TTAK, const u8 * TK, const u8 * TA,
- u32 IV32)
-{
- int i, j;
-
- /* Initialize the 80-bit TTAK from TSC (IV32) and TA[0..5] */
- TTAK[0] = Lo16(IV32);
- TTAK[1] = Hi16(IV32);
- TTAK[2] = Mk16(TA[1], TA[0]);
- TTAK[3] = Mk16(TA[3], TA[2]);
- TTAK[4] = Mk16(TA[5], TA[4]);
-
- for (i = 0; i < PHASE1_LOOP_COUNT; i++) {
- j = 2 * (i & 1);
- TTAK[0] += _S_(TTAK[4] ^ Mk16(TK[1 + j], TK[0 + j]));
- TTAK[1] += _S_(TTAK[0] ^ Mk16(TK[5 + j], TK[4 + j]));
- TTAK[2] += _S_(TTAK[1] ^ Mk16(TK[9 + j], TK[8 + j]));
- TTAK[3] += _S_(TTAK[2] ^ Mk16(TK[13 + j], TK[12 + j]));
- TTAK[4] += _S_(TTAK[3] ^ Mk16(TK[1 + j], TK[0 + j])) + i;
- }
-}
-
-static void tkip_mixing_phase2(u8 * WEPSeed, const u8 * TK, const u16 * TTAK,
- u16 IV16)
-{
- /* Make temporary area overlap WEP seed so that the final copy can be
- * avoided on little endian hosts. */
- u16 *PPK = (u16 *) & WEPSeed[4];
-
- /* Step 1 - make copy of TTAK and bring in TSC */
- PPK[0] = TTAK[0];
- PPK[1] = TTAK[1];
- PPK[2] = TTAK[2];
- PPK[3] = TTAK[3];
- PPK[4] = TTAK[4];
- PPK[5] = TTAK[4] + IV16;
-
- /* Step 2 - 96-bit bijective mixing using S-box */
- PPK[0] += _S_(PPK[5] ^ Mk16_le((__le16 *) & TK[0]));
- PPK[1] += _S_(PPK[0] ^ Mk16_le((__le16 *) & TK[2]));
- PPK[2] += _S_(PPK[1] ^ Mk16_le((__le16 *) & TK[4]));
- PPK[3] += _S_(PPK[2] ^ Mk16_le((__le16 *) & TK[6]));
- PPK[4] += _S_(PPK[3] ^ Mk16_le((__le16 *) & TK[8]));
- PPK[5] += _S_(PPK[4] ^ Mk16_le((__le16 *) & TK[10]));
-
- PPK[0] += RotR1(PPK[5] ^ Mk16_le((__le16 *) & TK[12]));
- PPK[1] += RotR1(PPK[0] ^ Mk16_le((__le16 *) & TK[14]));
- PPK[2] += RotR1(PPK[1]);
- PPK[3] += RotR1(PPK[2]);
- PPK[4] += RotR1(PPK[3]);
- PPK[5] += RotR1(PPK[4]);
-
- /* Step 3 - bring in last of TK bits, assign 24-bit WEP IV value
- * WEPSeed[0..2] is transmitted as WEP IV */
- WEPSeed[0] = Hi8(IV16);
- WEPSeed[1] = (Hi8(IV16) | 0x20) & 0x7F;
- WEPSeed[2] = Lo8(IV16);
- WEPSeed[3] = Lo8((PPK[5] ^ Mk16_le((__le16 *) & TK[0])) >> 1);
-
-#ifdef __BIG_ENDIAN
- {
- int i;
- for (i = 0; i < 6; i++)
- PPK[i] = (PPK[i] << 8) | (PPK[i] >> 8);
- }
-#endif
-}
-
-static int lib80211_tkip_hdr(struct sk_buff *skb, int hdr_len,
- u8 * rc4key, int keylen, void *priv)
-{
- struct lib80211_tkip_data *tkey = priv;
- u8 *pos;
- struct ieee80211_hdr *hdr;
-
- hdr = (struct ieee80211_hdr *)skb->data;
-
- if (skb_headroom(skb) < TKIP_HDR_LEN || skb->len < hdr_len)
- return -1;
-
- if (rc4key == NULL || keylen < 16)
- return -1;
-
- if (!tkey->tx_phase1_done) {
- tkip_mixing_phase1(tkey->tx_ttak, tkey->key, hdr->addr2,
- tkey->tx_iv32);
- tkey->tx_phase1_done = 1;
- }
- tkip_mixing_phase2(rc4key, tkey->key, tkey->tx_ttak, tkey->tx_iv16);
-
- pos = skb_push(skb, TKIP_HDR_LEN);
- memmove(pos, pos + TKIP_HDR_LEN, hdr_len);
- pos += hdr_len;
-
- *pos++ = *rc4key;
- *pos++ = *(rc4key + 1);
- *pos++ = *(rc4key + 2);
- *pos++ = (tkey->key_idx << 6) | (1 << 5) /* Ext IV included */ ;
- *pos++ = tkey->tx_iv32 & 0xff;
- *pos++ = (tkey->tx_iv32 >> 8) & 0xff;
- *pos++ = (tkey->tx_iv32 >> 16) & 0xff;
- *pos++ = (tkey->tx_iv32 >> 24) & 0xff;
-
- tkey->tx_iv16++;
- if (tkey->tx_iv16 == 0) {
- tkey->tx_phase1_done = 0;
- tkey->tx_iv32++;
- }
-
- return TKIP_HDR_LEN;
-}
-
-static int lib80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
-{
- struct lib80211_tkip_data *tkey = priv;
- int len;
- u8 rc4key[16], *pos, *icv;
- u32 crc;
-
- if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) {
- struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
- net_dbg_ratelimited("TKIP countermeasures: dropped TX packet to %pM\n",
- hdr->addr1);
- return -1;
- }
-
- if (skb_tailroom(skb) < 4 || skb->len < hdr_len)
- return -1;
-
- len = skb->len - hdr_len;
- pos = skb->data + hdr_len;
-
- if ((lib80211_tkip_hdr(skb, hdr_len, rc4key, 16, priv)) < 0)
- return -1;
-
- crc = ~crc32_le(~0, pos, len);
- icv = skb_put(skb, 4);
- icv[0] = crc;
- icv[1] = crc >> 8;
- icv[2] = crc >> 16;
- icv[3] = crc >> 24;
-
- arc4_setkey(&tkey->tx_ctx_arc4, rc4key, 16);
- arc4_crypt(&tkey->tx_ctx_arc4, pos, pos, len + 4);
-
- return 0;
-}
-
-/*
- * deal with seq counter wrapping correctly.
- * refer to timer_after() for jiffies wrapping handling
- */
-static inline int tkip_replay_check(u32 iv32_n, u16 iv16_n,
- u32 iv32_o, u16 iv16_o)
-{
- if ((s32)iv32_n - (s32)iv32_o < 0 ||
- (iv32_n == iv32_o && iv16_n <= iv16_o))
- return 1;
- return 0;
-}
-
-static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
-{
- struct lib80211_tkip_data *tkey = priv;
- u8 rc4key[16];
- u8 keyidx, *pos;
- u32 iv32;
- u16 iv16;
- struct ieee80211_hdr *hdr;
- u8 icv[4];
- u32 crc;
- int plen;
-
- hdr = (struct ieee80211_hdr *)skb->data;
-
- if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) {
- net_dbg_ratelimited("TKIP countermeasures: dropped received packet from %pM\n",
- hdr->addr2);
- return -1;
- }
-
- if (skb->len < hdr_len + TKIP_HDR_LEN + 4)
- return -1;
-
- pos = skb->data + hdr_len;
- keyidx = pos[3];
- if (!(keyidx & (1 << 5))) {
- net_dbg_ratelimited("TKIP: received packet without ExtIV flag from %pM\n",
- hdr->addr2);
- return -2;
- }
- keyidx >>= 6;
- if (tkey->key_idx != keyidx) {
- net_dbg_ratelimited("TKIP: RX tkey->key_idx=%d frame keyidx=%d\n",
- tkey->key_idx, keyidx);
- return -6;
- }
- if (!tkey->key_set) {
- net_dbg_ratelimited("TKIP: received packet from %pM with keyid=%d that does not have a configured key\n",
- hdr->addr2, keyidx);
- return -3;
- }
- iv16 = (pos[0] << 8) | pos[2];
- iv32 = pos[4] | (pos[5] << 8) | (pos[6] << 16) | (pos[7] << 24);
- pos += TKIP_HDR_LEN;
-
- if (tkip_replay_check(iv32, iv16, tkey->rx_iv32, tkey->rx_iv16)) {
-#ifdef CONFIG_LIB80211_DEBUG
- net_dbg_ratelimited("TKIP: replay detected: STA=%pM previous TSC %08x%04x received TSC %08x%04x\n",
- hdr->addr2, tkey->rx_iv32, tkey->rx_iv16,
- iv32, iv16);
-#endif
- tkey->dot11RSNAStatsTKIPReplays++;
- return -4;
- }
-
- if (iv32 != tkey->rx_iv32 || !tkey->rx_phase1_done) {
- tkip_mixing_phase1(tkey->rx_ttak, tkey->key, hdr->addr2, iv32);
- tkey->rx_phase1_done = 1;
- }
- tkip_mixing_phase2(rc4key, tkey->key, tkey->rx_ttak, iv16);
-
- plen = skb->len - hdr_len - 12;
-
- arc4_setkey(&tkey->rx_ctx_arc4, rc4key, 16);
- arc4_crypt(&tkey->rx_ctx_arc4, pos, pos, plen + 4);
-
- crc = ~crc32_le(~0, pos, plen);
- icv[0] = crc;
- icv[1] = crc >> 8;
- icv[2] = crc >> 16;
- icv[3] = crc >> 24;
- if (memcmp(icv, pos + plen, 4) != 0) {
- if (iv32 != tkey->rx_iv32) {
- /* Previously cached Phase1 result was already lost, so
- * it needs to be recalculated for the next packet. */
- tkey->rx_phase1_done = 0;
- }
-#ifdef CONFIG_LIB80211_DEBUG
- net_dbg_ratelimited("TKIP: ICV error detected: STA=%pM\n",
- hdr->addr2);
-#endif
- tkey->dot11RSNAStatsTKIPICVErrors++;
- return -5;
- }
-
- /* Update real counters only after Michael MIC verification has
- * completed */
- tkey->rx_iv32_new = iv32;
- tkey->rx_iv16_new = iv16;
-
- /* Remove IV and ICV */
- memmove(skb->data + TKIP_HDR_LEN, skb->data, hdr_len);
- skb_pull(skb, TKIP_HDR_LEN);
- skb_trim(skb, skb->len - 4);
-
- return keyidx;
-}
-
-static int michael_mic(struct crypto_shash *tfm_michael, u8 *key, u8 *hdr,
- u8 *data, size_t data_len, u8 *mic)
-{
- SHASH_DESC_ON_STACK(desc, tfm_michael);
- int err;
-
- if (tfm_michael == NULL) {
- pr_warn("%s(): tfm_michael == NULL\n", __func__);
- return -1;
- }
-
- desc->tfm = tfm_michael;
-
- if (crypto_shash_setkey(tfm_michael, key, 8))
- return -1;
-
- err = crypto_shash_init(desc);
- if (err)
- goto out;
- err = crypto_shash_update(desc, hdr, 16);
- if (err)
- goto out;
- err = crypto_shash_update(desc, data, data_len);
- if (err)
- goto out;
- err = crypto_shash_final(desc, mic);
-
-out:
- shash_desc_zero(desc);
- return err;
-}
-
-static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr)
-{
- struct ieee80211_hdr *hdr11;
-
- hdr11 = (struct ieee80211_hdr *)skb->data;
-
- switch (le16_to_cpu(hdr11->frame_control) &
- (IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) {
- case IEEE80211_FCTL_TODS:
- memcpy(hdr, hdr11->addr3, ETH_ALEN); /* DA */
- memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN); /* SA */
- break;
- case IEEE80211_FCTL_FROMDS:
- memcpy(hdr, hdr11->addr1, ETH_ALEN); /* DA */
- memcpy(hdr + ETH_ALEN, hdr11->addr3, ETH_ALEN); /* SA */
- break;
- case IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS:
- memcpy(hdr, hdr11->addr3, ETH_ALEN); /* DA */
- memcpy(hdr + ETH_ALEN, hdr11->addr4, ETH_ALEN); /* SA */
- break;
- default:
- memcpy(hdr, hdr11->addr1, ETH_ALEN); /* DA */
- memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN); /* SA */
- break;
- }
-
- if (ieee80211_is_data_qos(hdr11->frame_control)) {
- hdr[12] = le16_to_cpu(*((__le16 *)ieee80211_get_qos_ctl(hdr11)))
- & IEEE80211_QOS_CTL_TID_MASK;
- } else
- hdr[12] = 0; /* priority */
-
- hdr[13] = hdr[14] = hdr[15] = 0; /* reserved */
-}
-
-static int lib80211_michael_mic_add(struct sk_buff *skb, int hdr_len,
- void *priv)
-{
- struct lib80211_tkip_data *tkey = priv;
- u8 *pos;
-
- if (skb_tailroom(skb) < 8 || skb->len < hdr_len) {
- printk(KERN_DEBUG "Invalid packet for Michael MIC add "
- "(tailroom=%d hdr_len=%d skb->len=%d)\n",
- skb_tailroom(skb), hdr_len, skb->len);
- return -1;
- }
-
- michael_mic_hdr(skb, tkey->tx_hdr);
- pos = skb_put(skb, 8);
- if (michael_mic(tkey->tx_tfm_michael, &tkey->key[16], tkey->tx_hdr,
- skb->data + hdr_len, skb->len - 8 - hdr_len, pos))
- return -1;
-
- return 0;
-}
-
-static void lib80211_michael_mic_failure(struct net_device *dev,
- struct ieee80211_hdr *hdr,
- int keyidx)
-{
- union iwreq_data wrqu;
- struct iw_michaelmicfailure ev;
-
- /* TODO: needed parameters: count, keyid, key type, TSC */
- memset(&ev, 0, sizeof(ev));
- ev.flags = keyidx & IW_MICFAILURE_KEY_ID;
- if (hdr->addr1[0] & 0x01)
- ev.flags |= IW_MICFAILURE_GROUP;
- else
- ev.flags |= IW_MICFAILURE_PAIRWISE;
- ev.src_addr.sa_family = ARPHRD_ETHER;
- memcpy(ev.src_addr.sa_data, hdr->addr2, ETH_ALEN);
- memset(&wrqu, 0, sizeof(wrqu));
- wrqu.data.length = sizeof(ev);
- wireless_send_event(dev, IWEVMICHAELMICFAILURE, &wrqu, (char *)&ev);
-}
-
-static int lib80211_michael_mic_verify(struct sk_buff *skb, int keyidx,
- int hdr_len, void *priv)
-{
- struct lib80211_tkip_data *tkey = priv;
- u8 mic[8];
-
- if (!tkey->key_set)
- return -1;
-
- michael_mic_hdr(skb, tkey->rx_hdr);
- if (michael_mic(tkey->rx_tfm_michael, &tkey->key[24], tkey->rx_hdr,
- skb->data + hdr_len, skb->len - 8 - hdr_len, mic))
- return -1;
- if (memcmp(mic, skb->data + skb->len - 8, 8) != 0) {
- struct ieee80211_hdr *hdr;
- hdr = (struct ieee80211_hdr *)skb->data;
- printk(KERN_DEBUG "%s: Michael MIC verification failed for "
- "MSDU from %pM keyidx=%d\n",
- skb->dev ? skb->dev->name : "N/A", hdr->addr2,
- keyidx);
- if (skb->dev)
- lib80211_michael_mic_failure(skb->dev, hdr, keyidx);
- tkey->dot11RSNAStatsTKIPLocalMICFailures++;
- return -1;
- }
-
- /* Update TSC counters for RX now that the packet verification has
- * completed. */
- tkey->rx_iv32 = tkey->rx_iv32_new;
- tkey->rx_iv16 = tkey->rx_iv16_new;
-
- skb_trim(skb, skb->len - 8);
-
- return 0;
-}
-
-static int lib80211_tkip_set_key(void *key, int len, u8 * seq, void *priv)
-{
- struct lib80211_tkip_data *tkey = priv;
- int keyidx;
- struct crypto_shash *tfm = tkey->tx_tfm_michael;
- struct arc4_ctx *tfm2 = &tkey->tx_ctx_arc4;
- struct crypto_shash *tfm3 = tkey->rx_tfm_michael;
- struct arc4_ctx *tfm4 = &tkey->rx_ctx_arc4;
-
- keyidx = tkey->key_idx;
- memset(tkey, 0, sizeof(*tkey));
- tkey->key_idx = keyidx;
- tkey->tx_tfm_michael = tfm;
- tkey->tx_ctx_arc4 = *tfm2;
- tkey->rx_tfm_michael = tfm3;
- tkey->rx_ctx_arc4 = *tfm4;
- if (len == TKIP_KEY_LEN) {
- memcpy(tkey->key, key, TKIP_KEY_LEN);
- tkey->key_set = 1;
- tkey->tx_iv16 = 1; /* TSC is initialized to 1 */
- if (seq) {
- tkey->rx_iv32 = (seq[5] << 24) | (seq[4] << 16) |
- (seq[3] << 8) | seq[2];
- tkey->rx_iv16 = (seq[1] << 8) | seq[0];
- }
- } else if (len == 0)
- tkey->key_set = 0;
- else
- return -1;
-
- return 0;
-}
-
-static int lib80211_tkip_get_key(void *key, int len, u8 * seq, void *priv)
-{
- struct lib80211_tkip_data *tkey = priv;
-
- if (len < TKIP_KEY_LEN)
- return -1;
-
- if (!tkey->key_set)
- return 0;
- memcpy(key, tkey->key, TKIP_KEY_LEN);
-
- if (seq) {
- /*
- * Not clear if this should return the value as is
- * or - as the code previously seemed to partially
- * have been written as - subtract one from it. It
- * was working this way for a long time so leave it.
- */
- seq[0] = tkey->tx_iv16;
- seq[1] = tkey->tx_iv16 >> 8;
- seq[2] = tkey->tx_iv32;
- seq[3] = tkey->tx_iv32 >> 8;
- seq[4] = tkey->tx_iv32 >> 16;
- seq[5] = tkey->tx_iv32 >> 24;
- }
-
- return TKIP_KEY_LEN;
-}
-
-static void lib80211_tkip_print_stats(struct seq_file *m, void *priv)
-{
- struct lib80211_tkip_data *tkip = priv;
- seq_printf(m,
- "key[%d] alg=TKIP key_set=%d "
- "tx_pn=%02x%02x%02x%02x%02x%02x "
- "rx_pn=%02x%02x%02x%02x%02x%02x "
- "replays=%d icv_errors=%d local_mic_failures=%d\n",
- tkip->key_idx, tkip->key_set,
- (tkip->tx_iv32 >> 24) & 0xff,
- (tkip->tx_iv32 >> 16) & 0xff,
- (tkip->tx_iv32 >> 8) & 0xff,
- tkip->tx_iv32 & 0xff,
- (tkip->tx_iv16 >> 8) & 0xff,
- tkip->tx_iv16 & 0xff,
- (tkip->rx_iv32 >> 24) & 0xff,
- (tkip->rx_iv32 >> 16) & 0xff,
- (tkip->rx_iv32 >> 8) & 0xff,
- tkip->rx_iv32 & 0xff,
- (tkip->rx_iv16 >> 8) & 0xff,
- tkip->rx_iv16 & 0xff,
- tkip->dot11RSNAStatsTKIPReplays,
- tkip->dot11RSNAStatsTKIPICVErrors,
- tkip->dot11RSNAStatsTKIPLocalMICFailures);
-}
-
-static const struct lib80211_crypto_ops lib80211_crypt_tkip = {
- .name = "TKIP",
- .init = lib80211_tkip_init,
- .deinit = lib80211_tkip_deinit,
- .encrypt_mpdu = lib80211_tkip_encrypt,
- .decrypt_mpdu = lib80211_tkip_decrypt,
- .encrypt_msdu = lib80211_michael_mic_add,
- .decrypt_msdu = lib80211_michael_mic_verify,
- .set_key = lib80211_tkip_set_key,
- .get_key = lib80211_tkip_get_key,
- .print_stats = lib80211_tkip_print_stats,
- .extra_mpdu_prefix_len = 4 + 4, /* IV + ExtIV */
- .extra_mpdu_postfix_len = 4, /* ICV */
- .extra_msdu_postfix_len = 8, /* MIC */
- .get_flags = lib80211_tkip_get_flags,
- .set_flags = lib80211_tkip_set_flags,
- .owner = THIS_MODULE,
-};
-
-static int __init lib80211_crypto_tkip_init(void)
-{
- return lib80211_register_crypto_ops(&lib80211_crypt_tkip);
-}
-
-static void __exit lib80211_crypto_tkip_exit(void)
-{
- lib80211_unregister_crypto_ops(&lib80211_crypt_tkip);
-}
-
-module_init(lib80211_crypto_tkip_init);
-module_exit(lib80211_crypto_tkip_exit);
diff --git a/net/wireless/lib80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c
deleted file mode 100644
index 3b148c7bef85..000000000000
--- a/net/wireless/lib80211_crypt_wep.c
+++ /dev/null
@@ -1,256 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * lib80211 crypt: host-based WEP encryption implementation for lib80211
- *
- * Copyright (c) 2002-2004, Jouni Malinen <j@w1.fi>
- * Copyright (c) 2008, John W. Linville <linville@tuxdriver.com>
- */
-
-#include <linux/err.h>
-#include <linux/fips.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/scatterlist.h>
-#include <linux/skbuff.h>
-#include <linux/mm.h>
-#include <asm/string.h>
-
-#include <net/lib80211.h>
-
-#include <crypto/arc4.h>
-#include <linux/crc32.h>
-
-MODULE_AUTHOR("Jouni Malinen");
-MODULE_DESCRIPTION("lib80211 crypt: WEP");
-MODULE_LICENSE("GPL");
-
-struct lib80211_wep_data {
- u32 iv;
-#define WEP_KEY_LEN 13
- u8 key[WEP_KEY_LEN + 1];
- u8 key_len;
- u8 key_idx;
- struct arc4_ctx tx_ctx;
- struct arc4_ctx rx_ctx;
-};
-
-static void *lib80211_wep_init(int keyidx)
-{
- struct lib80211_wep_data *priv;
-
- if (fips_enabled)
- return NULL;
-
- priv = kzalloc(sizeof(*priv), GFP_ATOMIC);
- if (priv == NULL)
- return NULL;
- priv->key_idx = keyidx;
-
- /* start WEP IV from a random value */
- get_random_bytes(&priv->iv, 4);
-
- return priv;
-}
-
-static void lib80211_wep_deinit(void *priv)
-{
- kfree_sensitive(priv);
-}
-
-/* Add WEP IV/key info to a frame that has at least 4 bytes of headroom */
-static int lib80211_wep_build_iv(struct sk_buff *skb, int hdr_len,
- u8 *key, int keylen, void *priv)
-{
- struct lib80211_wep_data *wep = priv;
- u32 klen;
- u8 *pos;
-
- if (skb_headroom(skb) < 4 || skb->len < hdr_len)
- return -1;
-
- pos = skb_push(skb, 4);
- memmove(pos, pos + 4, hdr_len);
- pos += hdr_len;
-
- klen = 3 + wep->key_len;
-
- wep->iv++;
-
- /* Fluhrer, Mantin, and Shamir have reported weaknesses in the key
- * scheduling algorithm of RC4. At least IVs (KeyByte + 3, 0xff, N)
- * can be used to speedup attacks, so avoid using them. */
- if ((wep->iv & 0xff00) == 0xff00) {
- u8 B = (wep->iv >> 16) & 0xff;
- if (B >= 3 && B < klen)
- wep->iv += 0x0100;
- }
-
- /* Prepend 24-bit IV to RC4 key and TX frame */
- *pos++ = (wep->iv >> 16) & 0xff;
- *pos++ = (wep->iv >> 8) & 0xff;
- *pos++ = wep->iv & 0xff;
- *pos++ = wep->key_idx << 6;
-
- return 0;
-}
-
-/* Perform WEP encryption on given skb that has at least 4 bytes of headroom
- * for IV and 4 bytes of tailroom for ICV. Both IV and ICV will be transmitted,
- * so the payload length increases with 8 bytes.
- *
- * WEP frame payload: IV + TX key idx, RC4(data), ICV = RC4(CRC32(data))
- */
-static int lib80211_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
-{
- struct lib80211_wep_data *wep = priv;
- u32 crc, klen, len;
- u8 *pos, *icv;
- u8 key[WEP_KEY_LEN + 3];
-
- /* other checks are in lib80211_wep_build_iv */
- if (skb_tailroom(skb) < 4)
- return -1;
-
- /* add the IV to the frame */
- if (lib80211_wep_build_iv(skb, hdr_len, NULL, 0, priv))
- return -1;
-
- /* Copy the IV into the first 3 bytes of the key */
- skb_copy_from_linear_data_offset(skb, hdr_len, key, 3);
-
- /* Copy rest of the WEP key (the secret part) */
- memcpy(key + 3, wep->key, wep->key_len);
-
- len = skb->len - hdr_len - 4;
- pos = skb->data + hdr_len + 4;
- klen = 3 + wep->key_len;
-
- /* Append little-endian CRC32 over only the data and encrypt it to produce ICV */
- crc = ~crc32_le(~0, pos, len);
- icv = skb_put(skb, 4);
- icv[0] = crc;
- icv[1] = crc >> 8;
- icv[2] = crc >> 16;
- icv[3] = crc >> 24;
-
- arc4_setkey(&wep->tx_ctx, key, klen);
- arc4_crypt(&wep->tx_ctx, pos, pos, len + 4);
-
- return 0;
-}
-
-/* Perform WEP decryption on given buffer. Buffer includes whole WEP part of
- * the frame: IV (4 bytes), encrypted payload (including SNAP header),
- * ICV (4 bytes). len includes both IV and ICV.
- *
- * Returns 0 if frame was decrypted successfully and ICV was correct and -1 on
- * failure. If frame is OK, IV and ICV will be removed.
- */
-static int lib80211_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
-{
- struct lib80211_wep_data *wep = priv;
- u32 crc, klen, plen;
- u8 key[WEP_KEY_LEN + 3];
- u8 keyidx, *pos, icv[4];
-
- if (skb->len < hdr_len + 8)
- return -1;
-
- pos = skb->data + hdr_len;
- key[0] = *pos++;
- key[1] = *pos++;
- key[2] = *pos++;
- keyidx = *pos++ >> 6;
- if (keyidx != wep->key_idx)
- return -1;
-
- klen = 3 + wep->key_len;
-
- /* Copy rest of the WEP key (the secret part) */
- memcpy(key + 3, wep->key, wep->key_len);
-
- /* Apply RC4 to data and compute CRC32 over decrypted data */
- plen = skb->len - hdr_len - 8;
-
- arc4_setkey(&wep->rx_ctx, key, klen);
- arc4_crypt(&wep->rx_ctx, pos, pos, plen + 4);
-
- crc = ~crc32_le(~0, pos, plen);
- icv[0] = crc;
- icv[1] = crc >> 8;
- icv[2] = crc >> 16;
- icv[3] = crc >> 24;
- if (memcmp(icv, pos + plen, 4) != 0) {
- /* ICV mismatch - drop frame */
- return -2;
- }
-
- /* Remove IV and ICV */
- memmove(skb->data + 4, skb->data, hdr_len);
- skb_pull(skb, 4);
- skb_trim(skb, skb->len - 4);
-
- return 0;
-}
-
-static int lib80211_wep_set_key(void *key, int len, u8 * seq, void *priv)
-{
- struct lib80211_wep_data *wep = priv;
-
- if (len < 0 || len > WEP_KEY_LEN)
- return -1;
-
- memcpy(wep->key, key, len);
- wep->key_len = len;
-
- return 0;
-}
-
-static int lib80211_wep_get_key(void *key, int len, u8 * seq, void *priv)
-{
- struct lib80211_wep_data *wep = priv;
-
- if (len < wep->key_len)
- return -1;
-
- memcpy(key, wep->key, wep->key_len);
-
- return wep->key_len;
-}
-
-static void lib80211_wep_print_stats(struct seq_file *m, void *priv)
-{
- struct lib80211_wep_data *wep = priv;
- seq_printf(m, "key[%d] alg=WEP len=%d\n", wep->key_idx, wep->key_len);
-}
-
-static const struct lib80211_crypto_ops lib80211_crypt_wep = {
- .name = "WEP",
- .init = lib80211_wep_init,
- .deinit = lib80211_wep_deinit,
- .encrypt_mpdu = lib80211_wep_encrypt,
- .decrypt_mpdu = lib80211_wep_decrypt,
- .encrypt_msdu = NULL,
- .decrypt_msdu = NULL,
- .set_key = lib80211_wep_set_key,
- .get_key = lib80211_wep_get_key,
- .print_stats = lib80211_wep_print_stats,
- .extra_mpdu_prefix_len = 4, /* IV */
- .extra_mpdu_postfix_len = 4, /* ICV */
- .owner = THIS_MODULE,
-};
-
-static int __init lib80211_crypto_wep_init(void)
-{
- return lib80211_register_crypto_ops(&lib80211_crypt_wep);
-}
-
-static void __exit lib80211_crypto_wep_exit(void)
-{
- lib80211_unregister_crypto_ops(&lib80211_crypt_wep);
-}
-
-module_init(lib80211_crypto_wep_init);
-module_exit(lib80211_crypto_wep_exit);
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 4dac81854721..a5eb92d93074 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -340,12 +340,6 @@ cfg80211_mlme_check_mlo_compat(const struct ieee80211_multi_link_elem *mle_a,
return -EINVAL;
}
- if (ieee80211_mle_get_eml_med_sync_delay((const u8 *)mle_a) !=
- ieee80211_mle_get_eml_med_sync_delay((const u8 *)mle_b)) {
- NL_SET_ERR_MSG(extack, "link EML medium sync delay mismatch");
- return -EINVAL;
- }
-
if (ieee80211_mle_get_eml_cap((const u8 *)mle_a) !=
ieee80211_mle_get_eml_cap((const u8 *)mle_b)) {
NL_SET_ERR_MSG(extack, "link EML capabilities mismatch");
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d7d099f7118a..9d2edb71f981 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -829,6 +829,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_MLO_TTLM_DLINK] = NLA_POLICY_EXACT_LEN(sizeof(u16) * 8),
[NL80211_ATTR_MLO_TTLM_ULINK] = NLA_POLICY_EXACT_LEN(sizeof(u16) * 8),
[NL80211_ATTR_ASSOC_SPP_AMSDU] = { .type = NLA_FLAG },
+ [NL80211_ATTR_VIF_RADIO_MASK] = { .type = NLA_U32 },
};
/* policy for the key attributes */
@@ -1285,10 +1286,7 @@ static unsigned int nl80211_link_id(struct nlattr **attrs)
{
struct nlattr *linkid = attrs[NL80211_ATTR_MLO_LINK_ID];
- if (!linkid)
- return 0;
-
- return nla_get_u8(linkid);
+ return nla_get_u8_default(linkid, 0);
}
static int nl80211_link_id_or_invalid(struct nlattr **attrs)
@@ -2430,6 +2428,11 @@ static int nl80211_put_radio(struct wiphy *wiphy, struct sk_buff *msg, int idx)
if (nla_put_u32(msg, NL80211_WIPHY_RADIO_ATTR_INDEX, idx))
goto nla_put_failure;
+ if (r->antenna_mask &&
+ nla_put_u32(msg, NL80211_WIPHY_RADIO_ATTR_ANTENNA_MASK,
+ r->antenna_mask))
+ goto nla_put_failure;
+
for (i = 0; i < r->n_freq_range; i++) {
const struct wiphy_radio_freq_range *range = &r->freq_range[i];
@@ -3408,11 +3411,9 @@ static int _nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
if (attrs[NL80211_ATTR_CENTER_FREQ1]) {
chandef->center_freq1 =
nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ1]);
- if (attrs[NL80211_ATTR_CENTER_FREQ1_OFFSET])
- chandef->freq1_offset = nla_get_u32(
- attrs[NL80211_ATTR_CENTER_FREQ1_OFFSET]);
- else
- chandef->freq1_offset = 0;
+ chandef->freq1_offset =
+ nla_get_u32_default(attrs[NL80211_ATTR_CENTER_FREQ1_OFFSET],
+ 0);
}
if (attrs[NL80211_ATTR_CENTER_FREQ2])
chandef->center_freq2 =
@@ -3561,7 +3562,7 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
case NL80211_IFTYPE_MESH_POINT:
return cfg80211_set_mesh_channel(rdev, wdev, &chandef);
case NL80211_IFTYPE_MONITOR:
- return cfg80211_set_monitor_channel(rdev, &chandef);
+ return cfg80211_set_monitor_channel(rdev, dev, &chandef);
default:
break;
}
@@ -3996,7 +3997,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
nla_put_u32(msg, NL80211_ATTR_GENERATION,
rdev->devlist_generation ^
(cfg80211_rdev_list_generation << 2)) ||
- nla_put_u8(msg, NL80211_ATTR_4ADDR, wdev->use_4addr))
+ nla_put_u8(msg, NL80211_ATTR_4ADDR, wdev->use_4addr) ||
+ nla_put_u32(msg, NL80211_ATTR_VIF_RADIO_MASK, wdev->radio_mask))
goto nla_put_failure;
if (rdev->ops->get_channel && !wdev->valid_links) {
@@ -4199,6 +4201,7 @@ static const struct nla_policy mntr_flags_policy[NL80211_MNTR_FLAG_MAX + 1] = {
[NL80211_MNTR_FLAG_OTHER_BSS] = { .type = NLA_FLAG },
[NL80211_MNTR_FLAG_COOK_FRAMES] = { .type = NLA_FLAG },
[NL80211_MNTR_FLAG_ACTIVE] = { .type = NLA_FLAG },
+ [NL80211_MNTR_FLAG_SKIP_TX] = { .type = NLA_FLAG },
};
static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags)
@@ -4312,6 +4315,29 @@ static int nl80211_valid_4addr(struct cfg80211_registered_device *rdev,
return -EOPNOTSUPP;
}
+static int nl80211_parse_vif_radio_mask(struct genl_info *info,
+ u32 *radio_mask)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct nlattr *attr = info->attrs[NL80211_ATTR_VIF_RADIO_MASK];
+ u32 mask, allowed;
+
+ if (!attr) {
+ *radio_mask = 0;
+ return 0;
+ }
+
+ allowed = BIT(rdev->wiphy.n_radio) - 1;
+ mask = nla_get_u32(attr);
+ if (mask & ~allowed)
+ return -EINVAL;
+ if (!mask)
+ mask = allowed;
+ *radio_mask = mask;
+
+ return 1;
+}
+
static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -4319,6 +4345,8 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
int err;
enum nl80211_iftype otype, ntype;
struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ u32 radio_mask = 0;
bool change = false;
memset(&params, 0, sizeof(params));
@@ -4332,8 +4360,6 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
}
if (info->attrs[NL80211_ATTR_MESH_ID]) {
- struct wireless_dev *wdev = dev->ieee80211_ptr;
-
if (ntype != NL80211_IFTYPE_MESH_POINT)
return -EINVAL;
if (otype != NL80211_IFTYPE_MESH_POINT)
@@ -4364,6 +4390,12 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
if (err > 0)
change = true;
+ err = nl80211_parse_vif_radio_mask(info, &radio_mask);
+ if (err < 0)
+ return err;
+ if (err && netif_running(dev))
+ return -EBUSY;
+
if (change)
err = cfg80211_change_iface(rdev, dev, ntype, &params);
else
@@ -4372,11 +4404,11 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
if (!err && params.use_4addr != -1)
dev->ieee80211_ptr->use_4addr = params.use_4addr;
- if (change && !err) {
- struct wireless_dev *wdev = dev->ieee80211_ptr;
+ if (radio_mask)
+ wdev->radio_mask = radio_mask;
+ if (change && !err)
nl80211_notify_iface(rdev, wdev, NL80211_CMD_SET_INTERFACE);
- }
return err;
}
@@ -4387,6 +4419,7 @@ static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
struct vif_params params;
struct wireless_dev *wdev;
struct sk_buff *msg;
+ u32 radio_mask;
int err;
enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED;
@@ -4424,6 +4457,10 @@ static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
if (err < 0)
return err;
+ err = nl80211_parse_vif_radio_mask(info, &radio_mask);
+ if (err < 0)
+ return err;
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return -ENOMEM;
@@ -4465,6 +4502,9 @@ static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
break;
}
+ if (radio_mask)
+ wdev->radio_mask = radio_mask;
+
if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0,
rdev, wdev, NL80211_CMD_NEW_INTERFACE) < 0) {
nlmsg_free(msg);
@@ -6078,6 +6118,10 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
!info->attrs[NL80211_ATTR_BEACON_HEAD])
return -EINVAL;
+ if (info->attrs[NL80211_ATTR_SMPS_MODE] &&
+ nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]) != NL80211_SMPS_OFF)
+ return -EOPNOTSUPP;
+
params = kzalloc(sizeof(*params), GFP_KERNEL);
if (!params)
return -ENOMEM;
@@ -6227,34 +6271,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
goto out;
}
- if (info->attrs[NL80211_ATTR_SMPS_MODE]) {
- params->smps_mode =
- nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]);
- switch (params->smps_mode) {
- case NL80211_SMPS_OFF:
- break;
- case NL80211_SMPS_STATIC:
- if (!(rdev->wiphy.features &
- NL80211_FEATURE_STATIC_SMPS)) {
- err = -EINVAL;
- goto out;
- }
- break;
- case NL80211_SMPS_DYNAMIC:
- if (!(rdev->wiphy.features &
- NL80211_FEATURE_DYNAMIC_SMPS)) {
- err = -EINVAL;
- goto out;
- }
- break;
- default:
- err = -EINVAL;
- goto out;
- }
- } else {
- params->smps_mode = NL80211_SMPS_OFF;
- }
-
params->pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]);
if (params->pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ]) {
err = -EOPNOTSUPP;
@@ -8244,11 +8260,9 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
if (unlikely(!rcu_access_pointer(cfg80211_regdomain)))
return -EINPROGRESS;
- if (info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE])
- user_reg_hint_type =
- nla_get_u32(info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE]);
- else
- user_reg_hint_type = NL80211_USER_REG_HINT_USER;
+ user_reg_hint_type =
+ nla_get_u32_default(info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE],
+ NL80211_USER_REG_HINT_USER);
switch (user_reg_hint_type) {
case NL80211_USER_REG_HINT_USER:
@@ -9180,6 +9194,9 @@ static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev,
lockdep_assert_wiphy(wdev->wiphy);
+ if (!cfg80211_wdev_channel_allowed(wdev, chan))
+ return false;
+
if (!cfg80211_beaconing_iface_active(wdev))
return true;
@@ -9392,7 +9409,8 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
}
/* ignore disabled channels */
- if (chan->flags & IEEE80211_CHAN_DISABLED)
+ if (chan->flags & IEEE80211_CHAN_DISABLED ||
+ !cfg80211_wdev_channel_allowed(wdev, chan))
continue;
request->channels[i] = chan;
@@ -9412,7 +9430,8 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
chan = &wiphy->bands[band]->channels[j];
- if (chan->flags & IEEE80211_CHAN_DISABLED)
+ if (chan->flags & IEEE80211_CHAN_DISABLED ||
+ !cfg80211_wdev_channel_allowed(wdev, chan))
continue;
request->channels[i] = chan;
@@ -9776,6 +9795,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
request = kzalloc(size, GFP_KERNEL);
if (!request)
return ERR_PTR(-ENOMEM);
+ request->n_channels = n_channels;
if (n_ssids)
request->ssids = (void *)request +
@@ -11061,11 +11081,9 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
nla_len(info->attrs[NL80211_ATTR_SAE_PASSWORD]);
}
- if (info->attrs[NL80211_ATTR_SAE_PWE])
- settings->sae_pwe =
- nla_get_u8(info->attrs[NL80211_ATTR_SAE_PWE]);
- else
- settings->sae_pwe = NL80211_SAE_PWE_UNSPECIFIED;
+ settings->sae_pwe =
+ nla_get_u8_default(info->attrs[NL80211_ATTR_SAE_PWE],
+ NL80211_SAE_PWE_UNSPECIFIED);
return 0;
}
@@ -12321,10 +12339,8 @@ static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info)
dev->ieee80211_ptr->conn_owner_nlportid != info->snd_portid)
return -EPERM;
- if (!info->attrs[NL80211_ATTR_REASON_CODE])
- reason = WLAN_REASON_DEAUTH_LEAVING;
- else
- reason = nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]);
+ reason = nla_get_u16_default(info->attrs[NL80211_ATTR_REASON_CODE],
+ WLAN_REASON_DEAUTH_LEAVING);
if (reason == 0)
return -EINVAL;
@@ -12446,7 +12462,7 @@ static int nl80211_del_pmksa(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_MAC]) {
pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
} else if (info->attrs[NL80211_ATTR_SSID]) {
- /* SSID based pmksa flush suppported only for FILS,
+ /* SSID based pmksa flush supported only for FILS,
* OWE/SAE OFFLOAD cases
*/
if (info->attrs[NL80211_ATTR_FILS_CACHE_ID] &&
@@ -13670,10 +13686,7 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev,
cfg->dst = nla_get_in_addr(tb[NL80211_WOWLAN_TCP_DST_IPV4]);
memcpy(cfg->dst_mac, nla_data(tb[NL80211_WOWLAN_TCP_DST_MAC]),
ETH_ALEN);
- if (tb[NL80211_WOWLAN_TCP_SRC_PORT])
- port = nla_get_u16(tb[NL80211_WOWLAN_TCP_SRC_PORT]);
- else
- port = 0;
+ port = nla_get_u16_default(tb[NL80211_WOWLAN_TCP_SRC_PORT], 0);
#ifdef CONFIG_INET
/* allocate a socket and port for it and use it */
err = __sock_create(wiphy_net(&rdev->wiphy), PF_INET, SOCK_STREAM,
@@ -13884,11 +13897,9 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
pat_len < wowlan->pattern_min_len)
goto error;
- if (!pat_tb[NL80211_PKTPAT_OFFSET])
- pkt_offset = 0;
- else
- pkt_offset = nla_get_u32(
- pat_tb[NL80211_PKTPAT_OFFSET]);
+ pkt_offset =
+ nla_get_u32_default(pat_tb[NL80211_PKTPAT_OFFSET],
+ 0);
if (pkt_offset > wowlan->max_pkt_offset)
goto error;
new_triggers.patterns[i].pkt_offset = pkt_offset;
@@ -14061,8 +14072,6 @@ void cfg80211_free_coalesce(struct cfg80211_coalesce *coalesce)
for (i = 0; i < coalesce->n_rules; i++) {
rule = &coalesce->rules[i];
- if (!rule)
- continue;
for (j = 0; j < rule->n_patterns; j++)
kfree(rule->patterns[j].mask);
kfree(rule->patterns);
@@ -14134,10 +14143,8 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
pat_len < coalesce->pattern_min_len)
return -EINVAL;
- if (!pat_tb[NL80211_PKTPAT_OFFSET])
- pkt_offset = 0;
- else
- pkt_offset = nla_get_u32(pat_tb[NL80211_PKTPAT_OFFSET]);
+ pkt_offset = nla_get_u32_default(pat_tb[NL80211_PKTPAT_OFFSET],
+ 0);
if (pkt_offset > coalesce->max_pkt_offset)
return -EINVAL;
new_rule->patterns[i].pkt_offset = pkt_offset;
@@ -15498,7 +15505,7 @@ static int nl80211_add_tx_ts(struct sk_buff *skb, struct genl_info *info)
if (tsid >= IEEE80211_FIRST_TSPEC_TSID) {
/* TODO: handle 802.11 TSPEC/admission control
* need more attributes for that (e.g. BA session requirement);
- * change the WMM adminssion test above to allow both then
+ * change the WMM admission test above to allow both then
*/
return -EINVAL;
}
diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c
index b7e3e46ec16d..326faea38ca3 100644
--- a/net/wireless/radiotap.c
+++ b/net/wireless/radiotap.c
@@ -200,7 +200,7 @@ static void find_ns(struct ieee80211_radiotap_iterator *iterator,
* present fields. @this_arg can be changed by the caller (eg,
* incremented to move inside a compound argument like
* IEEE80211_RADIOTAP_CHANNEL). The args pointed to are in
- * little-endian format whatever the endianess of your CPU.
+ * little-endian format whatever the endianness of your CPU.
*
* Alignment Gotcha:
* You must take care when dereferencing iterator.this_arg
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index f5adbf6b5c84..adb6105bbb7d 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -445,11 +445,12 @@ rdev_libertas_set_mesh_channel(struct cfg80211_registered_device *rdev,
static inline int
rdev_set_monitor_channel(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
struct cfg80211_chan_def *chandef)
{
int ret;
- trace_rdev_set_monitor_channel(&rdev->wiphy, chandef);
- ret = rdev->ops->set_monitor_channel(&rdev->wiphy, chandef);
+ trace_rdev_set_monitor_channel(&rdev->wiphy, dev, chandef);
+ ret = rdev->ops->set_monitor_channel(&rdev->wiphy, dev, chandef);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 6489ba943a63..1df65a5a44f7 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1147,7 +1147,7 @@ static const struct ieee80211_regdomain *reg_get_regdomain(struct wiphy *wiphy)
/*
* Follow the driver's regulatory domain, if present, unless a country
- * IE has been processed or a user wants to help complaince further
+ * IE has been processed or a user wants to help compliance further
*/
if (lr->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
lr->initiator != NL80211_REGDOM_SET_BY_USER &&
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index d0aed41ded2f..1c6fd45aa809 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -956,7 +956,8 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
struct ieee80211_channel *chan =
ieee80211_get_channel(&rdev->wiphy, ap->center_freq);
- if (!chan || chan->flags & IEEE80211_CHAN_DISABLED)
+ if (!chan || chan->flags & IEEE80211_CHAN_DISABLED ||
+ !cfg80211_wdev_channel_allowed(rdev_req->wdev, chan))
continue;
for (i = 0; i < rdev_req->n_channels; i++) {
@@ -3519,9 +3520,12 @@ int cfg80211_wext_siwscan(struct net_device *dev,
continue;
for (j = 0; j < wiphy->bands[band]->n_channels; j++) {
+ struct ieee80211_channel *chan;
+
/* ignore disabled channels */
- if (wiphy->bands[band]->channels[j].flags &
- IEEE80211_CHAN_DISABLED)
+ chan = &wiphy->bands[band]->channels[j];
+ if (chan->flags & IEEE80211_CHAN_DISABLED ||
+ !cfg80211_wdev_channel_allowed(creq->wdev, chan))
continue;
/* If we have a wireless request structure and the
@@ -3598,7 +3602,6 @@ int cfg80211_wext_siwscan(struct net_device *dev,
kfree(creq);
return err;
}
-EXPORT_WEXT_HANDLER(cfg80211_wext_siwscan);
static char *ieee80211_scan_add_ies(struct iw_request_info *info,
const struct cfg80211_bss_ies *ies,
@@ -3970,5 +3973,4 @@ int cfg80211_wext_giwscan(struct net_device *dev,
return res;
}
-EXPORT_WEXT_HANDLER(cfg80211_wext_giwscan);
#endif
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 97c21b627791..d5c9bb614fa6 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1318,19 +1318,21 @@ TRACE_EVENT(rdev_libertas_set_mesh_channel,
);
TRACE_EVENT(rdev_set_monitor_channel,
- TP_PROTO(struct wiphy *wiphy,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
struct cfg80211_chan_def *chandef),
- TP_ARGS(wiphy, chandef),
+ TP_ARGS(wiphy, netdev, chandef),
TP_STRUCT__entry(
WIPHY_ENTRY
+ NETDEV_ENTRY
CHAN_DEF_ENTRY
),
TP_fast_assign(
WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
CHAN_DEF_ASSIGN(chandef);
),
- TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT,
- WIPHY_PR_ARG, CHAN_DEF_PR_ARG)
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT,
+ WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG)
);
TRACE_EVENT(rdev_auth,
diff --git a/net/wireless/util.c b/net/wireless/util.c
index f49b55724f83..040d62051eb9 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -743,7 +743,7 @@ __ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen,
return NULL;
/*
- * When reusing framents, copy some data to the head to simplify
+ * When reusing fragments, copy some data to the head to simplify
* ethernet header handling and speed up protocol header processing
* in the stack later.
*/
@@ -2923,3 +2923,32 @@ bool cfg80211_radio_chandef_valid(const struct wiphy_radio *radio,
return true;
}
EXPORT_SYMBOL(cfg80211_radio_chandef_valid);
+
+bool cfg80211_wdev_channel_allowed(struct wireless_dev *wdev,
+ struct ieee80211_channel *chan)
+{
+ struct wiphy *wiphy = wdev->wiphy;
+ const struct wiphy_radio *radio;
+ struct cfg80211_chan_def chandef;
+ u32 radio_mask;
+ int i;
+
+ radio_mask = wdev->radio_mask;
+ if (!wiphy->n_radio || radio_mask == BIT(wiphy->n_radio) - 1)
+ return true;
+
+ cfg80211_chandef_create(&chandef, chan, NL80211_CHAN_HT20);
+ for (i = 0; i < wiphy->n_radio; i++) {
+ if (!(radio_mask & BIT(i)))
+ continue;
+
+ radio = &wiphy->radio[i];
+ if (!cfg80211_radio_chandef_valid(radio, &chandef))
+ continue;
+
+ return true;
+ }
+
+ return false;
+}
+EXPORT_SYMBOL(cfg80211_wdev_channel_allowed);
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 2371069f3c43..90d5c0592667 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -30,7 +30,6 @@ int cfg80211_wext_giwname(struct net_device *dev,
strcpy(wrqu->name, "IEEE 802.11");
return 0;
}
-EXPORT_WEXT_HANDLER(cfg80211_wext_giwname);
int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info,
union iwreq_data *wrqu, char *extra)
@@ -69,7 +68,6 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info,
return ret;
}
-EXPORT_WEXT_HANDLER(cfg80211_wext_siwmode);
int cfg80211_wext_giwmode(struct net_device *dev, struct iw_request_info *info,
union iwreq_data *wrqu, char *extra)
@@ -105,7 +103,6 @@ int cfg80211_wext_giwmode(struct net_device *dev, struct iw_request_info *info,
}
return 0;
}
-EXPORT_WEXT_HANDLER(cfg80211_wext_giwmode);
int cfg80211_wext_giwrange(struct net_device *dev,
@@ -220,7 +217,6 @@ int cfg80211_wext_giwrange(struct net_device *dev,
return 0;
}
-EXPORT_WEXT_HANDLER(cfg80211_wext_giwrange);
/**
@@ -281,7 +277,6 @@ out:
wiphy_unlock(&rdev->wiphy);
return err;
}
-EXPORT_WEXT_HANDLER(cfg80211_wext_siwrts);
int cfg80211_wext_giwrts(struct net_device *dev,
struct iw_request_info *info,
@@ -296,7 +291,6 @@ int cfg80211_wext_giwrts(struct net_device *dev,
return 0;
}
-EXPORT_WEXT_HANDLER(cfg80211_wext_giwrts);
int cfg80211_wext_siwfrag(struct net_device *dev,
struct iw_request_info *info,
@@ -327,7 +321,6 @@ out:
return err;
}
-EXPORT_WEXT_HANDLER(cfg80211_wext_siwfrag);
int cfg80211_wext_giwfrag(struct net_device *dev,
struct iw_request_info *info,
@@ -342,7 +335,6 @@ int cfg80211_wext_giwfrag(struct net_device *dev,
return 0;
}
-EXPORT_WEXT_HANDLER(cfg80211_wext_giwfrag);
static int cfg80211_wext_siwretry(struct net_device *dev,
struct iw_request_info *info,
@@ -413,7 +405,6 @@ int cfg80211_wext_giwretry(struct net_device *dev,
return 0;
}
-EXPORT_WEXT_HANDLER(cfg80211_wext_giwretry);
static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
struct net_device *dev, bool pairwise,
@@ -830,7 +821,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev,
ret = -EINVAL;
break;
}
- ret = cfg80211_set_monitor_channel(rdev, &chandef);
+ ret = cfg80211_set_monitor_channel(rdev, dev, &chandef);
break;
case NL80211_IFTYPE_MESH_POINT:
freq = cfg80211_wext_freq(wextfreq);
@@ -1204,7 +1195,7 @@ static int cfg80211_wext_siwpower(struct net_device *dev,
switch (wrq->flags & IW_POWER_MODE) {
case IW_POWER_ON: /* If not specified */
case IW_POWER_MODE: /* If set all mask */
- case IW_POWER_ALL_R: /* If explicitely state all */
+ case IW_POWER_ALL_R: /* If explicitly state all */
ps = true;
break;
default: /* Otherwise we ignore */
diff --git a/net/wireless/wext-compat.h b/net/wireless/wext-compat.h
index c02eb789e676..8251ca5df8ae 100644
--- a/net/wireless/wext-compat.h
+++ b/net/wireless/wext-compat.h
@@ -5,12 +5,6 @@
#include <net/iw_handler.h>
#include <linux/wireless.h>
-#ifdef CONFIG_CFG80211_WEXT_EXPORT
-#define EXPORT_WEXT_HANDLER(h) EXPORT_SYMBOL_GPL(h)
-#else
-#define EXPORT_WEXT_HANDLER(h)
-#endif /* CONFIG_CFG80211_WEXT_EXPORT */
-
int cfg80211_ibss_wext_siwfreq(struct net_device *dev,
struct iw_request_info *info,
struct iw_freq *wextfreq, char *extra);
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 838ad6541a17..3bb04b05c5ce 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -1159,7 +1159,7 @@ char *iwe_stream_add_event(struct iw_request_info *info, char *stream,
/* Check if it's possible */
if (likely((stream + event_len) < ends)) {
iwe->len = event_len;
- /* Beware of alignement issues on 64 bits */
+ /* Beware of alignment issues on 64 bits */
memcpy(stream, (char *) iwe, IW_EV_LCP_PK_LEN);
memcpy(stream + lcp_len, &iwe->u,
event_len - lcp_len);
diff --git a/net/wireless/wext-spy.c b/net/wireless/wext-spy.c
deleted file mode 100644
index b379a0371653..000000000000
--- a/net/wireless/wext-spy.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * This file implement the Wireless Extensions spy API.
- *
- * Authors : Jean Tourrilhes - HPL - <jt@hpl.hp.com>
- * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved.
- *
- * (As all part of the Linux kernel, this file is GPL)
- */
-
-#include <linux/wireless.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/export.h>
-#include <net/iw_handler.h>
-#include <net/arp.h>
-#include <net/wext.h>
-
-static inline struct iw_spy_data *get_spydata(struct net_device *dev)
-{
- /* This is the new way */
- if (dev->wireless_data)
- return dev->wireless_data->spy_data;
- return NULL;
-}
-
-int iw_handler_set_spy(struct net_device * dev,
- struct iw_request_info * info,
- union iwreq_data * wrqu,
- char * extra)
-{
- struct iw_spy_data * spydata = get_spydata(dev);
- struct sockaddr * address = (struct sockaddr *) extra;
-
- /* Make sure driver is not buggy or using the old API */
- if (!spydata)
- return -EOPNOTSUPP;
-
- /* Disable spy collection while we copy the addresses.
- * While we copy addresses, any call to wireless_spy_update()
- * will NOP. This is OK, as anyway the addresses are changing. */
- spydata->spy_number = 0;
-
- /* We want to operate without locking, because wireless_spy_update()
- * most likely will happen in the interrupt handler, and therefore
- * have its own locking constraints and needs performance.
- * The rtnl_lock() make sure we don't race with the other iw_handlers.
- * This make sure wireless_spy_update() "see" that the spy list
- * is temporarily disabled. */
- smp_wmb();
-
- /* Are there are addresses to copy? */
- if (wrqu->data.length > 0) {
- int i;
-
- /* Copy addresses */
- for (i = 0; i < wrqu->data.length; i++)
- memcpy(spydata->spy_address[i], address[i].sa_data,
- ETH_ALEN);
- /* Reset stats */
- memset(spydata->spy_stat, 0,
- sizeof(struct iw_quality) * IW_MAX_SPY);
- }
-
- /* Make sure above is updated before re-enabling */
- smp_wmb();
-
- /* Enable addresses */
- spydata->spy_number = wrqu->data.length;
-
- return 0;
-}
-EXPORT_SYMBOL(iw_handler_set_spy);
-
-int iw_handler_get_spy(struct net_device * dev,
- struct iw_request_info * info,
- union iwreq_data * wrqu,
- char * extra)
-{
- struct iw_spy_data * spydata = get_spydata(dev);
- struct sockaddr * address = (struct sockaddr *) extra;
- int i;
-
- /* Make sure driver is not buggy or using the old API */
- if (!spydata)
- return -EOPNOTSUPP;
-
- wrqu->data.length = spydata->spy_number;
-
- /* Copy addresses. */
- for (i = 0; i < spydata->spy_number; i++) {
- memcpy(address[i].sa_data, spydata->spy_address[i], ETH_ALEN);
- address[i].sa_family = AF_UNIX;
- }
- /* Copy stats to the user buffer (just after). */
- if (spydata->spy_number > 0)
- memcpy(extra + (sizeof(struct sockaddr) *spydata->spy_number),
- spydata->spy_stat,
- sizeof(struct iw_quality) * spydata->spy_number);
- /* Reset updated flags. */
- for (i = 0; i < spydata->spy_number; i++)
- spydata->spy_stat[i].updated &= ~IW_QUAL_ALL_UPDATED;
- return 0;
-}
-EXPORT_SYMBOL(iw_handler_get_spy);
-
-/*------------------------------------------------------------------*/
-/*
- * Standard Wireless Handler : set spy threshold
- */
-int iw_handler_set_thrspy(struct net_device * dev,
- struct iw_request_info *info,
- union iwreq_data * wrqu,
- char * extra)
-{
- struct iw_spy_data * spydata = get_spydata(dev);
- struct iw_thrspy * threshold = (struct iw_thrspy *) extra;
-
- /* Make sure driver is not buggy or using the old API */
- if (!spydata)
- return -EOPNOTSUPP;
-
- /* Just do it */
- spydata->spy_thr_low = threshold->low;
- spydata->spy_thr_high = threshold->high;
-
- /* Clear flag */
- memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under));
-
- return 0;
-}
-EXPORT_SYMBOL(iw_handler_set_thrspy);
-
-/*------------------------------------------------------------------*/
-/*
- * Standard Wireless Handler : get spy threshold
- */
-int iw_handler_get_thrspy(struct net_device * dev,
- struct iw_request_info *info,
- union iwreq_data * wrqu,
- char * extra)
-{
- struct iw_spy_data * spydata = get_spydata(dev);
- struct iw_thrspy * threshold = (struct iw_thrspy *) extra;
-
- /* Make sure driver is not buggy or using the old API */
- if (!spydata)
- return -EOPNOTSUPP;
-
- /* Just do it */
- threshold->low = spydata->spy_thr_low;
- threshold->high = spydata->spy_thr_high;
-
- return 0;
-}
-EXPORT_SYMBOL(iw_handler_get_thrspy);
-
-/*------------------------------------------------------------------*/
-/*
- * Prepare and send a Spy Threshold event
- */
-static void iw_send_thrspy_event(struct net_device * dev,
- struct iw_spy_data * spydata,
- unsigned char * address,
- struct iw_quality * wstats)
-{
- union iwreq_data wrqu;
- struct iw_thrspy threshold;
-
- /* Init */
- wrqu.data.length = 1;
- wrqu.data.flags = 0;
- /* Copy address */
- memcpy(threshold.addr.sa_data, address, ETH_ALEN);
- threshold.addr.sa_family = ARPHRD_ETHER;
- /* Copy stats */
- threshold.qual = *wstats;
- /* Copy also thresholds */
- threshold.low = spydata->spy_thr_low;
- threshold.high = spydata->spy_thr_high;
-
- /* Send event to user space */
- wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold);
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Call for the driver to update the spy data.
- * For now, the spy data is a simple array. As the size of the array is
- * small, this is good enough. If we wanted to support larger number of
- * spy addresses, we should use something more efficient...
- */
-void wireless_spy_update(struct net_device * dev,
- unsigned char * address,
- struct iw_quality * wstats)
-{
- struct iw_spy_data * spydata = get_spydata(dev);
- int i;
- int match = -1;
-
- /* Make sure driver is not buggy or using the old API */
- if (!spydata)
- return;
-
- /* Update all records that match */
- for (i = 0; i < spydata->spy_number; i++)
- if (ether_addr_equal(address, spydata->spy_address[i])) {
- memcpy(&(spydata->spy_stat[i]), wstats,
- sizeof(struct iw_quality));
- match = i;
- }
-
- /* Generate an event if we cross the spy threshold.
- * To avoid event storms, we have a simple hysteresis : we generate
- * event only when we go under the low threshold or above the
- * high threshold. */
- if (match >= 0) {
- if (spydata->spy_thr_under[match]) {
- if (wstats->level > spydata->spy_thr_high.level) {
- spydata->spy_thr_under[match] = 0;
- iw_send_thrspy_event(dev, spydata,
- address, wstats);
- }
- } else {
- if (wstats->level < spydata->spy_thr_low.level) {
- spydata->spy_thr_under[match] = 1;
- iw_send_thrspy_event(dev, spydata,
- address, wstats);
- }
- }
- }
-}
-EXPORT_SYMBOL(wireless_spy_update);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 1140b2a120ca..3fa70286c846 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -141,7 +141,7 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff_xsk *xskb, u32 len,
u64 addr;
int err;
- addr = xp_get_handle(xskb);
+ addr = xp_get_handle(xskb, xskb->pool);
err = xskq_prod_reserve_desc(xs->rx, addr, len, flags);
if (err) {
xs->rx_queue_full++;
@@ -171,14 +171,14 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
return 0;
xskb_list = &xskb->pool->xskb_list;
- list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) {
+ list_for_each_entry_safe(pos, tmp, xskb_list, list_node) {
if (list_is_singular(xskb_list))
contd = 0;
len = pos->xdp.data_end - pos->xdp.data;
err = __xsk_rcv_zc(xs, pos, len, contd);
if (err)
goto err;
- list_del(&pos->xskb_list_node);
+ list_del(&pos->list_node);
}
return 0;
@@ -527,34 +527,34 @@ static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
}
-static int xsk_cq_reserve_addr_locked(struct xdp_sock *xs, u64 addr)
+static int xsk_cq_reserve_addr_locked(struct xsk_buff_pool *pool, u64 addr)
{
unsigned long flags;
int ret;
- spin_lock_irqsave(&xs->pool->cq_lock, flags);
- ret = xskq_prod_reserve_addr(xs->pool->cq, addr);
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+ spin_lock_irqsave(&pool->cq_lock, flags);
+ ret = xskq_prod_reserve_addr(pool->cq, addr);
+ spin_unlock_irqrestore(&pool->cq_lock, flags);
return ret;
}
-static void xsk_cq_submit_locked(struct xdp_sock *xs, u32 n)
+static void xsk_cq_submit_locked(struct xsk_buff_pool *pool, u32 n)
{
unsigned long flags;
- spin_lock_irqsave(&xs->pool->cq_lock, flags);
- xskq_prod_submit_n(xs->pool->cq, n);
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+ spin_lock_irqsave(&pool->cq_lock, flags);
+ xskq_prod_submit_n(pool->cq, n);
+ spin_unlock_irqrestore(&pool->cq_lock, flags);
}
-static void xsk_cq_cancel_locked(struct xdp_sock *xs, u32 n)
+static void xsk_cq_cancel_locked(struct xsk_buff_pool *pool, u32 n)
{
unsigned long flags;
- spin_lock_irqsave(&xs->pool->cq_lock, flags);
- xskq_prod_cancel_n(xs->pool->cq, n);
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+ spin_lock_irqsave(&pool->cq_lock, flags);
+ xskq_prod_cancel_n(pool->cq, n);
+ spin_unlock_irqrestore(&pool->cq_lock, flags);
}
static u32 xsk_get_num_desc(struct sk_buff *skb)
@@ -571,7 +571,7 @@ static void xsk_destruct_skb(struct sk_buff *skb)
*compl->tx_timestamp = ktime_get_tai_fast_ns();
}
- xsk_cq_submit_locked(xdp_sk(skb->sk), xsk_get_num_desc(skb));
+ xsk_cq_submit_locked(xdp_sk(skb->sk)->pool, xsk_get_num_desc(skb));
sock_wfree(skb);
}
@@ -587,7 +587,7 @@ static void xsk_consume_skb(struct sk_buff *skb)
struct xdp_sock *xs = xdp_sk(skb->sk);
skb->destructor = sock_wfree;
- xsk_cq_cancel_locked(xs, xsk_get_num_desc(skb));
+ xsk_cq_cancel_locked(xs->pool, xsk_get_num_desc(skb));
/* Free skb without triggering the perf drop trace */
consume_skb(skb);
xs->skb = NULL;
@@ -675,6 +675,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
len = desc->len;
if (!skb) {
+ first_frag = true;
+
hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom));
tr = dev->needed_tailroom;
skb = sock_alloc_send_skb(&xs->sk, hr + len + tr, 1, &err);
@@ -685,12 +687,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
skb_put(skb, len);
err = skb_store_bits(skb, 0, buffer, len);
- if (unlikely(err)) {
- kfree_skb(skb);
+ if (unlikely(err))
goto free_err;
- }
-
- first_frag = true;
} else {
int nr_frags = skb_shinfo(skb)->nr_frags;
struct page *page;
@@ -758,6 +756,9 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
return skb;
free_err:
+ if (first_frag && skb)
+ kfree_skb(skb);
+
if (err == -EOVERFLOW) {
/* Drop the packet */
xsk_set_destructor_arg(xs->skb);
@@ -765,7 +766,7 @@ free_err:
xskq_cons_release(xs->tx);
} else {
/* Let application retry */
- xsk_cq_cancel_locked(xs, 1);
+ xsk_cq_cancel_locked(xs->pool, 1);
}
return ERR_PTR(err);
@@ -802,7 +803,7 @@ static int __xsk_generic_xmit(struct sock *sk)
* if there is space in it. This avoids having to implement
* any buffering in the Tx path.
*/
- if (xsk_cq_reserve_addr_locked(xs, desc.addr))
+ if (xsk_cq_reserve_addr_locked(xs->pool, desc.addr))
goto out;
skb = xsk_build_skb(xs, &desc);
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 521a2938e50a..ae71da7d2cd6 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -101,8 +101,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
xskb = &pool->heads[i];
xskb->pool = pool;
xskb->xdp.frame_sz = umem->chunk_size - umem->headroom;
- INIT_LIST_HEAD(&xskb->free_list_node);
- INIT_LIST_HEAD(&xskb->xskb_list_node);
+ INIT_LIST_HEAD(&xskb->list_node);
if (pool->unaligned)
pool->free_heads[i] = xskb;
else
@@ -230,6 +229,7 @@ int xp_assign_dev(struct xsk_buff_pool *pool,
goto err_unreg_xsk;
}
pool->umem->zc = true;
+ pool->xdp_zc_max_segs = netdev->xdp_zc_max_segs;
return 0;
err_unreg_xsk:
@@ -417,8 +417,10 @@ static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_
for (i = 0; i < pool->heads_cnt; i++) {
struct xdp_buff_xsk *xskb = &pool->heads[i];
+ u64 orig_addr;
- xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, xskb->orig_addr);
+ orig_addr = xskb->xdp.data_hard_start - pool->addrs - pool->headroom;
+ xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, orig_addr);
}
}
@@ -501,6 +503,22 @@ static bool xp_check_aligned(struct xsk_buff_pool *pool, u64 *addr)
return *addr < pool->addrs_cnt;
}
+static struct xdp_buff_xsk *xp_get_xskb(struct xsk_buff_pool *pool, u64 addr)
+{
+ struct xdp_buff_xsk *xskb;
+
+ if (pool->unaligned) {
+ xskb = pool->free_heads[--pool->free_heads_cnt];
+ xp_init_xskb_addr(xskb, pool, addr);
+ if (pool->dma_pages)
+ xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr);
+ } else {
+ xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)];
+ }
+
+ return xskb;
+}
+
static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool)
{
struct xdp_buff_xsk *xskb;
@@ -526,14 +544,7 @@ static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool)
break;
}
- if (pool->unaligned) {
- xskb = pool->free_heads[--pool->free_heads_cnt];
- xp_init_xskb_addr(xskb, pool, addr);
- if (pool->dma_pages)
- xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr);
- } else {
- xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)];
- }
+ xskb = xp_get_xskb(pool, addr);
xskq_cons_release(pool->fq);
return xskb;
@@ -550,8 +561,8 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool)
} else {
pool->free_list_cnt--;
xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk,
- free_list_node);
- list_del_init(&xskb->free_list_node);
+ list_node);
+ list_del_init(&xskb->list_node);
}
xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM;
@@ -591,14 +602,7 @@ static u32 xp_alloc_new_from_fq(struct xsk_buff_pool *pool, struct xdp_buff **xd
continue;
}
- if (pool->unaligned) {
- xskb = pool->free_heads[--pool->free_heads_cnt];
- xp_init_xskb_addr(xskb, pool, addr);
- if (pool->dma_pages)
- xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr);
- } else {
- xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)];
- }
+ xskb = xp_get_xskb(pool, addr);
*xdp = &xskb->xdp;
xdp++;
@@ -617,8 +621,8 @@ static u32 xp_alloc_reused(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u3
i = nb_entries;
while (i--) {
- xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, free_list_node);
- list_del_init(&xskb->free_list_node);
+ xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, list_node);
+ list_del_init(&xskb->list_node);
*xdp = &xskb->xdp;
xdp++;
@@ -688,11 +692,11 @@ EXPORT_SYMBOL(xp_can_alloc);
void xp_free(struct xdp_buff_xsk *xskb)
{
- if (!list_empty(&xskb->free_list_node))
+ if (!list_empty(&xskb->list_node))
return;
xskb->pool->free_list_cnt++;
- list_add(&xskb->free_list_node, &xskb->pool->free_list);
+ list_add(&xskb->list_node, &xskb->pool->free_list);
}
EXPORT_SYMBOL(xp_free);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 406b20dfee8d..46d87e961ad6 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -260,7 +260,7 @@ u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool,
nr_frags = 0;
} else {
nr_frags++;
- if (nr_frags == pool->netdev->xdp_zc_max_segs) {
+ if (nr_frags == pool->xdp_zc_max_segs) {
nr_frags = 0;
break;
}
diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c
index 91357ccaf4af..5b9ee63e30b6 100644
--- a/net/xfrm/xfrm_compat.c
+++ b/net/xfrm/xfrm_compat.c
@@ -132,6 +132,7 @@ static const struct nla_policy compat_policy[XFRMA_MAX+1] = {
[XFRMA_MTIMER_THRESH] = { .type = NLA_U32 },
[XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT),
[XFRMA_NAT_KEEPALIVE_INTERVAL] = { .type = NLA_U32 },
+ [XFRMA_SA_PCPU] = { .type = NLA_U32 },
};
static struct nlmsghdr *xfrm_nlmsg_put_compat(struct sk_buff *skb,
@@ -282,9 +283,10 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src)
case XFRMA_MTIMER_THRESH:
case XFRMA_SA_DIR:
case XFRMA_NAT_KEEPALIVE_INTERVAL:
+ case XFRMA_SA_PCPU:
return xfrm_nla_cpy(dst, src, nla_len(src));
default:
- BUILD_BUG_ON(XFRMA_MAX != XFRMA_NAT_KEEPALIVE_INTERVAL);
+ BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_PCPU);
pr_warn_once("unsupported nla_type %d\n", src->nla_type);
return -EOPNOTSUPP;
}
@@ -439,7 +441,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla,
int err;
if (type > XFRMA_MAX) {
- BUILD_BUG_ON(XFRMA_MAX != XFRMA_NAT_KEEPALIVE_INTERVAL);
+ BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_PCPU);
NL_SET_ERR_MSG(extack, "Bad attribute");
return -EOPNOTSUPP;
}
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 749e7eea99e4..841a60a6fbfe 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -572,7 +572,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
goto drop;
}
- x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family);
+ x = xfrm_input_state_lookup(net, mark, daddr, spi, nexthdr, family);
if (x == NULL) {
secpath_reset(skb);
XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index a2ea9dbac90b..4408c11c0835 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -289,7 +289,7 @@ struct dst_entry *__xfrm_dst_lookup(int family,
EXPORT_SYMBOL(__xfrm_dst_lookup);
static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
- int tos, int oif,
+ dscp_t dscp, int oif,
xfrm_address_t *prev_saddr,
xfrm_address_t *prev_daddr,
int family, u32 mark)
@@ -312,7 +312,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
params.net = net;
params.saddr = saddr;
params.daddr = daddr;
- params.tos = tos;
+ params.dscp = dscp;
params.oif = oif;
params.mark = mark;
params.ipproto = x->id.proto;
@@ -434,6 +434,7 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
if (policy) {
write_pnet(&policy->xp_net, net);
INIT_LIST_HEAD(&policy->walk.all);
+ INIT_HLIST_HEAD(&policy->state_cache_list);
INIT_HLIST_NODE(&policy->bydst);
INIT_HLIST_NODE(&policy->byidx);
rwlock_init(&policy->lock);
@@ -475,6 +476,9 @@ EXPORT_SYMBOL(xfrm_policy_destroy);
static void xfrm_policy_kill(struct xfrm_policy *policy)
{
+ struct net *net = xp_net(policy);
+ struct xfrm_state *x;
+
xfrm_dev_policy_delete(policy);
write_lock_bh(&policy->lock);
@@ -490,6 +494,13 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
if (del_timer(&policy->timer))
xfrm_pol_put(policy);
+ /* XXX: Flush state cache */
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ hlist_for_each_entry_rcu(x, &policy->state_cache_list, state_cache) {
+ hlist_del_init_rcu(&x->state_cache);
+ }
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+
xfrm_pol_put(policy);
}
@@ -2576,10 +2587,10 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
}
-static int xfrm_get_tos(const struct flowi *fl, int family)
+static dscp_t xfrm_get_dscp(const struct flowi *fl, int family)
{
if (family == AF_INET)
- return fl->u.ip4.flowi4_tos & INET_DSCP_MASK;
+ return inet_dsfield_to_dscp(fl->u.ip4.flowi4_tos);
return 0;
}
@@ -2667,13 +2678,13 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
int header_len = 0;
int nfheader_len = 0;
int trailer_len = 0;
- int tos;
int family = policy->selector.family;
xfrm_address_t saddr, daddr;
+ dscp_t dscp;
xfrm_flowi_addr_get(fl, &saddr, &daddr, family);
- tos = xfrm_get_tos(fl, family);
+ dscp = xfrm_get_dscp(fl, family);
dst_hold(dst);
@@ -2721,8 +2732,8 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
family = xfrm[i]->props.family;
oif = fl->flowi_oif ? : fl->flowi_l3mdev;
- dst = xfrm_dst_lookup(xfrm[i], tos, oif,
- &saddr, &daddr, family, mark);
+ dst = xfrm_dst_lookup(xfrm[i], dscp, oif, &saddr,
+ &daddr, family, mark);
err = PTR_ERR(dst);
if (IS_ERR(dst))
goto put_states;
@@ -3275,6 +3286,7 @@ no_transform:
dst_release(dst);
dst = dst_orig;
}
+
ok:
xfrm_pols_put(pols, drop_pols);
if (dst && dst->xfrm &&
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 37478d36a8df..67ca7ac955a3 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -665,6 +665,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
refcount_set(&x->refcnt, 1);
atomic_set(&x->tunnel_users, 0);
INIT_LIST_HEAD(&x->km.all);
+ INIT_HLIST_NODE(&x->state_cache);
INIT_HLIST_NODE(&x->bydst);
INIT_HLIST_NODE(&x->bysrc);
INIT_HLIST_NODE(&x->byspi);
@@ -679,6 +680,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
x->lft.hard_packet_limit = XFRM_INF;
x->replay_maxage = 0;
x->replay_maxdiff = 0;
+ x->pcpu_num = UINT_MAX;
spin_lock_init(&x->lock);
}
return x;
@@ -743,12 +745,18 @@ int __xfrm_state_delete(struct xfrm_state *x)
if (x->km.state != XFRM_STATE_DEAD) {
x->km.state = XFRM_STATE_DEAD;
+
spin_lock(&net->xfrm.xfrm_state_lock);
list_del(&x->km.all);
hlist_del_rcu(&x->bydst);
hlist_del_rcu(&x->bysrc);
if (x->km.seq)
hlist_del_rcu(&x->byseq);
+ if (!hlist_unhashed(&x->state_cache))
+ hlist_del_rcu(&x->state_cache);
+ if (!hlist_unhashed(&x->state_cache_input))
+ hlist_del_rcu(&x->state_cache_input);
+
if (x->id.spi)
hlist_del_rcu(&x->byspi);
net->xfrm.state_num--;
@@ -1101,6 +1109,52 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
return NULL;
}
+struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark,
+ const xfrm_address_t *daddr,
+ __be32 spi, u8 proto,
+ unsigned short family)
+{
+ struct hlist_head *state_cache_input;
+ struct xfrm_state *x = NULL;
+ int cpu = get_cpu();
+
+ state_cache_input = per_cpu_ptr(net->xfrm.state_cache_input, cpu);
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(x, state_cache_input, state_cache_input) {
+ if (x->props.family != family ||
+ x->id.spi != spi ||
+ x->id.proto != proto ||
+ !xfrm_addr_equal(&x->id.daddr, daddr, family))
+ continue;
+
+ if ((mark & x->mark.m) != x->mark.v)
+ continue;
+ if (!xfrm_state_hold_rcu(x))
+ continue;
+ goto out;
+ }
+
+ x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family);
+
+ if (x && x->km.state == XFRM_STATE_VALID) {
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ if (hlist_unhashed(&x->state_cache_input)) {
+ hlist_add_head_rcu(&x->state_cache_input, state_cache_input);
+ } else {
+ hlist_del_rcu(&x->state_cache_input);
+ hlist_add_head_rcu(&x->state_cache_input, state_cache_input);
+ }
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+ }
+
+out:
+ rcu_read_unlock();
+ put_cpu();
+ return x;
+}
+EXPORT_SYMBOL(xfrm_input_state_lookup);
+
static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
const xfrm_address_t *daddr,
const xfrm_address_t *saddr,
@@ -1155,6 +1209,12 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
struct xfrm_state **best, int *acq_in_progress,
int *error)
{
+ /* We need the cpu id just as a lookup key,
+ * we don't require it to be stable.
+ */
+ unsigned int pcpu_id = get_cpu();
+ put_cpu();
+
/* Resolution logic:
* 1. There is a valid state with matching selector. Done.
* 2. Valid state with inappropriate selector. Skip.
@@ -1174,13 +1234,18 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
&fl->u.__fl_common))
return;
+ if (x->pcpu_num != UINT_MAX && x->pcpu_num != pcpu_id)
+ return;
+
if (!*best ||
+ ((*best)->pcpu_num == UINT_MAX && x->pcpu_num == pcpu_id) ||
(*best)->km.dying > x->km.dying ||
((*best)->km.dying == x->km.dying &&
(*best)->curlft.add_time < x->curlft.add_time))
*best = x;
} else if (x->km.state == XFRM_STATE_ACQ) {
- *acq_in_progress = 1;
+ if (!*best || x->pcpu_num == pcpu_id)
+ *acq_in_progress = 1;
} else if (x->km.state == XFRM_STATE_ERROR ||
x->km.state == XFRM_STATE_EXPIRED) {
if ((!x->sel.family ||
@@ -1209,12 +1274,60 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
unsigned short encap_family = tmpl->encap_family;
unsigned int sequence;
struct km_event c;
+ unsigned int pcpu_id;
+ bool cached = false;
+
+ /* We need the cpu id just as a lookup key,
+ * we don't require it to be stable.
+ */
+ pcpu_id = get_cpu();
+ put_cpu();
to_put = NULL;
sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
rcu_read_lock();
+ hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) {
+ if (x->props.family == encap_family &&
+ x->props.reqid == tmpl->reqid &&
+ (mark & x->mark.m) == x->mark.v &&
+ x->if_id == if_id &&
+ !(x->props.flags & XFRM_STATE_WILDRECV) &&
+ xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
+ tmpl->mode == x->props.mode &&
+ tmpl->id.proto == x->id.proto &&
+ (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
+ xfrm_state_look_at(pol, x, fl, encap_family,
+ &best, &acquire_in_progress, &error);
+ }
+
+ if (best)
+ goto cached;
+
+ hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) {
+ if (x->props.family == encap_family &&
+ x->props.reqid == tmpl->reqid &&
+ (mark & x->mark.m) == x->mark.v &&
+ x->if_id == if_id &&
+ !(x->props.flags & XFRM_STATE_WILDRECV) &&
+ xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&
+ tmpl->mode == x->props.mode &&
+ tmpl->id.proto == x->id.proto &&
+ (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
+ xfrm_state_look_at(pol, x, fl, family,
+ &best, &acquire_in_progress, &error);
+ }
+
+cached:
+ cached = true;
+ if (best)
+ goto found;
+ else if (error)
+ best = NULL;
+ else if (acquire_in_progress) /* XXX: acquire_in_progress should not happen */
+ WARN_ON(1);
+
h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
#ifdef CONFIG_XFRM_OFFLOAD
@@ -1282,7 +1395,10 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
}
found:
- x = best;
+ if (!(pol->flags & XFRM_POLICY_CPU_ACQUIRE) ||
+ (best && (best->pcpu_num == pcpu_id)))
+ x = best;
+
if (!x && !error && !acquire_in_progress) {
if (tmpl->id.spi &&
(x0 = __xfrm_state_lookup_all(net, mark, daddr,
@@ -1314,6 +1430,8 @@ found:
xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family);
memcpy(&x->mark, &pol->mark, sizeof(x->mark));
x->if_id = if_id;
+ if ((pol->flags & XFRM_POLICY_CPU_ACQUIRE) && best)
+ x->pcpu_num = pcpu_id;
error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid);
if (error) {
@@ -1352,6 +1470,7 @@ found:
x->km.state = XFRM_STATE_ACQ;
x->dir = XFRM_SA_DIR_OUT;
list_add(&x->km.all, &net->xfrm.state_all);
+ h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
XFRM_STATE_INSERT(bydst, &x->bydst,
net->xfrm.state_bydst + h,
x->xso.type);
@@ -1359,6 +1478,7 @@ found:
XFRM_STATE_INSERT(bysrc, &x->bysrc,
net->xfrm.state_bysrc + h,
x->xso.type);
+ INIT_HLIST_NODE(&x->state_cache);
if (x->id.spi) {
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
XFRM_STATE_INSERT(byspi, &x->byspi,
@@ -1392,6 +1512,11 @@ found:
x = NULL;
error = -ESRCH;
}
+
+ /* Use the already installed 'fallback' while the CPU-specific
+ * SA acquire is handled*/
+ if (best)
+ x = best;
}
out:
if (x) {
@@ -1402,6 +1527,15 @@ out:
} else {
*err = acquire_in_progress ? -EAGAIN : error;
}
+
+ if (x && x->km.state == XFRM_STATE_VALID && !cached &&
+ (!(pol->flags & XFRM_POLICY_CPU_ACQUIRE) || x->pcpu_num == pcpu_id)) {
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ if (hlist_unhashed(&x->state_cache))
+ hlist_add_head_rcu(&x->state_cache, &pol->state_cache_list);
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+ }
+
rcu_read_unlock();
if (to_put)
xfrm_state_put(to_put);
@@ -1524,12 +1658,14 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
unsigned int h;
u32 mark = xnew->mark.v & xnew->mark.m;
u32 if_id = xnew->if_id;
+ u32 cpu_id = xnew->pcpu_num;
h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family);
hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
if (x->props.family == family &&
x->props.reqid == reqid &&
x->if_id == if_id &&
+ x->pcpu_num == cpu_id &&
(mark & x->mark.m) == x->mark.v &&
xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) &&
xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family))
@@ -1552,7 +1688,7 @@ EXPORT_SYMBOL(xfrm_state_insert);
static struct xfrm_state *__find_acq_core(struct net *net,
const struct xfrm_mark *m,
unsigned short family, u8 mode,
- u32 reqid, u32 if_id, u8 proto,
+ u32 reqid, u32 if_id, u32 pcpu_num, u8 proto,
const xfrm_address_t *daddr,
const xfrm_address_t *saddr,
int create)
@@ -1569,6 +1705,7 @@ static struct xfrm_state *__find_acq_core(struct net *net,
x->id.spi != 0 ||
x->id.proto != proto ||
(mark & x->mark.m) != x->mark.v ||
+ x->pcpu_num != pcpu_num ||
!xfrm_addr_equal(&x->id.daddr, daddr, family) ||
!xfrm_addr_equal(&x->props.saddr, saddr, family))
continue;
@@ -1602,6 +1739,7 @@ static struct xfrm_state *__find_acq_core(struct net *net,
break;
}
+ x->pcpu_num = pcpu_num;
x->km.state = XFRM_STATE_ACQ;
x->id.proto = proto;
x->props.family = family;
@@ -1630,7 +1768,7 @@ static struct xfrm_state *__find_acq_core(struct net *net,
return x;
}
-static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
+static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num);
int xfrm_state_add(struct xfrm_state *x)
{
@@ -1656,7 +1794,7 @@ int xfrm_state_add(struct xfrm_state *x)
}
if (use_spi && x->km.seq) {
- x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq);
+ x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq, x->pcpu_num);
if (x1 && ((x1->id.proto != x->id.proto) ||
!xfrm_addr_equal(&x1->id.daddr, &x->id.daddr, family))) {
to_put = x1;
@@ -1666,7 +1804,7 @@ int xfrm_state_add(struct xfrm_state *x)
if (use_spi && !x1)
x1 = __find_acq_core(net, &x->mark, family, x->props.mode,
- x->props.reqid, x->if_id, x->id.proto,
+ x->props.reqid, x->if_id, x->pcpu_num, x->id.proto,
&x->id.daddr, &x->props.saddr, 0);
__xfrm_state_bump_genids(x);
@@ -1791,6 +1929,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
x->props.flags = orig->props.flags;
x->props.extra_flags = orig->props.extra_flags;
+ x->pcpu_num = orig->pcpu_num;
x->if_id = orig->if_id;
x->tfcpad = orig->tfcpad;
x->replay_maxdiff = orig->replay_maxdiff;
@@ -2066,13 +2205,14 @@ EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
struct xfrm_state *
xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid,
- u32 if_id, u8 proto, const xfrm_address_t *daddr,
+ u32 if_id, u32 pcpu_num, u8 proto, const xfrm_address_t *daddr,
const xfrm_address_t *saddr, int create, unsigned short family)
{
struct xfrm_state *x;
spin_lock_bh(&net->xfrm.xfrm_state_lock);
- x = __find_acq_core(net, mark, family, mode, reqid, if_id, proto, daddr, saddr, create);
+ x = __find_acq_core(net, mark, family, mode, reqid, if_id, pcpu_num,
+ proto, daddr, saddr, create);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
return x;
@@ -2207,7 +2347,7 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
/* Silly enough, but I'm lazy to build resolution list */
-static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
+static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num)
{
unsigned int h = xfrm_seq_hash(net, seq);
struct xfrm_state *x;
@@ -2215,6 +2355,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s
hlist_for_each_entry_rcu(x, net->xfrm.state_byseq + h, byseq) {
if (x->km.seq == seq &&
(mark & x->mark.m) == x->mark.v &&
+ x->pcpu_num == pcpu_num &&
x->km.state == XFRM_STATE_ACQ) {
xfrm_state_hold(x);
return x;
@@ -2224,12 +2365,12 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s
return NULL;
}
-struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
+struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num)
{
struct xfrm_state *x;
spin_lock_bh(&net->xfrm.xfrm_state_lock);
- x = __xfrm_find_acq_byseq(net, mark, seq);
+ x = __xfrm_find_acq_byseq(net, mark, seq, pcpu_num);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
return x;
}
@@ -2988,6 +3129,11 @@ int __net_init xfrm_state_init(struct net *net)
net->xfrm.state_byseq = xfrm_hash_alloc(sz);
if (!net->xfrm.state_byseq)
goto out_byseq;
+
+ net->xfrm.state_cache_input = alloc_percpu(struct hlist_head);
+ if (!net->xfrm.state_cache_input)
+ goto out_state_cache_input;
+
net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
net->xfrm.state_num = 0;
@@ -2997,6 +3143,8 @@ int __net_init xfrm_state_init(struct net *net)
&net->xfrm.xfrm_state_lock);
return 0;
+out_state_cache_input:
+ xfrm_hash_free(net->xfrm.state_byseq, sz);
out_byseq:
xfrm_hash_free(net->xfrm.state_byspi, sz);
out_byspi:
@@ -3026,6 +3174,7 @@ void xfrm_state_fini(struct net *net)
xfrm_hash_free(net->xfrm.state_bysrc, sz);
WARN_ON(!hlist_empty(net->xfrm.state_bydst));
xfrm_hash_free(net->xfrm.state_bydst, sz);
+ free_percpu(net->xfrm.state_cache_input);
}
#ifdef CONFIG_AUDITSYSCALL
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index e3b8ce89831a..b2876e09328b 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -200,7 +200,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
struct netlink_ext_ack *extack)
{
int err;
- u8 sa_dir = attrs[XFRMA_SA_DIR] ? nla_get_u8(attrs[XFRMA_SA_DIR]) : 0;
+ u8 sa_dir = nla_get_u8_default(attrs[XFRMA_SA_DIR], 0);
u16 family = p->sel.family;
err = -EINVAL;
@@ -460,6 +460,12 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
}
}
+ if (!sa_dir && attrs[XFRMA_SA_PCPU]) {
+ NL_SET_ERR_MSG(extack, "SA_PCPU only supported with SA_DIR");
+ err = -EINVAL;
+ goto out;
+ }
+
out:
return err;
}
@@ -767,10 +773,8 @@ static void xfrm_smark_init(struct nlattr **attrs, struct xfrm_mark *m)
{
if (attrs[XFRMA_SET_MARK]) {
m->v = nla_get_u32(attrs[XFRMA_SET_MARK]);
- if (attrs[XFRMA_SET_MARK_MASK])
- m->m = nla_get_u32(attrs[XFRMA_SET_MARK_MASK]);
- else
- m->m = 0xffffffff;
+ m->m = nla_get_u32_default(attrs[XFRMA_SET_MARK_MASK],
+ 0xffffffff);
} else {
m->v = m->m = 0;
}
@@ -841,6 +845,12 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
x->nat_keepalive_interval =
nla_get_u32(attrs[XFRMA_NAT_KEEPALIVE_INTERVAL]);
+ if (attrs[XFRMA_SA_PCPU]) {
+ x->pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]);
+ if (x->pcpu_num >= num_possible_cpus())
+ goto error;
+ }
+
err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV], extack);
if (err)
goto error;
@@ -1089,7 +1099,7 @@ static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb)
if (!nla)
return -EMSGSIZE;
algo = nla_data(nla);
- strncpy(algo->alg_name, auth->alg_name, sizeof(algo->alg_name));
+ strscpy_pad(algo->alg_name, auth->alg_name, sizeof(algo->alg_name));
if (redact_secret && auth->alg_key_len)
memset(algo->alg_key, 0, (auth->alg_key_len + 7) / 8);
@@ -1296,6 +1306,11 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
if (ret)
goto out;
}
+ if (x->pcpu_num != UINT_MAX) {
+ ret = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num);
+ if (ret)
+ goto out;
+ }
if (x->dir)
ret = nla_put_u8(skb, XFRMA_SA_DIR, x->dir);
@@ -1700,6 +1715,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
u32 mark;
struct xfrm_mark m;
u32 if_id = 0;
+ u32 pcpu_num = UINT_MAX;
p = nlmsg_data(nlh);
err = verify_spi_info(p->info.id.proto, p->min, p->max, extack);
@@ -1716,8 +1732,16 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
if (attrs[XFRMA_IF_ID])
if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+ if (attrs[XFRMA_SA_PCPU]) {
+ pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]);
+ if (pcpu_num >= num_possible_cpus()) {
+ err = -EINVAL;
+ goto out_noput;
+ }
+ }
+
if (p->info.seq) {
- x = xfrm_find_acq_byseq(net, mark, p->info.seq);
+ x = xfrm_find_acq_byseq(net, mark, p->info.seq, pcpu_num);
if (x && !xfrm_addr_equal(&x->id.daddr, daddr, family)) {
xfrm_state_put(x);
x = NULL;
@@ -1726,7 +1750,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!x)
x = xfrm_find_acq(net, &m, p->info.mode, p->info.reqid,
- if_id, p->info.id.proto, daddr,
+ if_id, pcpu_num, p->info.id.proto, daddr,
&p->info.saddr, 1,
family);
err = -ENOENT;
@@ -2526,7 +2550,8 @@ static inline unsigned int xfrm_aevent_msgsize(struct xfrm_state *x)
+ nla_total_size(sizeof(struct xfrm_mark))
+ nla_total_size(4) /* XFRM_AE_RTHR */
+ nla_total_size(4) /* XFRM_AE_ETHR */
- + nla_total_size(sizeof(x->dir)); /* XFRMA_SA_DIR */
+ + nla_total_size(sizeof(x->dir)) /* XFRMA_SA_DIR */
+ + nla_total_size(4); /* XFRMA_SA_PCPU */
}
static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c)
@@ -2582,6 +2607,11 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct
err = xfrm_if_id_put(skb, x->if_id);
if (err)
goto out_cancel;
+ if (x->pcpu_num != UINT_MAX) {
+ err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num);
+ if (err)
+ goto out_cancel;
+ }
if (x->dir) {
err = nla_put_u8(skb, XFRMA_SA_DIR, x->dir);
@@ -2852,6 +2882,13 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh,
xfrm_mark_get(attrs, &mark);
+ if (attrs[XFRMA_SA_PCPU]) {
+ x->pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]);
+ err = -EINVAL;
+ if (x->pcpu_num >= num_possible_cpus())
+ goto free_state;
+ }
+
err = verify_newpolicy_info(&ua->policy, extack);
if (err)
goto free_state;
@@ -3182,6 +3219,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_MTIMER_THRESH] = { .type = NLA_U32 },
[XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT),
[XFRMA_NAT_KEEPALIVE_INTERVAL] = { .type = NLA_U32 },
+ [XFRMA_SA_PCPU] = { .type = NLA_U32 },
};
EXPORT_SYMBOL_GPL(xfrma_policy);
@@ -3245,6 +3283,20 @@ static int xfrm_reject_unused_attr(int type, struct nlattr **attrs,
}
}
+ if (attrs[XFRMA_SA_PCPU]) {
+ switch (type) {
+ case XFRM_MSG_NEWSA:
+ case XFRM_MSG_UPDSA:
+ case XFRM_MSG_ALLOCSPI:
+ case XFRM_MSG_ACQUIRE:
+
+ break;
+ default:
+ NL_SET_ERR_MSG(extack, "Invalid attribute SA_PCPU");
+ return -EINVAL;
+ }
+ }
+
return 0;
}
@@ -3348,7 +3400,8 @@ static inline unsigned int xfrm_expire_msgsize(void)
{
return NLMSG_ALIGN(sizeof(struct xfrm_user_expire)) +
nla_total_size(sizeof(struct xfrm_mark)) +
- nla_total_size(sizeof_field(struct xfrm_state, dir));
+ nla_total_size(sizeof_field(struct xfrm_state, dir)) +
+ nla_total_size(4); /* XFRMA_SA_PCPU */
}
static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c)
@@ -3374,6 +3427,11 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct
err = xfrm_if_id_put(skb, x->if_id);
if (err)
return err;
+ if (x->pcpu_num != UINT_MAX) {
+ err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num);
+ if (err)
+ return err;
+ }
if (x->dir) {
err = nla_put_u8(skb, XFRMA_SA_DIR, x->dir);
@@ -3481,6 +3539,8 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x)
}
if (x->if_id)
l += nla_total_size(sizeof(x->if_id));
+ if (x->pcpu_num)
+ l += nla_total_size(sizeof(x->pcpu_num));
/* Must count x->lastused as it may become non-zero behind our back. */
l += nla_total_size_64bit(sizeof(u64));
@@ -3587,6 +3647,7 @@ static inline unsigned int xfrm_acquire_msgsize(struct xfrm_state *x,
+ nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr)
+ nla_total_size(sizeof(struct xfrm_mark))
+ nla_total_size(xfrm_user_sec_ctx_size(x->security))
+ + nla_total_size(4) /* XFRMA_SA_PCPU */
+ userpolicy_type_attrsize();
}
@@ -3623,6 +3684,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
err = xfrm_if_id_put(skb, xp->if_id);
if (!err && xp->xdo.dev)
err = copy_user_offload(&xp->xdo, skb);
+ if (!err && x->pcpu_num != UINT_MAX)
+ err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num);
if (err) {
nlmsg_cancel(skb, nlh);
return err;