summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/Kconfig1
-rw-r--r--net/appletalk/ddp.c4
-rw-r--r--net/atm/mpoa_caches.c6
-rw-r--r--net/atm/pppoatm.c4
-rw-r--r--net/batman-adv/bat_v.c7
-rw-r--r--net/batman-adv/bat_v_ogm.c179
-rw-r--r--net/batman-adv/bat_v_ogm.h3
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/soft-interface.c8
-rw-r--r--net/batman-adv/sysfs.c2
-rw-r--r--net/batman-adv/types.h12
-rw-r--r--net/bluetooth/6lowpan.c2
-rw-r--r--net/bluetooth/hci_request.c29
-rw-r--r--net/bluetooth/hidp/core.c4
-rw-r--r--net/bluetooth/mgmt.c8
-rw-r--r--net/bpf/test_run.c39
-rw-r--r--net/bridge/br_mdb.c177
-rw-r--r--net/bridge/br_multicast.c32
-rw-r--r--net/bridge/br_private.h3
-rw-r--r--net/bridge/br_vlan.c2
-rw-r--r--net/bridge/netfilter/ebt_802_3.c8
-rw-r--r--net/bridge/netfilter/nf_conntrack_bridge.c15
-rw-r--r--net/can/Kconfig13
-rw-r--r--net/can/Makefile2
-rw-r--r--net/can/af_can.c379
-rw-r--r--net/can/af_can.h25
-rw-r--r--net/can/bcm.c170
-rw-r--r--net/can/gw.c511
-rw-r--r--net/can/j1939/Kconfig15
-rw-r--r--net/can/j1939/Makefile10
-rw-r--r--net/can/j1939/address-claim.c230
-rw-r--r--net/can/j1939/bus.c333
-rw-r--r--net/can/j1939/j1939-priv.h338
-rw-r--r--net/can/j1939/main.c403
-rw-r--r--net/can/j1939/socket.c1160
-rw-r--r--net/can/j1939/transport.c2027
-rw-r--r--net/can/proc.c164
-rw-r--r--net/can/raw.c44
-rw-r--r--net/core/bpf_sk_storage.c104
-rw-r--r--net/core/datagram.c6
-rw-r--r--net/core/dev.c77
-rw-r--r--net/core/devlink.c1201
-rw-r--r--net/core/drop_monitor.c1345
-rw-r--r--net/core/ethtool.c6
-rw-r--r--net/core/fib_notifier.c29
-rw-r--r--net/core/filter.c88
-rw-r--r--net/core/flow_dissector.c21
-rw-r--r--net/core/flow_offload.c241
-rw-r--r--net/core/neighbour.c2
-rw-r--r--net/core/page_pool.c41
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/core/skbuff.c84
-rw-r--r--net/core/skmsg.c3
-rw-r--r--net/core/sock.c9
-rw-r--r--net/core/sock_map.c2
-rw-r--r--net/core/sock_reuseport.c15
-rw-r--r--net/core/sysctl_net_core.c8
-rw-r--r--net/core/tso.c8
-rw-r--r--net/dsa/Kconfig16
-rw-r--r--net/dsa/Makefile2
-rw-r--r--net/dsa/dsa2.c127
-rw-r--r--net/dsa/master.c66
-rw-r--r--net/dsa/port.c19
-rw-r--r--net/dsa/slave.c102
-rw-r--r--net/dsa/switch.c135
-rw-r--r--net/dsa/tag_8021q.c102
-rw-r--r--net/dsa/tag_ksz.c62
-rw-r--r--net/dsa/tag_sja1105.c3
-rw-r--r--net/ipv4/af_inet.c5
-rw-r--r--net/ipv4/datagram.c2
-rw-r--r--net/ipv4/ip_output.c3
-rw-r--r--net/ipv4/ipmr.c4
-rw-r--r--net/ipv4/netfilter/Kconfig8
-rw-r--r--net/ipv4/netfilter/Makefile2
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c4
-rw-r--r--net/ipv4/nexthop.c2
-rw-r--r--net/ipv4/ping.c2
-rw-r--r--net/ipv4/raw.c4
-rw-r--r--net/ipv4/sysctl_net_ipv4.c9
-rw-r--r--net/ipv4/tcp.c32
-rw-r--r--net/ipv4/tcp_bbr.c6
-rw-r--r--net/ipv4/tcp_diag.c52
-rw-r--r--net/ipv4/tcp_input.c82
-rw-r--r--net/ipv4/tcp_ipv4.c16
-rw-r--r--net/ipv4/tcp_output.c17
-rw-r--r--net/ipv4/tcp_timer.c2
-rw-r--r--net/ipv4/udp.c7
-rw-r--r--net/ipv6/datagram.c2
-rw-r--r--net/ipv6/exthdrs_core.c4
-rw-r--r--net/ipv6/ip6_gre.c2
-rw-r--r--net/ipv6/ip6_input.c4
-rw-r--r--net/ipv6/ip6_output.c3
-rw-r--r--net/ipv6/ip6mr.c4
-rw-r--r--net/ipv6/ndisc.c1
-rw-r--r--net/ipv6/netfilter.c4
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c4
-rw-r--r--net/ipv6/netfilter/ip6t_ipv6header.c4
-rw-r--r--net/ipv6/netfilter/nf_log_ipv6.c4
-rw-r--r--net/ipv6/netfilter/nf_socket_ipv6.c1
-rw-r--r--net/ipv6/raw.c4
-rw-r--r--net/ipv6/route.c16
-rw-r--r--net/ipv6/tcp_ipv6.c15
-rw-r--r--net/ipv6/udp.c8
-rw-r--r--net/kcm/kcmsock.c12
-rw-r--r--net/mac80211/agg-rx.c72
-rw-r--r--net/mac80211/cfg.c7
-rw-r--r--net/mac80211/debugfs.c3
-rw-r--r--net/mac80211/driver-ops.h8
-rw-r--r--net/mac80211/he.c40
-rw-r--r--net/mac80211/ht.c2
-rw-r--r--net/mac80211/ibss.c8
-rw-r--r--net/mac80211/ieee80211_i.h20
-rw-r--r--net/mac80211/iface.c2
-rw-r--r--net/mac80211/key.c64
-rw-r--r--net/mac80211/key.h4
-rw-r--r--net/mac80211/main.c19
-rw-r--r--net/mac80211/mesh.c62
-rw-r--r--net/mac80211/mesh.h4
-rw-r--r--net/mac80211/mesh_plink.c12
-rw-r--r--net/mac80211/mlme.c22
-rw-r--r--net/mac80211/offchannel.c5
-rw-r--r--net/mac80211/rate.h9
-rw-r--r--net/mac80211/rc80211_minstrel.h1
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c277
-rw-r--r--net/mac80211/rc80211_minstrel_ht.h12
-rw-r--r--net/mac80211/sta_info.c4
-rw-r--r--net/mac80211/sta_info.h2
-rw-r--r--net/mac80211/status.c195
-rw-r--r--net/mac80211/trace.h7
-rw-r--r--net/mac80211/tx.c6
-rw-r--r--net/mac80211/util.c71
-rw-r--r--net/mac80211/vht.c10
-rw-r--r--net/mac80211/wpa.c6
-rw-r--r--net/ncsi/ncsi-pkt.h5
-rw-r--r--net/ncsi/ncsi-rsp.c17
-rw-r--r--net/netfilter/Kconfig8
-rw-r--r--net/netfilter/Makefile2
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h4
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c73
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_mh.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c3
-rw-r--r--net/netfilter/nf_conntrack_ecache.c1
-rw-r--r--net/netfilter/nf_conntrack_expect.c2
-rw-r--r--net/netfilter/nf_conntrack_ftp.c2
-rw-r--r--net/netfilter/nf_conntrack_helper.c5
-rw-r--r--net/netfilter/nf_conntrack_labels.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_icmpv6.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c2
-rw-r--r--net/netfilter/nf_conntrack_standalone.c35
-rw-r--r--net/netfilter/nf_conntrack_timeout.c1
-rw-r--r--net/netfilter/nf_dup_netdev.c21
-rw-r--r--net/netfilter/nf_flow_table_core.c1
-rw-r--r--net/netfilter/nf_nat_core.c6
-rw-r--r--net/netfilter/nf_nat_proto.c4
-rw-r--r--net/netfilter/nf_synproxy_core.c8
-rw-r--r--net/netfilter/nf_tables_api.c93
-rw-r--r--net/netfilter/nf_tables_offload.c296
-rw-r--r--net/netfilter/nfnetlink_log.c61
-rw-r--r--net/netfilter/nfnetlink_queue.c4
-rw-r--r--net/netfilter/nft_bitwise.c19
-rw-r--r--net/netfilter/nft_byteorder.c9
-rw-r--r--net/netfilter/nft_dup_netdev.c12
-rw-r--r--net/netfilter/nft_dynset.c6
-rw-r--r--net/netfilter/nft_flow_offload.c3
-rw-r--r--net/netfilter/nft_fwd_netdev.c12
-rw-r--r--net/netfilter/nft_immediate.c24
-rw-r--r--net/netfilter/nft_meta.c46
-rw-r--r--net/netfilter/nft_quota.c29
-rw-r--r--net/netfilter/nft_set_bitmap.c2
-rw-r--r--net/netfilter/nft_set_hash.c21
-rw-r--r--net/netfilter/nft_set_rbtree.c2
-rw-r--r--net/netfilter/nft_synproxy.c147
-rw-r--r--net/netfilter/xt_IDLETIMER.c2
-rw-r--r--net/netfilter/xt_connlimit.c2
-rw-r--r--net/netfilter/xt_hashlimit.c7
-rw-r--r--net/netfilter/xt_physdev.c5
-rw-r--r--net/netfilter/xt_set.c1
-rw-r--r--net/netlabel/netlabel_kapi.c2
-rw-r--r--net/nfc/netlink.c6
-rw-r--r--net/openvswitch/datapath.c45
-rw-r--r--net/openvswitch/datapath.h2
-rw-r--r--net/openvswitch/flow.c13
-rw-r--r--net/psample/psample.c20
-rw-r--r--net/rds/af_rds.c99
-rw-r--r--net/rds/bind.c4
-rw-r--r--net/rds/ib_recv.c23
-rw-r--r--net/rds/ib_stats.c2
-rw-r--r--net/rds/rds.h11
-rw-r--r--net/rds/recv.c22
-rw-r--r--net/rds/send.c16
-rw-r--r--net/rds/stats.c3
-rw-r--r--net/rxrpc/ar-internal.h4
-rw-r--r--net/rxrpc/call_object.c4
-rw-r--r--net/rxrpc/insecure.c5
-rw-r--r--net/rxrpc/rxkad.c103
-rw-r--r--net/sched/Kconfig13
-rw-r--r--net/sched/act_ct.c2
-rw-r--r--net/sched/act_mirred.c21
-rw-r--r--net/sched/act_police.c27
-rw-r--r--net/sched/act_sample.c27
-rw-r--r--net/sched/act_vlan.c14
-rw-r--r--net/sched/cls_api.c666
-rw-r--r--net/sched/cls_bpf.c38
-rw-r--r--net/sched/cls_flower.c124
-rw-r--r--net/sched/cls_matchall.c33
-rw-r--r--net/sched/cls_u32.c29
-rw-r--r--net/sched/sch_cbs.c2
-rw-r--r--net/sched/sch_fq_codel.c14
-rw-r--r--net/sched/sch_generic.c3
-rw-r--r--net/sched/sch_taprio.c413
-rw-r--r--net/sctp/associola.c9
-rw-r--r--net/sctp/auth.c101
-rw-r--r--net/sctp/endpointola.c65
-rw-r--r--net/sctp/protocol.c3
-rw-r--r--net/sctp/sm_make_chunk.c34
-rw-r--r--net/sctp/sm_statefuns.c6
-rw-r--r--net/sctp/sm_statetable.c28
-rw-r--r--net/sctp/socket.c679
-rw-r--r--net/sctp/sysctl.c7
-rw-r--r--net/sctp/transport.c2
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_keys.c9
-rw-r--r--net/tipc/bcast.c14
-rw-r--r--net/tipc/bearer.c64
-rw-r--r--net/tipc/bearer.h10
-rw-r--r--net/tipc/core.c5
-rw-r--r--net/tipc/core.h3
-rw-r--r--net/tipc/group.c4
-rw-r--r--net/tipc/link.c160
-rw-r--r--net/tipc/msg.c59
-rw-r--r--net/tipc/msg.h28
-rw-r--r--net/tipc/name_distr.c2
-rw-r--r--net/tipc/node.c14
-rw-r--r--net/tipc/node.h6
-rw-r--r--net/tipc/socket.c14
-rw-r--r--net/tipc/topsrv.c2
-rw-r--r--net/tls/tls_device.c228
-rw-r--r--net/tls/tls_device_fallback.c2
-rw-r--r--net/tls/tls_main.c136
-rw-r--r--net/tls/tls_sw.c6
-rw-r--r--net/vmw_vsock/hyperv_transport.c24
-rw-r--r--net/vmw_vsock/virtio_transport.c1
-rw-r--r--net/vmw_vsock/virtio_transport_common.c104
-rw-r--r--net/wimax/debugfs.c42
-rw-r--r--net/wimax/stack.c11
-rw-r--r--net/wimax/wimax-internal.h7
-rw-r--r--net/wireless/Kconfig2
-rw-r--r--net/wireless/chan.c162
-rw-r--r--net/wireless/core.c30
-rw-r--r--net/wireless/core.h4
-rw-r--r--net/wireless/ibss.c16
-rw-r--r--net/wireless/lib80211_crypt_ccmp.c197
-rw-r--r--net/wireless/nl80211.c238
-rw-r--r--net/wireless/reg.c21
-rw-r--r--net/wireless/scan.c269
-rw-r--r--net/wireless/trace.h3
-rw-r--r--net/wireless/util.c57
-rw-r--r--net/wireless/wext-compat.c5
-rw-r--r--net/xdp/xdp_umem.c73
-rw-r--r--net/xdp/xsk.c349
-rw-r--r--net/xdp/xsk.h13
-rw-r--r--net/xdp/xsk_diag.c5
-rw-r--r--net/xdp/xsk_queue.h71
-rw-r--r--net/xfrm/xfrm_ipcomp.c2
267 files changed, 14737 insertions, 3102 deletions
diff --git a/net/Kconfig b/net/Kconfig
index 57f51a279ad6..3101bfcbdd7a 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -430,6 +430,7 @@ config NET_SOCK_MSG
config NET_DEVLINK
bool
default n
+ imply NET_DROP_MONITOR
config PAGE_POOL
bool
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index a8cb6b2e20c1..4072e9d394d6 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -953,8 +953,8 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset,
if (copy > len)
copy = len;
vaddr = kmap_atomic(skb_frag_page(frag));
- sum = atalk_sum_partial(vaddr + frag->page_offset +
- offset - start, copy, sum);
+ sum = atalk_sum_partial(vaddr + skb_frag_off(frag) +
+ offset - start, copy, sum);
kunmap_atomic(vaddr);
if (!(len -= copy))
diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c
index 4bb418313720..3286f9d527d3 100644
--- a/net/atm/mpoa_caches.c
+++ b/net/atm/mpoa_caches.c
@@ -180,8 +180,7 @@ static int cache_hit(in_cache_entry *entry, struct mpoa_client *mpc)
static void in_cache_put(in_cache_entry *entry)
{
if (refcount_dec_and_test(&entry->use)) {
- memset(entry, 0, sizeof(in_cache_entry));
- kfree(entry);
+ kzfree(entry);
}
}
@@ -416,8 +415,7 @@ static eg_cache_entry *eg_cache_get_by_src_ip(__be32 ipaddr,
static void eg_cache_put(eg_cache_entry *entry)
{
if (refcount_dec_and_test(&entry->use)) {
- memset(entry, 0, sizeof(eg_cache_entry));
- kfree(entry);
+ kzfree(entry);
}
}
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index bd3da9af5ef6..45d8e1d5d033 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -216,9 +216,7 @@ static void pppoatm_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
pvcc->chan.mtu += LLC_LEN;
break;
}
- pr_debug("Couldn't autodetect yet (skb: %02X %02X %02X %02X %02X %02X)\n",
- skb->data[0], skb->data[1], skb->data[2],
- skb->data[3], skb->data[4], skb->data[5]);
+ pr_debug("Couldn't autodetect yet (skb: %6ph)\n", skb->data);
goto error;
case e_vc:
break;
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 22672cb3e25d..64054edc2e3c 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -79,6 +79,7 @@ static int batadv_v_iface_enable(struct batadv_hard_iface *hard_iface)
static void batadv_v_iface_disable(struct batadv_hard_iface *hard_iface)
{
+ batadv_v_ogm_iface_disable(hard_iface);
batadv_v_elp_iface_disable(hard_iface);
}
@@ -1081,6 +1082,12 @@ void batadv_v_hardif_init(struct batadv_hard_iface *hard_iface)
*/
atomic_set(&hard_iface->bat_v.throughput_override, 0);
atomic_set(&hard_iface->bat_v.elp_interval, 500);
+
+ hard_iface->bat_v.aggr_len = 0;
+ skb_queue_head_init(&hard_iface->bat_v.aggr_list);
+ spin_lock_init(&hard_iface->bat_v.aggr_list_lock);
+ INIT_DELAYED_WORK(&hard_iface->bat_v.aggr_wq,
+ batadv_v_ogm_aggr_work);
}
/**
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index bc06e3cdfa84..dc4f7430cb5a 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -17,12 +17,14 @@
#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
+#include <linux/lockdep.h>
#include <linux/netdevice.h>
#include <linux/random.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
+#include <linux/spinlock.h>
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/types.h>
@@ -77,6 +79,20 @@ struct batadv_orig_node *batadv_v_ogm_orig_get(struct batadv_priv *bat_priv,
}
/**
+ * batadv_v_ogm_start_queue_timer() - restart the OGM aggregation timer
+ * @hard_iface: the interface to use to send the OGM
+ */
+static void batadv_v_ogm_start_queue_timer(struct batadv_hard_iface *hard_iface)
+{
+ unsigned int msecs = BATADV_MAX_AGGREGATION_MS * 1000;
+
+ /* msecs * [0.9, 1.1] */
+ msecs += prandom_u32() % (msecs / 5) - (msecs / 10);
+ queue_delayed_work(batadv_event_workqueue, &hard_iface->bat_v.aggr_wq,
+ msecs_to_jiffies(msecs / 1000));
+}
+
+/**
* batadv_v_ogm_start_timer() - restart the OGM sending timer
* @bat_priv: the bat priv with all the soft interface information
*/
@@ -116,6 +132,130 @@ static void batadv_v_ogm_send_to_if(struct sk_buff *skb,
}
/**
+ * batadv_v_ogm_len() - OGMv2 packet length
+ * @skb: the OGM to check
+ *
+ * Return: Length of the given OGMv2 packet, including tvlv length, excluding
+ * ethernet header length.
+ */
+static unsigned int batadv_v_ogm_len(struct sk_buff *skb)
+{
+ struct batadv_ogm2_packet *ogm_packet;
+
+ ogm_packet = (struct batadv_ogm2_packet *)skb->data;
+ return BATADV_OGM2_HLEN + ntohs(ogm_packet->tvlv_len);
+}
+
+/**
+ * batadv_v_ogm_queue_left() - check if given OGM still fits aggregation queue
+ * @skb: the OGM to check
+ * @hard_iface: the interface to use to send the OGM
+ *
+ * Caller needs to hold the hard_iface->bat_v.aggr_list_lock.
+ *
+ * Return: True, if the given OGMv2 packet still fits, false otherwise.
+ */
+static bool batadv_v_ogm_queue_left(struct sk_buff *skb,
+ struct batadv_hard_iface *hard_iface)
+{
+ unsigned int max = min_t(unsigned int, hard_iface->net_dev->mtu,
+ BATADV_MAX_AGGREGATION_BYTES);
+ unsigned int ogm_len = batadv_v_ogm_len(skb);
+
+ lockdep_assert_held(&hard_iface->bat_v.aggr_list_lock);
+
+ return hard_iface->bat_v.aggr_len + ogm_len <= max;
+}
+
+/**
+ * batadv_v_ogm_aggr_list_free - free all elements in an aggregation queue
+ * @hard_iface: the interface holding the aggregation queue
+ *
+ * Empties the OGMv2 aggregation queue and frees all the skbs it contained.
+ *
+ * Caller needs to hold the hard_iface->bat_v.aggr_list_lock.
+ */
+static void batadv_v_ogm_aggr_list_free(struct batadv_hard_iface *hard_iface)
+{
+ struct sk_buff *skb;
+
+ lockdep_assert_held(&hard_iface->bat_v.aggr_list_lock);
+
+ while ((skb = skb_dequeue(&hard_iface->bat_v.aggr_list)))
+ kfree_skb(skb);
+
+ hard_iface->bat_v.aggr_len = 0;
+}
+
+/**
+ * batadv_v_ogm_aggr_send() - flush & send aggregation queue
+ * @hard_iface: the interface with the aggregation queue to flush
+ *
+ * Aggregates all OGMv2 packets currently in the aggregation queue into a
+ * single OGMv2 packet and transmits this aggregate.
+ *
+ * The aggregation queue is empty after this call.
+ *
+ * Caller needs to hold the hard_iface->bat_v.aggr_list_lock.
+ */
+static void batadv_v_ogm_aggr_send(struct batadv_hard_iface *hard_iface)
+{
+ unsigned int aggr_len = hard_iface->bat_v.aggr_len;
+ struct sk_buff *skb_aggr;
+ unsigned int ogm_len;
+ struct sk_buff *skb;
+
+ lockdep_assert_held(&hard_iface->bat_v.aggr_list_lock);
+
+ if (!aggr_len)
+ return;
+
+ skb_aggr = dev_alloc_skb(aggr_len + ETH_HLEN + NET_IP_ALIGN);
+ if (!skb_aggr) {
+ batadv_v_ogm_aggr_list_free(hard_iface);
+ return;
+ }
+
+ skb_reserve(skb_aggr, ETH_HLEN + NET_IP_ALIGN);
+ skb_reset_network_header(skb_aggr);
+
+ while ((skb = skb_dequeue(&hard_iface->bat_v.aggr_list))) {
+ hard_iface->bat_v.aggr_len -= batadv_v_ogm_len(skb);
+
+ ogm_len = batadv_v_ogm_len(skb);
+ skb_put_data(skb_aggr, skb->data, ogm_len);
+
+ consume_skb(skb);
+ }
+
+ batadv_v_ogm_send_to_if(skb_aggr, hard_iface);
+}
+
+/**
+ * batadv_v_ogm_queue_on_if() - queue a batman ogm on a given interface
+ * @skb: the OGM to queue
+ * @hard_iface: the interface to queue the OGM on
+ */
+static void batadv_v_ogm_queue_on_if(struct sk_buff *skb,
+ struct batadv_hard_iface *hard_iface)
+{
+ struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
+
+ if (!atomic_read(&bat_priv->aggregated_ogms)) {
+ batadv_v_ogm_send_to_if(skb, hard_iface);
+ return;
+ }
+
+ spin_lock_bh(&hard_iface->bat_v.aggr_list_lock);
+ if (!batadv_v_ogm_queue_left(skb, hard_iface))
+ batadv_v_ogm_aggr_send(hard_iface);
+
+ hard_iface->bat_v.aggr_len += batadv_v_ogm_len(skb);
+ skb_queue_tail(&hard_iface->bat_v.aggr_list, skb);
+ spin_unlock_bh(&hard_iface->bat_v.aggr_list_lock);
+}
+
+/**
* batadv_v_ogm_send() - periodic worker broadcasting the own OGM
* @work: work queue item
*/
@@ -210,7 +350,7 @@ static void batadv_v_ogm_send(struct work_struct *work)
break;
}
- batadv_v_ogm_send_to_if(skb_tmp, hard_iface);
+ batadv_v_ogm_queue_on_if(skb_tmp, hard_iface);
batadv_hardif_put(hard_iface);
}
rcu_read_unlock();
@@ -224,6 +364,27 @@ out:
}
/**
+ * batadv_v_ogm_aggr_work() - OGM queue periodic task per interface
+ * @work: work queue item
+ *
+ * Emits aggregated OGM message in regular intervals.
+ */
+void batadv_v_ogm_aggr_work(struct work_struct *work)
+{
+ struct batadv_hard_iface_bat_v *batv;
+ struct batadv_hard_iface *hard_iface;
+
+ batv = container_of(work, struct batadv_hard_iface_bat_v, aggr_wq.work);
+ hard_iface = container_of(batv, struct batadv_hard_iface, bat_v);
+
+ spin_lock_bh(&hard_iface->bat_v.aggr_list_lock);
+ batadv_v_ogm_aggr_send(hard_iface);
+ spin_unlock_bh(&hard_iface->bat_v.aggr_list_lock);
+
+ batadv_v_ogm_start_queue_timer(hard_iface);
+}
+
+/**
* batadv_v_ogm_iface_enable() - prepare an interface for B.A.T.M.A.N. V
* @hard_iface: the interface to prepare
*
@@ -235,12 +396,26 @@ int batadv_v_ogm_iface_enable(struct batadv_hard_iface *hard_iface)
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
+ batadv_v_ogm_start_queue_timer(hard_iface);
batadv_v_ogm_start_timer(bat_priv);
return 0;
}
/**
+ * batadv_v_ogm_iface_disable() - release OGM interface private resources
+ * @hard_iface: interface for which the resources have to be released
+ */
+void batadv_v_ogm_iface_disable(struct batadv_hard_iface *hard_iface)
+{
+ cancel_delayed_work_sync(&hard_iface->bat_v.aggr_wq);
+
+ spin_lock_bh(&hard_iface->bat_v.aggr_list_lock);
+ batadv_v_ogm_aggr_list_free(hard_iface);
+ spin_unlock_bh(&hard_iface->bat_v.aggr_list_lock);
+}
+
+/**
* batadv_v_ogm_primary_iface_set() - set a new primary interface
* @primary_iface: the new primary interface
*/
@@ -382,7 +557,7 @@ static void batadv_v_ogm_forward(struct batadv_priv *bat_priv,
if_outgoing->net_dev->name, ntohl(ogm_forward->throughput),
ogm_forward->ttl, if_incoming->net_dev->name);
- batadv_v_ogm_send_to_if(skb, if_outgoing);
+ batadv_v_ogm_queue_on_if(skb, if_outgoing);
out:
if (orig_ifinfo)
diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h
index 2a50df7fc2bf..bf16d040461d 100644
--- a/net/batman-adv/bat_v_ogm.h
+++ b/net/batman-adv/bat_v_ogm.h
@@ -11,10 +11,13 @@
#include <linux/skbuff.h>
#include <linux/types.h>
+#include <linux/workqueue.h>
int batadv_v_ogm_init(struct batadv_priv *bat_priv);
void batadv_v_ogm_free(struct batadv_priv *bat_priv);
+void batadv_v_ogm_aggr_work(struct work_struct *work);
int batadv_v_ogm_iface_enable(struct batadv_hard_iface *hard_iface);
+void batadv_v_ogm_iface_disable(struct batadv_hard_iface *hard_iface);
struct batadv_orig_node *batadv_v_ogm_orig_get(struct batadv_priv *bat_priv,
const u8 *addr);
void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface);
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 3d4c04d87ff3..6967f2e4c3f4 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2019.3"
+#define BATADV_SOURCE_VERSION "2019.4"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index c7a2e77ca1da..a1146cb10919 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -943,10 +943,10 @@ static const struct net_device_ops batadv_netdev_ops = {
static void batadv_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *info)
{
- strlcpy(info->driver, "B.A.T.M.A.N. advanced", sizeof(info->driver));
- strlcpy(info->version, BATADV_SOURCE_VERSION, sizeof(info->version));
- strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
- strlcpy(info->bus_info, "batman", sizeof(info->bus_info));
+ strscpy(info->driver, "B.A.T.M.A.N. advanced", sizeof(info->driver));
+ strscpy(info->version, BATADV_SOURCE_VERSION, sizeof(info->version));
+ strscpy(info->fw_version, "N/A", sizeof(info->fw_version));
+ strscpy(info->bus_info, "batman", sizeof(info->bus_info));
}
/* Inspired by drivers/net/ethernet/dlink/sundance.c:1702
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 1efcb97039cd..e5bbc28ed12c 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -1070,7 +1070,7 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
dev_hold(net_dev);
INIT_WORK(&store_work->work, batadv_store_mesh_iface_work);
store_work->net_dev = net_dev;
- strlcpy(store_work->soft_iface_name, buff,
+ strscpy(store_work->soft_iface_name, buff,
sizeof(store_work->soft_iface_name));
queue_work(batadv_event_workqueue, &store_work->work);
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 6ae139d74e0f..be7c02aa91e2 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -117,6 +117,18 @@ struct batadv_hard_iface_bat_v {
/** @elp_wq: workqueue used to schedule ELP transmissions */
struct delayed_work elp_wq;
+ /** @aggr_wq: workqueue used to transmit queued OGM packets */
+ struct delayed_work aggr_wq;
+
+ /** @aggr_list: queue for to be aggregated OGM packets */
+ struct sk_buff_head aggr_list;
+
+ /** @aggr_len: size of the OGM aggregate (excluding ethernet header) */
+ unsigned int aggr_len;
+
+ /** @aggr_list_lock: protects aggr_list */
+ spinlock_t aggr_list_lock;
+
/**
* @throughput_override: throughput override to disable link
* auto-detection
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 9d41de1ec90f..bb55d92691b0 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -583,7 +583,7 @@ static const struct net_device_ops netdev_ops = {
.ndo_start_xmit = bt_xmit,
};
-static struct header_ops header_ops = {
+static const struct header_ops header_ops = {
.create = header_create,
};
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 621f1a97d803..7f6a581b5b7e 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -1054,6 +1054,7 @@ void __hci_req_enable_advertising(struct hci_request *req)
struct hci_cp_le_set_adv_param cp;
u8 own_addr_type, enable = 0x01;
bool connectable;
+ u16 adv_min_interval, adv_max_interval;
u32 flags;
flags = get_adv_instance_flags(hdev, hdev->cur_adv_instance);
@@ -1087,16 +1088,30 @@ void __hci_req_enable_advertising(struct hci_request *req)
return;
memset(&cp, 0, sizeof(cp));
- cp.min_interval = cpu_to_le16(hdev->le_adv_min_interval);
- cp.max_interval = cpu_to_le16(hdev->le_adv_max_interval);
- if (connectable)
+ if (connectable) {
cp.type = LE_ADV_IND;
- else if (get_cur_adv_instance_scan_rsp_len(hdev))
- cp.type = LE_ADV_SCAN_IND;
- else
- cp.type = LE_ADV_NONCONN_IND;
+ adv_min_interval = hdev->le_adv_min_interval;
+ adv_max_interval = hdev->le_adv_max_interval;
+ } else {
+ if (get_cur_adv_instance_scan_rsp_len(hdev))
+ cp.type = LE_ADV_SCAN_IND;
+ else
+ cp.type = LE_ADV_NONCONN_IND;
+
+ if (!hci_dev_test_flag(hdev, HCI_DISCOVERABLE) ||
+ hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) {
+ adv_min_interval = DISCOV_LE_FAST_ADV_INT_MIN;
+ adv_max_interval = DISCOV_LE_FAST_ADV_INT_MAX;
+ } else {
+ adv_min_interval = hdev->le_adv_min_interval;
+ adv_max_interval = hdev->le_adv_max_interval;
+ }
+ }
+
+ cp.min_interval = cpu_to_le16(adv_min_interval);
+ cp.max_interval = cpu_to_le16(adv_max_interval);
cp.own_address_type = own_addr_type;
cp.channel_map = hdev->le_adv_channel_map;
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 8d889969ae7e..bef84b95e2c4 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -267,7 +267,7 @@ static int hidp_get_raw_report(struct hid_device *hid,
set_bit(HIDP_WAITING_FOR_RETURN, &session->flags);
data[0] = report_number;
ret = hidp_send_ctrl_message(session, report_type, data, 1);
- if (ret)
+ if (ret < 0)
goto err;
/* Wait for the return of the report. The returned report
@@ -343,7 +343,7 @@ static int hidp_set_raw_report(struct hid_device *hid, unsigned char reportnum,
data[0] = reportnum;
set_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags);
ret = hidp_send_ctrl_message(session, report_type, data, count);
- if (ret)
+ if (ret < 0)
goto err;
/* Wait for the ACK from the device. */
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 150114e33b20..acb7c6d5643f 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -2588,7 +2588,6 @@ static int get_connections(struct sock *sk, struct hci_dev *hdev, void *data,
{
struct mgmt_rp_get_connections *rp;
struct hci_conn *c;
- size_t rp_len;
int err;
u16 i;
@@ -2608,8 +2607,7 @@ static int get_connections(struct sock *sk, struct hci_dev *hdev, void *data,
i++;
}
- rp_len = sizeof(*rp) + (i * sizeof(struct mgmt_addr_info));
- rp = kmalloc(rp_len, GFP_KERNEL);
+ rp = kmalloc(struct_size(rp, addr, i), GFP_KERNEL);
if (!rp) {
err = -ENOMEM;
goto unlock;
@@ -2629,10 +2627,8 @@ static int get_connections(struct sock *sk, struct hci_dev *hdev, void *data,
rp->conn_count = cpu_to_le16(i);
/* Recalculate length in case of filtered SCO connections, etc */
- rp_len = sizeof(*rp) + (i * sizeof(struct mgmt_addr_info));
-
err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CONNECTIONS, 0, rp,
- rp_len);
+ struct_size(rp, addr, i));
kfree(rp);
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 80e6f3a6864d..1153bbcdff72 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -377,6 +377,22 @@ out:
return ret;
}
+static int verify_user_bpf_flow_keys(struct bpf_flow_keys *ctx)
+{
+ /* make sure the fields we don't use are zeroed */
+ if (!range_is_zero(ctx, 0, offsetof(struct bpf_flow_keys, flags)))
+ return -EINVAL;
+
+ /* flags is allowed */
+
+ if (!range_is_zero(ctx, offsetof(struct bpf_flow_keys, flags) +
+ FIELD_SIZEOF(struct bpf_flow_keys, flags),
+ sizeof(struct bpf_flow_keys)))
+ return -EINVAL;
+
+ return 0;
+}
+
int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr)
@@ -384,9 +400,11 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
u32 size = kattr->test.data_size_in;
struct bpf_flow_dissector ctx = {};
u32 repeat = kattr->test.repeat;
+ struct bpf_flow_keys *user_ctx;
struct bpf_flow_keys flow_keys;
u64 time_start, time_spent = 0;
const struct ethhdr *eth;
+ unsigned int flags = 0;
u32 retval, duration;
void *data;
int ret;
@@ -395,9 +413,6 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
return -EINVAL;
- if (kattr->test.ctx_in || kattr->test.ctx_out)
- return -EINVAL;
-
if (size < ETH_HLEN)
return -EINVAL;
@@ -410,6 +425,18 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
if (!repeat)
repeat = 1;
+ user_ctx = bpf_ctx_init(kattr, sizeof(struct bpf_flow_keys));
+ if (IS_ERR(user_ctx)) {
+ kfree(data);
+ return PTR_ERR(user_ctx);
+ }
+ if (user_ctx) {
+ ret = verify_user_bpf_flow_keys(user_ctx);
+ if (ret)
+ goto out;
+ flags = user_ctx->flags;
+ }
+
ctx.flow_keys = &flow_keys;
ctx.data = data;
ctx.data_end = (__u8 *)data + size;
@@ -419,7 +446,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
time_start = ktime_get_ns();
for (i = 0; i < repeat; i++) {
retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN,
- size);
+ size, flags);
if (signal_pending(current)) {
preempt_enable();
@@ -450,8 +477,12 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
retval, duration);
+ if (!ret)
+ ret = bpf_ctx_finish(kattr, uattr, user_ctx,
+ sizeof(struct bpf_flow_keys));
out:
+ kfree(user_ctx);
kfree(data);
return ret;
}
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 63f9c08625f0..da5ed4cf9233 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -60,6 +60,8 @@ static void __mdb_entry_fill_flags(struct br_mdb_entry *e, unsigned char flags)
e->flags = 0;
if (flags & MDB_PG_FLAGS_OFFLOAD)
e->flags |= MDB_FLAGS_OFFLOAD;
+ if (flags & MDB_PG_FLAGS_FAST_LEAVE)
+ e->flags |= MDB_FLAGS_FAST_LEAVE;
}
static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip)
@@ -75,6 +77,53 @@ static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip)
#endif
}
+static int __mdb_fill_info(struct sk_buff *skb,
+ struct net_bridge_mdb_entry *mp,
+ struct net_bridge_port_group *p)
+{
+ struct timer_list *mtimer;
+ struct nlattr *nest_ent;
+ struct br_mdb_entry e;
+ u8 flags = 0;
+ int ifindex;
+
+ memset(&e, 0, sizeof(e));
+ if (p) {
+ ifindex = p->port->dev->ifindex;
+ mtimer = &p->timer;
+ flags = p->flags;
+ } else {
+ ifindex = mp->br->dev->ifindex;
+ mtimer = &mp->timer;
+ }
+
+ __mdb_entry_fill_flags(&e, flags);
+ e.ifindex = ifindex;
+ e.vid = mp->addr.vid;
+ if (mp->addr.proto == htons(ETH_P_IP))
+ e.addr.u.ip4 = mp->addr.u.ip4;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (mp->addr.proto == htons(ETH_P_IPV6))
+ e.addr.u.ip6 = mp->addr.u.ip6;
+#endif
+ e.addr.proto = mp->addr.proto;
+ nest_ent = nla_nest_start_noflag(skb,
+ MDBA_MDB_ENTRY_INFO);
+ if (!nest_ent)
+ return -EMSGSIZE;
+
+ if (nla_put_nohdr(skb, sizeof(e), &e) ||
+ nla_put_u32(skb,
+ MDBA_MDB_EATTR_TIMER,
+ br_timer_value(mtimer))) {
+ nla_nest_cancel(skb, nest_ent);
+ return -EMSGSIZE;
+ }
+ nla_nest_end(skb, nest_ent);
+
+ return 0;
+}
+
static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev)
{
@@ -93,7 +142,6 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) {
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
- struct net_bridge_port *port;
if (idx < s_idx)
goto skip;
@@ -104,43 +152,24 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
break;
}
+ if (mp->host_joined) {
+ err = __mdb_fill_info(skb, mp, NULL);
+ if (err) {
+ nla_nest_cancel(skb, nest2);
+ break;
+ }
+ }
+
for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL;
pp = &p->next) {
- struct nlattr *nest_ent;
- struct br_mdb_entry e;
-
- port = p->port;
- if (!port)
+ if (!p->port)
continue;
- memset(&e, 0, sizeof(e));
- e.ifindex = port->dev->ifindex;
- e.vid = p->addr.vid;
- __mdb_entry_fill_flags(&e, p->flags);
- if (p->addr.proto == htons(ETH_P_IP))
- e.addr.u.ip4 = p->addr.u.ip4;
-#if IS_ENABLED(CONFIG_IPV6)
- if (p->addr.proto == htons(ETH_P_IPV6))
- e.addr.u.ip6 = p->addr.u.ip6;
-#endif
- e.addr.proto = p->addr.proto;
- nest_ent = nla_nest_start_noflag(skb,
- MDBA_MDB_ENTRY_INFO);
- if (!nest_ent) {
- nla_nest_cancel(skb, nest2);
- err = -EMSGSIZE;
- goto out;
- }
- if (nla_put_nohdr(skb, sizeof(e), &e) ||
- nla_put_u32(skb,
- MDBA_MDB_EATTR_TIMER,
- br_timer_value(&p->timer))) {
- nla_nest_cancel(skb, nest_ent);
+ err = __mdb_fill_info(skb, mp, p);
+ if (err) {
nla_nest_cancel(skb, nest2);
- err = -EMSGSIZE;
goto out;
}
- nla_nest_end(skb, nest_ent);
}
nla_nest_end(skb, nest2);
skip:
@@ -587,6 +616,19 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
return err;
}
+ /* host join */
+ if (!port) {
+ /* don't allow any flags for host-joined groups */
+ if (state)
+ return -EINVAL;
+ if (mp->host_joined)
+ return -EEXIST;
+
+ br_multicast_host_join(mp, false);
+
+ return 0;
+ }
+
for (pp = &mp->ports;
(p = mlock_dereference(*pp, br)) != NULL;
pp = &p->next) {
@@ -611,19 +653,21 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
{
struct br_ip ip;
struct net_device *dev;
- struct net_bridge_port *p;
+ struct net_bridge_port *p = NULL;
int ret;
if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED))
return -EINVAL;
- dev = __dev_get_by_index(net, entry->ifindex);
- if (!dev)
- return -ENODEV;
+ if (entry->ifindex != br->dev->ifindex) {
+ dev = __dev_get_by_index(net, entry->ifindex);
+ if (!dev)
+ return -ENODEV;
- p = br_port_get_rtnl(dev);
- if (!p || p->br != br || p->state == BR_STATE_DISABLED)
- return -EINVAL;
+ p = br_port_get_rtnl(dev);
+ if (!p || p->br != br || p->state == BR_STATE_DISABLED)
+ return -EINVAL;
+ }
__mdb_entry_to_br_ip(entry, &ip);
@@ -638,9 +682,9 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
{
struct net *net = sock_net(skb->sk);
struct net_bridge_vlan_group *vg;
+ struct net_bridge_port *p = NULL;
struct net_device *dev, *pdev;
struct br_mdb_entry *entry;
- struct net_bridge_port *p;
struct net_bridge_vlan *v;
struct net_bridge *br;
int err;
@@ -651,18 +695,22 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
br = netdev_priv(dev);
+ if (entry->ifindex != br->dev->ifindex) {
+ pdev = __dev_get_by_index(net, entry->ifindex);
+ if (!pdev)
+ return -ENODEV;
+
+ p = br_port_get_rtnl(pdev);
+ if (!p || p->br != br || p->state == BR_STATE_DISABLED)
+ return -EINVAL;
+ vg = nbp_vlan_group(p);
+ } else {
+ vg = br_vlan_group(br);
+ }
+
/* If vlan filtering is enabled and VLAN is not specified
* install mdb entry on all vlans configured on the port.
*/
- pdev = __dev_get_by_index(net, entry->ifindex);
- if (!pdev)
- return -ENODEV;
-
- p = br_port_get_rtnl(pdev);
- if (!p || p->br != br || p->state == BR_STATE_DISABLED)
- return -EINVAL;
-
- vg = nbp_vlan_group(p);
if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) {
list_for_each_entry(v, &vg->vlan_list, vlist) {
entry->vid = v->vid;
@@ -698,6 +746,15 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
if (!mp)
goto unlock;
+ /* host leave */
+ if (entry->ifindex == mp->br->dev->ifindex && mp->host_joined) {
+ br_multicast_host_leave(mp, false);
+ err = 0;
+ if (!mp->ports && netif_running(br->dev))
+ mod_timer(&mp->timer, jiffies);
+ goto unlock;
+ }
+
for (pp = &mp->ports;
(p = mlock_dereference(*pp, br)) != NULL;
pp = &p->next) {
@@ -730,9 +787,9 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
{
struct net *net = sock_net(skb->sk);
struct net_bridge_vlan_group *vg;
+ struct net_bridge_port *p = NULL;
struct net_device *dev, *pdev;
struct br_mdb_entry *entry;
- struct net_bridge_port *p;
struct net_bridge_vlan *v;
struct net_bridge *br;
int err;
@@ -743,18 +800,22 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
br = netdev_priv(dev);
+ if (entry->ifindex != br->dev->ifindex) {
+ pdev = __dev_get_by_index(net, entry->ifindex);
+ if (!pdev)
+ return -ENODEV;
+
+ p = br_port_get_rtnl(pdev);
+ if (!p || p->br != br || p->state == BR_STATE_DISABLED)
+ return -EINVAL;
+ vg = nbp_vlan_group(p);
+ } else {
+ vg = br_vlan_group(br);
+ }
+
/* If vlan filtering is enabled and VLAN is not specified
* delete mdb entry on all vlans configured on the port.
*/
- pdev = __dev_get_by_index(net, entry->ifindex);
- if (!pdev)
- return -ENODEV;
-
- p = br_port_get_rtnl(pdev);
- if (!p || p->br != br || p->state == BR_STATE_DISABLED)
- return -EINVAL;
-
- vg = nbp_vlan_group(p);
if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) {
list_for_each_entry(v, &vg->vlan_list, vlist) {
entry->vid = v->vid;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index f8cac3702712..ad12fe3fca8c 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -148,8 +148,7 @@ static void br_multicast_group_expired(struct timer_list *t)
if (!netif_running(br->dev) || timer_pending(&mp->timer))
goto out;
- mp->host_joined = false;
- br_mdb_notify(br->dev, NULL, &mp->addr, RTM_DELMDB, 0);
+ br_multicast_host_leave(mp, true);
if (mp->ports)
goto out;
@@ -512,6 +511,27 @@ static bool br_port_group_equal(struct net_bridge_port_group *p,
return ether_addr_equal(src, p->eth_addr);
}
+void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify)
+{
+ if (!mp->host_joined) {
+ mp->host_joined = true;
+ if (notify)
+ br_mdb_notify(mp->br->dev, NULL, &mp->addr,
+ RTM_NEWMDB, 0);
+ }
+ mod_timer(&mp->timer, jiffies + mp->br->multicast_membership_interval);
+}
+
+void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify)
+{
+ if (!mp->host_joined)
+ return;
+
+ mp->host_joined = false;
+ if (notify)
+ br_mdb_notify(mp->br->dev, NULL, &mp->addr, RTM_DELMDB, 0);
+}
+
static int br_multicast_add_group(struct net_bridge *br,
struct net_bridge_port *port,
struct br_ip *group,
@@ -534,11 +554,7 @@ static int br_multicast_add_group(struct net_bridge *br,
goto err;
if (!port) {
- if (!mp->host_joined) {
- mp->host_joined = true;
- br_mdb_notify(br->dev, NULL, &mp->addr, RTM_NEWMDB, 0);
- }
- mod_timer(&mp->timer, now + br->multicast_membership_interval);
+ br_multicast_host_join(mp, true);
goto out;
}
@@ -1396,7 +1412,7 @@ br_multicast_leave_group(struct net_bridge *br,
del_timer(&p->timer);
kfree_rcu(p, rcu);
br_mdb_notify(br->dev, port, group, RTM_DELMDB,
- p->flags);
+ p->flags | MDB_PG_FLAGS_FAST_LEAVE);
if (!mp->ports && !mp->host_joined &&
netif_running(br->dev))
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 646504db0220..ce2ab14ee605 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -199,6 +199,7 @@ struct net_bridge_fdb_entry {
#define MDB_PG_FLAGS_PERMANENT BIT(0)
#define MDB_PG_FLAGS_OFFLOAD BIT(1)
+#define MDB_PG_FLAGS_FAST_LEAVE BIT(2)
struct net_bridge_port_group {
struct net_bridge_port *port;
@@ -701,6 +702,8 @@ void br_multicast_get_stats(const struct net_bridge *br,
struct br_mcast_stats *dest);
void br_mdb_init(void);
void br_mdb_uninit(void);
+void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify);
+void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify);
#define mlock_dereference(X, br) \
rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index f5b2aeebbfe9..bb98984cd27d 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -1281,6 +1281,8 @@ int br_vlan_get_info(const struct net_device *dev, u16 vid,
p_vinfo->vid = vid;
p_vinfo->flags = v->flags;
+ if (vid == br_get_pvid(vg))
+ p_vinfo->flags |= BRIDGE_VLAN_INFO_PVID;
return 0;
}
EXPORT_SYMBOL_GPL(br_vlan_get_info);
diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index 2c8fe24400e5..68c2519bdc52 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -11,7 +11,13 @@
#include <linux/module.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_bridge/ebtables.h>
-#include <linux/netfilter_bridge/ebt_802_3.h>
+#include <linux/skbuff.h>
+#include <uapi/linux/netfilter_bridge/ebt_802_3.h>
+
+static struct ebt_802_3_hdr *ebt_802_3_hdr(const struct sk_buff *skb)
+{
+ return (struct ebt_802_3_hdr *)skb_mac_header(skb);
+}
static bool
ebt_802_3_mt(const struct sk_buff *skb, struct xt_action_param *par)
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index 4f5444d2a526..8842798c29e6 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -17,7 +17,6 @@
#include <net/netfilter/nf_conntrack_bridge.h>
#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#include <net/netfilter/nf_tables.h>
#include "../br_private.h"
@@ -27,9 +26,9 @@
*/
static int nf_br_ip_fragment(struct net *net, struct sock *sk,
struct sk_buff *skb,
- struct nf_ct_bridge_frag_data *data,
+ struct nf_bridge_frag_data *data,
int (*output)(struct net *, struct sock *sk,
- const struct nf_ct_bridge_frag_data *data,
+ const struct nf_bridge_frag_data *data,
struct sk_buff *))
{
int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
@@ -279,7 +278,7 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
}
static void nf_ct_bridge_frag_save(struct sk_buff *skb,
- struct nf_ct_bridge_frag_data *data)
+ struct nf_bridge_frag_data *data)
{
if (skb_vlan_tag_present(skb)) {
data->vlan_present = true;
@@ -294,10 +293,10 @@ static void nf_ct_bridge_frag_save(struct sk_buff *skb,
static unsigned int
nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state,
int (*output)(struct net *, struct sock *sk,
- const struct nf_ct_bridge_frag_data *data,
+ const struct nf_bridge_frag_data *data,
struct sk_buff *))
{
- struct nf_ct_bridge_frag_data data;
+ struct nf_bridge_frag_data data;
if (!BR_INPUT_SKB_CB(skb)->frag_max_size)
return NF_ACCEPT;
@@ -320,7 +319,7 @@ nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state,
/* Actually only slow path refragmentation needs this. */
static int nf_ct_bridge_frag_restore(struct sk_buff *skb,
- const struct nf_ct_bridge_frag_data *data)
+ const struct nf_bridge_frag_data *data)
{
int err;
@@ -341,7 +340,7 @@ static int nf_ct_bridge_frag_restore(struct sk_buff *skb,
}
static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk,
- const struct nf_ct_bridge_frag_data *data,
+ const struct nf_bridge_frag_data *data,
struct sk_buff *skb)
{
int err;
diff --git a/net/can/Kconfig b/net/can/Kconfig
index 0f9fe846ddef..d77042752457 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig
@@ -8,11 +8,12 @@ menuconfig CAN
tristate "CAN bus subsystem support"
---help---
Controller Area Network (CAN) is a slow (up to 1Mbit/s) serial
- communications protocol which was developed by Bosch in
- 1991, mainly for automotive, but now widely used in marine
- (NMEA2000), industrial, and medical applications.
- More information on the CAN network protocol family PF_CAN
- is contained in <Documentation/networking/can.rst>.
+ communications protocol. Development of the CAN bus started in
+ 1983 at Robert Bosch GmbH, and the protocol was officially
+ released in 1986. The CAN bus was originally mainly for automotive,
+ but is now widely used in marine (NMEA2000), industrial, and medical
+ applications. More information on the CAN network protocol family
+ PF_CAN is contained in <Documentation/networking/can.rst>.
If you want CAN support you should say Y here and also to the
specific driver for your controller(s) below.
@@ -52,6 +53,8 @@ config CAN_GW
They can be modified with AND/OR/XOR/SET operations as configured
by the netlink configuration interface known e.g. from iptables.
+source "net/can/j1939/Kconfig"
+
source "drivers/net/can/Kconfig"
endif
diff --git a/net/can/Makefile b/net/can/Makefile
index 1242bbbfe57f..08bd217fc051 100644
--- a/net/can/Makefile
+++ b/net/can/Makefile
@@ -15,3 +15,5 @@ can-bcm-y := bcm.o
obj-$(CONFIG_CAN_GW) += can-gw.o
can-gw-y := gw.o
+
+obj-$(CONFIG_CAN_J1939) += j1939/
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 80281ef2ccbd..5518a7d9eed9 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -1,5 +1,5 @@
-/*
- * af_can.c - Protocol family CAN core module
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/* af_can.c - Protocol family CAN core module
* (used by different CAN protocol modules)
*
* Copyright (c) 2002-2017 Volkswagen Group Electronic Research
@@ -58,6 +58,7 @@
#include <linux/can.h>
#include <linux/can/core.h>
#include <linux/can/skb.h>
+#include <linux/can/can-ml.h>
#include <linux/ratelimit.h>
#include <net/net_namespace.h>
#include <net/sock.h>
@@ -83,18 +84,7 @@ static DEFINE_MUTEX(proto_tab_lock);
static atomic_t skbcounter = ATOMIC_INIT(0);
-/*
- * af_can socket functions
- */
-
-int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
-{
- switch (cmd) {
- default:
- return -ENOIOCTLCMD;
- }
-}
-EXPORT_SYMBOL(can_ioctl);
+/* af_can socket functions */
static void can_sock_destruct(struct sock *sk)
{
@@ -140,14 +130,13 @@ static int can_create(struct net *net, struct socket *sock, int protocol,
err = request_module("can-proto-%d", protocol);
- /*
- * In case of error we only print a message but don't
+ /* In case of error we only print a message but don't
* return the error code immediately. Below we will
* return -EPROTONOSUPPORT
*/
if (err)
- printk_ratelimited(KERN_ERR "can: request_module "
- "(can-proto-%d) failed.\n", protocol);
+ pr_err_ratelimited("can: request_module (can-proto-%d) failed.\n",
+ protocol);
cp = can_get_proto(protocol);
}
@@ -188,9 +177,7 @@ static int can_create(struct net *net, struct socket *sock, int protocol,
return err;
}
-/*
- * af_can tx path
- */
+/* af_can tx path */
/**
* can_send - transmit a CAN frame (optional with local loopback)
@@ -212,7 +199,7 @@ int can_send(struct sk_buff *skb, int loop)
{
struct sk_buff *newskb = NULL;
struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
- struct s_stats *can_stats = dev_net(skb->dev)->can.can_stats;
+ struct can_pkg_stats *pkg_stats = dev_net(skb->dev)->can.pkg_stats;
int err = -EINVAL;
if (skb->len == CAN_MTU) {
@@ -223,11 +210,11 @@ int can_send(struct sk_buff *skb, int loop)
skb->protocol = htons(ETH_P_CANFD);
if (unlikely(cfd->len > CANFD_MAX_DLEN))
goto inval_skb;
- } else
+ } else {
goto inval_skb;
+ }
- /*
- * Make sure the CAN frame can pass the selected CAN netdevice.
+ /* Make sure the CAN frame can pass the selected CAN netdevice.
* As structs can_frame and canfd_frame are similar, we can provide
* CAN FD frames to legacy CAN drivers as long as the length is <= 8
*/
@@ -258,8 +245,7 @@ int can_send(struct sk_buff *skb, int loop)
/* indication for the CAN driver: do loopback */
skb->pkt_type = PACKET_LOOPBACK;
- /*
- * The reference to the originating sock may be required
+ /* The reference to the originating sock may be required
* by the receiving socket to check whether the frame is
* its own. Example: can_raw sockopt CAN_RAW_RECV_OWN_MSGS
* Therefore we have to ensure that skb->sk remains the
@@ -268,8 +254,7 @@ int can_send(struct sk_buff *skb, int loop)
*/
if (!(skb->dev->flags & IFF_ECHO)) {
- /*
- * If the interface is not capable to do loopback
+ /* If the interface is not capable to do loopback
* itself, we do it here.
*/
newskb = skb_clone(skb, GFP_ATOMIC);
@@ -301,8 +286,8 @@ int can_send(struct sk_buff *skb, int loop)
netif_rx_ni(newskb);
/* update statistics */
- can_stats->tx_frames++;
- can_stats->tx_frames_delta++;
+ pkg_stats->tx_frames++;
+ pkg_stats->tx_frames_delta++;
return 0;
@@ -312,17 +297,17 @@ inval_skb:
}
EXPORT_SYMBOL(can_send);
-/*
- * af_can rx path
- */
+/* af_can rx path */
-static struct can_dev_rcv_lists *find_dev_rcv_lists(struct net *net,
- struct net_device *dev)
+static struct can_dev_rcv_lists *can_dev_rcv_lists_find(struct net *net,
+ struct net_device *dev)
{
- if (!dev)
- return net->can.can_rx_alldev_list;
- else
- return (struct can_dev_rcv_lists *)dev->ml_priv;
+ if (dev) {
+ struct can_ml_priv *ml_priv = dev->ml_priv;
+ return &ml_priv->dev_rcv_lists;
+ } else {
+ return net->can.rx_alldev_list;
+ }
}
/**
@@ -349,7 +334,7 @@ static unsigned int effhash(canid_t can_id)
}
/**
- * find_rcv_list - determine optimal filterlist inside device filter struct
+ * can_rcv_list_find - determine optimal filterlist inside device filter struct
* @can_id: pointer to CAN identifier of a given can_filter
* @mask: pointer to CAN mask of a given can_filter
* @d: pointer to the device filter struct
@@ -375,8 +360,8 @@ static unsigned int effhash(canid_t can_id)
* Constistency checked mask.
* Reduced can_id to have a preprocessed filter compare value.
*/
-static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask,
- struct can_dev_rcv_lists *d)
+static struct hlist_head *can_rcv_list_find(canid_t *can_id, canid_t *mask,
+ struct can_dev_rcv_lists *dev_rcv_lists)
{
canid_t inv = *can_id & CAN_INV_FILTER; /* save flag before masking */
@@ -384,7 +369,7 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask,
if (*mask & CAN_ERR_FLAG) {
/* clear CAN_ERR_FLAG in filter entry */
*mask &= CAN_ERR_MASK;
- return &d->rx[RX_ERR];
+ return &dev_rcv_lists->rx[RX_ERR];
}
/* with cleared CAN_ERR_FLAG we have a simple mask/value filterpair */
@@ -400,27 +385,26 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask,
/* inverse can_id/can_mask filter */
if (inv)
- return &d->rx[RX_INV];
+ return &dev_rcv_lists->rx[RX_INV];
/* mask == 0 => no condition testing at receive time */
if (!(*mask))
- return &d->rx[RX_ALL];
+ return &dev_rcv_lists->rx[RX_ALL];
/* extra filterlists for the subscription of a single non-RTR can_id */
if (((*mask & CAN_EFF_RTR_FLAGS) == CAN_EFF_RTR_FLAGS) &&
!(*can_id & CAN_RTR_FLAG)) {
-
if (*can_id & CAN_EFF_FLAG) {
if (*mask == (CAN_EFF_MASK | CAN_EFF_RTR_FLAGS))
- return &d->rx_eff[effhash(*can_id)];
+ return &dev_rcv_lists->rx_eff[effhash(*can_id)];
} else {
if (*mask == (CAN_SFF_MASK | CAN_EFF_RTR_FLAGS))
- return &d->rx_sff[*can_id];
+ return &dev_rcv_lists->rx_sff[*can_id];
}
}
/* default: filter via can_id/can_mask */
- return &d->rx[RX_FIL];
+ return &dev_rcv_lists->rx[RX_FIL];
}
/**
@@ -457,10 +441,10 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id,
canid_t mask, void (*func)(struct sk_buff *, void *),
void *data, char *ident, struct sock *sk)
{
- struct receiver *r;
- struct hlist_head *rl;
- struct can_dev_rcv_lists *d;
- struct s_pstats *can_pstats = net->can.can_pstats;
+ struct receiver *rcv;
+ struct hlist_head *rcv_list;
+ struct can_dev_rcv_lists *dev_rcv_lists;
+ struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats;
int err = 0;
/* insert new receiver (dev,canid,mask) -> (func,data) */
@@ -471,50 +455,42 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id,
if (dev && !net_eq(net, dev_net(dev)))
return -ENODEV;
- r = kmem_cache_alloc(rcv_cache, GFP_KERNEL);
- if (!r)
+ rcv = kmem_cache_alloc(rcv_cache, GFP_KERNEL);
+ if (!rcv)
return -ENOMEM;
- spin_lock(&net->can.can_rcvlists_lock);
+ spin_lock_bh(&net->can.rcvlists_lock);
- d = find_dev_rcv_lists(net, dev);
- if (d) {
- rl = find_rcv_list(&can_id, &mask, d);
+ dev_rcv_lists = can_dev_rcv_lists_find(net, dev);
+ rcv_list = can_rcv_list_find(&can_id, &mask, dev_rcv_lists);
- r->can_id = can_id;
- r->mask = mask;
- r->matches = 0;
- r->func = func;
- r->data = data;
- r->ident = ident;
- r->sk = sk;
+ rcv->can_id = can_id;
+ rcv->mask = mask;
+ rcv->matches = 0;
+ rcv->func = func;
+ rcv->data = data;
+ rcv->ident = ident;
+ rcv->sk = sk;
- hlist_add_head_rcu(&r->list, rl);
- d->entries++;
+ hlist_add_head_rcu(&rcv->list, rcv_list);
+ dev_rcv_lists->entries++;
- can_pstats->rcv_entries++;
- if (can_pstats->rcv_entries_max < can_pstats->rcv_entries)
- can_pstats->rcv_entries_max = can_pstats->rcv_entries;
- } else {
- kmem_cache_free(rcv_cache, r);
- err = -ENODEV;
- }
-
- spin_unlock(&net->can.can_rcvlists_lock);
+ rcv_lists_stats->rcv_entries++;
+ rcv_lists_stats->rcv_entries_max = max(rcv_lists_stats->rcv_entries_max,
+ rcv_lists_stats->rcv_entries);
+ spin_unlock_bh(&net->can.rcvlists_lock);
return err;
}
EXPORT_SYMBOL(can_rx_register);
-/*
- * can_rx_delete_receiver - rcu callback for single receiver entry removal
- */
+/* can_rx_delete_receiver - rcu callback for single receiver entry removal */
static void can_rx_delete_receiver(struct rcu_head *rp)
{
- struct receiver *r = container_of(rp, struct receiver, rcu);
- struct sock *sk = r->sk;
+ struct receiver *rcv = container_of(rp, struct receiver, rcu);
+ struct sock *sk = rcv->sk;
- kmem_cache_free(rcv_cache, r);
+ kmem_cache_free(rcv_cache, rcv);
if (sk)
sock_put(sk);
}
@@ -534,10 +510,10 @@ void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id,
canid_t mask, void (*func)(struct sk_buff *, void *),
void *data)
{
- struct receiver *r = NULL;
- struct hlist_head *rl;
- struct s_pstats *can_pstats = net->can.can_pstats;
- struct can_dev_rcv_lists *d;
+ struct receiver *rcv = NULL;
+ struct hlist_head *rcv_list;
+ struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats;
+ struct can_dev_rcv_lists *dev_rcv_lists;
if (dev && dev->type != ARPHRD_CAN)
return;
@@ -545,86 +521,69 @@ void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id,
if (dev && !net_eq(net, dev_net(dev)))
return;
- spin_lock(&net->can.can_rcvlists_lock);
+ spin_lock_bh(&net->can.rcvlists_lock);
- d = find_dev_rcv_lists(net, dev);
- if (!d) {
- pr_err("BUG: receive list not found for "
- "dev %s, id %03X, mask %03X\n",
- DNAME(dev), can_id, mask);
- goto out;
- }
+ dev_rcv_lists = can_dev_rcv_lists_find(net, dev);
+ rcv_list = can_rcv_list_find(&can_id, &mask, dev_rcv_lists);
- rl = find_rcv_list(&can_id, &mask, d);
-
- /*
- * Search the receiver list for the item to delete. This should
+ /* Search the receiver list for the item to delete. This should
* exist, since no receiver may be unregistered that hasn't
* been registered before.
*/
-
- hlist_for_each_entry_rcu(r, rl, list) {
- if (r->can_id == can_id && r->mask == mask &&
- r->func == func && r->data == data)
+ hlist_for_each_entry_rcu(rcv, rcv_list, list) {
+ if (rcv->can_id == can_id && rcv->mask == mask &&
+ rcv->func == func && rcv->data == data)
break;
}
- /*
- * Check for bugs in CAN protocol implementations using af_can.c:
- * 'r' will be NULL if no matching list item was found for removal.
+ /* Check for bugs in CAN protocol implementations using af_can.c:
+ * 'rcv' will be NULL if no matching list item was found for removal.
*/
-
- if (!r) {
- WARN(1, "BUG: receive list entry not found for dev %s, "
- "id %03X, mask %03X\n", DNAME(dev), can_id, mask);
+ if (!rcv) {
+ WARN(1, "BUG: receive list entry not found for dev %s, id %03X, mask %03X\n",
+ DNAME(dev), can_id, mask);
goto out;
}
- hlist_del_rcu(&r->list);
- d->entries--;
+ hlist_del_rcu(&rcv->list);
+ dev_rcv_lists->entries--;
- if (can_pstats->rcv_entries > 0)
- can_pstats->rcv_entries--;
-
- /* remove device structure requested by NETDEV_UNREGISTER */
- if (d->remove_on_zero_entries && !d->entries) {
- kfree(d);
- dev->ml_priv = NULL;
- }
+ if (rcv_lists_stats->rcv_entries > 0)
+ rcv_lists_stats->rcv_entries--;
out:
- spin_unlock(&net->can.can_rcvlists_lock);
+ spin_unlock_bh(&net->can.rcvlists_lock);
/* schedule the receiver item for deletion */
- if (r) {
- if (r->sk)
- sock_hold(r->sk);
- call_rcu(&r->rcu, can_rx_delete_receiver);
+ if (rcv) {
+ if (rcv->sk)
+ sock_hold(rcv->sk);
+ call_rcu(&rcv->rcu, can_rx_delete_receiver);
}
}
EXPORT_SYMBOL(can_rx_unregister);
-static inline void deliver(struct sk_buff *skb, struct receiver *r)
+static inline void deliver(struct sk_buff *skb, struct receiver *rcv)
{
- r->func(skb, r->data);
- r->matches++;
+ rcv->func(skb, rcv->data);
+ rcv->matches++;
}
-static int can_rcv_filter(struct can_dev_rcv_lists *d, struct sk_buff *skb)
+static int can_rcv_filter(struct can_dev_rcv_lists *dev_rcv_lists, struct sk_buff *skb)
{
- struct receiver *r;
+ struct receiver *rcv;
int matches = 0;
struct can_frame *cf = (struct can_frame *)skb->data;
canid_t can_id = cf->can_id;
- if (d->entries == 0)
+ if (dev_rcv_lists->entries == 0)
return 0;
if (can_id & CAN_ERR_FLAG) {
/* check for error message frame entries only */
- hlist_for_each_entry_rcu(r, &d->rx[RX_ERR], list) {
- if (can_id & r->mask) {
- deliver(skb, r);
+ hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx[RX_ERR], list) {
+ if (can_id & rcv->mask) {
+ deliver(skb, rcv);
matches++;
}
}
@@ -632,23 +591,23 @@ static int can_rcv_filter(struct can_dev_rcv_lists *d, struct sk_buff *skb)
}
/* check for unfiltered entries */
- hlist_for_each_entry_rcu(r, &d->rx[RX_ALL], list) {
- deliver(skb, r);
+ hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx[RX_ALL], list) {
+ deliver(skb, rcv);
matches++;
}
/* check for can_id/mask entries */
- hlist_for_each_entry_rcu(r, &d->rx[RX_FIL], list) {
- if ((can_id & r->mask) == r->can_id) {
- deliver(skb, r);
+ hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx[RX_FIL], list) {
+ if ((can_id & rcv->mask) == rcv->can_id) {
+ deliver(skb, rcv);
matches++;
}
}
/* check for inverted can_id/mask entries */
- hlist_for_each_entry_rcu(r, &d->rx[RX_INV], list) {
- if ((can_id & r->mask) != r->can_id) {
- deliver(skb, r);
+ hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx[RX_INV], list) {
+ if ((can_id & rcv->mask) != rcv->can_id) {
+ deliver(skb, rcv);
matches++;
}
}
@@ -658,16 +617,16 @@ static int can_rcv_filter(struct can_dev_rcv_lists *d, struct sk_buff *skb)
return matches;
if (can_id & CAN_EFF_FLAG) {
- hlist_for_each_entry_rcu(r, &d->rx_eff[effhash(can_id)], list) {
- if (r->can_id == can_id) {
- deliver(skb, r);
+ hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx_eff[effhash(can_id)], list) {
+ if (rcv->can_id == can_id) {
+ deliver(skb, rcv);
matches++;
}
}
} else {
can_id &= CAN_SFF_MASK;
- hlist_for_each_entry_rcu(r, &d->rx_sff[can_id], list) {
- deliver(skb, r);
+ hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx_sff[can_id], list) {
+ deliver(skb, rcv);
matches++;
}
}
@@ -677,14 +636,14 @@ static int can_rcv_filter(struct can_dev_rcv_lists *d, struct sk_buff *skb)
static void can_receive(struct sk_buff *skb, struct net_device *dev)
{
- struct can_dev_rcv_lists *d;
+ struct can_dev_rcv_lists *dev_rcv_lists;
struct net *net = dev_net(dev);
- struct s_stats *can_stats = net->can.can_stats;
+ struct can_pkg_stats *pkg_stats = net->can.pkg_stats;
int matches;
/* update statistics */
- can_stats->rx_frames++;
- can_stats->rx_frames_delta++;
+ pkg_stats->rx_frames++;
+ pkg_stats->rx_frames_delta++;
/* create non-zero unique skb identifier together with *skb */
while (!(can_skb_prv(skb)->skbcnt))
@@ -693,12 +652,11 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev)
rcu_read_lock();
/* deliver the packet to sockets listening on all devices */
- matches = can_rcv_filter(net->can.can_rx_alldev_list, skb);
+ matches = can_rcv_filter(net->can.rx_alldev_list, skb);
/* find receive list for this device */
- d = find_dev_rcv_lists(net, dev);
- if (d)
- matches += can_rcv_filter(d, skb);
+ dev_rcv_lists = can_dev_rcv_lists_find(net, dev);
+ matches += can_rcv_filter(dev_rcv_lists, skb);
rcu_read_unlock();
@@ -706,8 +664,8 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev)
consume_skb(skb);
if (matches > 0) {
- can_stats->matches++;
- can_stats->matches_delta++;
+ pkg_stats->matches++;
+ pkg_stats->matches_delta++;
}
}
@@ -729,7 +687,7 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
}
static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev)
+ struct packet_type *pt, struct net_device *orig_dev)
{
struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
@@ -745,9 +703,7 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
return NET_RX_SUCCESS;
}
-/*
- * af_can protocol functions
- */
+/* af_can protocol functions */
/**
* can_proto_register - register CAN transport protocol
@@ -778,8 +734,9 @@ int can_proto_register(const struct can_proto *cp)
if (rcu_access_pointer(proto_tab[proto])) {
pr_err("can: protocol %d already registered\n", proto);
err = -EBUSY;
- } else
+ } else {
RCU_INIT_POINTER(proto_tab[proto], cp);
+ }
mutex_unlock(&proto_tab_lock);
@@ -809,48 +766,19 @@ void can_proto_unregister(const struct can_proto *cp)
}
EXPORT_SYMBOL(can_proto_unregister);
-/*
- * af_can notifier to create/remove CAN netdevice specific structs
- */
+/* af_can notifier to create/remove CAN netdevice specific structs */
static int can_notifier(struct notifier_block *nb, unsigned long msg,
void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct can_dev_rcv_lists *d;
if (dev->type != ARPHRD_CAN)
return NOTIFY_DONE;
switch (msg) {
-
case NETDEV_REGISTER:
-
- /* create new dev_rcv_lists for this device */
- d = kzalloc(sizeof(*d), GFP_KERNEL);
- if (!d)
- return NOTIFY_DONE;
- BUG_ON(dev->ml_priv);
- dev->ml_priv = d;
-
- break;
-
- case NETDEV_UNREGISTER:
- spin_lock(&dev_net(dev)->can.can_rcvlists_lock);
-
- d = dev->ml_priv;
- if (d) {
- if (d->entries)
- d->remove_on_zero_entries = 1;
- else {
- kfree(d);
- dev->ml_priv = NULL;
- }
- } else
- pr_err("can: notifier: receive list not found for dev "
- "%s\n", dev->name);
-
- spin_unlock(&dev_net(dev)->can.can_rcvlists_lock);
-
+ WARN(!dev->ml_priv,
+ "No CAN mid layer private allocated, please fix your driver and use alloc_candev()!\n");
break;
}
@@ -859,71 +787,54 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
static int can_pernet_init(struct net *net)
{
- spin_lock_init(&net->can.can_rcvlists_lock);
- net->can.can_rx_alldev_list =
- kzalloc(sizeof(struct can_dev_rcv_lists), GFP_KERNEL);
- if (!net->can.can_rx_alldev_list)
+ spin_lock_init(&net->can.rcvlists_lock);
+ net->can.rx_alldev_list =
+ kzalloc(sizeof(*net->can.rx_alldev_list), GFP_KERNEL);
+ if (!net->can.rx_alldev_list)
goto out;
- net->can.can_stats = kzalloc(sizeof(struct s_stats), GFP_KERNEL);
- if (!net->can.can_stats)
- goto out_free_alldev_list;
- net->can.can_pstats = kzalloc(sizeof(struct s_pstats), GFP_KERNEL);
- if (!net->can.can_pstats)
- goto out_free_can_stats;
+ net->can.pkg_stats = kzalloc(sizeof(*net->can.pkg_stats), GFP_KERNEL);
+ if (!net->can.pkg_stats)
+ goto out_free_rx_alldev_list;
+ net->can.rcv_lists_stats = kzalloc(sizeof(*net->can.rcv_lists_stats), GFP_KERNEL);
+ if (!net->can.rcv_lists_stats)
+ goto out_free_pkg_stats;
if (IS_ENABLED(CONFIG_PROC_FS)) {
/* the statistics are updated every second (timer triggered) */
if (stats_timer) {
- timer_setup(&net->can.can_stattimer, can_stat_update,
+ timer_setup(&net->can.stattimer, can_stat_update,
0);
- mod_timer(&net->can.can_stattimer,
+ mod_timer(&net->can.stattimer,
round_jiffies(jiffies + HZ));
}
- net->can.can_stats->jiffies_init = jiffies;
+ net->can.pkg_stats->jiffies_init = jiffies;
can_init_proc(net);
}
return 0;
- out_free_can_stats:
- kfree(net->can.can_stats);
- out_free_alldev_list:
- kfree(net->can.can_rx_alldev_list);
+ out_free_pkg_stats:
+ kfree(net->can.pkg_stats);
+ out_free_rx_alldev_list:
+ kfree(net->can.rx_alldev_list);
out:
return -ENOMEM;
}
static void can_pernet_exit(struct net *net)
{
- struct net_device *dev;
-
if (IS_ENABLED(CONFIG_PROC_FS)) {
can_remove_proc(net);
if (stats_timer)
- del_timer_sync(&net->can.can_stattimer);
+ del_timer_sync(&net->can.stattimer);
}
- /* remove created dev_rcv_lists from still registered CAN devices */
- rcu_read_lock();
- for_each_netdev_rcu(net, dev) {
- if (dev->type == ARPHRD_CAN && dev->ml_priv) {
- struct can_dev_rcv_lists *d = dev->ml_priv;
-
- BUG_ON(d->entries);
- kfree(d);
- dev->ml_priv = NULL;
- }
- }
- rcu_read_unlock();
-
- kfree(net->can.can_rx_alldev_list);
- kfree(net->can.can_stats);
- kfree(net->can.can_pstats);
+ kfree(net->can.rx_alldev_list);
+ kfree(net->can.pkg_stats);
+ kfree(net->can.rcv_lists_stats);
}
-/*
- * af_can module init/exit functions
- */
+/* af_can module init/exit functions */
static struct packet_type can_packet __read_mostly = {
.type = cpu_to_be16(ETH_P_CAN),
diff --git a/net/can/af_can.h b/net/can/af_can.h
index 9cb3719632bd..7c2d9161e224 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h
@@ -1,5 +1,5 @@
-/*
- * Copyright (c) 2002-2007 Volkswagen Group Electronic Research
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/* Copyright (c) 2002-2007 Volkswagen Group Electronic Research
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -53,32 +53,17 @@ struct receiver {
canid_t can_id;
canid_t mask;
unsigned long matches;
- void (*func)(struct sk_buff *, void *);
+ void (*func)(struct sk_buff *skb, void *data);
void *data;
char *ident;
struct sock *sk;
struct rcu_head rcu;
};
-#define CAN_SFF_RCV_ARRAY_SZ (1 << CAN_SFF_ID_BITS)
-#define CAN_EFF_RCV_HASH_BITS 10
-#define CAN_EFF_RCV_ARRAY_SZ (1 << CAN_EFF_RCV_HASH_BITS)
-
-enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_MAX };
-
-/* per device receive filters linked at dev->ml_priv */
-struct can_dev_rcv_lists {
- struct hlist_head rx[RX_MAX];
- struct hlist_head rx_sff[CAN_SFF_RCV_ARRAY_SZ];
- struct hlist_head rx_eff[CAN_EFF_RCV_ARRAY_SZ];
- int remove_on_zero_entries;
- int entries;
-};
-
/* statistic structures */
/* can be reset e.g. by can_init_stats() */
-struct s_stats {
+struct can_pkg_stats {
unsigned long jiffies_init;
unsigned long rx_frames;
@@ -103,7 +88,7 @@ struct s_stats {
};
/* persistent statistics */
-struct s_pstats {
+struct can_rcv_lists_stats {
unsigned long stats_reset;
unsigned long user_reset;
unsigned long rcv_entries;
diff --git a/net/can/bcm.c b/net/can/bcm.c
index a34ee52f19ea..c96fa0f33db3 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
/*
* bcm.c - Broadcast Manager to filter/send (cyclic) CAN content
*
@@ -105,7 +106,6 @@ struct bcm_op {
unsigned long frames_abs, frames_filtered;
struct bcm_timeval ival1, ival2;
struct hrtimer timer, thrtimer;
- struct tasklet_struct tsklet, thrtsklet;
ktime_t rx_stamp, kt_ival1, kt_ival2, kt_lastmsg;
int rx_ifindex;
int cfsiz;
@@ -370,25 +370,34 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
}
}
-static void bcm_tx_start_timer(struct bcm_op *op)
+static bool bcm_tx_set_expiry(struct bcm_op *op, struct hrtimer *hrt)
{
+ ktime_t ival;
+
if (op->kt_ival1 && op->count)
- hrtimer_start(&op->timer,
- ktime_add(ktime_get(), op->kt_ival1),
- HRTIMER_MODE_ABS);
+ ival = op->kt_ival1;
else if (op->kt_ival2)
- hrtimer_start(&op->timer,
- ktime_add(ktime_get(), op->kt_ival2),
- HRTIMER_MODE_ABS);
+ ival = op->kt_ival2;
+ else
+ return false;
+
+ hrtimer_set_expires(hrt, ktime_add(ktime_get(), ival));
+ return true;
}
-static void bcm_tx_timeout_tsklet(unsigned long data)
+static void bcm_tx_start_timer(struct bcm_op *op)
{
- struct bcm_op *op = (struct bcm_op *)data;
+ if (bcm_tx_set_expiry(op, &op->timer))
+ hrtimer_start_expires(&op->timer, HRTIMER_MODE_ABS_SOFT);
+}
+
+/* bcm_tx_timeout_handler - performs cyclic CAN frame transmissions */
+static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
+{
+ struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer);
struct bcm_msg_head msg_head;
if (op->kt_ival1 && (op->count > 0)) {
-
op->count--;
if (!op->count && (op->flags & TX_COUNTEVT)) {
@@ -405,22 +414,12 @@ static void bcm_tx_timeout_tsklet(unsigned long data)
}
bcm_can_tx(op);
- } else if (op->kt_ival2)
+ } else if (op->kt_ival2) {
bcm_can_tx(op);
+ }
- bcm_tx_start_timer(op);
-}
-
-/*
- * bcm_tx_timeout_handler - performs cyclic CAN frame transmissions
- */
-static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
-{
- struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer);
-
- tasklet_schedule(&op->tsklet);
-
- return HRTIMER_NORESTART;
+ return bcm_tx_set_expiry(op, &op->timer) ?
+ HRTIMER_RESTART : HRTIMER_NORESTART;
}
/*
@@ -486,7 +485,7 @@ static void bcm_rx_update_and_send(struct bcm_op *op,
/* do not send the saved data - only start throttle timer */
hrtimer_start(&op->thrtimer,
ktime_add(op->kt_lastmsg, op->kt_ival2),
- HRTIMER_MODE_ABS);
+ HRTIMER_MODE_ABS_SOFT);
return;
}
@@ -545,14 +544,21 @@ static void bcm_rx_starttimer(struct bcm_op *op)
return;
if (op->kt_ival1)
- hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL);
+ hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL_SOFT);
}
-static void bcm_rx_timeout_tsklet(unsigned long data)
+/* bcm_rx_timeout_handler - when the (cyclic) CAN frame reception timed out */
+static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
{
- struct bcm_op *op = (struct bcm_op *)data;
+ struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer);
struct bcm_msg_head msg_head;
+ /* if user wants to be informed, when cyclic CAN-Messages come back */
+ if ((op->flags & RX_ANNOUNCE_RESUME) && op->last_frames) {
+ /* clear received CAN frames to indicate 'nothing received' */
+ memset(op->last_frames, 0, op->nframes * op->cfsiz);
+ }
+
/* create notification to user */
msg_head.opcode = RX_TIMEOUT;
msg_head.flags = op->flags;
@@ -563,25 +569,6 @@ static void bcm_rx_timeout_tsklet(unsigned long data)
msg_head.nframes = 0;
bcm_send_to_user(op, &msg_head, NULL, 0);
-}
-
-/*
- * bcm_rx_timeout_handler - when the (cyclic) CAN frame reception timed out
- */
-static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
-{
- struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer);
-
- /* schedule before NET_RX_SOFTIRQ */
- tasklet_hi_schedule(&op->tsklet);
-
- /* no restart of the timer is done here! */
-
- /* if user wants to be informed, when cyclic CAN-Messages come back */
- if ((op->flags & RX_ANNOUNCE_RESUME) && op->last_frames) {
- /* clear received CAN frames to indicate 'nothing received' */
- memset(op->last_frames, 0, op->nframes * op->cfsiz);
- }
return HRTIMER_NORESTART;
}
@@ -589,14 +576,12 @@ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
/*
* bcm_rx_do_flush - helper for bcm_rx_thr_flush
*/
-static inline int bcm_rx_do_flush(struct bcm_op *op, int update,
- unsigned int index)
+static inline int bcm_rx_do_flush(struct bcm_op *op, unsigned int index)
{
struct canfd_frame *lcf = op->last_frames + op->cfsiz * index;
if ((op->last_frames) && (lcf->flags & RX_THR)) {
- if (update)
- bcm_rx_changed(op, lcf);
+ bcm_rx_changed(op, lcf);
return 1;
}
return 0;
@@ -604,11 +589,8 @@ static inline int bcm_rx_do_flush(struct bcm_op *op, int update,
/*
* bcm_rx_thr_flush - Check for throttled data and send it to the userspace
- *
- * update == 0 : just check if throttled data is available (any irq context)
- * update == 1 : check and send throttled data to userspace (soft_irq context)
*/
-static int bcm_rx_thr_flush(struct bcm_op *op, int update)
+static int bcm_rx_thr_flush(struct bcm_op *op)
{
int updated = 0;
@@ -617,24 +599,16 @@ static int bcm_rx_thr_flush(struct bcm_op *op, int update)
/* for MUX filter we start at index 1 */
for (i = 1; i < op->nframes; i++)
- updated += bcm_rx_do_flush(op, update, i);
+ updated += bcm_rx_do_flush(op, i);
} else {
/* for RX_FILTER_ID and simple filter */
- updated += bcm_rx_do_flush(op, update, 0);
+ updated += bcm_rx_do_flush(op, 0);
}
return updated;
}
-static void bcm_rx_thr_tsklet(unsigned long data)
-{
- struct bcm_op *op = (struct bcm_op *)data;
-
- /* push the changed data to the userspace */
- bcm_rx_thr_flush(op, 1);
-}
-
/*
* bcm_rx_thr_handler - the time for blocked content updates is over now:
* Check for throttled data and send it to the userspace
@@ -643,9 +617,7 @@ static enum hrtimer_restart bcm_rx_thr_handler(struct hrtimer *hrtimer)
{
struct bcm_op *op = container_of(hrtimer, struct bcm_op, thrtimer);
- tasklet_schedule(&op->thrtsklet);
-
- if (bcm_rx_thr_flush(op, 0)) {
+ if (bcm_rx_thr_flush(op)) {
hrtimer_forward(hrtimer, ktime_get(), op->kt_ival2);
return HRTIMER_RESTART;
} else {
@@ -741,23 +713,8 @@ static struct bcm_op *bcm_find_op(struct list_head *ops,
static void bcm_remove_op(struct bcm_op *op)
{
- if (op->tsklet.func) {
- while (test_bit(TASKLET_STATE_SCHED, &op->tsklet.state) ||
- test_bit(TASKLET_STATE_RUN, &op->tsklet.state) ||
- hrtimer_active(&op->timer)) {
- hrtimer_cancel(&op->timer);
- tasklet_kill(&op->tsklet);
- }
- }
-
- if (op->thrtsklet.func) {
- while (test_bit(TASKLET_STATE_SCHED, &op->thrtsklet.state) ||
- test_bit(TASKLET_STATE_RUN, &op->thrtsklet.state) ||
- hrtimer_active(&op->thrtimer)) {
- hrtimer_cancel(&op->thrtimer);
- tasklet_kill(&op->thrtsklet);
- }
- }
+ hrtimer_cancel(&op->timer);
+ hrtimer_cancel(&op->thrtimer);
if ((op->frames) && (op->frames != &op->sframe))
kfree(op->frames);
@@ -990,15 +947,13 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
op->ifindex = ifindex;
/* initialize uninitialized (kzalloc) structure */
- hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&op->timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL_SOFT);
op->timer.function = bcm_tx_timeout_handler;
- /* initialize tasklet for tx countevent notification */
- tasklet_init(&op->tsklet, bcm_tx_timeout_tsklet,
- (unsigned long) op);
-
/* currently unused in tx_ops */
- hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL_SOFT);
/* add this bcm_op to the list of the tx_ops */
list_add(&op->list, &bo->tx_ops);
@@ -1167,20 +1122,14 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
op->rx_ifindex = ifindex;
/* initialize uninitialized (kzalloc) structure */
- hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&op->timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL_SOFT);
op->timer.function = bcm_rx_timeout_handler;
- /* initialize tasklet for rx timeout notification */
- tasklet_init(&op->tsklet, bcm_rx_timeout_tsklet,
- (unsigned long) op);
-
- hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL_SOFT);
op->thrtimer.function = bcm_rx_thr_handler;
- /* initialize tasklet for rx throttle handling */
- tasklet_init(&op->thrtsklet, bcm_rx_thr_tsklet,
- (unsigned long) op);
-
/* add this bcm_op to the list of the rx_ops */
list_add(&op->list, &bo->rx_ops);
@@ -1226,12 +1175,12 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
*/
op->kt_lastmsg = 0;
hrtimer_cancel(&op->thrtimer);
- bcm_rx_thr_flush(op, 1);
+ bcm_rx_thr_flush(op);
}
if ((op->flags & STARTTIMER) && op->kt_ival1)
hrtimer_start(&op->timer, op->kt_ival1,
- HRTIMER_MODE_REL);
+ HRTIMER_MODE_REL_SOFT);
}
/* now we can register for can_ids, if we added a new bcm_op */
@@ -1345,7 +1294,7 @@ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
/* no bound device as default => check msg_name */
DECLARE_SOCKADDR(struct sockaddr_can *, addr, msg->msg_name);
- if (msg->msg_namelen < sizeof(*addr))
+ if (msg->msg_namelen < CAN_REQUIRED_SIZE(*addr, can_ifindex))
return -EINVAL;
if (addr->can_family != AF_CAN)
@@ -1587,7 +1536,7 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
struct net *net = sock_net(sk);
int ret = 0;
- if (len < sizeof(*addr))
+ if (len < CAN_REQUIRED_SIZE(*addr, can_ifindex))
return -EINVAL;
lock_sock(sk);
@@ -1679,6 +1628,13 @@ static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
return size;
}
+static int bcm_sock_no_ioctlcmd(struct socket *sock, unsigned int cmd,
+ unsigned long arg)
+{
+ /* no ioctls for socket layer -> hand it down to NIC layer */
+ return -ENOIOCTLCMD;
+}
+
static const struct proto_ops bcm_ops = {
.family = PF_CAN,
.release = bcm_release,
@@ -1688,7 +1644,7 @@ static const struct proto_ops bcm_ops = {
.accept = sock_no_accept,
.getname = sock_no_getname,
.poll = datagram_poll,
- .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */
+ .ioctl = bcm_sock_no_ioctlcmd,
.gettstamp = sock_gettstamp,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/can/gw.c b/net/can/gw.c
index 72711053ebe6..65d60c93af29 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1,7 +1,7 @@
-/*
- * gw.c - CAN frame Gateway/Router/Bridge with netlink interface
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* gw.c - CAN frame Gateway/Router/Bridge with netlink interface
*
- * Copyright (c) 2017 Volkswagen Group Electronic Research
+ * Copyright (c) 2019 Volkswagen Group Electronic Research
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -59,7 +59,7 @@
#include <net/net_namespace.h>
#include <net/sock.h>
-#define CAN_GW_VERSION "20170425"
+#define CAN_GW_VERSION "20190810"
#define CAN_GW_NAME "can-gw"
MODULE_DESCRIPTION("PF_CAN netlink gateway");
@@ -85,10 +85,10 @@ static struct kmem_cache *cgw_cache __read_mostly;
/* structure that contains the (on-the-fly) CAN frame modifications */
struct cf_mod {
struct {
- struct can_frame and;
- struct can_frame or;
- struct can_frame xor;
- struct can_frame set;
+ struct canfd_frame and;
+ struct canfd_frame or;
+ struct canfd_frame xor;
+ struct canfd_frame set;
} modframe;
struct {
u8 and;
@@ -96,7 +96,7 @@ struct cf_mod {
u8 xor;
u8 set;
} modtype;
- void (*modfunc[MAX_MODFUNCTIONS])(struct can_frame *cf,
+ void (*modfunc[MAX_MODFUNCTIONS])(struct canfd_frame *cf,
struct cf_mod *mod);
/* CAN frame checksum calculation after CAN frame modifications */
@@ -105,15 +105,15 @@ struct cf_mod {
struct cgw_csum_crc8 crc8;
} csum;
struct {
- void (*xor)(struct can_frame *cf, struct cgw_csum_xor *xor);
- void (*crc8)(struct can_frame *cf, struct cgw_csum_crc8 *crc8);
+ void (*xor)(struct canfd_frame *cf,
+ struct cgw_csum_xor *xor);
+ void (*crc8)(struct canfd_frame *cf,
+ struct cgw_csum_crc8 *crc8);
} csumfunc;
u32 uid;
};
-
-/*
- * So far we just support CAN -> CAN routing and frame modifications.
+/* So far we just support CAN -> CAN routing and frame modifications.
*
* The internal can_can_gw structure contains data and attributes for
* a CAN -> CAN gateway job.
@@ -151,39 +151,88 @@ struct cgw_job {
/* modification functions that are invoked in the hot path in can_can_gw_rcv */
-#define MODFUNC(func, op) static void func(struct can_frame *cf, \
+#define MODFUNC(func, op) static void func(struct canfd_frame *cf, \
struct cf_mod *mod) { op ; }
MODFUNC(mod_and_id, cf->can_id &= mod->modframe.and.can_id)
-MODFUNC(mod_and_dlc, cf->can_dlc &= mod->modframe.and.can_dlc)
+MODFUNC(mod_and_len, cf->len &= mod->modframe.and.len)
+MODFUNC(mod_and_flags, cf->flags &= mod->modframe.and.flags)
MODFUNC(mod_and_data, *(u64 *)cf->data &= *(u64 *)mod->modframe.and.data)
MODFUNC(mod_or_id, cf->can_id |= mod->modframe.or.can_id)
-MODFUNC(mod_or_dlc, cf->can_dlc |= mod->modframe.or.can_dlc)
+MODFUNC(mod_or_len, cf->len |= mod->modframe.or.len)
+MODFUNC(mod_or_flags, cf->flags |= mod->modframe.or.flags)
MODFUNC(mod_or_data, *(u64 *)cf->data |= *(u64 *)mod->modframe.or.data)
MODFUNC(mod_xor_id, cf->can_id ^= mod->modframe.xor.can_id)
-MODFUNC(mod_xor_dlc, cf->can_dlc ^= mod->modframe.xor.can_dlc)
+MODFUNC(mod_xor_len, cf->len ^= mod->modframe.xor.len)
+MODFUNC(mod_xor_flags, cf->flags ^= mod->modframe.xor.flags)
MODFUNC(mod_xor_data, *(u64 *)cf->data ^= *(u64 *)mod->modframe.xor.data)
MODFUNC(mod_set_id, cf->can_id = mod->modframe.set.can_id)
-MODFUNC(mod_set_dlc, cf->can_dlc = mod->modframe.set.can_dlc)
+MODFUNC(mod_set_len, cf->len = mod->modframe.set.len)
+MODFUNC(mod_set_flags, cf->flags = mod->modframe.set.flags)
MODFUNC(mod_set_data, *(u64 *)cf->data = *(u64 *)mod->modframe.set.data)
-static inline void canframecpy(struct can_frame *dst, struct can_frame *src)
+static void mod_and_fddata(struct canfd_frame *cf, struct cf_mod *mod)
+{
+ int i;
+
+ for (i = 0; i < CANFD_MAX_DLEN; i += 8)
+ *(u64 *)(cf->data + i) &= *(u64 *)(mod->modframe.and.data + i);
+}
+
+static void mod_or_fddata(struct canfd_frame *cf, struct cf_mod *mod)
+{
+ int i;
+
+ for (i = 0; i < CANFD_MAX_DLEN; i += 8)
+ *(u64 *)(cf->data + i) |= *(u64 *)(mod->modframe.or.data + i);
+}
+
+static void mod_xor_fddata(struct canfd_frame *cf, struct cf_mod *mod)
+{
+ int i;
+
+ for (i = 0; i < CANFD_MAX_DLEN; i += 8)
+ *(u64 *)(cf->data + i) ^= *(u64 *)(mod->modframe.xor.data + i);
+}
+
+static void mod_set_fddata(struct canfd_frame *cf, struct cf_mod *mod)
+{
+ memcpy(cf->data, mod->modframe.set.data, CANFD_MAX_DLEN);
+}
+
+static void canframecpy(struct canfd_frame *dst, struct can_frame *src)
{
- /*
- * Copy the struct members separately to ensure that no uninitialized
+ /* Copy the struct members separately to ensure that no uninitialized
* data are copied in the 3 bytes hole of the struct. This is needed
* to make easy compares of the data in the struct cf_mod.
*/
dst->can_id = src->can_id;
- dst->can_dlc = src->can_dlc;
+ dst->len = src->can_dlc;
*(u64 *)dst->data = *(u64 *)src->data;
}
-static int cgw_chk_csum_parms(s8 fr, s8 to, s8 re)
+static void canfdframecpy(struct canfd_frame *dst, struct canfd_frame *src)
{
- /*
- * absolute dlc values 0 .. 7 => 0 .. 7, e.g. data [0]
+ /* Copy the struct members separately to ensure that no uninitialized
+ * data are copied in the 2 bytes hole of the struct. This is needed
+ * to make easy compares of the data in the struct cf_mod.
+ */
+
+ dst->can_id = src->can_id;
+ dst->flags = src->flags;
+ dst->len = src->len;
+ memcpy(dst->data, src->data, CANFD_MAX_DLEN);
+}
+
+static int cgw_chk_csum_parms(s8 fr, s8 to, s8 re, struct rtcanmsg *r)
+{
+ s8 dlen = CAN_MAX_DLEN;
+
+ if (r->flags & CGW_FLAGS_CAN_FD)
+ dlen = CANFD_MAX_DLEN;
+
+ /* absolute dlc values 0 .. 7 => 0 .. 7, e.g. data [0]
* relative to received dlc -1 .. -8 :
* e.g. for received dlc = 8
* -1 => index = 7 (data[7])
@@ -191,27 +240,27 @@ static int cgw_chk_csum_parms(s8 fr, s8 to, s8 re)
* -8 => index = 0 (data[0])
*/
- if (fr > -9 && fr < 8 &&
- to > -9 && to < 8 &&
- re > -9 && re < 8)
+ if (fr >= -dlen && fr < dlen &&
+ to >= -dlen && to < dlen &&
+ re >= -dlen && re < dlen)
return 0;
else
return -EINVAL;
}
-static inline int calc_idx(int idx, int rx_dlc)
+static inline int calc_idx(int idx, int rx_len)
{
if (idx < 0)
- return rx_dlc + idx;
+ return rx_len + idx;
else
return idx;
}
-static void cgw_csum_xor_rel(struct can_frame *cf, struct cgw_csum_xor *xor)
+static void cgw_csum_xor_rel(struct canfd_frame *cf, struct cgw_csum_xor *xor)
{
- int from = calc_idx(xor->from_idx, cf->can_dlc);
- int to = calc_idx(xor->to_idx, cf->can_dlc);
- int res = calc_idx(xor->result_idx, cf->can_dlc);
+ int from = calc_idx(xor->from_idx, cf->len);
+ int to = calc_idx(xor->to_idx, cf->len);
+ int res = calc_idx(xor->result_idx, cf->len);
u8 val = xor->init_xor_val;
int i;
@@ -229,7 +278,7 @@ static void cgw_csum_xor_rel(struct can_frame *cf, struct cgw_csum_xor *xor)
cf->data[res] = val;
}
-static void cgw_csum_xor_pos(struct can_frame *cf, struct cgw_csum_xor *xor)
+static void cgw_csum_xor_pos(struct canfd_frame *cf, struct cgw_csum_xor *xor)
{
u8 val = xor->init_xor_val;
int i;
@@ -240,7 +289,7 @@ static void cgw_csum_xor_pos(struct can_frame *cf, struct cgw_csum_xor *xor)
cf->data[xor->result_idx] = val;
}
-static void cgw_csum_xor_neg(struct can_frame *cf, struct cgw_csum_xor *xor)
+static void cgw_csum_xor_neg(struct canfd_frame *cf, struct cgw_csum_xor *xor)
{
u8 val = xor->init_xor_val;
int i;
@@ -251,11 +300,12 @@ static void cgw_csum_xor_neg(struct can_frame *cf, struct cgw_csum_xor *xor)
cf->data[xor->result_idx] = val;
}
-static void cgw_csum_crc8_rel(struct can_frame *cf, struct cgw_csum_crc8 *crc8)
+static void cgw_csum_crc8_rel(struct canfd_frame *cf,
+ struct cgw_csum_crc8 *crc8)
{
- int from = calc_idx(crc8->from_idx, cf->can_dlc);
- int to = calc_idx(crc8->to_idx, cf->can_dlc);
- int res = calc_idx(crc8->result_idx, cf->can_dlc);
+ int from = calc_idx(crc8->from_idx, cf->len);
+ int to = calc_idx(crc8->to_idx, cf->len);
+ int res = calc_idx(crc8->result_idx, cf->len);
u8 crc = crc8->init_crc_val;
int i;
@@ -264,96 +314,102 @@ static void cgw_csum_crc8_rel(struct can_frame *cf, struct cgw_csum_crc8 *crc8)
if (from <= to) {
for (i = crc8->from_idx; i <= crc8->to_idx; i++)
- crc = crc8->crctab[crc^cf->data[i]];
+ crc = crc8->crctab[crc ^ cf->data[i]];
} else {
for (i = crc8->from_idx; i >= crc8->to_idx; i--)
- crc = crc8->crctab[crc^cf->data[i]];
+ crc = crc8->crctab[crc ^ cf->data[i]];
}
switch (crc8->profile) {
-
case CGW_CRC8PRF_1U8:
- crc = crc8->crctab[crc^crc8->profile_data[0]];
+ crc = crc8->crctab[crc ^ crc8->profile_data[0]];
break;
case CGW_CRC8PRF_16U8:
- crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]];
+ crc = crc8->crctab[crc ^ crc8->profile_data[cf->data[1] & 0xF]];
break;
case CGW_CRC8PRF_SFFID_XOR:
- crc = crc8->crctab[crc^(cf->can_id & 0xFF)^
+ crc = crc8->crctab[crc ^ (cf->can_id & 0xFF) ^
(cf->can_id >> 8 & 0xFF)];
break;
-
}
- cf->data[crc8->result_idx] = crc^crc8->final_xor_val;
+ cf->data[crc8->result_idx] = crc ^ crc8->final_xor_val;
}
-static void cgw_csum_crc8_pos(struct can_frame *cf, struct cgw_csum_crc8 *crc8)
+static void cgw_csum_crc8_pos(struct canfd_frame *cf,
+ struct cgw_csum_crc8 *crc8)
{
u8 crc = crc8->init_crc_val;
int i;
for (i = crc8->from_idx; i <= crc8->to_idx; i++)
- crc = crc8->crctab[crc^cf->data[i]];
+ crc = crc8->crctab[crc ^ cf->data[i]];
switch (crc8->profile) {
-
case CGW_CRC8PRF_1U8:
- crc = crc8->crctab[crc^crc8->profile_data[0]];
+ crc = crc8->crctab[crc ^ crc8->profile_data[0]];
break;
case CGW_CRC8PRF_16U8:
- crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]];
+ crc = crc8->crctab[crc ^ crc8->profile_data[cf->data[1] & 0xF]];
break;
case CGW_CRC8PRF_SFFID_XOR:
- crc = crc8->crctab[crc^(cf->can_id & 0xFF)^
+ crc = crc8->crctab[crc ^ (cf->can_id & 0xFF) ^
(cf->can_id >> 8 & 0xFF)];
break;
}
- cf->data[crc8->result_idx] = crc^crc8->final_xor_val;
+ cf->data[crc8->result_idx] = crc ^ crc8->final_xor_val;
}
-static void cgw_csum_crc8_neg(struct can_frame *cf, struct cgw_csum_crc8 *crc8)
+static void cgw_csum_crc8_neg(struct canfd_frame *cf,
+ struct cgw_csum_crc8 *crc8)
{
u8 crc = crc8->init_crc_val;
int i;
for (i = crc8->from_idx; i >= crc8->to_idx; i--)
- crc = crc8->crctab[crc^cf->data[i]];
+ crc = crc8->crctab[crc ^ cf->data[i]];
switch (crc8->profile) {
-
case CGW_CRC8PRF_1U8:
- crc = crc8->crctab[crc^crc8->profile_data[0]];
+ crc = crc8->crctab[crc ^ crc8->profile_data[0]];
break;
case CGW_CRC8PRF_16U8:
- crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]];
+ crc = crc8->crctab[crc ^ crc8->profile_data[cf->data[1] & 0xF]];
break;
case CGW_CRC8PRF_SFFID_XOR:
- crc = crc8->crctab[crc^(cf->can_id & 0xFF)^
+ crc = crc8->crctab[crc ^ (cf->can_id & 0xFF) ^
(cf->can_id >> 8 & 0xFF)];
break;
}
- cf->data[crc8->result_idx] = crc^crc8->final_xor_val;
+ cf->data[crc8->result_idx] = crc ^ crc8->final_xor_val;
}
/* the receive & process & send function */
static void can_can_gw_rcv(struct sk_buff *skb, void *data)
{
struct cgw_job *gwj = (struct cgw_job *)data;
- struct can_frame *cf;
+ struct canfd_frame *cf;
struct sk_buff *nskb;
int modidx = 0;
- /*
- * Do not handle CAN frames routed more than 'max_hops' times.
+ /* process strictly Classic CAN or CAN FD frames */
+ if (gwj->flags & CGW_FLAGS_CAN_FD) {
+ if (skb->len != CANFD_MTU)
+ return;
+ } else {
+ if (skb->len != CAN_MTU)
+ return;
+ }
+
+ /* Do not handle CAN frames routed more than 'max_hops' times.
* In general we should never catch this delimiter which is intended
* to cover a misconfiguration protection (e.g. circular CAN routes).
*
@@ -384,8 +440,7 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
can_skb_prv(skb)->ifindex == gwj->dst.dev->ifindex)
return;
- /*
- * clone the given skb, which has not been done in can_rcv()
+ /* clone the given skb, which has not been done in can_rcv()
*
* When there is at least one modification function activated,
* we need to copy the skb as we want to modify skb->data.
@@ -410,7 +465,7 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
nskb->dev = gwj->dst.dev;
/* pointer to modifiable CAN frame */
- cf = (struct can_frame *)nskb->data;
+ cf = (struct canfd_frame *)nskb->data;
/* perform preprocessed modification functions if there are any */
while (modidx < MAX_MODFUNCTIONS && gwj->mod.modfunc[modidx])
@@ -419,26 +474,22 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
/* Has the CAN frame been modified? */
if (modidx) {
/* get available space for the processed CAN frame type */
- int max_len = nskb->len - offsetof(struct can_frame, data);
+ int max_len = nskb->len - offsetof(struct canfd_frame, data);
/* dlc may have changed, make sure it fits to the CAN frame */
- if (cf->can_dlc > max_len)
- goto out_delete;
-
- /* check for checksum updates in classic CAN length only */
- if (gwj->mod.csumfunc.crc8) {
- if (cf->can_dlc > 8)
- goto out_delete;
-
- (*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8);
+ if (cf->len > max_len) {
+ /* delete frame due to misconfiguration */
+ gwj->deleted_frames++;
+ kfree_skb(nskb);
+ return;
}
- if (gwj->mod.csumfunc.xor) {
- if (cf->can_dlc > 8)
- goto out_delete;
+ /* check for checksum updates */
+ if (gwj->mod.csumfunc.crc8)
+ (*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8);
+ if (gwj->mod.csumfunc.xor)
(*gwj->mod.csumfunc.xor)(cf, &gwj->mod.csum.xor);
- }
}
/* clear the skb timestamp if not configured the other way */
@@ -450,14 +501,6 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
gwj->dropped_frames++;
else
gwj->handled_frames++;
-
- return;
-
- out_delete:
- /* delete frame due to misconfiguration */
- gwj->deleted_frames++;
- kfree_skb(nskb);
- return;
}
static inline int cgw_register_filter(struct net *net, struct cgw_job *gwj)
@@ -483,14 +526,12 @@ static int cgw_notifier(struct notifier_block *nb,
return NOTIFY_DONE;
if (msg == NETDEV_UNREGISTER) {
-
struct cgw_job *gwj = NULL;
struct hlist_node *nx;
ASSERT_RTNL();
hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) {
-
if (gwj->src.dev == dev || gwj->dst.dev == dev) {
hlist_del(&gwj->list);
cgw_unregister_filter(net, gwj);
@@ -505,7 +546,6 @@ static int cgw_notifier(struct notifier_block *nb,
static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type,
u32 pid, u32 seq, int flags)
{
- struct cgw_frame_mod mb;
struct rtcanmsg *rtcan;
struct nlmsghdr *nlh;
@@ -542,32 +582,66 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type,
goto cancel;
}
- if (gwj->mod.modtype.and) {
- memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf));
- mb.modtype = gwj->mod.modtype.and;
- if (nla_put(skb, CGW_MOD_AND, sizeof(mb), &mb) < 0)
- goto cancel;
- }
+ if (gwj->flags & CGW_FLAGS_CAN_FD) {
+ struct cgw_fdframe_mod mb;
- if (gwj->mod.modtype.or) {
- memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf));
- mb.modtype = gwj->mod.modtype.or;
- if (nla_put(skb, CGW_MOD_OR, sizeof(mb), &mb) < 0)
- goto cancel;
- }
+ if (gwj->mod.modtype.and) {
+ memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf));
+ mb.modtype = gwj->mod.modtype.and;
+ if (nla_put(skb, CGW_FDMOD_AND, sizeof(mb), &mb) < 0)
+ goto cancel;
+ }
- if (gwj->mod.modtype.xor) {
- memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf));
- mb.modtype = gwj->mod.modtype.xor;
- if (nla_put(skb, CGW_MOD_XOR, sizeof(mb), &mb) < 0)
- goto cancel;
- }
+ if (gwj->mod.modtype.or) {
+ memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf));
+ mb.modtype = gwj->mod.modtype.or;
+ if (nla_put(skb, CGW_FDMOD_OR, sizeof(mb), &mb) < 0)
+ goto cancel;
+ }
- if (gwj->mod.modtype.set) {
- memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf));
- mb.modtype = gwj->mod.modtype.set;
- if (nla_put(skb, CGW_MOD_SET, sizeof(mb), &mb) < 0)
- goto cancel;
+ if (gwj->mod.modtype.xor) {
+ memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf));
+ mb.modtype = gwj->mod.modtype.xor;
+ if (nla_put(skb, CGW_FDMOD_XOR, sizeof(mb), &mb) < 0)
+ goto cancel;
+ }
+
+ if (gwj->mod.modtype.set) {
+ memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf));
+ mb.modtype = gwj->mod.modtype.set;
+ if (nla_put(skb, CGW_FDMOD_SET, sizeof(mb), &mb) < 0)
+ goto cancel;
+ }
+ } else {
+ struct cgw_frame_mod mb;
+
+ if (gwj->mod.modtype.and) {
+ memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf));
+ mb.modtype = gwj->mod.modtype.and;
+ if (nla_put(skb, CGW_MOD_AND, sizeof(mb), &mb) < 0)
+ goto cancel;
+ }
+
+ if (gwj->mod.modtype.or) {
+ memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf));
+ mb.modtype = gwj->mod.modtype.or;
+ if (nla_put(skb, CGW_MOD_OR, sizeof(mb), &mb) < 0)
+ goto cancel;
+ }
+
+ if (gwj->mod.modtype.xor) {
+ memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf));
+ mb.modtype = gwj->mod.modtype.xor;
+ if (nla_put(skb, CGW_MOD_XOR, sizeof(mb), &mb) < 0)
+ goto cancel;
+ }
+
+ if (gwj->mod.modtype.set) {
+ memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf));
+ mb.modtype = gwj->mod.modtype.set;
+ if (nla_put(skb, CGW_MOD_SET, sizeof(mb), &mb) < 0)
+ goto cancel;
+ }
}
if (gwj->mod.uid) {
@@ -588,7 +662,6 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type,
}
if (gwj->gwtype == CGW_TYPE_CAN_CAN) {
-
if (gwj->ccgw.filter.can_id || gwj->ccgw.filter.can_mask) {
if (nla_put(skb, CGW_FILTER, sizeof(struct can_filter),
&gwj->ccgw.filter) < 0)
@@ -623,8 +696,9 @@ static int cgw_dump_jobs(struct sk_buff *skb, struct netlink_callback *cb)
if (idx < s_idx)
goto cont;
- if (cgw_put_job(skb, gwj, RTM_NEWROUTE, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0)
+ if (cgw_put_job(skb, gwj, RTM_NEWROUTE,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0)
break;
cont:
idx++;
@@ -636,7 +710,7 @@ cont:
return skb->len;
}
-static const struct nla_policy cgw_policy[CGW_MAX+1] = {
+static const struct nla_policy cgw_policy[CGW_MAX + 1] = {
[CGW_MOD_AND] = { .len = sizeof(struct cgw_frame_mod) },
[CGW_MOD_OR] = { .len = sizeof(struct cgw_frame_mod) },
[CGW_MOD_XOR] = { .len = sizeof(struct cgw_frame_mod) },
@@ -648,14 +722,18 @@ static const struct nla_policy cgw_policy[CGW_MAX+1] = {
[CGW_FILTER] = { .len = sizeof(struct can_filter) },
[CGW_LIM_HOPS] = { .type = NLA_U8 },
[CGW_MOD_UID] = { .type = NLA_U32 },
+ [CGW_FDMOD_AND] = { .len = sizeof(struct cgw_fdframe_mod) },
+ [CGW_FDMOD_OR] = { .len = sizeof(struct cgw_fdframe_mod) },
+ [CGW_FDMOD_XOR] = { .len = sizeof(struct cgw_fdframe_mod) },
+ [CGW_FDMOD_SET] = { .len = sizeof(struct cgw_fdframe_mod) },
};
/* check for common and gwtype specific attributes */
static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
u8 gwtype, void *gwtypeattr, u8 *limhops)
{
- struct nlattr *tb[CGW_MAX+1];
- struct cgw_frame_mod mb;
+ struct nlattr *tb[CGW_MAX + 1];
+ struct rtcanmsg *r = nlmsg_data(nlh);
int modidx = 0;
int err = 0;
@@ -675,87 +753,166 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
}
/* check for AND/OR/XOR/SET modifications */
+ if (r->flags & CGW_FLAGS_CAN_FD) {
+ struct cgw_fdframe_mod mb;
- if (tb[CGW_MOD_AND]) {
- nla_memcpy(&mb, tb[CGW_MOD_AND], CGW_MODATTR_LEN);
+ if (tb[CGW_FDMOD_AND]) {
+ nla_memcpy(&mb, tb[CGW_FDMOD_AND], CGW_FDMODATTR_LEN);
- canframecpy(&mod->modframe.and, &mb.cf);
- mod->modtype.and = mb.modtype;
+ canfdframecpy(&mod->modframe.and, &mb.cf);
+ mod->modtype.and = mb.modtype;
- if (mb.modtype & CGW_MOD_ID)
- mod->modfunc[modidx++] = mod_and_id;
+ if (mb.modtype & CGW_MOD_ID)
+ mod->modfunc[modidx++] = mod_and_id;
- if (mb.modtype & CGW_MOD_DLC)
- mod->modfunc[modidx++] = mod_and_dlc;
+ if (mb.modtype & CGW_MOD_LEN)
+ mod->modfunc[modidx++] = mod_and_len;
- if (mb.modtype & CGW_MOD_DATA)
- mod->modfunc[modidx++] = mod_and_data;
- }
+ if (mb.modtype & CGW_MOD_FLAGS)
+ mod->modfunc[modidx++] = mod_and_flags;
+
+ if (mb.modtype & CGW_MOD_DATA)
+ mod->modfunc[modidx++] = mod_and_fddata;
+ }
- if (tb[CGW_MOD_OR]) {
- nla_memcpy(&mb, tb[CGW_MOD_OR], CGW_MODATTR_LEN);
+ if (tb[CGW_FDMOD_OR]) {
+ nla_memcpy(&mb, tb[CGW_FDMOD_OR], CGW_FDMODATTR_LEN);
- canframecpy(&mod->modframe.or, &mb.cf);
- mod->modtype.or = mb.modtype;
+ canfdframecpy(&mod->modframe.or, &mb.cf);
+ mod->modtype.or = mb.modtype;
- if (mb.modtype & CGW_MOD_ID)
- mod->modfunc[modidx++] = mod_or_id;
+ if (mb.modtype & CGW_MOD_ID)
+ mod->modfunc[modidx++] = mod_or_id;
- if (mb.modtype & CGW_MOD_DLC)
- mod->modfunc[modidx++] = mod_or_dlc;
+ if (mb.modtype & CGW_MOD_LEN)
+ mod->modfunc[modidx++] = mod_or_len;
- if (mb.modtype & CGW_MOD_DATA)
- mod->modfunc[modidx++] = mod_or_data;
- }
+ if (mb.modtype & CGW_MOD_FLAGS)
+ mod->modfunc[modidx++] = mod_or_flags;
- if (tb[CGW_MOD_XOR]) {
- nla_memcpy(&mb, tb[CGW_MOD_XOR], CGW_MODATTR_LEN);
+ if (mb.modtype & CGW_MOD_DATA)
+ mod->modfunc[modidx++] = mod_or_fddata;
+ }
- canframecpy(&mod->modframe.xor, &mb.cf);
- mod->modtype.xor = mb.modtype;
+ if (tb[CGW_FDMOD_XOR]) {
+ nla_memcpy(&mb, tb[CGW_FDMOD_XOR], CGW_FDMODATTR_LEN);
- if (mb.modtype & CGW_MOD_ID)
- mod->modfunc[modidx++] = mod_xor_id;
+ canfdframecpy(&mod->modframe.xor, &mb.cf);
+ mod->modtype.xor = mb.modtype;
- if (mb.modtype & CGW_MOD_DLC)
- mod->modfunc[modidx++] = mod_xor_dlc;
+ if (mb.modtype & CGW_MOD_ID)
+ mod->modfunc[modidx++] = mod_xor_id;
- if (mb.modtype & CGW_MOD_DATA)
- mod->modfunc[modidx++] = mod_xor_data;
- }
+ if (mb.modtype & CGW_MOD_LEN)
+ mod->modfunc[modidx++] = mod_xor_len;
- if (tb[CGW_MOD_SET]) {
- nla_memcpy(&mb, tb[CGW_MOD_SET], CGW_MODATTR_LEN);
+ if (mb.modtype & CGW_MOD_FLAGS)
+ mod->modfunc[modidx++] = mod_xor_flags;
+
+ if (mb.modtype & CGW_MOD_DATA)
+ mod->modfunc[modidx++] = mod_xor_fddata;
+ }
- canframecpy(&mod->modframe.set, &mb.cf);
- mod->modtype.set = mb.modtype;
+ if (tb[CGW_FDMOD_SET]) {
+ nla_memcpy(&mb, tb[CGW_FDMOD_SET], CGW_FDMODATTR_LEN);
+
+ canfdframecpy(&mod->modframe.set, &mb.cf);
+ mod->modtype.set = mb.modtype;
+
+ if (mb.modtype & CGW_MOD_ID)
+ mod->modfunc[modidx++] = mod_set_id;
+
+ if (mb.modtype & CGW_MOD_LEN)
+ mod->modfunc[modidx++] = mod_set_len;
+
+ if (mb.modtype & CGW_MOD_FLAGS)
+ mod->modfunc[modidx++] = mod_set_flags;
+
+ if (mb.modtype & CGW_MOD_DATA)
+ mod->modfunc[modidx++] = mod_set_fddata;
+ }
+ } else {
+ struct cgw_frame_mod mb;
- if (mb.modtype & CGW_MOD_ID)
- mod->modfunc[modidx++] = mod_set_id;
+ if (tb[CGW_MOD_AND]) {
+ nla_memcpy(&mb, tb[CGW_MOD_AND], CGW_MODATTR_LEN);
- if (mb.modtype & CGW_MOD_DLC)
- mod->modfunc[modidx++] = mod_set_dlc;
+ canframecpy(&mod->modframe.and, &mb.cf);
+ mod->modtype.and = mb.modtype;
- if (mb.modtype & CGW_MOD_DATA)
- mod->modfunc[modidx++] = mod_set_data;
+ if (mb.modtype & CGW_MOD_ID)
+ mod->modfunc[modidx++] = mod_and_id;
+
+ if (mb.modtype & CGW_MOD_LEN)
+ mod->modfunc[modidx++] = mod_and_len;
+
+ if (mb.modtype & CGW_MOD_DATA)
+ mod->modfunc[modidx++] = mod_and_data;
+ }
+
+ if (tb[CGW_MOD_OR]) {
+ nla_memcpy(&mb, tb[CGW_MOD_OR], CGW_MODATTR_LEN);
+
+ canframecpy(&mod->modframe.or, &mb.cf);
+ mod->modtype.or = mb.modtype;
+
+ if (mb.modtype & CGW_MOD_ID)
+ mod->modfunc[modidx++] = mod_or_id;
+
+ if (mb.modtype & CGW_MOD_LEN)
+ mod->modfunc[modidx++] = mod_or_len;
+
+ if (mb.modtype & CGW_MOD_DATA)
+ mod->modfunc[modidx++] = mod_or_data;
+ }
+
+ if (tb[CGW_MOD_XOR]) {
+ nla_memcpy(&mb, tb[CGW_MOD_XOR], CGW_MODATTR_LEN);
+
+ canframecpy(&mod->modframe.xor, &mb.cf);
+ mod->modtype.xor = mb.modtype;
+
+ if (mb.modtype & CGW_MOD_ID)
+ mod->modfunc[modidx++] = mod_xor_id;
+
+ if (mb.modtype & CGW_MOD_LEN)
+ mod->modfunc[modidx++] = mod_xor_len;
+
+ if (mb.modtype & CGW_MOD_DATA)
+ mod->modfunc[modidx++] = mod_xor_data;
+ }
+
+ if (tb[CGW_MOD_SET]) {
+ nla_memcpy(&mb, tb[CGW_MOD_SET], CGW_MODATTR_LEN);
+
+ canframecpy(&mod->modframe.set, &mb.cf);
+ mod->modtype.set = mb.modtype;
+
+ if (mb.modtype & CGW_MOD_ID)
+ mod->modfunc[modidx++] = mod_set_id;
+
+ if (mb.modtype & CGW_MOD_LEN)
+ mod->modfunc[modidx++] = mod_set_len;
+
+ if (mb.modtype & CGW_MOD_DATA)
+ mod->modfunc[modidx++] = mod_set_data;
+ }
}
/* check for checksum operations after CAN frame modifications */
if (modidx) {
-
if (tb[CGW_CS_CRC8]) {
struct cgw_csum_crc8 *c = nla_data(tb[CGW_CS_CRC8]);
err = cgw_chk_csum_parms(c->from_idx, c->to_idx,
- c->result_idx);
+ c->result_idx, r);
if (err)
return err;
nla_memcpy(&mod->csum.crc8, tb[CGW_CS_CRC8],
CGW_CS_CRC8_LEN);
- /*
- * select dedicated processing function to reduce
+ /* select dedicated processing function to reduce
* runtime operations in receive hot path.
*/
if (c->from_idx < 0 || c->to_idx < 0 ||
@@ -771,15 +928,14 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
struct cgw_csum_xor *c = nla_data(tb[CGW_CS_XOR]);
err = cgw_chk_csum_parms(c->from_idx, c->to_idx,
- c->result_idx);
+ c->result_idx, r);
if (err)
return err;
nla_memcpy(&mod->csum.xor, tb[CGW_CS_XOR],
CGW_CS_XOR_LEN);
- /*
- * select dedicated processing function to reduce
+ /* select dedicated processing function to reduce
* runtime operations in receive hot path.
*/
if (c->from_idx < 0 || c->to_idx < 0 ||
@@ -791,16 +947,14 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
mod->csumfunc.xor = cgw_csum_xor_neg;
}
- if (tb[CGW_MOD_UID]) {
+ if (tb[CGW_MOD_UID])
nla_memcpy(&mod->uid, tb[CGW_MOD_UID], sizeof(u32));
- }
}
if (gwtype == CGW_TYPE_CAN_CAN) {
-
/* check CGW_TYPE_CAN_CAN specific attributes */
-
struct can_can_gw *ccgw = (struct can_can_gw *)gwtypeattr;
+
memset(ccgw, 0, sizeof(*ccgw));
/* check for can_filter in attributes */
@@ -861,12 +1015,10 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh,
return err;
if (mod.uid) {
-
ASSERT_RTNL();
/* check for updating an existing job with identical uid */
hlist_for_each_entry(gwj, &net->can.cgw_list, list) {
-
if (gwj->mod.uid != mod.uid)
continue;
@@ -987,7 +1139,6 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh,
/* remove only the first matching entry */
hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) {
-
if (gwj->flags != r->flags)
continue;
diff --git a/net/can/j1939/Kconfig b/net/can/j1939/Kconfig
new file mode 100644
index 000000000000..2998298b71ec
--- /dev/null
+++ b/net/can/j1939/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# SAE J1939 network layer core configuration
+#
+
+config CAN_J1939
+ tristate "SAE J1939"
+ depends on CAN
+ help
+ SAE J1939
+ Say Y to have in-kernel support for j1939 socket type. This
+ allows communication according to SAE j1939.
+ The relevant parts in kernel are
+ SAE j1939-21 (datalink & transport protocol)
+ & SAE j1939-81 (network management).
diff --git a/net/can/j1939/Makefile b/net/can/j1939/Makefile
new file mode 100644
index 000000000000..19181bdae173
--- /dev/null
+++ b/net/can/j1939/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_CAN_J1939) += can-j1939.o
+
+can-j1939-objs := \
+ address-claim.o \
+ bus.o \
+ main.o \
+ socket.o \
+ transport.o
diff --git a/net/can/j1939/address-claim.c b/net/can/j1939/address-claim.c
new file mode 100644
index 000000000000..f33c47327927
--- /dev/null
+++ b/net/can/j1939/address-claim.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2010-2011 EIA Electronics,
+// Kurt Van Dijck <kurt.van.dijck@eia.be>
+// Copyright (c) 2010-2011 EIA Electronics,
+// Pieter Beyens <pieter.beyens@eia.be>
+// Copyright (c) 2017-2019 Pengutronix,
+// Marc Kleine-Budde <kernel@pengutronix.de>
+// Copyright (c) 2017-2019 Pengutronix,
+// Oleksij Rempel <kernel@pengutronix.de>
+
+/* J1939 Address Claiming.
+ * Address Claiming in the kernel
+ * - keeps track of the AC states of ECU's,
+ * - resolves NAME<=>SA taking into account the AC states of ECU's.
+ *
+ * All Address Claim msgs (including host-originated msg) are processed
+ * at the receive path (a sent msg is always received again via CAN echo).
+ * As such, the processing of AC msgs is done in the order on which msgs
+ * are sent on the bus.
+ *
+ * This module doesn't send msgs itself (e.g. replies on Address Claims),
+ * this is the responsibility of a user space application or daemon.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+
+#include "j1939-priv.h"
+
+static inline name_t j1939_skb_to_name(const struct sk_buff *skb)
+{
+ return le64_to_cpup((__le64 *)skb->data);
+}
+
+static inline bool j1939_ac_msg_is_request(struct sk_buff *skb)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ int req_pgn;
+
+ if (skb->len < 3 || skcb->addr.pgn != J1939_PGN_REQUEST)
+ return false;
+
+ req_pgn = skb->data[0] | (skb->data[1] << 8) | (skb->data[2] << 16);
+
+ return req_pgn == J1939_PGN_ADDRESS_CLAIMED;
+}
+
+static int j1939_ac_verify_outgoing(struct j1939_priv *priv,
+ struct sk_buff *skb)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+
+ if (skb->len != 8) {
+ netdev_notice(priv->ndev, "tx address claim with dlc %i\n",
+ skb->len);
+ return -EPROTO;
+ }
+
+ if (skcb->addr.src_name != j1939_skb_to_name(skb)) {
+ netdev_notice(priv->ndev, "tx address claim with different name\n");
+ return -EPROTO;
+ }
+
+ if (skcb->addr.sa == J1939_NO_ADDR) {
+ netdev_notice(priv->ndev, "tx address claim with broadcast sa\n");
+ return -EPROTO;
+ }
+
+ /* ac must always be a broadcast */
+ if (skcb->addr.dst_name || skcb->addr.da != J1939_NO_ADDR) {
+ netdev_notice(priv->ndev, "tx address claim with dest, not broadcast\n");
+ return -EPROTO;
+ }
+ return 0;
+}
+
+int j1939_ac_fixup(struct j1939_priv *priv, struct sk_buff *skb)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ int ret;
+ u8 addr;
+
+ /* network mgmt: address claiming msgs */
+ if (skcb->addr.pgn == J1939_PGN_ADDRESS_CLAIMED) {
+ struct j1939_ecu *ecu;
+
+ ret = j1939_ac_verify_outgoing(priv, skb);
+ /* return both when failure & when successful */
+ if (ret < 0)
+ return ret;
+ ecu = j1939_ecu_get_by_name(priv, skcb->addr.src_name);
+ if (!ecu)
+ return -ENODEV;
+
+ if (ecu->addr != skcb->addr.sa)
+ /* hold further traffic for ecu, remove from parent */
+ j1939_ecu_unmap(ecu);
+ j1939_ecu_put(ecu);
+ } else if (skcb->addr.src_name) {
+ /* assign source address */
+ addr = j1939_name_to_addr(priv, skcb->addr.src_name);
+ if (!j1939_address_is_unicast(addr) &&
+ !j1939_ac_msg_is_request(skb)) {
+ netdev_notice(priv->ndev, "tx drop: invalid sa for name 0x%016llx\n",
+ skcb->addr.src_name);
+ return -EADDRNOTAVAIL;
+ }
+ skcb->addr.sa = addr;
+ }
+
+ /* assign destination address */
+ if (skcb->addr.dst_name) {
+ addr = j1939_name_to_addr(priv, skcb->addr.dst_name);
+ if (!j1939_address_is_unicast(addr)) {
+ netdev_notice(priv->ndev, "tx drop: invalid da for name 0x%016llx\n",
+ skcb->addr.dst_name);
+ return -EADDRNOTAVAIL;
+ }
+ skcb->addr.da = addr;
+ }
+ return 0;
+}
+
+static void j1939_ac_process(struct j1939_priv *priv, struct sk_buff *skb)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ struct j1939_ecu *ecu, *prev;
+ name_t name;
+
+ if (skb->len != 8) {
+ netdev_notice(priv->ndev, "rx address claim with wrong dlc %i\n",
+ skb->len);
+ return;
+ }
+
+ name = j1939_skb_to_name(skb);
+ skcb->addr.src_name = name;
+ if (!name) {
+ netdev_notice(priv->ndev, "rx address claim without name\n");
+ return;
+ }
+
+ if (!j1939_address_is_valid(skcb->addr.sa)) {
+ netdev_notice(priv->ndev, "rx address claim with broadcast sa\n");
+ return;
+ }
+
+ write_lock_bh(&priv->lock);
+
+ /* Few words on the ECU ref counting:
+ *
+ * First we get an ECU handle, either with
+ * j1939_ecu_get_by_name_locked() (increments the ref counter)
+ * or j1939_ecu_create_locked() (initializes an ECU object
+ * with a ref counter of 1).
+ *
+ * j1939_ecu_unmap_locked() will decrement the ref counter,
+ * but only if the ECU was mapped before. So "ecu" still
+ * belongs to us.
+ *
+ * j1939_ecu_timer_start() will increment the ref counter
+ * before it starts the timer, so we can put the ecu when
+ * leaving this function.
+ */
+ ecu = j1939_ecu_get_by_name_locked(priv, name);
+ if (!ecu && j1939_address_is_unicast(skcb->addr.sa))
+ ecu = j1939_ecu_create_locked(priv, name);
+
+ if (IS_ERR_OR_NULL(ecu))
+ goto out_unlock_bh;
+
+ /* cancel pending (previous) address claim */
+ j1939_ecu_timer_cancel(ecu);
+
+ if (j1939_address_is_idle(skcb->addr.sa)) {
+ j1939_ecu_unmap_locked(ecu);
+ goto out_ecu_put;
+ }
+
+ /* save new addr */
+ if (ecu->addr != skcb->addr.sa)
+ j1939_ecu_unmap_locked(ecu);
+ ecu->addr = skcb->addr.sa;
+
+ prev = j1939_ecu_get_by_addr_locked(priv, skcb->addr.sa);
+ if (prev) {
+ if (ecu->name > prev->name) {
+ j1939_ecu_unmap_locked(ecu);
+ j1939_ecu_put(prev);
+ goto out_ecu_put;
+ } else {
+ /* kick prev if less or equal */
+ j1939_ecu_unmap_locked(prev);
+ j1939_ecu_put(prev);
+ }
+ }
+
+ j1939_ecu_timer_start(ecu);
+ out_ecu_put:
+ j1939_ecu_put(ecu);
+ out_unlock_bh:
+ write_unlock_bh(&priv->lock);
+}
+
+void j1939_ac_recv(struct j1939_priv *priv, struct sk_buff *skb)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ struct j1939_ecu *ecu;
+
+ /* network mgmt */
+ if (skcb->addr.pgn == J1939_PGN_ADDRESS_CLAIMED) {
+ j1939_ac_process(priv, skb);
+ } else if (j1939_address_is_unicast(skcb->addr.sa)) {
+ /* assign source name */
+ ecu = j1939_ecu_get_by_addr(priv, skcb->addr.sa);
+ if (ecu) {
+ skcb->addr.src_name = ecu->name;
+ j1939_ecu_put(ecu);
+ }
+ }
+
+ /* assign destination name */
+ ecu = j1939_ecu_get_by_addr(priv, skcb->addr.da);
+ if (ecu) {
+ skcb->addr.dst_name = ecu->name;
+ j1939_ecu_put(ecu);
+ }
+}
diff --git a/net/can/j1939/bus.c b/net/can/j1939/bus.c
new file mode 100644
index 000000000000..486687901602
--- /dev/null
+++ b/net/can/j1939/bus.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2010-2011 EIA Electronics,
+// Kurt Van Dijck <kurt.van.dijck@eia.be>
+// Copyright (c) 2017-2019 Pengutronix,
+// Marc Kleine-Budde <kernel@pengutronix.de>
+// Copyright (c) 2017-2019 Pengutronix,
+// Oleksij Rempel <kernel@pengutronix.de>
+
+/* bus for j1939 remote devices
+ * Since rtnetlink, no real bus is used.
+ */
+
+#include <net/sock.h>
+
+#include "j1939-priv.h"
+
+static void __j1939_ecu_release(struct kref *kref)
+{
+ struct j1939_ecu *ecu = container_of(kref, struct j1939_ecu, kref);
+ struct j1939_priv *priv = ecu->priv;
+
+ list_del(&ecu->list);
+ kfree(ecu);
+ j1939_priv_put(priv);
+}
+
+void j1939_ecu_put(struct j1939_ecu *ecu)
+{
+ kref_put(&ecu->kref, __j1939_ecu_release);
+}
+
+static void j1939_ecu_get(struct j1939_ecu *ecu)
+{
+ kref_get(&ecu->kref);
+}
+
+static bool j1939_ecu_is_mapped_locked(struct j1939_ecu *ecu)
+{
+ struct j1939_priv *priv = ecu->priv;
+
+ lockdep_assert_held(&priv->lock);
+
+ return j1939_ecu_find_by_addr_locked(priv, ecu->addr) == ecu;
+}
+
+/* ECU device interface */
+/* map ECU to a bus address space */
+static void j1939_ecu_map_locked(struct j1939_ecu *ecu)
+{
+ struct j1939_priv *priv = ecu->priv;
+ struct j1939_addr_ent *ent;
+
+ lockdep_assert_held(&priv->lock);
+
+ if (!j1939_address_is_unicast(ecu->addr))
+ return;
+
+ ent = &priv->ents[ecu->addr];
+
+ if (ent->ecu) {
+ netdev_warn(priv->ndev, "Trying to map already mapped ECU, addr: 0x%02x, name: 0x%016llx. Skip it.\n",
+ ecu->addr, ecu->name);
+ return;
+ }
+
+ j1939_ecu_get(ecu);
+ ent->ecu = ecu;
+ ent->nusers += ecu->nusers;
+}
+
+/* unmap ECU from a bus address space */
+void j1939_ecu_unmap_locked(struct j1939_ecu *ecu)
+{
+ struct j1939_priv *priv = ecu->priv;
+ struct j1939_addr_ent *ent;
+
+ lockdep_assert_held(&priv->lock);
+
+ if (!j1939_address_is_unicast(ecu->addr))
+ return;
+
+ if (!j1939_ecu_is_mapped_locked(ecu))
+ return;
+
+ ent = &priv->ents[ecu->addr];
+ ent->ecu = NULL;
+ ent->nusers -= ecu->nusers;
+ j1939_ecu_put(ecu);
+}
+
+void j1939_ecu_unmap(struct j1939_ecu *ecu)
+{
+ write_lock_bh(&ecu->priv->lock);
+ j1939_ecu_unmap_locked(ecu);
+ write_unlock_bh(&ecu->priv->lock);
+}
+
+void j1939_ecu_unmap_all(struct j1939_priv *priv)
+{
+ int i;
+
+ write_lock_bh(&priv->lock);
+ for (i = 0; i < ARRAY_SIZE(priv->ents); i++)
+ if (priv->ents[i].ecu)
+ j1939_ecu_unmap_locked(priv->ents[i].ecu);
+ write_unlock_bh(&priv->lock);
+}
+
+void j1939_ecu_timer_start(struct j1939_ecu *ecu)
+{
+ /* The ECU is held here and released in the
+ * j1939_ecu_timer_handler() or j1939_ecu_timer_cancel().
+ */
+ j1939_ecu_get(ecu);
+
+ /* Schedule timer in 250 msec to commit address change. */
+ hrtimer_start(&ecu->ac_timer, ms_to_ktime(250),
+ HRTIMER_MODE_REL_SOFT);
+}
+
+void j1939_ecu_timer_cancel(struct j1939_ecu *ecu)
+{
+ if (hrtimer_cancel(&ecu->ac_timer))
+ j1939_ecu_put(ecu);
+}
+
+static enum hrtimer_restart j1939_ecu_timer_handler(struct hrtimer *hrtimer)
+{
+ struct j1939_ecu *ecu =
+ container_of(hrtimer, struct j1939_ecu, ac_timer);
+ struct j1939_priv *priv = ecu->priv;
+
+ write_lock_bh(&priv->lock);
+ /* TODO: can we test if ecu->addr is unicast before starting
+ * the timer?
+ */
+ j1939_ecu_map_locked(ecu);
+
+ /* The corresponding j1939_ecu_get() is in
+ * j1939_ecu_timer_start().
+ */
+ j1939_ecu_put(ecu);
+ write_unlock_bh(&priv->lock);
+
+ return HRTIMER_NORESTART;
+}
+
+struct j1939_ecu *j1939_ecu_create_locked(struct j1939_priv *priv, name_t name)
+{
+ struct j1939_ecu *ecu;
+
+ lockdep_assert_held(&priv->lock);
+
+ ecu = kzalloc(sizeof(*ecu), gfp_any());
+ if (!ecu)
+ return ERR_PTR(-ENOMEM);
+ kref_init(&ecu->kref);
+ ecu->addr = J1939_IDLE_ADDR;
+ ecu->name = name;
+
+ hrtimer_init(&ecu->ac_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT);
+ ecu->ac_timer.function = j1939_ecu_timer_handler;
+ INIT_LIST_HEAD(&ecu->list);
+
+ j1939_priv_get(priv);
+ ecu->priv = priv;
+ list_add_tail(&ecu->list, &priv->ecus);
+
+ return ecu;
+}
+
+struct j1939_ecu *j1939_ecu_find_by_addr_locked(struct j1939_priv *priv,
+ u8 addr)
+{
+ lockdep_assert_held(&priv->lock);
+
+ return priv->ents[addr].ecu;
+}
+
+struct j1939_ecu *j1939_ecu_get_by_addr_locked(struct j1939_priv *priv, u8 addr)
+{
+ struct j1939_ecu *ecu;
+
+ lockdep_assert_held(&priv->lock);
+
+ if (!j1939_address_is_unicast(addr))
+ return NULL;
+
+ ecu = j1939_ecu_find_by_addr_locked(priv, addr);
+ if (ecu)
+ j1939_ecu_get(ecu);
+
+ return ecu;
+}
+
+struct j1939_ecu *j1939_ecu_get_by_addr(struct j1939_priv *priv, u8 addr)
+{
+ struct j1939_ecu *ecu;
+
+ read_lock_bh(&priv->lock);
+ ecu = j1939_ecu_get_by_addr_locked(priv, addr);
+ read_unlock_bh(&priv->lock);
+
+ return ecu;
+}
+
+/* get pointer to ecu without increasing ref counter */
+static struct j1939_ecu *j1939_ecu_find_by_name_locked(struct j1939_priv *priv,
+ name_t name)
+{
+ struct j1939_ecu *ecu;
+
+ lockdep_assert_held(&priv->lock);
+
+ list_for_each_entry(ecu, &priv->ecus, list) {
+ if (ecu->name == name)
+ return ecu;
+ }
+
+ return NULL;
+}
+
+struct j1939_ecu *j1939_ecu_get_by_name_locked(struct j1939_priv *priv,
+ name_t name)
+{
+ struct j1939_ecu *ecu;
+
+ lockdep_assert_held(&priv->lock);
+
+ if (!name)
+ return NULL;
+
+ ecu = j1939_ecu_find_by_name_locked(priv, name);
+ if (ecu)
+ j1939_ecu_get(ecu);
+
+ return ecu;
+}
+
+struct j1939_ecu *j1939_ecu_get_by_name(struct j1939_priv *priv, name_t name)
+{
+ struct j1939_ecu *ecu;
+
+ read_lock_bh(&priv->lock);
+ ecu = j1939_ecu_get_by_name_locked(priv, name);
+ read_unlock_bh(&priv->lock);
+
+ return ecu;
+}
+
+u8 j1939_name_to_addr(struct j1939_priv *priv, name_t name)
+{
+ struct j1939_ecu *ecu;
+ int addr = J1939_IDLE_ADDR;
+
+ if (!name)
+ return J1939_NO_ADDR;
+
+ read_lock_bh(&priv->lock);
+ ecu = j1939_ecu_find_by_name_locked(priv, name);
+ if (ecu && j1939_ecu_is_mapped_locked(ecu))
+ /* ecu's SA is registered */
+ addr = ecu->addr;
+
+ read_unlock_bh(&priv->lock);
+
+ return addr;
+}
+
+/* TX addr/name accounting
+ * Transport protocol needs to know if a SA is local or not
+ * These functions originate from userspace manipulating sockets,
+ * so locking is straigforward
+ */
+
+int j1939_local_ecu_get(struct j1939_priv *priv, name_t name, u8 sa)
+{
+ struct j1939_ecu *ecu;
+ int err = 0;
+
+ write_lock_bh(&priv->lock);
+
+ if (j1939_address_is_unicast(sa))
+ priv->ents[sa].nusers++;
+
+ if (!name)
+ goto done;
+
+ ecu = j1939_ecu_get_by_name_locked(priv, name);
+ if (!ecu)
+ ecu = j1939_ecu_create_locked(priv, name);
+ err = PTR_ERR_OR_ZERO(ecu);
+ if (err)
+ goto done;
+
+ ecu->nusers++;
+ /* TODO: do we care if ecu->addr != sa? */
+ if (j1939_ecu_is_mapped_locked(ecu))
+ /* ecu's sa is active already */
+ priv->ents[ecu->addr].nusers++;
+
+ done:
+ write_unlock_bh(&priv->lock);
+
+ return err;
+}
+
+void j1939_local_ecu_put(struct j1939_priv *priv, name_t name, u8 sa)
+{
+ struct j1939_ecu *ecu;
+
+ write_lock_bh(&priv->lock);
+
+ if (j1939_address_is_unicast(sa))
+ priv->ents[sa].nusers--;
+
+ if (!name)
+ goto done;
+
+ ecu = j1939_ecu_find_by_name_locked(priv, name);
+ if (WARN_ON_ONCE(!ecu))
+ goto done;
+
+ ecu->nusers--;
+ /* TODO: do we care if ecu->addr != sa? */
+ if (j1939_ecu_is_mapped_locked(ecu))
+ /* ecu's sa is active already */
+ priv->ents[ecu->addr].nusers--;
+ j1939_ecu_put(ecu);
+
+ done:
+ write_unlock_bh(&priv->lock);
+}
diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h
new file mode 100644
index 000000000000..12369b604ce9
--- /dev/null
+++ b/net/can/j1939/j1939-priv.h
@@ -0,0 +1,338 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (c) 2010-2011 EIA Electronics,
+// Kurt Van Dijck <kurt.van.dijck@eia.be>
+// Copyright (c) 2017-2019 Pengutronix,
+// Marc Kleine-Budde <kernel@pengutronix.de>
+// Copyright (c) 2017-2019 Pengutronix,
+// Oleksij Rempel <kernel@pengutronix.de>
+
+#ifndef _J1939_PRIV_H_
+#define _J1939_PRIV_H_
+
+#include <linux/can/j1939.h>
+#include <net/sock.h>
+
+/* Timeout to receive the abort signal over loop back. In case CAN
+ * bus is open, the timeout should be triggered.
+ */
+#define J1939_XTP_ABORT_TIMEOUT_MS 500
+#define J1939_SIMPLE_ECHO_TIMEOUT_MS (10 * 1000)
+
+struct j1939_session;
+enum j1939_sk_errqueue_type {
+ J1939_ERRQUEUE_ACK,
+ J1939_ERRQUEUE_SCHED,
+ J1939_ERRQUEUE_ABORT,
+};
+
+/* j1939 devices */
+struct j1939_ecu {
+ struct list_head list;
+ name_t name;
+ u8 addr;
+
+ /* indicates that this ecu successfully claimed @sa as its address */
+ struct hrtimer ac_timer;
+ struct kref kref;
+ struct j1939_priv *priv;
+
+ /* count users, to help transport protocol decide for interaction */
+ int nusers;
+};
+
+struct j1939_priv {
+ struct list_head ecus;
+ /* local list entry in priv
+ * These allow irq (& softirq) context lookups on j1939 devices
+ * This approach (separate lists) is done as the other 2 alternatives
+ * are not easier or even wrong
+ * 1) using the pure kobject methods involves mutexes, which are not
+ * allowed in irq context.
+ * 2) duplicating data structures would require a lot of synchronization
+ * code
+ * usage:
+ */
+
+ /* segments need a lock to protect the above list */
+ rwlock_t lock;
+
+ struct net_device *ndev;
+
+ /* list of 256 ecu ptrs, that cache the claimed addresses.
+ * also protected by the above lock
+ */
+ struct j1939_addr_ent {
+ struct j1939_ecu *ecu;
+ /* count users, to help transport protocol */
+ int nusers;
+ } ents[256];
+
+ struct kref kref;
+
+ /* List of active sessions to prevent start of conflicting
+ * one.
+ *
+ * Do not start two sessions of same type, addresses and
+ * direction.
+ */
+ struct list_head active_session_list;
+
+ /* protects active_session_list */
+ spinlock_t active_session_list_lock;
+
+ unsigned int tp_max_packet_size;
+
+ /* lock for j1939_socks list */
+ spinlock_t j1939_socks_lock;
+ struct list_head j1939_socks;
+
+ struct kref rx_kref;
+};
+
+void j1939_ecu_put(struct j1939_ecu *ecu);
+
+/* keep the cache of what is local */
+int j1939_local_ecu_get(struct j1939_priv *priv, name_t name, u8 sa);
+void j1939_local_ecu_put(struct j1939_priv *priv, name_t name, u8 sa);
+
+static inline bool j1939_address_is_unicast(u8 addr)
+{
+ return addr <= J1939_MAX_UNICAST_ADDR;
+}
+
+static inline bool j1939_address_is_idle(u8 addr)
+{
+ return addr == J1939_IDLE_ADDR;
+}
+
+static inline bool j1939_address_is_valid(u8 addr)
+{
+ return addr != J1939_NO_ADDR;
+}
+
+static inline bool j1939_pgn_is_pdu1(pgn_t pgn)
+{
+ /* ignore dp & res bits for this */
+ return (pgn & 0xff00) < 0xf000;
+}
+
+/* utility to correctly unmap an ECU */
+void j1939_ecu_unmap_locked(struct j1939_ecu *ecu);
+void j1939_ecu_unmap(struct j1939_ecu *ecu);
+
+u8 j1939_name_to_addr(struct j1939_priv *priv, name_t name);
+struct j1939_ecu *j1939_ecu_find_by_addr_locked(struct j1939_priv *priv,
+ u8 addr);
+struct j1939_ecu *j1939_ecu_get_by_addr(struct j1939_priv *priv, u8 addr);
+struct j1939_ecu *j1939_ecu_get_by_addr_locked(struct j1939_priv *priv,
+ u8 addr);
+struct j1939_ecu *j1939_ecu_get_by_name(struct j1939_priv *priv, name_t name);
+struct j1939_ecu *j1939_ecu_get_by_name_locked(struct j1939_priv *priv,
+ name_t name);
+
+enum j1939_transfer_type {
+ J1939_TP,
+ J1939_ETP,
+ J1939_SIMPLE,
+};
+
+struct j1939_addr {
+ name_t src_name;
+ name_t dst_name;
+ pgn_t pgn;
+
+ u8 sa;
+ u8 da;
+
+ u8 type;
+};
+
+/* control buffer of the sk_buff */
+struct j1939_sk_buff_cb {
+ /* Offset in bytes within one ETP session */
+ u32 offset;
+
+ /* for tx, MSG_SYN will be used to sync on sockets */
+ u32 msg_flags;
+ u32 tskey;
+
+ struct j1939_addr addr;
+
+ /* Flags for quick lookups during skb processing.
+ * These are set in the receive path only.
+ */
+#define J1939_ECU_LOCAL_SRC BIT(0)
+#define J1939_ECU_LOCAL_DST BIT(1)
+ u8 flags;
+
+ priority_t priority;
+};
+
+static inline
+struct j1939_sk_buff_cb *j1939_skb_to_cb(const struct sk_buff *skb)
+{
+ BUILD_BUG_ON(sizeof(struct j1939_sk_buff_cb) > sizeof(skb->cb));
+
+ return (struct j1939_sk_buff_cb *)skb->cb;
+}
+
+int j1939_send_one(struct j1939_priv *priv, struct sk_buff *skb);
+void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb);
+bool j1939_sk_recv_match(struct j1939_priv *priv,
+ struct j1939_sk_buff_cb *skcb);
+void j1939_sk_send_loop_abort(struct sock *sk, int err);
+void j1939_sk_errqueue(struct j1939_session *session,
+ enum j1939_sk_errqueue_type type);
+void j1939_sk_queue_activate_next(struct j1939_session *session);
+
+/* stack entries */
+struct j1939_session *j1939_tp_send(struct j1939_priv *priv,
+ struct sk_buff *skb, size_t size);
+int j1939_tp_recv(struct j1939_priv *priv, struct sk_buff *skb);
+int j1939_ac_fixup(struct j1939_priv *priv, struct sk_buff *skb);
+void j1939_ac_recv(struct j1939_priv *priv, struct sk_buff *skb);
+void j1939_simple_recv(struct j1939_priv *priv, struct sk_buff *skb);
+
+/* network management */
+struct j1939_ecu *j1939_ecu_create_locked(struct j1939_priv *priv, name_t name);
+
+void j1939_ecu_timer_start(struct j1939_ecu *ecu);
+void j1939_ecu_timer_cancel(struct j1939_ecu *ecu);
+void j1939_ecu_unmap_all(struct j1939_priv *priv);
+
+struct j1939_priv *j1939_netdev_start(struct net_device *ndev);
+void j1939_netdev_stop(struct j1939_priv *priv);
+
+void j1939_priv_put(struct j1939_priv *priv);
+void j1939_priv_get(struct j1939_priv *priv);
+
+/* notify/alert all j1939 sockets bound to ifindex */
+void j1939_sk_netdev_event_netdown(struct j1939_priv *priv);
+int j1939_cancel_active_session(struct j1939_priv *priv, struct sock *sk);
+void j1939_tp_init(struct j1939_priv *priv);
+
+/* decrement pending skb for a j1939 socket */
+void j1939_sock_pending_del(struct sock *sk);
+
+enum j1939_session_state {
+ J1939_SESSION_NEW,
+ J1939_SESSION_ACTIVE,
+ /* waiting for abort signal on the bus */
+ J1939_SESSION_WAITING_ABORT,
+ J1939_SESSION_ACTIVE_MAX,
+ J1939_SESSION_DONE,
+};
+
+struct j1939_session {
+ struct j1939_priv *priv;
+ struct list_head active_session_list_entry;
+ struct list_head sk_session_queue_entry;
+ struct kref kref;
+ struct sock *sk;
+
+ /* ifindex, src, dst, pgn define the session block
+ * the are _never_ modified after insertion in the list
+ * this decreases locking problems a _lot_
+ */
+ struct j1939_sk_buff_cb skcb;
+ struct sk_buff_head skb_queue;
+
+ /* all tx related stuff (last_txcmd, pkt.tx)
+ * is protected (modified only) with the txtimer hrtimer
+ * 'total' & 'block' are never changed,
+ * last_cmd, last & block are protected by ->lock
+ * this means that the tx may run after cts is received that should
+ * have stopped tx, but this time discrepancy is never avoided anyhow
+ */
+ u8 last_cmd, last_txcmd;
+ bool transmission;
+ bool extd;
+ /* Total message size, number of bytes */
+ unsigned int total_message_size;
+ /* Total number of bytes queue from socket to the session */
+ unsigned int total_queued_size;
+ unsigned int tx_retry;
+
+ int err;
+ u32 tskey;
+ enum j1939_session_state state;
+
+ /* Packets counters for a (extended) transfer session. The packet is
+ * maximal of 7 bytes.
+ */
+ struct {
+ /* total - total number of packets for this session */
+ unsigned int total;
+ /* last - last packet of a transfer block after which
+ * responder should send ETP.CM_CTS and originator
+ * ETP.CM_DPO
+ */
+ unsigned int last;
+ /* tx - number of packets send by originator node.
+ * this counter can be set back if responder node
+ * didn't received all packets send by originator.
+ */
+ unsigned int tx;
+ unsigned int tx_acked;
+ /* rx - number of packets received */
+ unsigned int rx;
+ /* block - amount of packets expected in one block */
+ unsigned int block;
+ /* dpo - ETP.CM_DPO, Data Packet Offset */
+ unsigned int dpo;
+ } pkt;
+ struct hrtimer txtimer, rxtimer;
+};
+
+struct j1939_sock {
+ struct sock sk; /* must be first to skip with memset */
+ struct j1939_priv *priv;
+ struct list_head list;
+
+#define J1939_SOCK_BOUND BIT(0)
+#define J1939_SOCK_CONNECTED BIT(1)
+#define J1939_SOCK_PROMISC BIT(2)
+#define J1939_SOCK_ERRQUEUE BIT(3)
+ int state;
+
+ int ifindex;
+ struct j1939_addr addr;
+ struct j1939_filter *filters;
+ int nfilters;
+ pgn_t pgn_rx_filter;
+
+ /* j1939 may emit equal PGN (!= equal CAN-id's) out of order
+ * when transport protocol comes in.
+ * To allow emitting in order, keep a 'pending' nr. of packets
+ */
+ atomic_t skb_pending;
+ wait_queue_head_t waitq;
+
+ /* lock for the sk_session_queue list */
+ spinlock_t sk_session_queue_lock;
+ struct list_head sk_session_queue;
+};
+
+static inline struct j1939_sock *j1939_sk(const struct sock *sk)
+{
+ return container_of(sk, struct j1939_sock, sk);
+}
+
+void j1939_session_get(struct j1939_session *session);
+void j1939_session_put(struct j1939_session *session);
+void j1939_session_skb_queue(struct j1939_session *session,
+ struct sk_buff *skb);
+int j1939_session_activate(struct j1939_session *session);
+void j1939_tp_schedule_txtimer(struct j1939_session *session, int msec);
+void j1939_session_timers_cancel(struct j1939_session *session);
+
+#define J1939_MAX_TP_PACKET_SIZE (7 * 0xff)
+#define J1939_MAX_ETP_PACKET_SIZE (7 * 0x00ffffff)
+
+#define J1939_REGULAR 0
+#define J1939_EXTENDED 1
+
+/* CAN protocol */
+extern const struct can_proto j1939_can_proto;
+
+#endif /* _J1939_PRIV_H_ */
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
new file mode 100644
index 000000000000..def2f813ffce
--- /dev/null
+++ b/net/can/j1939/main.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2010-2011 EIA Electronics,
+// Pieter Beyens <pieter.beyens@eia.be>
+// Copyright (c) 2010-2011 EIA Electronics,
+// Kurt Van Dijck <kurt.van.dijck@eia.be>
+// Copyright (c) 2018 Protonic,
+// Robin van der Gracht <robin@protonic.nl>
+// Copyright (c) 2017-2019 Pengutronix,
+// Marc Kleine-Budde <kernel@pengutronix.de>
+// Copyright (c) 2017-2019 Pengutronix,
+// Oleksij Rempel <kernel@pengutronix.de>
+
+/* Core of can-j1939 that links j1939 to CAN. */
+
+#include <linux/can/can-ml.h>
+#include <linux/can/core.h>
+#include <linux/can/skb.h>
+#include <linux/if_arp.h>
+#include <linux/module.h>
+
+#include "j1939-priv.h"
+
+MODULE_DESCRIPTION("PF_CAN SAE J1939");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("EIA Electronics (Kurt Van Dijck & Pieter Beyens)");
+MODULE_ALIAS("can-proto-" __stringify(CAN_J1939));
+
+/* LOWLEVEL CAN interface */
+
+/* CAN_HDR: #bytes before can_frame data part */
+#define J1939_CAN_HDR (offsetof(struct can_frame, data))
+
+/* CAN_FTR: #bytes beyond data part */
+#define J1939_CAN_FTR (sizeof(struct can_frame) - J1939_CAN_HDR - \
+ sizeof(((struct can_frame *)0)->data))
+
+/* lowest layer */
+static void j1939_can_recv(struct sk_buff *iskb, void *data)
+{
+ struct j1939_priv *priv = data;
+ struct sk_buff *skb;
+ struct j1939_sk_buff_cb *skcb, *iskcb;
+ struct can_frame *cf;
+
+ /* create a copy of the skb
+ * j1939 only delivers the real data bytes,
+ * the header goes into sockaddr.
+ * j1939 may not touch the incoming skb in such way
+ */
+ skb = skb_clone(iskb, GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ can_skb_set_owner(skb, iskb->sk);
+
+ /* get a pointer to the header of the skb
+ * the skb payload (pointer) is moved, so that the next skb_data
+ * returns the actual payload
+ */
+ cf = (void *)skb->data;
+ skb_pull(skb, J1939_CAN_HDR);
+
+ /* fix length, set to dlc, with 8 maximum */
+ skb_trim(skb, min_t(uint8_t, cf->can_dlc, 8));
+
+ /* set addr */
+ skcb = j1939_skb_to_cb(skb);
+ memset(skcb, 0, sizeof(*skcb));
+
+ iskcb = j1939_skb_to_cb(iskb);
+ skcb->tskey = iskcb->tskey;
+ skcb->priority = (cf->can_id >> 26) & 0x7;
+ skcb->addr.sa = cf->can_id;
+ skcb->addr.pgn = (cf->can_id >> 8) & J1939_PGN_MAX;
+ /* set default message type */
+ skcb->addr.type = J1939_TP;
+ if (j1939_pgn_is_pdu1(skcb->addr.pgn)) {
+ /* Type 1: with destination address */
+ skcb->addr.da = skcb->addr.pgn;
+ /* normalize pgn: strip dst address */
+ skcb->addr.pgn &= 0x3ff00;
+ } else {
+ /* set broadcast address */
+ skcb->addr.da = J1939_NO_ADDR;
+ }
+
+ /* update localflags */
+ read_lock_bh(&priv->lock);
+ if (j1939_address_is_unicast(skcb->addr.sa) &&
+ priv->ents[skcb->addr.sa].nusers)
+ skcb->flags |= J1939_ECU_LOCAL_SRC;
+ if (j1939_address_is_unicast(skcb->addr.da) &&
+ priv->ents[skcb->addr.da].nusers)
+ skcb->flags |= J1939_ECU_LOCAL_DST;
+ read_unlock_bh(&priv->lock);
+
+ /* deliver into the j1939 stack ... */
+ j1939_ac_recv(priv, skb);
+
+ if (j1939_tp_recv(priv, skb))
+ /* this means the transport layer processed the message */
+ goto done;
+
+ j1939_simple_recv(priv, skb);
+ j1939_sk_recv(priv, skb);
+ done:
+ kfree_skb(skb);
+}
+
+/* NETDEV MANAGEMENT */
+
+/* values for can_rx_(un)register */
+#define J1939_CAN_ID CAN_EFF_FLAG
+#define J1939_CAN_MASK (CAN_EFF_FLAG | CAN_RTR_FLAG)
+
+static DEFINE_SPINLOCK(j1939_netdev_lock);
+
+static struct j1939_priv *j1939_priv_create(struct net_device *ndev)
+{
+ struct j1939_priv *priv;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return NULL;
+
+ rwlock_init(&priv->lock);
+ INIT_LIST_HEAD(&priv->ecus);
+ priv->ndev = ndev;
+ kref_init(&priv->kref);
+ kref_init(&priv->rx_kref);
+ dev_hold(ndev);
+
+ netdev_dbg(priv->ndev, "%s : 0x%p\n", __func__, priv);
+
+ return priv;
+}
+
+static inline void j1939_priv_set(struct net_device *ndev,
+ struct j1939_priv *priv)
+{
+ struct can_ml_priv *can_ml_priv = ndev->ml_priv;
+
+ can_ml_priv->j1939_priv = priv;
+}
+
+static void __j1939_priv_release(struct kref *kref)
+{
+ struct j1939_priv *priv = container_of(kref, struct j1939_priv, kref);
+ struct net_device *ndev = priv->ndev;
+
+ netdev_dbg(priv->ndev, "%s: 0x%p\n", __func__, priv);
+
+ dev_put(ndev);
+ kfree(priv);
+}
+
+void j1939_priv_put(struct j1939_priv *priv)
+{
+ kref_put(&priv->kref, __j1939_priv_release);
+}
+
+void j1939_priv_get(struct j1939_priv *priv)
+{
+ kref_get(&priv->kref);
+}
+
+static int j1939_can_rx_register(struct j1939_priv *priv)
+{
+ struct net_device *ndev = priv->ndev;
+ int ret;
+
+ j1939_priv_get(priv);
+ ret = can_rx_register(dev_net(ndev), ndev, J1939_CAN_ID, J1939_CAN_MASK,
+ j1939_can_recv, priv, "j1939", NULL);
+ if (ret < 0) {
+ j1939_priv_put(priv);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void j1939_can_rx_unregister(struct j1939_priv *priv)
+{
+ struct net_device *ndev = priv->ndev;
+
+ can_rx_unregister(dev_net(ndev), ndev, J1939_CAN_ID, J1939_CAN_MASK,
+ j1939_can_recv, priv);
+
+ j1939_priv_put(priv);
+}
+
+static void __j1939_rx_release(struct kref *kref)
+ __releases(&j1939_netdev_lock)
+{
+ struct j1939_priv *priv = container_of(kref, struct j1939_priv,
+ rx_kref);
+
+ j1939_can_rx_unregister(priv);
+ j1939_ecu_unmap_all(priv);
+ j1939_priv_set(priv->ndev, NULL);
+ spin_unlock(&j1939_netdev_lock);
+}
+
+/* get pointer to priv without increasing ref counter */
+static inline struct j1939_priv *j1939_ndev_to_priv(struct net_device *ndev)
+{
+ struct can_ml_priv *can_ml_priv = ndev->ml_priv;
+
+ return can_ml_priv->j1939_priv;
+}
+
+static struct j1939_priv *j1939_priv_get_by_ndev_locked(struct net_device *ndev)
+{
+ struct j1939_priv *priv;
+
+ lockdep_assert_held(&j1939_netdev_lock);
+
+ if (ndev->type != ARPHRD_CAN)
+ return NULL;
+
+ priv = j1939_ndev_to_priv(ndev);
+ if (priv)
+ j1939_priv_get(priv);
+
+ return priv;
+}
+
+static struct j1939_priv *j1939_priv_get_by_ndev(struct net_device *ndev)
+{
+ struct j1939_priv *priv;
+
+ spin_lock(&j1939_netdev_lock);
+ priv = j1939_priv_get_by_ndev_locked(ndev);
+ spin_unlock(&j1939_netdev_lock);
+
+ return priv;
+}
+
+struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
+{
+ struct j1939_priv *priv, *priv_new;
+ int ret;
+
+ priv = j1939_priv_get_by_ndev(ndev);
+ if (priv) {
+ kref_get(&priv->rx_kref);
+ return priv;
+ }
+
+ priv = j1939_priv_create(ndev);
+ if (!priv)
+ return ERR_PTR(-ENOMEM);
+
+ j1939_tp_init(priv);
+ spin_lock_init(&priv->j1939_socks_lock);
+ INIT_LIST_HEAD(&priv->j1939_socks);
+
+ spin_lock(&j1939_netdev_lock);
+ priv_new = j1939_priv_get_by_ndev_locked(ndev);
+ if (priv_new) {
+ /* Someone was faster than us, use their priv and roll
+ * back our's.
+ */
+ spin_unlock(&j1939_netdev_lock);
+ dev_put(ndev);
+ kfree(priv);
+ kref_get(&priv_new->rx_kref);
+ return priv_new;
+ }
+ j1939_priv_set(ndev, priv);
+ spin_unlock(&j1939_netdev_lock);
+
+ ret = j1939_can_rx_register(priv);
+ if (ret < 0)
+ goto out_priv_put;
+
+ return priv;
+
+ out_priv_put:
+ j1939_priv_set(ndev, NULL);
+ dev_put(ndev);
+ kfree(priv);
+
+ return ERR_PTR(ret);
+}
+
+void j1939_netdev_stop(struct j1939_priv *priv)
+{
+ kref_put_lock(&priv->rx_kref, __j1939_rx_release, &j1939_netdev_lock);
+ j1939_priv_put(priv);
+}
+
+int j1939_send_one(struct j1939_priv *priv, struct sk_buff *skb)
+{
+ int ret, dlc;
+ canid_t canid;
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ struct can_frame *cf;
+
+ /* apply sanity checks */
+ if (j1939_pgn_is_pdu1(skcb->addr.pgn))
+ skcb->addr.pgn &= J1939_PGN_PDU1_MAX;
+ else
+ skcb->addr.pgn &= J1939_PGN_MAX;
+
+ if (skcb->priority > 7)
+ skcb->priority = 6;
+
+ ret = j1939_ac_fixup(priv, skb);
+ if (unlikely(ret))
+ goto failed;
+ dlc = skb->len;
+
+ /* re-claim the CAN_HDR from the SKB */
+ cf = skb_push(skb, J1939_CAN_HDR);
+
+ /* make it a full can frame again */
+ skb_put(skb, J1939_CAN_FTR + (8 - dlc));
+
+ canid = CAN_EFF_FLAG |
+ (skcb->priority << 26) |
+ (skcb->addr.pgn << 8) |
+ skcb->addr.sa;
+ if (j1939_pgn_is_pdu1(skcb->addr.pgn))
+ canid |= skcb->addr.da << 8;
+
+ cf->can_id = canid;
+ cf->can_dlc = dlc;
+
+ return can_send(skb, 1);
+
+ failed:
+ kfree_skb(skb);
+ return ret;
+}
+
+static int j1939_netdev_notify(struct notifier_block *nb,
+ unsigned long msg, void *data)
+{
+ struct net_device *ndev = netdev_notifier_info_to_dev(data);
+ struct j1939_priv *priv;
+
+ priv = j1939_priv_get_by_ndev(ndev);
+ if (!priv)
+ goto notify_done;
+
+ if (ndev->type != ARPHRD_CAN)
+ goto notify_put;
+
+ switch (msg) {
+ case NETDEV_DOWN:
+ j1939_cancel_active_session(priv, NULL);
+ j1939_sk_netdev_event_netdown(priv);
+ j1939_ecu_unmap_all(priv);
+ break;
+ }
+
+notify_put:
+ j1939_priv_put(priv);
+
+notify_done:
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block j1939_netdev_notifier = {
+ .notifier_call = j1939_netdev_notify,
+};
+
+/* MODULE interface */
+static __init int j1939_module_init(void)
+{
+ int ret;
+
+ pr_info("can: SAE J1939\n");
+
+ ret = register_netdevice_notifier(&j1939_netdev_notifier);
+ if (ret)
+ goto fail_notifier;
+
+ ret = can_proto_register(&j1939_can_proto);
+ if (ret < 0) {
+ pr_err("can: registration of j1939 protocol failed\n");
+ goto fail_sk;
+ }
+
+ return 0;
+
+ fail_sk:
+ unregister_netdevice_notifier(&j1939_netdev_notifier);
+ fail_notifier:
+ return ret;
+}
+
+static __exit void j1939_module_exit(void)
+{
+ can_proto_unregister(&j1939_can_proto);
+
+ unregister_netdevice_notifier(&j1939_netdev_notifier);
+}
+
+module_init(j1939_module_init);
+module_exit(j1939_module_exit);
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
new file mode 100644
index 000000000000..37c1040bcb9c
--- /dev/null
+++ b/net/can/j1939/socket.c
@@ -0,0 +1,1160 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2010-2011 EIA Electronics,
+// Pieter Beyens <pieter.beyens@eia.be>
+// Copyright (c) 2010-2011 EIA Electronics,
+// Kurt Van Dijck <kurt.van.dijck@eia.be>
+// Copyright (c) 2018 Protonic,
+// Robin van der Gracht <robin@protonic.nl>
+// Copyright (c) 2017-2019 Pengutronix,
+// Marc Kleine-Budde <kernel@pengutronix.de>
+// Copyright (c) 2017-2019 Pengutronix,
+// Oleksij Rempel <kernel@pengutronix.de>
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/can/core.h>
+#include <linux/can/skb.h>
+#include <linux/errqueue.h>
+#include <linux/if_arp.h>
+
+#include "j1939-priv.h"
+
+#define J1939_MIN_NAMELEN CAN_REQUIRED_SIZE(struct sockaddr_can, can_addr.j1939)
+
+/* conversion function between struct sock::sk_priority from linux and
+ * j1939 priority field
+ */
+static inline priority_t j1939_prio(u32 sk_priority)
+{
+ sk_priority = min(sk_priority, 7U);
+
+ return 7 - sk_priority;
+}
+
+static inline u32 j1939_to_sk_priority(priority_t prio)
+{
+ return 7 - prio;
+}
+
+/* function to see if pgn is to be evaluated */
+static inline bool j1939_pgn_is_valid(pgn_t pgn)
+{
+ return pgn <= J1939_PGN_MAX;
+}
+
+/* test function to avoid non-zero DA placeholder for pdu1 pgn's */
+static inline bool j1939_pgn_is_clean_pdu(pgn_t pgn)
+{
+ if (j1939_pgn_is_pdu1(pgn))
+ return !(pgn & 0xff);
+ else
+ return true;
+}
+
+static inline void j1939_sock_pending_add(struct sock *sk)
+{
+ struct j1939_sock *jsk = j1939_sk(sk);
+
+ atomic_inc(&jsk->skb_pending);
+}
+
+static int j1939_sock_pending_get(struct sock *sk)
+{
+ struct j1939_sock *jsk = j1939_sk(sk);
+
+ return atomic_read(&jsk->skb_pending);
+}
+
+void j1939_sock_pending_del(struct sock *sk)
+{
+ struct j1939_sock *jsk = j1939_sk(sk);
+
+ /* atomic_dec_return returns the new value */
+ if (!atomic_dec_return(&jsk->skb_pending))
+ wake_up(&jsk->waitq); /* no pending SKB's */
+}
+
+static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk)
+{
+ jsk->state |= J1939_SOCK_BOUND;
+ j1939_priv_get(priv);
+ jsk->priv = priv;
+
+ spin_lock_bh(&priv->j1939_socks_lock);
+ list_add_tail(&jsk->list, &priv->j1939_socks);
+ spin_unlock_bh(&priv->j1939_socks_lock);
+}
+
+static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk)
+{
+ spin_lock_bh(&priv->j1939_socks_lock);
+ list_del_init(&jsk->list);
+ spin_unlock_bh(&priv->j1939_socks_lock);
+
+ jsk->priv = NULL;
+ j1939_priv_put(priv);
+ jsk->state &= ~J1939_SOCK_BOUND;
+}
+
+static bool j1939_sk_queue_session(struct j1939_session *session)
+{
+ struct j1939_sock *jsk = j1939_sk(session->sk);
+ bool empty;
+
+ spin_lock_bh(&jsk->sk_session_queue_lock);
+ empty = list_empty(&jsk->sk_session_queue);
+ j1939_session_get(session);
+ list_add_tail(&session->sk_session_queue_entry, &jsk->sk_session_queue);
+ spin_unlock_bh(&jsk->sk_session_queue_lock);
+ j1939_sock_pending_add(&jsk->sk);
+
+ return empty;
+}
+
+static struct
+j1939_session *j1939_sk_get_incomplete_session(struct j1939_sock *jsk)
+{
+ struct j1939_session *session = NULL;
+
+ spin_lock_bh(&jsk->sk_session_queue_lock);
+ if (!list_empty(&jsk->sk_session_queue)) {
+ session = list_last_entry(&jsk->sk_session_queue,
+ struct j1939_session,
+ sk_session_queue_entry);
+ if (session->total_queued_size == session->total_message_size)
+ session = NULL;
+ else
+ j1939_session_get(session);
+ }
+ spin_unlock_bh(&jsk->sk_session_queue_lock);
+
+ return session;
+}
+
+static void j1939_sk_queue_drop_all(struct j1939_priv *priv,
+ struct j1939_sock *jsk, int err)
+{
+ struct j1939_session *session, *tmp;
+
+ netdev_dbg(priv->ndev, "%s: err: %i\n", __func__, err);
+ spin_lock_bh(&jsk->sk_session_queue_lock);
+ list_for_each_entry_safe(session, tmp, &jsk->sk_session_queue,
+ sk_session_queue_entry) {
+ list_del_init(&session->sk_session_queue_entry);
+ session->err = err;
+ j1939_session_put(session);
+ }
+ spin_unlock_bh(&jsk->sk_session_queue_lock);
+}
+
+static void j1939_sk_queue_activate_next_locked(struct j1939_session *session)
+{
+ struct j1939_sock *jsk;
+ struct j1939_session *first;
+ int err;
+
+ /* RX-Session don't have a socket (yet) */
+ if (!session->sk)
+ return;
+
+ jsk = j1939_sk(session->sk);
+ lockdep_assert_held(&jsk->sk_session_queue_lock);
+
+ err = session->err;
+
+ first = list_first_entry_or_null(&jsk->sk_session_queue,
+ struct j1939_session,
+ sk_session_queue_entry);
+
+ /* Some else has already activated the next session */
+ if (first != session)
+ return;
+
+activate_next:
+ list_del_init(&first->sk_session_queue_entry);
+ j1939_session_put(first);
+ first = list_first_entry_or_null(&jsk->sk_session_queue,
+ struct j1939_session,
+ sk_session_queue_entry);
+ if (!first)
+ return;
+
+ if (WARN_ON_ONCE(j1939_session_activate(first))) {
+ first->err = -EBUSY;
+ goto activate_next;
+ } else {
+ /* Give receiver some time (arbitrary chosen) to recover */
+ int time_ms = 0;
+
+ if (err)
+ time_ms = 10 + prandom_u32_max(16);
+
+ j1939_tp_schedule_txtimer(first, time_ms);
+ }
+}
+
+void j1939_sk_queue_activate_next(struct j1939_session *session)
+{
+ struct j1939_sock *jsk;
+
+ if (!session->sk)
+ return;
+
+ jsk = j1939_sk(session->sk);
+
+ spin_lock_bh(&jsk->sk_session_queue_lock);
+ j1939_sk_queue_activate_next_locked(session);
+ spin_unlock_bh(&jsk->sk_session_queue_lock);
+}
+
+static bool j1939_sk_match_dst(struct j1939_sock *jsk,
+ const struct j1939_sk_buff_cb *skcb)
+{
+ if ((jsk->state & J1939_SOCK_PROMISC))
+ return true;
+
+ /* Destination address filter */
+ if (jsk->addr.src_name && skcb->addr.dst_name) {
+ if (jsk->addr.src_name != skcb->addr.dst_name)
+ return false;
+ } else {
+ /* receive (all sockets) if
+ * - all packages that match our bind() address
+ * - all broadcast on a socket if SO_BROADCAST
+ * is set
+ */
+ if (j1939_address_is_unicast(skcb->addr.da)) {
+ if (jsk->addr.sa != skcb->addr.da)
+ return false;
+ } else if (!sock_flag(&jsk->sk, SOCK_BROADCAST)) {
+ /* receiving broadcast without SO_BROADCAST
+ * flag is not allowed
+ */
+ return false;
+ }
+ }
+
+ /* Source address filter */
+ if (jsk->state & J1939_SOCK_CONNECTED) {
+ /* receive (all sockets) if
+ * - all packages that match our connect() name or address
+ */
+ if (jsk->addr.dst_name && skcb->addr.src_name) {
+ if (jsk->addr.dst_name != skcb->addr.src_name)
+ return false;
+ } else {
+ if (jsk->addr.da != skcb->addr.sa)
+ return false;
+ }
+ }
+
+ /* PGN filter */
+ if (j1939_pgn_is_valid(jsk->pgn_rx_filter) &&
+ jsk->pgn_rx_filter != skcb->addr.pgn)
+ return false;
+
+ return true;
+}
+
+/* matches skb control buffer (addr) with a j1939 filter */
+static bool j1939_sk_match_filter(struct j1939_sock *jsk,
+ const struct j1939_sk_buff_cb *skcb)
+{
+ const struct j1939_filter *f = jsk->filters;
+ int nfilter = jsk->nfilters;
+
+ if (!nfilter)
+ /* receive all when no filters are assigned */
+ return true;
+
+ for (; nfilter; ++f, --nfilter) {
+ if ((skcb->addr.pgn & f->pgn_mask) != f->pgn)
+ continue;
+ if ((skcb->addr.sa & f->addr_mask) != f->addr)
+ continue;
+ if ((skcb->addr.src_name & f->name_mask) != f->name)
+ continue;
+ return true;
+ }
+ return false;
+}
+
+static bool j1939_sk_recv_match_one(struct j1939_sock *jsk,
+ const struct j1939_sk_buff_cb *skcb)
+{
+ if (!(jsk->state & J1939_SOCK_BOUND))
+ return false;
+
+ if (!j1939_sk_match_dst(jsk, skcb))
+ return false;
+
+ if (!j1939_sk_match_filter(jsk, skcb))
+ return false;
+
+ return true;
+}
+
+static void j1939_sk_recv_one(struct j1939_sock *jsk, struct sk_buff *oskb)
+{
+ const struct j1939_sk_buff_cb *oskcb = j1939_skb_to_cb(oskb);
+ struct j1939_sk_buff_cb *skcb;
+ struct sk_buff *skb;
+
+ if (oskb->sk == &jsk->sk)
+ return;
+
+ if (!j1939_sk_recv_match_one(jsk, oskcb))
+ return;
+
+ skb = skb_clone(oskb, GFP_ATOMIC);
+ if (!skb) {
+ pr_warn("skb clone failed\n");
+ return;
+ }
+ can_skb_set_owner(skb, oskb->sk);
+
+ skcb = j1939_skb_to_cb(skb);
+ skcb->msg_flags &= ~(MSG_DONTROUTE);
+ if (skb->sk)
+ skcb->msg_flags |= MSG_DONTROUTE;
+
+ if (sock_queue_rcv_skb(&jsk->sk, skb) < 0)
+ kfree_skb(skb);
+}
+
+bool j1939_sk_recv_match(struct j1939_priv *priv, struct j1939_sk_buff_cb *skcb)
+{
+ struct j1939_sock *jsk;
+ bool match = false;
+
+ spin_lock_bh(&priv->j1939_socks_lock);
+ list_for_each_entry(jsk, &priv->j1939_socks, list) {
+ match = j1939_sk_recv_match_one(jsk, skcb);
+ if (match)
+ break;
+ }
+ spin_unlock_bh(&priv->j1939_socks_lock);
+
+ return match;
+}
+
+void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb)
+{
+ struct j1939_sock *jsk;
+
+ spin_lock_bh(&priv->j1939_socks_lock);
+ list_for_each_entry(jsk, &priv->j1939_socks, list) {
+ j1939_sk_recv_one(jsk, skb);
+ }
+ spin_unlock_bh(&priv->j1939_socks_lock);
+}
+
+static int j1939_sk_init(struct sock *sk)
+{
+ struct j1939_sock *jsk = j1939_sk(sk);
+
+ /* Ensure that "sk" is first member in "struct j1939_sock", so that we
+ * can skip it during memset().
+ */
+ BUILD_BUG_ON(offsetof(struct j1939_sock, sk) != 0);
+ memset((void *)jsk + sizeof(jsk->sk), 0x0,
+ sizeof(*jsk) - sizeof(jsk->sk));
+
+ INIT_LIST_HEAD(&jsk->list);
+ init_waitqueue_head(&jsk->waitq);
+ jsk->sk.sk_priority = j1939_to_sk_priority(6);
+ jsk->sk.sk_reuse = 1; /* per default */
+ jsk->addr.sa = J1939_NO_ADDR;
+ jsk->addr.da = J1939_NO_ADDR;
+ jsk->addr.pgn = J1939_NO_PGN;
+ jsk->pgn_rx_filter = J1939_NO_PGN;
+ atomic_set(&jsk->skb_pending, 0);
+ spin_lock_init(&jsk->sk_session_queue_lock);
+ INIT_LIST_HEAD(&jsk->sk_session_queue);
+
+ return 0;
+}
+
+static int j1939_sk_sanity_check(struct sockaddr_can *addr, int len)
+{
+ if (!addr)
+ return -EDESTADDRREQ;
+ if (len < J1939_MIN_NAMELEN)
+ return -EINVAL;
+ if (addr->can_family != AF_CAN)
+ return -EINVAL;
+ if (!addr->can_ifindex)
+ return -ENODEV;
+ if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn) &&
+ !j1939_pgn_is_clean_pdu(addr->can_addr.j1939.pgn))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
+{
+ struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
+ struct j1939_sock *jsk = j1939_sk(sock->sk);
+ struct j1939_priv *priv = jsk->priv;
+ struct sock *sk = sock->sk;
+ struct net *net = sock_net(sk);
+ int ret = 0;
+
+ ret = j1939_sk_sanity_check(addr, len);
+ if (ret)
+ return ret;
+
+ lock_sock(sock->sk);
+
+ /* Already bound to an interface? */
+ if (jsk->state & J1939_SOCK_BOUND) {
+ /* A re-bind() to a different interface is not
+ * supported.
+ */
+ if (jsk->ifindex != addr->can_ifindex) {
+ ret = -EINVAL;
+ goto out_release_sock;
+ }
+
+ /* drop old references */
+ j1939_jsk_del(priv, jsk);
+ j1939_local_ecu_put(priv, jsk->addr.src_name, jsk->addr.sa);
+ } else {
+ struct net_device *ndev;
+
+ ndev = dev_get_by_index(net, addr->can_ifindex);
+ if (!ndev) {
+ ret = -ENODEV;
+ goto out_release_sock;
+ }
+
+ if (ndev->type != ARPHRD_CAN) {
+ dev_put(ndev);
+ ret = -ENODEV;
+ goto out_release_sock;
+ }
+
+ priv = j1939_netdev_start(ndev);
+ dev_put(ndev);
+ if (IS_ERR(priv)) {
+ ret = PTR_ERR(priv);
+ goto out_release_sock;
+ }
+
+ jsk->ifindex = addr->can_ifindex;
+ }
+
+ /* set default transmit pgn */
+ if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn))
+ jsk->pgn_rx_filter = addr->can_addr.j1939.pgn;
+ jsk->addr.src_name = addr->can_addr.j1939.name;
+ jsk->addr.sa = addr->can_addr.j1939.addr;
+
+ /* get new references */
+ ret = j1939_local_ecu_get(priv, jsk->addr.src_name, jsk->addr.sa);
+ if (ret) {
+ j1939_netdev_stop(priv);
+ goto out_release_sock;
+ }
+
+ j1939_jsk_add(priv, jsk);
+
+ out_release_sock: /* fall through */
+ release_sock(sock->sk);
+
+ return ret;
+}
+
+static int j1939_sk_connect(struct socket *sock, struct sockaddr *uaddr,
+ int len, int flags)
+{
+ struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
+ struct j1939_sock *jsk = j1939_sk(sock->sk);
+ int ret = 0;
+
+ ret = j1939_sk_sanity_check(addr, len);
+ if (ret)
+ return ret;
+
+ lock_sock(sock->sk);
+
+ /* bind() before connect() is mandatory */
+ if (!(jsk->state & J1939_SOCK_BOUND)) {
+ ret = -EINVAL;
+ goto out_release_sock;
+ }
+
+ /* A connect() to a different interface is not supported. */
+ if (jsk->ifindex != addr->can_ifindex) {
+ ret = -EINVAL;
+ goto out_release_sock;
+ }
+
+ if (!addr->can_addr.j1939.name &&
+ addr->can_addr.j1939.addr == J1939_NO_ADDR &&
+ !sock_flag(&jsk->sk, SOCK_BROADCAST)) {
+ /* broadcast, but SO_BROADCAST not set */
+ ret = -EACCES;
+ goto out_release_sock;
+ }
+
+ jsk->addr.dst_name = addr->can_addr.j1939.name;
+ jsk->addr.da = addr->can_addr.j1939.addr;
+
+ if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn))
+ jsk->addr.pgn = addr->can_addr.j1939.pgn;
+
+ jsk->state |= J1939_SOCK_CONNECTED;
+
+ out_release_sock: /* fall through */
+ release_sock(sock->sk);
+
+ return ret;
+}
+
+static void j1939_sk_sock2sockaddr_can(struct sockaddr_can *addr,
+ const struct j1939_sock *jsk, int peer)
+{
+ addr->can_family = AF_CAN;
+ addr->can_ifindex = jsk->ifindex;
+ addr->can_addr.j1939.pgn = jsk->addr.pgn;
+ if (peer) {
+ addr->can_addr.j1939.name = jsk->addr.dst_name;
+ addr->can_addr.j1939.addr = jsk->addr.da;
+ } else {
+ addr->can_addr.j1939.name = jsk->addr.src_name;
+ addr->can_addr.j1939.addr = jsk->addr.sa;
+ }
+}
+
+static int j1939_sk_getname(struct socket *sock, struct sockaddr *uaddr,
+ int peer)
+{
+ struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
+ struct sock *sk = sock->sk;
+ struct j1939_sock *jsk = j1939_sk(sk);
+ int ret = 0;
+
+ lock_sock(sk);
+
+ if (peer && !(jsk->state & J1939_SOCK_CONNECTED)) {
+ ret = -EADDRNOTAVAIL;
+ goto failure;
+ }
+
+ j1939_sk_sock2sockaddr_can(addr, jsk, peer);
+ ret = J1939_MIN_NAMELEN;
+
+ failure:
+ release_sock(sk);
+
+ return ret;
+}
+
+static int j1939_sk_release(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+ struct j1939_sock *jsk;
+
+ if (!sk)
+ return 0;
+
+ jsk = j1939_sk(sk);
+ lock_sock(sk);
+
+ if (jsk->state & J1939_SOCK_BOUND) {
+ struct j1939_priv *priv = jsk->priv;
+
+ if (wait_event_interruptible(jsk->waitq,
+ !j1939_sock_pending_get(&jsk->sk))) {
+ j1939_cancel_active_session(priv, sk);
+ j1939_sk_queue_drop_all(priv, jsk, ESHUTDOWN);
+ }
+
+ j1939_jsk_del(priv, jsk);
+
+ j1939_local_ecu_put(priv, jsk->addr.src_name,
+ jsk->addr.sa);
+
+ j1939_netdev_stop(priv);
+ }
+
+ sock_orphan(sk);
+ sock->sk = NULL;
+
+ release_sock(sk);
+ sock_put(sk);
+
+ return 0;
+}
+
+static int j1939_sk_setsockopt_flag(struct j1939_sock *jsk, char __user *optval,
+ unsigned int optlen, int flag)
+{
+ int tmp;
+
+ if (optlen != sizeof(tmp))
+ return -EINVAL;
+ if (copy_from_user(&tmp, optval, optlen))
+ return -EFAULT;
+ lock_sock(&jsk->sk);
+ if (tmp)
+ jsk->state |= flag;
+ else
+ jsk->state &= ~flag;
+ release_sock(&jsk->sk);
+ return tmp;
+}
+
+static int j1939_sk_setsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, unsigned int optlen)
+{
+ struct sock *sk = sock->sk;
+ struct j1939_sock *jsk = j1939_sk(sk);
+ int tmp, count = 0, ret = 0;
+ struct j1939_filter *filters = NULL, *ofilters;
+
+ if (level != SOL_CAN_J1939)
+ return -EINVAL;
+
+ switch (optname) {
+ case SO_J1939_FILTER:
+ if (optval) {
+ struct j1939_filter *f;
+ int c;
+
+ if (optlen % sizeof(*filters) != 0)
+ return -EINVAL;
+
+ if (optlen > J1939_FILTER_MAX *
+ sizeof(struct j1939_filter))
+ return -EINVAL;
+
+ count = optlen / sizeof(*filters);
+ filters = memdup_user(optval, optlen);
+ if (IS_ERR(filters))
+ return PTR_ERR(filters);
+
+ for (f = filters, c = count; c; f++, c--) {
+ f->name &= f->name_mask;
+ f->pgn &= f->pgn_mask;
+ f->addr &= f->addr_mask;
+ }
+ }
+
+ lock_sock(&jsk->sk);
+ ofilters = jsk->filters;
+ jsk->filters = filters;
+ jsk->nfilters = count;
+ release_sock(&jsk->sk);
+ kfree(ofilters);
+ return 0;
+ case SO_J1939_PROMISC:
+ return j1939_sk_setsockopt_flag(jsk, optval, optlen,
+ J1939_SOCK_PROMISC);
+ case SO_J1939_ERRQUEUE:
+ ret = j1939_sk_setsockopt_flag(jsk, optval, optlen,
+ J1939_SOCK_ERRQUEUE);
+ if (ret < 0)
+ return ret;
+
+ if (!(jsk->state & J1939_SOCK_ERRQUEUE))
+ skb_queue_purge(&sk->sk_error_queue);
+ return ret;
+ case SO_J1939_SEND_PRIO:
+ if (optlen != sizeof(tmp))
+ return -EINVAL;
+ if (copy_from_user(&tmp, optval, optlen))
+ return -EFAULT;
+ if (tmp < 0 || tmp > 7)
+ return -EDOM;
+ if (tmp < 2 && !capable(CAP_NET_ADMIN))
+ return -EPERM;
+ lock_sock(&jsk->sk);
+ jsk->sk.sk_priority = j1939_to_sk_priority(tmp);
+ release_sock(&jsk->sk);
+ return 0;
+ default:
+ return -ENOPROTOOPT;
+ }
+}
+
+static int j1939_sk_getsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ struct sock *sk = sock->sk;
+ struct j1939_sock *jsk = j1939_sk(sk);
+ int ret, ulen;
+ /* set defaults for using 'int' properties */
+ int tmp = 0;
+ int len = sizeof(tmp);
+ void *val = &tmp;
+
+ if (level != SOL_CAN_J1939)
+ return -EINVAL;
+ if (get_user(ulen, optlen))
+ return -EFAULT;
+ if (ulen < 0)
+ return -EINVAL;
+
+ lock_sock(&jsk->sk);
+ switch (optname) {
+ case SO_J1939_PROMISC:
+ tmp = (jsk->state & J1939_SOCK_PROMISC) ? 1 : 0;
+ break;
+ case SO_J1939_ERRQUEUE:
+ tmp = (jsk->state & J1939_SOCK_ERRQUEUE) ? 1 : 0;
+ break;
+ case SO_J1939_SEND_PRIO:
+ tmp = j1939_prio(jsk->sk.sk_priority);
+ break;
+ default:
+ ret = -ENOPROTOOPT;
+ goto no_copy;
+ }
+
+ /* copy to user, based on 'len' & 'val'
+ * but most sockopt's are 'int' properties, and have 'len' & 'val'
+ * left unchanged, but instead modified 'tmp'
+ */
+ if (len > ulen)
+ ret = -EFAULT;
+ else if (put_user(len, optlen))
+ ret = -EFAULT;
+ else if (copy_to_user(optval, val, len))
+ ret = -EFAULT;
+ else
+ ret = 0;
+ no_copy:
+ release_sock(&jsk->sk);
+ return ret;
+}
+
+static int j1939_sk_recvmsg(struct socket *sock, struct msghdr *msg,
+ size_t size, int flags)
+{
+ struct sock *sk = sock->sk;
+ struct sk_buff *skb;
+ struct j1939_sk_buff_cb *skcb;
+ int ret = 0;
+
+ if (flags & ~(MSG_DONTWAIT | MSG_ERRQUEUE))
+ return -EINVAL;
+
+ if (flags & MSG_ERRQUEUE)
+ return sock_recv_errqueue(sock->sk, msg, size, SOL_CAN_J1939,
+ SCM_J1939_ERRQUEUE);
+
+ skb = skb_recv_datagram(sk, flags, 0, &ret);
+ if (!skb)
+ return ret;
+
+ if (size < skb->len)
+ msg->msg_flags |= MSG_TRUNC;
+ else
+ size = skb->len;
+
+ ret = memcpy_to_msg(msg, skb->data, size);
+ if (ret < 0) {
+ skb_free_datagram(sk, skb);
+ return ret;
+ }
+
+ skcb = j1939_skb_to_cb(skb);
+ if (j1939_address_is_valid(skcb->addr.da))
+ put_cmsg(msg, SOL_CAN_J1939, SCM_J1939_DEST_ADDR,
+ sizeof(skcb->addr.da), &skcb->addr.da);
+
+ if (skcb->addr.dst_name)
+ put_cmsg(msg, SOL_CAN_J1939, SCM_J1939_DEST_NAME,
+ sizeof(skcb->addr.dst_name), &skcb->addr.dst_name);
+
+ put_cmsg(msg, SOL_CAN_J1939, SCM_J1939_PRIO,
+ sizeof(skcb->priority), &skcb->priority);
+
+ if (msg->msg_name) {
+ struct sockaddr_can *paddr = msg->msg_name;
+
+ msg->msg_namelen = J1939_MIN_NAMELEN;
+ memset(msg->msg_name, 0, msg->msg_namelen);
+ paddr->can_family = AF_CAN;
+ paddr->can_ifindex = skb->skb_iif;
+ paddr->can_addr.j1939.name = skcb->addr.src_name;
+ paddr->can_addr.j1939.addr = skcb->addr.sa;
+ paddr->can_addr.j1939.pgn = skcb->addr.pgn;
+ }
+
+ sock_recv_ts_and_drops(msg, sk, skb);
+ msg->msg_flags |= skcb->msg_flags;
+ skb_free_datagram(sk, skb);
+
+ return size;
+}
+
+static struct sk_buff *j1939_sk_alloc_skb(struct net_device *ndev,
+ struct sock *sk,
+ struct msghdr *msg, size_t size,
+ int *errcode)
+{
+ struct j1939_sock *jsk = j1939_sk(sk);
+ struct j1939_sk_buff_cb *skcb;
+ struct sk_buff *skb;
+ int ret;
+
+ skb = sock_alloc_send_skb(sk,
+ size +
+ sizeof(struct can_frame) -
+ sizeof(((struct can_frame *)NULL)->data) +
+ sizeof(struct can_skb_priv),
+ msg->msg_flags & MSG_DONTWAIT, &ret);
+ if (!skb)
+ goto failure;
+
+ can_skb_reserve(skb);
+ can_skb_prv(skb)->ifindex = ndev->ifindex;
+ can_skb_prv(skb)->skbcnt = 0;
+ skb_reserve(skb, offsetof(struct can_frame, data));
+
+ ret = memcpy_from_msg(skb_put(skb, size), msg, size);
+ if (ret < 0)
+ goto free_skb;
+
+ skb->dev = ndev;
+
+ skcb = j1939_skb_to_cb(skb);
+ memset(skcb, 0, sizeof(*skcb));
+ skcb->addr = jsk->addr;
+ skcb->priority = j1939_prio(sk->sk_priority);
+
+ if (msg->msg_name) {
+ struct sockaddr_can *addr = msg->msg_name;
+
+ if (addr->can_addr.j1939.name ||
+ addr->can_addr.j1939.addr != J1939_NO_ADDR) {
+ skcb->addr.dst_name = addr->can_addr.j1939.name;
+ skcb->addr.da = addr->can_addr.j1939.addr;
+ }
+ if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn))
+ skcb->addr.pgn = addr->can_addr.j1939.pgn;
+ }
+
+ *errcode = ret;
+ return skb;
+
+free_skb:
+ kfree_skb(skb);
+failure:
+ *errcode = ret;
+ return NULL;
+}
+
+static size_t j1939_sk_opt_stats_get_size(void)
+{
+ return
+ nla_total_size(sizeof(u32)) + /* J1939_NLA_BYTES_ACKED */
+ 0;
+}
+
+static struct sk_buff *
+j1939_sk_get_timestamping_opt_stats(struct j1939_session *session)
+{
+ struct sk_buff *stats;
+ u32 size;
+
+ stats = alloc_skb(j1939_sk_opt_stats_get_size(), GFP_ATOMIC);
+ if (!stats)
+ return NULL;
+
+ if (session->skcb.addr.type == J1939_SIMPLE)
+ size = session->total_message_size;
+ else
+ size = min(session->pkt.tx_acked * 7,
+ session->total_message_size);
+
+ nla_put_u32(stats, J1939_NLA_BYTES_ACKED, size);
+
+ return stats;
+}
+
+void j1939_sk_errqueue(struct j1939_session *session,
+ enum j1939_sk_errqueue_type type)
+{
+ struct j1939_priv *priv = session->priv;
+ struct sock *sk = session->sk;
+ struct j1939_sock *jsk;
+ struct sock_exterr_skb *serr;
+ struct sk_buff *skb;
+ char *state = "UNK";
+ int err;
+
+ /* currently we have no sk for the RX session */
+ if (!sk)
+ return;
+
+ jsk = j1939_sk(sk);
+
+ if (!(jsk->state & J1939_SOCK_ERRQUEUE))
+ return;
+
+ skb = j1939_sk_get_timestamping_opt_stats(session);
+ if (!skb)
+ return;
+
+ skb->tstamp = ktime_get_real();
+
+ BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb));
+
+ serr = SKB_EXT_ERR(skb);
+ memset(serr, 0, sizeof(*serr));
+ switch (type) {
+ case J1939_ERRQUEUE_ACK:
+ if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK))
+ return;
+
+ serr->ee.ee_errno = ENOMSG;
+ serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
+ serr->ee.ee_info = SCM_TSTAMP_ACK;
+ state = "ACK";
+ break;
+ case J1939_ERRQUEUE_SCHED:
+ if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED))
+ return;
+
+ serr->ee.ee_errno = ENOMSG;
+ serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
+ serr->ee.ee_info = SCM_TSTAMP_SCHED;
+ state = "SCH";
+ break;
+ case J1939_ERRQUEUE_ABORT:
+ serr->ee.ee_errno = session->err;
+ serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
+ serr->ee.ee_info = J1939_EE_INFO_TX_ABORT;
+ state = "ABT";
+ break;
+ default:
+ netdev_err(priv->ndev, "Unknown errqueue type %i\n", type);
+ }
+
+ serr->opt_stats = true;
+ if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+ serr->ee.ee_data = session->tskey;
+
+ netdev_dbg(session->priv->ndev, "%s: 0x%p tskey: %i, state: %s\n",
+ __func__, session, session->tskey, state);
+ err = sock_queue_err_skb(sk, skb);
+
+ if (err)
+ kfree_skb(skb);
+};
+
+void j1939_sk_send_loop_abort(struct sock *sk, int err)
+{
+ sk->sk_err = err;
+
+ sk->sk_error_report(sk);
+}
+
+static int j1939_sk_send_loop(struct j1939_priv *priv, struct sock *sk,
+ struct msghdr *msg, size_t size)
+
+{
+ struct j1939_sock *jsk = j1939_sk(sk);
+ struct j1939_session *session = j1939_sk_get_incomplete_session(jsk);
+ struct sk_buff *skb;
+ size_t segment_size, todo_size;
+ int ret = 0;
+
+ if (session &&
+ session->total_message_size != session->total_queued_size + size) {
+ j1939_session_put(session);
+ return -EIO;
+ }
+
+ todo_size = size;
+
+ while (todo_size) {
+ struct j1939_sk_buff_cb *skcb;
+
+ segment_size = min_t(size_t, J1939_MAX_TP_PACKET_SIZE,
+ todo_size);
+
+ /* Allocate skb for one segment */
+ skb = j1939_sk_alloc_skb(priv->ndev, sk, msg, segment_size,
+ &ret);
+ if (ret)
+ break;
+
+ skcb = j1939_skb_to_cb(skb);
+
+ if (!session) {
+ /* at this point the size should be full size
+ * of the session
+ */
+ skcb->offset = 0;
+ session = j1939_tp_send(priv, skb, size);
+ if (IS_ERR(session)) {
+ ret = PTR_ERR(session);
+ goto kfree_skb;
+ }
+ if (j1939_sk_queue_session(session)) {
+ /* try to activate session if we a
+ * fist in the queue
+ */
+ if (!j1939_session_activate(session)) {
+ j1939_tp_schedule_txtimer(session, 0);
+ } else {
+ ret = -EBUSY;
+ session->err = ret;
+ j1939_sk_queue_drop_all(priv, jsk,
+ EBUSY);
+ break;
+ }
+ }
+ } else {
+ skcb->offset = session->total_queued_size;
+ j1939_session_skb_queue(session, skb);
+ }
+
+ todo_size -= segment_size;
+ session->total_queued_size += segment_size;
+ }
+
+ switch (ret) {
+ case 0: /* OK */
+ if (todo_size)
+ netdev_warn(priv->ndev,
+ "no error found and not completely queued?! %zu\n",
+ todo_size);
+ ret = size;
+ break;
+ case -ERESTARTSYS:
+ ret = -EINTR;
+ /* fall through */
+ case -EAGAIN: /* OK */
+ if (todo_size != size)
+ ret = size - todo_size;
+ break;
+ default: /* ERROR */
+ break;
+ }
+
+ if (session)
+ j1939_session_put(session);
+
+ return ret;
+
+ kfree_skb:
+ kfree_skb(skb);
+ return ret;
+}
+
+static int j1939_sk_sendmsg(struct socket *sock, struct msghdr *msg,
+ size_t size)
+{
+ struct sock *sk = sock->sk;
+ struct j1939_sock *jsk = j1939_sk(sk);
+ struct j1939_priv *priv = jsk->priv;
+ int ifindex;
+ int ret;
+
+ /* various socket state tests */
+ if (!(jsk->state & J1939_SOCK_BOUND))
+ return -EBADFD;
+
+ ifindex = jsk->ifindex;
+
+ if (!jsk->addr.src_name && jsk->addr.sa == J1939_NO_ADDR)
+ /* no source address assigned yet */
+ return -EBADFD;
+
+ /* deal with provided destination address info */
+ if (msg->msg_name) {
+ struct sockaddr_can *addr = msg->msg_name;
+
+ if (msg->msg_namelen < J1939_MIN_NAMELEN)
+ return -EINVAL;
+
+ if (addr->can_family != AF_CAN)
+ return -EINVAL;
+
+ if (addr->can_ifindex && addr->can_ifindex != ifindex)
+ return -EBADFD;
+
+ if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn) &&
+ !j1939_pgn_is_clean_pdu(addr->can_addr.j1939.pgn))
+ return -EINVAL;
+
+ if (!addr->can_addr.j1939.name &&
+ addr->can_addr.j1939.addr == J1939_NO_ADDR &&
+ !sock_flag(sk, SOCK_BROADCAST))
+ /* broadcast, but SO_BROADCAST not set */
+ return -EACCES;
+ } else {
+ if (!jsk->addr.dst_name && jsk->addr.da == J1939_NO_ADDR &&
+ !sock_flag(sk, SOCK_BROADCAST))
+ /* broadcast, but SO_BROADCAST not set */
+ return -EACCES;
+ }
+
+ ret = j1939_sk_send_loop(priv, sk, msg, size);
+
+ return ret;
+}
+
+void j1939_sk_netdev_event_netdown(struct j1939_priv *priv)
+{
+ struct j1939_sock *jsk;
+ int error_code = ENETDOWN;
+
+ spin_lock_bh(&priv->j1939_socks_lock);
+ list_for_each_entry(jsk, &priv->j1939_socks, list) {
+ jsk->sk.sk_err = error_code;
+ if (!sock_flag(&jsk->sk, SOCK_DEAD))
+ jsk->sk.sk_error_report(&jsk->sk);
+
+ j1939_sk_queue_drop_all(priv, jsk, error_code);
+ }
+ spin_unlock_bh(&priv->j1939_socks_lock);
+}
+
+static int j1939_sk_no_ioctlcmd(struct socket *sock, unsigned int cmd,
+ unsigned long arg)
+{
+ /* no ioctls for socket layer -> hand it down to NIC layer */
+ return -ENOIOCTLCMD;
+}
+
+static const struct proto_ops j1939_ops = {
+ .family = PF_CAN,
+ .release = j1939_sk_release,
+ .bind = j1939_sk_bind,
+ .connect = j1939_sk_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = sock_no_accept,
+ .getname = j1939_sk_getname,
+ .poll = datagram_poll,
+ .ioctl = j1939_sk_no_ioctlcmd,
+ .listen = sock_no_listen,
+ .shutdown = sock_no_shutdown,
+ .setsockopt = j1939_sk_setsockopt,
+ .getsockopt = j1939_sk_getsockopt,
+ .sendmsg = j1939_sk_sendmsg,
+ .recvmsg = j1939_sk_recvmsg,
+ .mmap = sock_no_mmap,
+ .sendpage = sock_no_sendpage,
+};
+
+static struct proto j1939_proto __read_mostly = {
+ .name = "CAN_J1939",
+ .owner = THIS_MODULE,
+ .obj_size = sizeof(struct j1939_sock),
+ .init = j1939_sk_init,
+};
+
+const struct can_proto j1939_can_proto = {
+ .type = SOCK_DGRAM,
+ .protocol = CAN_J1939,
+ .ops = &j1939_ops,
+ .prot = &j1939_proto,
+};
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
new file mode 100644
index 000000000000..fe000ea757ea
--- /dev/null
+++ b/net/can/j1939/transport.c
@@ -0,0 +1,2027 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2010-2011 EIA Electronics,
+// Kurt Van Dijck <kurt.van.dijck@eia.be>
+// Copyright (c) 2018 Protonic,
+// Robin van der Gracht <robin@protonic.nl>
+// Copyright (c) 2017-2019 Pengutronix,
+// Marc Kleine-Budde <kernel@pengutronix.de>
+// Copyright (c) 2017-2019 Pengutronix,
+// Oleksij Rempel <kernel@pengutronix.de>
+
+#include <linux/can/skb.h>
+
+#include "j1939-priv.h"
+
+#define J1939_XTP_TX_RETRY_LIMIT 100
+
+#define J1939_ETP_PGN_CTL 0xc800
+#define J1939_ETP_PGN_DAT 0xc700
+#define J1939_TP_PGN_CTL 0xec00
+#define J1939_TP_PGN_DAT 0xeb00
+
+#define J1939_TP_CMD_RTS 0x10
+#define J1939_TP_CMD_CTS 0x11
+#define J1939_TP_CMD_EOMA 0x13
+#define J1939_TP_CMD_BAM 0x20
+#define J1939_TP_CMD_ABORT 0xff
+
+#define J1939_ETP_CMD_RTS 0x14
+#define J1939_ETP_CMD_CTS 0x15
+#define J1939_ETP_CMD_DPO 0x16
+#define J1939_ETP_CMD_EOMA 0x17
+#define J1939_ETP_CMD_ABORT 0xff
+
+enum j1939_xtp_abort {
+ J1939_XTP_NO_ABORT = 0,
+ J1939_XTP_ABORT_BUSY = 1,
+ /* Already in one or more connection managed sessions and
+ * cannot support another.
+ *
+ * EALREADY:
+ * Operation already in progress
+ */
+
+ J1939_XTP_ABORT_RESOURCE = 2,
+ /* System resources were needed for another task so this
+ * connection managed session was terminated.
+ *
+ * EMSGSIZE:
+ * The socket type requires that message be sent atomically,
+ * and the size of the message to be sent made this
+ * impossible.
+ */
+
+ J1939_XTP_ABORT_TIMEOUT = 3,
+ /* A timeout occurred and this is the connection abort to
+ * close the session.
+ *
+ * EHOSTUNREACH:
+ * The destination host cannot be reached (probably because
+ * the host is down or a remote router cannot reach it).
+ */
+
+ J1939_XTP_ABORT_GENERIC = 4,
+ /* CTS messages received when data transfer is in progress
+ *
+ * EBADMSG:
+ * Not a data message
+ */
+
+ J1939_XTP_ABORT_FAULT = 5,
+ /* Maximal retransmit request limit reached
+ *
+ * ENOTRECOVERABLE:
+ * State not recoverable
+ */
+
+ J1939_XTP_ABORT_UNEXPECTED_DATA = 6,
+ /* Unexpected data transfer packet
+ *
+ * ENOTCONN:
+ * Transport endpoint is not connected
+ */
+
+ J1939_XTP_ABORT_BAD_SEQ = 7,
+ /* Bad sequence number (and software is not able to recover)
+ *
+ * EILSEQ:
+ * Illegal byte sequence
+ */
+
+ J1939_XTP_ABORT_DUP_SEQ = 8,
+ /* Duplicate sequence number (and software is not able to
+ * recover)
+ */
+
+ J1939_XTP_ABORT_EDPO_UNEXPECTED = 9,
+ /* Unexpected EDPO packet (ETP) or Message size > 1785 bytes
+ * (TP)
+ */
+
+ J1939_XTP_ABORT_BAD_EDPO_PGN = 10,
+ /* Unexpected EDPO PGN (PGN in EDPO is bad) */
+
+ J1939_XTP_ABORT_EDPO_OUTOF_CTS = 11,
+ /* EDPO number of packets is greater than CTS */
+
+ J1939_XTP_ABORT_BAD_EDPO_OFFSET = 12,
+ /* Bad EDPO offset */
+
+ J1939_XTP_ABORT_OTHER_DEPRECATED = 13,
+ /* Deprecated. Use 250 instead (Any other reason) */
+
+ J1939_XTP_ABORT_ECTS_UNXPECTED_PGN = 14,
+ /* Unexpected ECTS PGN (PGN in ECTS is bad) */
+
+ J1939_XTP_ABORT_ECTS_TOO_BIG = 15,
+ /* ECTS requested packets exceeds message size */
+
+ J1939_XTP_ABORT_OTHER = 250,
+ /* Any other reason (if a Connection Abort reason is
+ * identified that is not listed in the table use code 250)
+ */
+};
+
+static unsigned int j1939_tp_block = 255;
+static unsigned int j1939_tp_packet_delay;
+static unsigned int j1939_tp_padding = 1;
+
+/* helpers */
+static const char *j1939_xtp_abort_to_str(enum j1939_xtp_abort abort)
+{
+ switch (abort) {
+ case J1939_XTP_ABORT_BUSY:
+ return "Already in one or more connection managed sessions and cannot support another.";
+ case J1939_XTP_ABORT_RESOURCE:
+ return "System resources were needed for another task so this connection managed session was terminated.";
+ case J1939_XTP_ABORT_TIMEOUT:
+ return "A timeout occurred and this is the connection abort to close the session.";
+ case J1939_XTP_ABORT_GENERIC:
+ return "CTS messages received when data transfer is in progress";
+ case J1939_XTP_ABORT_FAULT:
+ return "Maximal retransmit request limit reached";
+ case J1939_XTP_ABORT_UNEXPECTED_DATA:
+ return "Unexpected data transfer packet";
+ case J1939_XTP_ABORT_BAD_SEQ:
+ return "Bad sequence number (and software is not able to recover)";
+ case J1939_XTP_ABORT_DUP_SEQ:
+ return "Duplicate sequence number (and software is not able to recover)";
+ case J1939_XTP_ABORT_EDPO_UNEXPECTED:
+ return "Unexpected EDPO packet (ETP) or Message size > 1785 bytes (TP)";
+ case J1939_XTP_ABORT_BAD_EDPO_PGN:
+ return "Unexpected EDPO PGN (PGN in EDPO is bad)";
+ case J1939_XTP_ABORT_EDPO_OUTOF_CTS:
+ return "EDPO number of packets is greater than CTS";
+ case J1939_XTP_ABORT_BAD_EDPO_OFFSET:
+ return "Bad EDPO offset";
+ case J1939_XTP_ABORT_OTHER_DEPRECATED:
+ return "Deprecated. Use 250 instead (Any other reason)";
+ case J1939_XTP_ABORT_ECTS_UNXPECTED_PGN:
+ return "Unexpected ECTS PGN (PGN in ECTS is bad)";
+ case J1939_XTP_ABORT_ECTS_TOO_BIG:
+ return "ECTS requested packets exceeds message size";
+ case J1939_XTP_ABORT_OTHER:
+ return "Any other reason (if a Connection Abort reason is identified that is not listed in the table use code 250)";
+ default:
+ return "<unknown>";
+ }
+}
+
+static int j1939_xtp_abort_to_errno(struct j1939_priv *priv,
+ enum j1939_xtp_abort abort)
+{
+ int err;
+
+ switch (abort) {
+ case J1939_XTP_NO_ABORT:
+ WARN_ON_ONCE(abort == J1939_XTP_NO_ABORT);
+ err = 0;
+ break;
+ case J1939_XTP_ABORT_BUSY:
+ err = EALREADY;
+ break;
+ case J1939_XTP_ABORT_RESOURCE:
+ err = EMSGSIZE;
+ break;
+ case J1939_XTP_ABORT_TIMEOUT:
+ err = EHOSTUNREACH;
+ break;
+ case J1939_XTP_ABORT_GENERIC:
+ err = EBADMSG;
+ break;
+ case J1939_XTP_ABORT_FAULT:
+ err = ENOTRECOVERABLE;
+ break;
+ case J1939_XTP_ABORT_UNEXPECTED_DATA:
+ err = ENOTCONN;
+ break;
+ case J1939_XTP_ABORT_BAD_SEQ:
+ err = EILSEQ;
+ break;
+ case J1939_XTP_ABORT_DUP_SEQ:
+ err = EPROTO;
+ break;
+ case J1939_XTP_ABORT_EDPO_UNEXPECTED:
+ err = EPROTO;
+ break;
+ case J1939_XTP_ABORT_BAD_EDPO_PGN:
+ err = EPROTO;
+ break;
+ case J1939_XTP_ABORT_EDPO_OUTOF_CTS:
+ err = EPROTO;
+ break;
+ case J1939_XTP_ABORT_BAD_EDPO_OFFSET:
+ err = EPROTO;
+ break;
+ case J1939_XTP_ABORT_OTHER_DEPRECATED:
+ err = EPROTO;
+ break;
+ case J1939_XTP_ABORT_ECTS_UNXPECTED_PGN:
+ err = EPROTO;
+ break;
+ case J1939_XTP_ABORT_ECTS_TOO_BIG:
+ err = EPROTO;
+ break;
+ case J1939_XTP_ABORT_OTHER:
+ err = EPROTO;
+ break;
+ default:
+ netdev_warn(priv->ndev, "Unknown abort code %i", abort);
+ err = EPROTO;
+ }
+
+ return err;
+}
+
+static inline void j1939_session_list_lock(struct j1939_priv *priv)
+{
+ spin_lock_bh(&priv->active_session_list_lock);
+}
+
+static inline void j1939_session_list_unlock(struct j1939_priv *priv)
+{
+ spin_unlock_bh(&priv->active_session_list_lock);
+}
+
+void j1939_session_get(struct j1939_session *session)
+{
+ kref_get(&session->kref);
+}
+
+/* session completion functions */
+static void __j1939_session_drop(struct j1939_session *session)
+{
+ if (!session->transmission)
+ return;
+
+ j1939_sock_pending_del(session->sk);
+}
+
+static void j1939_session_destroy(struct j1939_session *session)
+{
+ if (session->err)
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_ABORT);
+ else
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_ACK);
+
+ netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+ skb_queue_purge(&session->skb_queue);
+ __j1939_session_drop(session);
+ j1939_priv_put(session->priv);
+ kfree(session);
+}
+
+static void __j1939_session_release(struct kref *kref)
+{
+ struct j1939_session *session = container_of(kref, struct j1939_session,
+ kref);
+
+ j1939_session_destroy(session);
+}
+
+void j1939_session_put(struct j1939_session *session)
+{
+ kref_put(&session->kref, __j1939_session_release);
+}
+
+static void j1939_session_txtimer_cancel(struct j1939_session *session)
+{
+ if (hrtimer_cancel(&session->txtimer))
+ j1939_session_put(session);
+}
+
+static void j1939_session_rxtimer_cancel(struct j1939_session *session)
+{
+ if (hrtimer_cancel(&session->rxtimer))
+ j1939_session_put(session);
+}
+
+void j1939_session_timers_cancel(struct j1939_session *session)
+{
+ j1939_session_txtimer_cancel(session);
+ j1939_session_rxtimer_cancel(session);
+}
+
+static inline bool j1939_cb_is_broadcast(const struct j1939_sk_buff_cb *skcb)
+{
+ return (!skcb->addr.dst_name && (skcb->addr.da == 0xff));
+}
+
+static void j1939_session_skb_drop_old(struct j1939_session *session)
+{
+ struct sk_buff *do_skb;
+ struct j1939_sk_buff_cb *do_skcb;
+ unsigned int offset_start;
+ unsigned long flags;
+
+ if (skb_queue_len(&session->skb_queue) < 2)
+ return;
+
+ offset_start = session->pkt.tx_acked * 7;
+
+ spin_lock_irqsave(&session->skb_queue.lock, flags);
+ do_skb = skb_peek(&session->skb_queue);
+ do_skcb = j1939_skb_to_cb(do_skb);
+
+ if ((do_skcb->offset + do_skb->len) < offset_start) {
+ __skb_unlink(do_skb, &session->skb_queue);
+ kfree_skb(do_skb);
+ }
+ spin_unlock_irqrestore(&session->skb_queue.lock, flags);
+}
+
+void j1939_session_skb_queue(struct j1939_session *session,
+ struct sk_buff *skb)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ struct j1939_priv *priv = session->priv;
+
+ j1939_ac_fixup(priv, skb);
+
+ if (j1939_address_is_unicast(skcb->addr.da) &&
+ priv->ents[skcb->addr.da].nusers)
+ skcb->flags |= J1939_ECU_LOCAL_DST;
+
+ skcb->flags |= J1939_ECU_LOCAL_SRC;
+
+ skb_queue_tail(&session->skb_queue, skb);
+}
+
+static struct sk_buff *j1939_session_skb_find(struct j1939_session *session)
+{
+ struct j1939_priv *priv = session->priv;
+ struct sk_buff *skb = NULL;
+ struct sk_buff *do_skb;
+ struct j1939_sk_buff_cb *do_skcb;
+ unsigned int offset_start;
+ unsigned long flags;
+
+ offset_start = session->pkt.dpo * 7;
+
+ spin_lock_irqsave(&session->skb_queue.lock, flags);
+ skb_queue_walk(&session->skb_queue, do_skb) {
+ do_skcb = j1939_skb_to_cb(do_skb);
+
+ if (offset_start >= do_skcb->offset &&
+ offset_start < (do_skcb->offset + do_skb->len)) {
+ skb = do_skb;
+ }
+ }
+ spin_unlock_irqrestore(&session->skb_queue.lock, flags);
+
+ if (!skb)
+ netdev_dbg(priv->ndev, "%s: 0x%p: no skb found for start: %i, queue size: %i\n",
+ __func__, session, offset_start,
+ skb_queue_len(&session->skb_queue));
+
+ return skb;
+}
+
+/* see if we are receiver
+ * returns 0 for broadcasts, although we will receive them
+ */
+static inline int j1939_tp_im_receiver(const struct j1939_sk_buff_cb *skcb)
+{
+ return skcb->flags & J1939_ECU_LOCAL_DST;
+}
+
+/* see if we are sender */
+static inline int j1939_tp_im_transmitter(const struct j1939_sk_buff_cb *skcb)
+{
+ return skcb->flags & J1939_ECU_LOCAL_SRC;
+}
+
+/* see if we are involved as either receiver or transmitter */
+static int j1939_tp_im_involved(const struct j1939_sk_buff_cb *skcb, bool swap)
+{
+ if (swap)
+ return j1939_tp_im_receiver(skcb);
+ else
+ return j1939_tp_im_transmitter(skcb);
+}
+
+static int j1939_tp_im_involved_anydir(struct j1939_sk_buff_cb *skcb)
+{
+ return skcb->flags & (J1939_ECU_LOCAL_SRC | J1939_ECU_LOCAL_DST);
+}
+
+/* extract pgn from flow-ctl message */
+static inline pgn_t j1939_xtp_ctl_to_pgn(const u8 *dat)
+{
+ pgn_t pgn;
+
+ pgn = (dat[7] << 16) | (dat[6] << 8) | (dat[5] << 0);
+ if (j1939_pgn_is_pdu1(pgn))
+ pgn &= 0xffff00;
+ return pgn;
+}
+
+static inline unsigned int j1939_tp_ctl_to_size(const u8 *dat)
+{
+ return (dat[2] << 8) + (dat[1] << 0);
+}
+
+static inline unsigned int j1939_etp_ctl_to_packet(const u8 *dat)
+{
+ return (dat[4] << 16) | (dat[3] << 8) | (dat[2] << 0);
+}
+
+static inline unsigned int j1939_etp_ctl_to_size(const u8 *dat)
+{
+ return (dat[4] << 24) | (dat[3] << 16) |
+ (dat[2] << 8) | (dat[1] << 0);
+}
+
+/* find existing session:
+ * reverse: swap cb's src & dst
+ * there is no problem with matching broadcasts, since
+ * broadcasts (no dst, no da) would never call this
+ * with reverse == true
+ */
+static bool j1939_session_match(struct j1939_addr *se_addr,
+ struct j1939_addr *sk_addr, bool reverse)
+{
+ if (se_addr->type != sk_addr->type)
+ return false;
+
+ if (reverse) {
+ if (se_addr->src_name) {
+ if (se_addr->src_name != sk_addr->dst_name)
+ return false;
+ } else if (se_addr->sa != sk_addr->da) {
+ return false;
+ }
+
+ if (se_addr->dst_name) {
+ if (se_addr->dst_name != sk_addr->src_name)
+ return false;
+ } else if (se_addr->da != sk_addr->sa) {
+ return false;
+ }
+ } else {
+ if (se_addr->src_name) {
+ if (se_addr->src_name != sk_addr->src_name)
+ return false;
+ } else if (se_addr->sa != sk_addr->sa) {
+ return false;
+ }
+
+ if (se_addr->dst_name) {
+ if (se_addr->dst_name != sk_addr->dst_name)
+ return false;
+ } else if (se_addr->da != sk_addr->da) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static struct
+j1939_session *j1939_session_get_by_addr_locked(struct j1939_priv *priv,
+ struct list_head *root,
+ struct j1939_addr *addr,
+ bool reverse, bool transmitter)
+{
+ struct j1939_session *session;
+
+ lockdep_assert_held(&priv->active_session_list_lock);
+
+ list_for_each_entry(session, root, active_session_list_entry) {
+ j1939_session_get(session);
+ if (j1939_session_match(&session->skcb.addr, addr, reverse) &&
+ session->transmission == transmitter)
+ return session;
+ j1939_session_put(session);
+ }
+
+ return NULL;
+}
+
+static struct
+j1939_session *j1939_session_get_simple(struct j1939_priv *priv,
+ struct sk_buff *skb)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ struct j1939_session *session;
+
+ lockdep_assert_held(&priv->active_session_list_lock);
+
+ list_for_each_entry(session, &priv->active_session_list,
+ active_session_list_entry) {
+ j1939_session_get(session);
+ if (session->skcb.addr.type == J1939_SIMPLE &&
+ session->tskey == skcb->tskey && session->sk == skb->sk)
+ return session;
+ j1939_session_put(session);
+ }
+
+ return NULL;
+}
+
+static struct
+j1939_session *j1939_session_get_by_addr(struct j1939_priv *priv,
+ struct j1939_addr *addr,
+ bool reverse, bool transmitter)
+{
+ struct j1939_session *session;
+
+ j1939_session_list_lock(priv);
+ session = j1939_session_get_by_addr_locked(priv,
+ &priv->active_session_list,
+ addr, reverse, transmitter);
+ j1939_session_list_unlock(priv);
+
+ return session;
+}
+
+static void j1939_skbcb_swap(struct j1939_sk_buff_cb *skcb)
+{
+ u8 tmp = 0;
+
+ swap(skcb->addr.dst_name, skcb->addr.src_name);
+ swap(skcb->addr.da, skcb->addr.sa);
+
+ /* swap SRC and DST flags, leave other untouched */
+ if (skcb->flags & J1939_ECU_LOCAL_SRC)
+ tmp |= J1939_ECU_LOCAL_DST;
+ if (skcb->flags & J1939_ECU_LOCAL_DST)
+ tmp |= J1939_ECU_LOCAL_SRC;
+ skcb->flags &= ~(J1939_ECU_LOCAL_SRC | J1939_ECU_LOCAL_DST);
+ skcb->flags |= tmp;
+}
+
+static struct
+sk_buff *j1939_tp_tx_dat_new(struct j1939_priv *priv,
+ const struct j1939_sk_buff_cb *re_skcb,
+ bool ctl,
+ bool swap_src_dst)
+{
+ struct sk_buff *skb;
+ struct j1939_sk_buff_cb *skcb;
+
+ skb = alloc_skb(sizeof(struct can_frame) + sizeof(struct can_skb_priv),
+ GFP_ATOMIC);
+ if (unlikely(!skb))
+ return ERR_PTR(-ENOMEM);
+
+ skb->dev = priv->ndev;
+ can_skb_reserve(skb);
+ can_skb_prv(skb)->ifindex = priv->ndev->ifindex;
+ /* reserve CAN header */
+ skb_reserve(skb, offsetof(struct can_frame, data));
+
+ memcpy(skb->cb, re_skcb, sizeof(skb->cb));
+ skcb = j1939_skb_to_cb(skb);
+ if (swap_src_dst)
+ j1939_skbcb_swap(skcb);
+
+ if (ctl) {
+ if (skcb->addr.type == J1939_ETP)
+ skcb->addr.pgn = J1939_ETP_PGN_CTL;
+ else
+ skcb->addr.pgn = J1939_TP_PGN_CTL;
+ } else {
+ if (skcb->addr.type == J1939_ETP)
+ skcb->addr.pgn = J1939_ETP_PGN_DAT;
+ else
+ skcb->addr.pgn = J1939_TP_PGN_DAT;
+ }
+
+ return skb;
+}
+
+/* TP transmit packet functions */
+static int j1939_tp_tx_dat(struct j1939_session *session,
+ const u8 *dat, int len)
+{
+ struct j1939_priv *priv = session->priv;
+ struct sk_buff *skb;
+
+ skb = j1939_tp_tx_dat_new(priv, &session->skcb,
+ false, false);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ skb_put_data(skb, dat, len);
+ if (j1939_tp_padding && len < 8)
+ memset(skb_put(skb, 8 - len), 0xff, 8 - len);
+
+ return j1939_send_one(priv, skb);
+}
+
+static int j1939_xtp_do_tx_ctl(struct j1939_priv *priv,
+ const struct j1939_sk_buff_cb *re_skcb,
+ bool swap_src_dst, pgn_t pgn, const u8 *dat)
+{
+ struct sk_buff *skb;
+ u8 *skdat;
+
+ if (!j1939_tp_im_involved(re_skcb, swap_src_dst))
+ return 0;
+
+ skb = j1939_tp_tx_dat_new(priv, re_skcb, true, swap_src_dst);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ skdat = skb_put(skb, 8);
+ memcpy(skdat, dat, 5);
+ skdat[5] = (pgn >> 0);
+ skdat[6] = (pgn >> 8);
+ skdat[7] = (pgn >> 16);
+
+ return j1939_send_one(priv, skb);
+}
+
+static inline int j1939_tp_tx_ctl(struct j1939_session *session,
+ bool swap_src_dst, const u8 *dat)
+{
+ struct j1939_priv *priv = session->priv;
+
+ return j1939_xtp_do_tx_ctl(priv, &session->skcb,
+ swap_src_dst,
+ session->skcb.addr.pgn, dat);
+}
+
+static int j1939_xtp_tx_abort(struct j1939_priv *priv,
+ const struct j1939_sk_buff_cb *re_skcb,
+ bool swap_src_dst,
+ enum j1939_xtp_abort err,
+ pgn_t pgn)
+{
+ u8 dat[5];
+
+ if (!j1939_tp_im_involved(re_skcb, swap_src_dst))
+ return 0;
+
+ memset(dat, 0xff, sizeof(dat));
+ dat[0] = J1939_TP_CMD_ABORT;
+ dat[1] = err;
+ return j1939_xtp_do_tx_ctl(priv, re_skcb, swap_src_dst, pgn, dat);
+}
+
+void j1939_tp_schedule_txtimer(struct j1939_session *session, int msec)
+{
+ j1939_session_get(session);
+ hrtimer_start(&session->txtimer, ms_to_ktime(msec),
+ HRTIMER_MODE_REL_SOFT);
+}
+
+static inline void j1939_tp_set_rxtimeout(struct j1939_session *session,
+ int msec)
+{
+ j1939_session_rxtimer_cancel(session);
+ j1939_session_get(session);
+ hrtimer_start(&session->rxtimer, ms_to_ktime(msec),
+ HRTIMER_MODE_REL_SOFT);
+}
+
+static int j1939_session_tx_rts(struct j1939_session *session)
+{
+ u8 dat[8];
+ int ret;
+
+ memset(dat, 0xff, sizeof(dat));
+
+ dat[1] = (session->total_message_size >> 0);
+ dat[2] = (session->total_message_size >> 8);
+ dat[3] = session->pkt.total;
+
+ if (session->skcb.addr.type == J1939_ETP) {
+ dat[0] = J1939_ETP_CMD_RTS;
+ dat[1] = (session->total_message_size >> 0);
+ dat[2] = (session->total_message_size >> 8);
+ dat[3] = (session->total_message_size >> 16);
+ dat[4] = (session->total_message_size >> 24);
+ } else if (j1939_cb_is_broadcast(&session->skcb)) {
+ dat[0] = J1939_TP_CMD_BAM;
+ /* fake cts for broadcast */
+ session->pkt.tx = 0;
+ } else {
+ dat[0] = J1939_TP_CMD_RTS;
+ dat[4] = dat[3];
+ }
+
+ if (dat[0] == session->last_txcmd)
+ /* done already */
+ return 0;
+
+ ret = j1939_tp_tx_ctl(session, false, dat);
+ if (ret < 0)
+ return ret;
+
+ session->last_txcmd = dat[0];
+ if (dat[0] == J1939_TP_CMD_BAM)
+ j1939_tp_schedule_txtimer(session, 50);
+
+ j1939_tp_set_rxtimeout(session, 1250);
+
+ netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+ return 0;
+}
+
+static int j1939_session_tx_dpo(struct j1939_session *session)
+{
+ unsigned int pkt;
+ u8 dat[8];
+ int ret;
+
+ memset(dat, 0xff, sizeof(dat));
+
+ dat[0] = J1939_ETP_CMD_DPO;
+ session->pkt.dpo = session->pkt.tx_acked;
+ pkt = session->pkt.dpo;
+ dat[1] = session->pkt.last - session->pkt.tx_acked;
+ dat[2] = (pkt >> 0);
+ dat[3] = (pkt >> 8);
+ dat[4] = (pkt >> 16);
+
+ ret = j1939_tp_tx_ctl(session, false, dat);
+ if (ret < 0)
+ return ret;
+
+ session->last_txcmd = dat[0];
+ j1939_tp_set_rxtimeout(session, 1250);
+ session->pkt.tx = session->pkt.tx_acked;
+
+ netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+ return 0;
+}
+
+static int j1939_session_tx_dat(struct j1939_session *session)
+{
+ struct j1939_priv *priv = session->priv;
+ struct j1939_sk_buff_cb *skcb;
+ int offset, pkt_done, pkt_end;
+ unsigned int len, pdelay;
+ struct sk_buff *se_skb;
+ const u8 *tpdat;
+ int ret = 0;
+ u8 dat[8];
+
+ se_skb = j1939_session_skb_find(session);
+ if (!se_skb)
+ return -ENOBUFS;
+
+ skcb = j1939_skb_to_cb(se_skb);
+ tpdat = se_skb->data;
+ ret = 0;
+ pkt_done = 0;
+ if (session->skcb.addr.type != J1939_ETP &&
+ j1939_cb_is_broadcast(&session->skcb))
+ pkt_end = session->pkt.total;
+ else
+ pkt_end = session->pkt.last;
+
+ while (session->pkt.tx < pkt_end) {
+ dat[0] = session->pkt.tx - session->pkt.dpo + 1;
+ offset = (session->pkt.tx * 7) - skcb->offset;
+ len = se_skb->len - offset;
+ if (len > 7)
+ len = 7;
+
+ memcpy(&dat[1], &tpdat[offset], len);
+ ret = j1939_tp_tx_dat(session, dat, len + 1);
+ if (ret < 0) {
+ /* ENOBUS == CAN interface TX queue is full */
+ if (ret != -ENOBUFS)
+ netdev_alert(priv->ndev,
+ "%s: 0x%p: queue data error: %i\n",
+ __func__, session, ret);
+ break;
+ }
+
+ session->last_txcmd = 0xff;
+ pkt_done++;
+ session->pkt.tx++;
+ pdelay = j1939_cb_is_broadcast(&session->skcb) ? 50 :
+ j1939_tp_packet_delay;
+
+ if (session->pkt.tx < session->pkt.total && pdelay) {
+ j1939_tp_schedule_txtimer(session, pdelay);
+ break;
+ }
+ }
+
+ if (pkt_done)
+ j1939_tp_set_rxtimeout(session, 250);
+
+ return ret;
+}
+
+static int j1939_xtp_txnext_transmiter(struct j1939_session *session)
+{
+ struct j1939_priv *priv = session->priv;
+ int ret = 0;
+
+ if (!j1939_tp_im_transmitter(&session->skcb)) {
+ netdev_alert(priv->ndev, "%s: 0x%p: called by not transmitter!\n",
+ __func__, session);
+ return -EINVAL;
+ }
+
+ switch (session->last_cmd) {
+ case 0:
+ ret = j1939_session_tx_rts(session);
+ break;
+
+ case J1939_ETP_CMD_CTS:
+ if (session->last_txcmd != J1939_ETP_CMD_DPO) {
+ ret = j1939_session_tx_dpo(session);
+ if (ret)
+ return ret;
+ }
+
+ /* fall through */
+ case J1939_TP_CMD_CTS:
+ case 0xff: /* did some data */
+ case J1939_ETP_CMD_DPO:
+ case J1939_TP_CMD_BAM:
+ ret = j1939_session_tx_dat(session);
+
+ break;
+ default:
+ netdev_alert(priv->ndev, "%s: 0x%p: unexpected last_cmd: %x\n",
+ __func__, session, session->last_cmd);
+ }
+
+ return ret;
+}
+
+static int j1939_session_tx_cts(struct j1939_session *session)
+{
+ struct j1939_priv *priv = session->priv;
+ unsigned int pkt, len;
+ int ret;
+ u8 dat[8];
+
+ if (!j1939_sk_recv_match(priv, &session->skcb))
+ return -ENOENT;
+
+ len = session->pkt.total - session->pkt.rx;
+ len = min3(len, session->pkt.block, j1939_tp_block ?: 255);
+ memset(dat, 0xff, sizeof(dat));
+
+ if (session->skcb.addr.type == J1939_ETP) {
+ pkt = session->pkt.rx + 1;
+ dat[0] = J1939_ETP_CMD_CTS;
+ dat[1] = len;
+ dat[2] = (pkt >> 0);
+ dat[3] = (pkt >> 8);
+ dat[4] = (pkt >> 16);
+ } else {
+ dat[0] = J1939_TP_CMD_CTS;
+ dat[1] = len;
+ dat[2] = session->pkt.rx + 1;
+ }
+
+ if (dat[0] == session->last_txcmd)
+ /* done already */
+ return 0;
+
+ ret = j1939_tp_tx_ctl(session, true, dat);
+ if (ret < 0)
+ return ret;
+
+ if (len)
+ /* only mark cts done when len is set */
+ session->last_txcmd = dat[0];
+ j1939_tp_set_rxtimeout(session, 1250);
+
+ netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+ return 0;
+}
+
+static int j1939_session_tx_eoma(struct j1939_session *session)
+{
+ struct j1939_priv *priv = session->priv;
+ u8 dat[8];
+ int ret;
+
+ if (!j1939_sk_recv_match(priv, &session->skcb))
+ return -ENOENT;
+
+ memset(dat, 0xff, sizeof(dat));
+
+ if (session->skcb.addr.type == J1939_ETP) {
+ dat[0] = J1939_ETP_CMD_EOMA;
+ dat[1] = session->total_message_size >> 0;
+ dat[2] = session->total_message_size >> 8;
+ dat[3] = session->total_message_size >> 16;
+ dat[4] = session->total_message_size >> 24;
+ } else {
+ dat[0] = J1939_TP_CMD_EOMA;
+ dat[1] = session->total_message_size;
+ dat[2] = session->total_message_size >> 8;
+ dat[3] = session->pkt.total;
+ }
+
+ if (dat[0] == session->last_txcmd)
+ /* done already */
+ return 0;
+
+ ret = j1939_tp_tx_ctl(session, true, dat);
+ if (ret < 0)
+ return ret;
+
+ session->last_txcmd = dat[0];
+
+ /* wait for the EOMA packet to come in */
+ j1939_tp_set_rxtimeout(session, 1250);
+
+ netdev_dbg(session->priv->ndev, "%p: 0x%p\n", __func__, session);
+
+ return 0;
+}
+
+static int j1939_xtp_txnext_receiver(struct j1939_session *session)
+{
+ struct j1939_priv *priv = session->priv;
+ int ret = 0;
+
+ if (!j1939_tp_im_receiver(&session->skcb)) {
+ netdev_alert(priv->ndev, "%s: 0x%p: called by not receiver!\n",
+ __func__, session);
+ return -EINVAL;
+ }
+
+ switch (session->last_cmd) {
+ case J1939_TP_CMD_RTS:
+ case J1939_ETP_CMD_RTS:
+ ret = j1939_session_tx_cts(session);
+ break;
+
+ case J1939_ETP_CMD_CTS:
+ case J1939_TP_CMD_CTS:
+ case 0xff: /* did some data */
+ case J1939_ETP_CMD_DPO:
+ if ((session->skcb.addr.type == J1939_TP &&
+ j1939_cb_is_broadcast(&session->skcb)))
+ break;
+
+ if (session->pkt.rx >= session->pkt.total) {
+ ret = j1939_session_tx_eoma(session);
+ } else if (session->pkt.rx >= session->pkt.last) {
+ session->last_txcmd = 0;
+ ret = j1939_session_tx_cts(session);
+ }
+ break;
+ default:
+ netdev_alert(priv->ndev, "%s: 0x%p: unexpected last_cmd: %x\n",
+ __func__, session, session->last_cmd);
+ }
+
+ return ret;
+}
+
+static int j1939_simple_txnext(struct j1939_session *session)
+{
+ struct j1939_priv *priv = session->priv;
+ struct sk_buff *se_skb = j1939_session_skb_find(session);
+ struct sk_buff *skb;
+ int ret;
+
+ if (!se_skb)
+ return 0;
+
+ skb = skb_clone(se_skb, GFP_ATOMIC);
+ if (!skb)
+ return -ENOMEM;
+
+ can_skb_set_owner(skb, se_skb->sk);
+
+ j1939_tp_set_rxtimeout(session, J1939_SIMPLE_ECHO_TIMEOUT_MS);
+
+ ret = j1939_send_one(priv, skb);
+ if (ret)
+ return ret;
+
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_SCHED);
+ j1939_sk_queue_activate_next(session);
+
+ return 0;
+}
+
+static bool j1939_session_deactivate_locked(struct j1939_session *session)
+{
+ bool active = false;
+
+ lockdep_assert_held(&session->priv->active_session_list_lock);
+
+ if (session->state >= J1939_SESSION_ACTIVE &&
+ session->state < J1939_SESSION_ACTIVE_MAX) {
+ active = true;
+
+ list_del_init(&session->active_session_list_entry);
+ session->state = J1939_SESSION_DONE;
+ j1939_session_put(session);
+ }
+
+ return active;
+}
+
+static bool j1939_session_deactivate(struct j1939_session *session)
+{
+ bool active;
+
+ j1939_session_list_lock(session->priv);
+ active = j1939_session_deactivate_locked(session);
+ j1939_session_list_unlock(session->priv);
+
+ return active;
+}
+
+static void
+j1939_session_deactivate_activate_next(struct j1939_session *session)
+{
+ if (j1939_session_deactivate(session))
+ j1939_sk_queue_activate_next(session);
+}
+
+static void j1939_session_cancel(struct j1939_session *session,
+ enum j1939_xtp_abort err)
+{
+ struct j1939_priv *priv = session->priv;
+
+ WARN_ON_ONCE(!err);
+
+ session->err = j1939_xtp_abort_to_errno(priv, err);
+ /* do not send aborts on incoming broadcasts */
+ if (!j1939_cb_is_broadcast(&session->skcb)) {
+ session->state = J1939_SESSION_WAITING_ABORT;
+ j1939_xtp_tx_abort(priv, &session->skcb,
+ !session->transmission,
+ err, session->skcb.addr.pgn);
+ }
+
+ if (session->sk)
+ j1939_sk_send_loop_abort(session->sk, session->err);
+}
+
+static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer)
+{
+ struct j1939_session *session =
+ container_of(hrtimer, struct j1939_session, txtimer);
+ struct j1939_priv *priv = session->priv;
+ int ret = 0;
+
+ if (session->skcb.addr.type == J1939_SIMPLE) {
+ ret = j1939_simple_txnext(session);
+ } else {
+ if (session->transmission)
+ ret = j1939_xtp_txnext_transmiter(session);
+ else
+ ret = j1939_xtp_txnext_receiver(session);
+ }
+
+ switch (ret) {
+ case -ENOBUFS:
+ /* Retry limit is currently arbitrary chosen */
+ if (session->tx_retry < J1939_XTP_TX_RETRY_LIMIT) {
+ session->tx_retry++;
+ j1939_tp_schedule_txtimer(session,
+ 10 + prandom_u32_max(16));
+ } else {
+ netdev_alert(priv->ndev, "%s: 0x%p: tx retry count reached\n",
+ __func__, session);
+ session->err = -ENETUNREACH;
+ j1939_session_rxtimer_cancel(session);
+ j1939_session_deactivate_activate_next(session);
+ }
+ break;
+ case -ENETDOWN:
+ /* In this case we should get a netdev_event(), all active
+ * sessions will be cleared by
+ * j1939_cancel_all_active_sessions(). So handle this as an
+ * error, but let j1939_cancel_all_active_sessions() do the
+ * cleanup including propagation of the error to user space.
+ */
+ break;
+ case 0:
+ session->tx_retry = 0;
+ break;
+ default:
+ netdev_alert(priv->ndev, "%s: 0x%p: tx aborted with unknown reason: %i\n",
+ __func__, session, ret);
+ if (session->skcb.addr.type != J1939_SIMPLE) {
+ j1939_tp_set_rxtimeout(session,
+ J1939_XTP_ABORT_TIMEOUT_MS);
+ j1939_session_cancel(session, J1939_XTP_ABORT_OTHER);
+ } else {
+ session->err = ret;
+ j1939_session_rxtimer_cancel(session);
+ j1939_session_deactivate_activate_next(session);
+ }
+ }
+
+ j1939_session_put(session);
+
+ return HRTIMER_NORESTART;
+}
+
+static void j1939_session_completed(struct j1939_session *session)
+{
+ struct sk_buff *skb;
+
+ if (!session->transmission) {
+ skb = j1939_session_skb_find(session);
+ /* distribute among j1939 receivers */
+ j1939_sk_recv(session->priv, skb);
+ }
+
+ j1939_session_deactivate_activate_next(session);
+}
+
+static enum hrtimer_restart j1939_tp_rxtimer(struct hrtimer *hrtimer)
+{
+ struct j1939_session *session = container_of(hrtimer,
+ struct j1939_session,
+ rxtimer);
+ struct j1939_priv *priv = session->priv;
+
+ if (session->state == J1939_SESSION_WAITING_ABORT) {
+ netdev_alert(priv->ndev, "%s: 0x%p: abort rx timeout. Force session deactivation\n",
+ __func__, session);
+
+ j1939_session_deactivate_activate_next(session);
+
+ } else if (session->skcb.addr.type == J1939_SIMPLE) {
+ netdev_alert(priv->ndev, "%s: 0x%p: Timeout. Failed to send simple message.\n",
+ __func__, session);
+
+ /* The message is probably stuck in the CAN controller and can
+ * be send as soon as CAN bus is in working state again.
+ */
+ session->err = -ETIME;
+ j1939_session_deactivate(session);
+ } else {
+ netdev_alert(priv->ndev, "%s: 0x%p: rx timeout, send abort\n",
+ __func__, session);
+
+ j1939_session_list_lock(session->priv);
+ if (session->state >= J1939_SESSION_ACTIVE &&
+ session->state < J1939_SESSION_ACTIVE_MAX) {
+ j1939_session_get(session);
+ hrtimer_start(&session->rxtimer,
+ ms_to_ktime(J1939_XTP_ABORT_TIMEOUT_MS),
+ HRTIMER_MODE_REL_SOFT);
+ j1939_session_cancel(session, J1939_XTP_ABORT_TIMEOUT);
+ }
+ j1939_session_list_unlock(session->priv);
+ }
+
+ j1939_session_put(session);
+
+ return HRTIMER_NORESTART;
+}
+
+static bool j1939_xtp_rx_cmd_bad_pgn(struct j1939_session *session,
+ const struct sk_buff *skb)
+{
+ const struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ pgn_t pgn = j1939_xtp_ctl_to_pgn(skb->data);
+ struct j1939_priv *priv = session->priv;
+ enum j1939_xtp_abort abort = J1939_XTP_NO_ABORT;
+ u8 cmd = skb->data[0];
+
+ if (session->skcb.addr.pgn == pgn)
+ return false;
+
+ switch (cmd) {
+ case J1939_TP_CMD_BAM:
+ abort = J1939_XTP_NO_ABORT;
+ break;
+
+ case J1939_ETP_CMD_RTS:
+ case J1939_TP_CMD_RTS: /* fall through */
+ abort = J1939_XTP_ABORT_BUSY;
+ break;
+
+ case J1939_ETP_CMD_CTS:
+ case J1939_TP_CMD_CTS: /* fall through */
+ abort = J1939_XTP_ABORT_ECTS_UNXPECTED_PGN;
+ break;
+
+ case J1939_ETP_CMD_DPO:
+ abort = J1939_XTP_ABORT_BAD_EDPO_PGN;
+ break;
+
+ case J1939_ETP_CMD_EOMA:
+ case J1939_TP_CMD_EOMA: /* fall through */
+ abort = J1939_XTP_ABORT_OTHER;
+ break;
+
+ case J1939_ETP_CMD_ABORT: /* && J1939_TP_CMD_ABORT */
+ abort = J1939_XTP_NO_ABORT;
+ break;
+
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ netdev_warn(priv->ndev, "%s: 0x%p: CMD 0x%02x with PGN 0x%05x for running session with different PGN 0x%05x.\n",
+ __func__, session, cmd, pgn, session->skcb.addr.pgn);
+ if (abort != J1939_XTP_NO_ABORT)
+ j1939_xtp_tx_abort(priv, skcb, true, abort, pgn);
+
+ return true;
+}
+
+static void j1939_xtp_rx_abort_one(struct j1939_priv *priv, struct sk_buff *skb,
+ bool reverse, bool transmitter)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ struct j1939_session *session;
+ u8 abort = skb->data[1];
+
+ session = j1939_session_get_by_addr(priv, &skcb->addr, reverse,
+ transmitter);
+ if (!session)
+ return;
+
+ if (j1939_xtp_rx_cmd_bad_pgn(session, skb))
+ goto abort_put;
+
+ netdev_info(priv->ndev, "%s: 0x%p: 0x%05x: (%u) %s\n", __func__,
+ session, j1939_xtp_ctl_to_pgn(skb->data), abort,
+ j1939_xtp_abort_to_str(abort));
+
+ j1939_session_timers_cancel(session);
+ session->err = j1939_xtp_abort_to_errno(priv, abort);
+ if (session->sk)
+ j1939_sk_send_loop_abort(session->sk, session->err);
+ j1939_session_deactivate_activate_next(session);
+
+abort_put:
+ j1939_session_put(session);
+}
+
+/* abort packets may come in 2 directions */
+static void
+j1939_xtp_rx_abort(struct j1939_priv *priv, struct sk_buff *skb,
+ bool transmitter)
+{
+ j1939_xtp_rx_abort_one(priv, skb, false, transmitter);
+ j1939_xtp_rx_abort_one(priv, skb, true, transmitter);
+}
+
+static void
+j1939_xtp_rx_eoma_one(struct j1939_session *session, struct sk_buff *skb)
+{
+ if (j1939_xtp_rx_cmd_bad_pgn(session, skb))
+ return;
+
+ netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+ session->pkt.tx_acked = session->pkt.total;
+ j1939_session_timers_cancel(session);
+ /* transmitted without problems */
+ j1939_session_completed(session);
+}
+
+static void
+j1939_xtp_rx_eoma(struct j1939_priv *priv, struct sk_buff *skb,
+ bool transmitter)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ struct j1939_session *session;
+
+ session = j1939_session_get_by_addr(priv, &skcb->addr, true,
+ transmitter);
+ if (!session)
+ return;
+
+ j1939_xtp_rx_eoma_one(session, skb);
+ j1939_session_put(session);
+}
+
+static void
+j1939_xtp_rx_cts_one(struct j1939_session *session, struct sk_buff *skb)
+{
+ enum j1939_xtp_abort err = J1939_XTP_ABORT_FAULT;
+ unsigned int pkt;
+ const u8 *dat;
+
+ dat = skb->data;
+
+ if (j1939_xtp_rx_cmd_bad_pgn(session, skb))
+ return;
+
+ netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+ if (session->last_cmd == dat[0]) {
+ err = J1939_XTP_ABORT_DUP_SEQ;
+ goto out_session_cancel;
+ }
+
+ if (session->skcb.addr.type == J1939_ETP)
+ pkt = j1939_etp_ctl_to_packet(dat);
+ else
+ pkt = dat[2];
+
+ if (!pkt)
+ goto out_session_cancel;
+ else if (dat[1] > session->pkt.block /* 0xff for etp */)
+ goto out_session_cancel;
+
+ /* set packet counters only when not CTS(0) */
+ session->pkt.tx_acked = pkt - 1;
+ j1939_session_skb_drop_old(session);
+ session->pkt.last = session->pkt.tx_acked + dat[1];
+ if (session->pkt.last > session->pkt.total)
+ /* safety measure */
+ session->pkt.last = session->pkt.total;
+ /* TODO: do not set tx here, do it in txtimer */
+ session->pkt.tx = session->pkt.tx_acked;
+
+ session->last_cmd = dat[0];
+ if (dat[1]) {
+ j1939_tp_set_rxtimeout(session, 1250);
+ if (session->transmission) {
+ if (session->pkt.tx_acked)
+ j1939_sk_errqueue(session,
+ J1939_ERRQUEUE_SCHED);
+ j1939_session_txtimer_cancel(session);
+ j1939_tp_schedule_txtimer(session, 0);
+ }
+ } else {
+ /* CTS(0) */
+ j1939_tp_set_rxtimeout(session, 550);
+ }
+ return;
+
+ out_session_cancel:
+ j1939_session_timers_cancel(session);
+ j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
+ j1939_session_cancel(session, err);
+}
+
+static void
+j1939_xtp_rx_cts(struct j1939_priv *priv, struct sk_buff *skb, bool transmitter)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ struct j1939_session *session;
+
+ session = j1939_session_get_by_addr(priv, &skcb->addr, true,
+ transmitter);
+ if (!session)
+ return;
+ j1939_xtp_rx_cts_one(session, skb);
+ j1939_session_put(session);
+}
+
+static struct j1939_session *j1939_session_new(struct j1939_priv *priv,
+ struct sk_buff *skb, size_t size)
+{
+ struct j1939_session *session;
+ struct j1939_sk_buff_cb *skcb;
+
+ session = kzalloc(sizeof(*session), gfp_any());
+ if (!session)
+ return NULL;
+
+ INIT_LIST_HEAD(&session->active_session_list_entry);
+ INIT_LIST_HEAD(&session->sk_session_queue_entry);
+ kref_init(&session->kref);
+
+ j1939_priv_get(priv);
+ session->priv = priv;
+ session->total_message_size = size;
+ session->state = J1939_SESSION_NEW;
+
+ skb_queue_head_init(&session->skb_queue);
+ skb_queue_tail(&session->skb_queue, skb);
+
+ skcb = j1939_skb_to_cb(skb);
+ memcpy(&session->skcb, skcb, sizeof(session->skcb));
+
+ hrtimer_init(&session->txtimer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL_SOFT);
+ session->txtimer.function = j1939_tp_txtimer;
+ hrtimer_init(&session->rxtimer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL_SOFT);
+ session->rxtimer.function = j1939_tp_rxtimer;
+
+ netdev_dbg(priv->ndev, "%s: 0x%p: sa: %02x, da: %02x\n",
+ __func__, session, skcb->addr.sa, skcb->addr.da);
+
+ return session;
+}
+
+static struct
+j1939_session *j1939_session_fresh_new(struct j1939_priv *priv,
+ int size,
+ const struct j1939_sk_buff_cb *rel_skcb)
+{
+ struct sk_buff *skb;
+ struct j1939_sk_buff_cb *skcb;
+ struct j1939_session *session;
+
+ skb = alloc_skb(size + sizeof(struct can_skb_priv), GFP_ATOMIC);
+ if (unlikely(!skb))
+ return NULL;
+
+ skb->dev = priv->ndev;
+ can_skb_reserve(skb);
+ can_skb_prv(skb)->ifindex = priv->ndev->ifindex;
+ skcb = j1939_skb_to_cb(skb);
+ memcpy(skcb, rel_skcb, sizeof(*skcb));
+
+ session = j1939_session_new(priv, skb, skb->len);
+ if (!session) {
+ kfree_skb(skb);
+ return NULL;
+ }
+
+ /* alloc data area */
+ skb_put(skb, size);
+ /* skb is recounted in j1939_session_new() */
+ return session;
+}
+
+int j1939_session_activate(struct j1939_session *session)
+{
+ struct j1939_priv *priv = session->priv;
+ struct j1939_session *active = NULL;
+ int ret = 0;
+
+ j1939_session_list_lock(priv);
+ if (session->skcb.addr.type != J1939_SIMPLE)
+ active = j1939_session_get_by_addr_locked(priv,
+ &priv->active_session_list,
+ &session->skcb.addr, false,
+ session->transmission);
+ if (active) {
+ j1939_session_put(active);
+ ret = -EAGAIN;
+ } else {
+ WARN_ON_ONCE(session->state != J1939_SESSION_NEW);
+ list_add_tail(&session->active_session_list_entry,
+ &priv->active_session_list);
+ j1939_session_get(session);
+ session->state = J1939_SESSION_ACTIVE;
+
+ netdev_dbg(session->priv->ndev, "%s: 0x%p\n",
+ __func__, session);
+ }
+ j1939_session_list_unlock(priv);
+
+ return ret;
+}
+
+static struct
+j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv,
+ struct sk_buff *skb)
+{
+ enum j1939_xtp_abort abort = J1939_XTP_NO_ABORT;
+ struct j1939_sk_buff_cb skcb = *j1939_skb_to_cb(skb);
+ struct j1939_session *session;
+ const u8 *dat;
+ pgn_t pgn;
+ int len;
+
+ netdev_dbg(priv->ndev, "%s\n", __func__);
+
+ dat = skb->data;
+ pgn = j1939_xtp_ctl_to_pgn(dat);
+ skcb.addr.pgn = pgn;
+
+ if (!j1939_sk_recv_match(priv, &skcb))
+ return NULL;
+
+ if (skcb.addr.type == J1939_ETP) {
+ len = j1939_etp_ctl_to_size(dat);
+ if (len > J1939_MAX_ETP_PACKET_SIZE)
+ abort = J1939_XTP_ABORT_FAULT;
+ else if (len > priv->tp_max_packet_size)
+ abort = J1939_XTP_ABORT_RESOURCE;
+ else if (len <= J1939_MAX_TP_PACKET_SIZE)
+ abort = J1939_XTP_ABORT_FAULT;
+ } else {
+ len = j1939_tp_ctl_to_size(dat);
+ if (len > J1939_MAX_TP_PACKET_SIZE)
+ abort = J1939_XTP_ABORT_FAULT;
+ else if (len > priv->tp_max_packet_size)
+ abort = J1939_XTP_ABORT_RESOURCE;
+ }
+
+ if (abort != J1939_XTP_NO_ABORT) {
+ j1939_xtp_tx_abort(priv, &skcb, true, abort, pgn);
+ return NULL;
+ }
+
+ session = j1939_session_fresh_new(priv, len, &skcb);
+ if (!session) {
+ j1939_xtp_tx_abort(priv, &skcb, true,
+ J1939_XTP_ABORT_RESOURCE, pgn);
+ return NULL;
+ }
+
+ /* initialize the control buffer: plain copy */
+ session->pkt.total = (len + 6) / 7;
+ session->pkt.block = 0xff;
+ if (skcb.addr.type != J1939_ETP) {
+ if (dat[3] != session->pkt.total)
+ netdev_alert(priv->ndev, "%s: 0x%p: strange total, %u != %u\n",
+ __func__, session, session->pkt.total,
+ dat[3]);
+ session->pkt.total = dat[3];
+ session->pkt.block = min(dat[3], dat[4]);
+ }
+
+ session->pkt.rx = 0;
+ session->pkt.tx = 0;
+
+ WARN_ON_ONCE(j1939_session_activate(session));
+
+ return session;
+}
+
+static int j1939_xtp_rx_rts_session_active(struct j1939_session *session,
+ struct sk_buff *skb)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ struct j1939_priv *priv = session->priv;
+
+ if (!session->transmission) {
+ if (j1939_xtp_rx_cmd_bad_pgn(session, skb))
+ return -EBUSY;
+
+ /* RTS on active session */
+ j1939_session_timers_cancel(session);
+ j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
+ j1939_session_cancel(session, J1939_XTP_ABORT_BUSY);
+ }
+
+ if (session->last_cmd != 0) {
+ /* we received a second rts on the same connection */
+ netdev_alert(priv->ndev, "%s: 0x%p: connection exists (%02x %02x). last cmd: %x\n",
+ __func__, session, skcb->addr.sa, skcb->addr.da,
+ session->last_cmd);
+
+ j1939_session_timers_cancel(session);
+ j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
+ j1939_session_cancel(session, J1939_XTP_ABORT_BUSY);
+
+ return -EBUSY;
+ }
+
+ if (session->skcb.addr.sa != skcb->addr.sa ||
+ session->skcb.addr.da != skcb->addr.da)
+ netdev_warn(priv->ndev, "%s: 0x%p: session->skcb.addr.sa=0x%02x skcb->addr.sa=0x%02x session->skcb.addr.da=0x%02x skcb->addr.da=0x%02x\n",
+ __func__, session,
+ session->skcb.addr.sa, skcb->addr.sa,
+ session->skcb.addr.da, skcb->addr.da);
+ /* make sure 'sa' & 'da' are correct !
+ * They may be 'not filled in yet' for sending
+ * skb's, since they did not pass the Address Claim ever.
+ */
+ session->skcb.addr.sa = skcb->addr.sa;
+ session->skcb.addr.da = skcb->addr.da;
+
+ netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+ return 0;
+}
+
+static void j1939_xtp_rx_rts(struct j1939_priv *priv, struct sk_buff *skb,
+ bool transmitter)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ struct j1939_session *session;
+ u8 cmd = skb->data[0];
+
+ session = j1939_session_get_by_addr(priv, &skcb->addr, false,
+ transmitter);
+
+ if (!session) {
+ if (transmitter) {
+ /* If we're the transmitter and this function is called,
+ * we received our own RTS. A session has already been
+ * created.
+ *
+ * For some reasons however it might have been destroyed
+ * already. So don't create a new one here (using
+ * "j1939_xtp_rx_rts_session_new()") as this will be a
+ * receiver session.
+ *
+ * The reasons the session is already destroyed might
+ * be:
+ * - user space closed socket was and the session was
+ * aborted
+ * - session was aborted due to external abort message
+ */
+ return;
+ }
+ session = j1939_xtp_rx_rts_session_new(priv, skb);
+ if (!session)
+ return;
+ } else {
+ if (j1939_xtp_rx_rts_session_active(session, skb)) {
+ j1939_session_put(session);
+ return;
+ }
+ }
+ session->last_cmd = cmd;
+
+ j1939_tp_set_rxtimeout(session, 1250);
+
+ if (cmd != J1939_TP_CMD_BAM && !session->transmission) {
+ j1939_session_txtimer_cancel(session);
+ j1939_tp_schedule_txtimer(session, 0);
+ }
+
+ j1939_session_put(session);
+}
+
+static void j1939_xtp_rx_dpo_one(struct j1939_session *session,
+ struct sk_buff *skb)
+{
+ const u8 *dat = skb->data;
+
+ if (j1939_xtp_rx_cmd_bad_pgn(session, skb))
+ return;
+
+ netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+ /* transmitted without problems */
+ session->pkt.dpo = j1939_etp_ctl_to_packet(skb->data);
+ session->last_cmd = dat[0];
+ j1939_tp_set_rxtimeout(session, 750);
+}
+
+static void j1939_xtp_rx_dpo(struct j1939_priv *priv, struct sk_buff *skb,
+ bool transmitter)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ struct j1939_session *session;
+
+ session = j1939_session_get_by_addr(priv, &skcb->addr, false,
+ transmitter);
+ if (!session) {
+ netdev_info(priv->ndev,
+ "%s: no connection found\n", __func__);
+ return;
+ }
+
+ j1939_xtp_rx_dpo_one(session, skb);
+ j1939_session_put(session);
+}
+
+static void j1939_xtp_rx_dat_one(struct j1939_session *session,
+ struct sk_buff *skb)
+{
+ struct j1939_priv *priv = session->priv;
+ struct j1939_sk_buff_cb *skcb;
+ struct sk_buff *se_skb;
+ const u8 *dat;
+ u8 *tpdat;
+ int offset;
+ int nbytes;
+ bool final = false;
+ bool do_cts_eoma = false;
+ int packet;
+
+ skcb = j1939_skb_to_cb(skb);
+ dat = skb->data;
+ if (skb->len <= 1)
+ /* makes no sense */
+ goto out_session_cancel;
+
+ switch (session->last_cmd) {
+ case 0xff:
+ break;
+ case J1939_ETP_CMD_DPO:
+ if (skcb->addr.type == J1939_ETP)
+ break;
+ /* fall through */
+ case J1939_TP_CMD_BAM: /* fall through */
+ case J1939_TP_CMD_CTS: /* fall through */
+ if (skcb->addr.type != J1939_ETP)
+ break;
+ /* fall through */
+ default:
+ netdev_info(priv->ndev, "%s: 0x%p: last %02x\n", __func__,
+ session, session->last_cmd);
+ goto out_session_cancel;
+ }
+
+ packet = (dat[0] - 1 + session->pkt.dpo);
+ if (packet > session->pkt.total ||
+ (session->pkt.rx + 1) > session->pkt.total) {
+ netdev_info(priv->ndev, "%s: 0x%p: should have been completed\n",
+ __func__, session);
+ goto out_session_cancel;
+ }
+ se_skb = j1939_session_skb_find(session);
+ if (!se_skb) {
+ netdev_warn(priv->ndev, "%s: 0x%p: no skb found\n", __func__,
+ session);
+ goto out_session_cancel;
+ }
+
+ skcb = j1939_skb_to_cb(se_skb);
+ offset = packet * 7 - skcb->offset;
+ nbytes = se_skb->len - offset;
+ if (nbytes > 7)
+ nbytes = 7;
+ if (nbytes <= 0 || (nbytes + 1) > skb->len) {
+ netdev_info(priv->ndev, "%s: 0x%p: nbytes %i, len %i\n",
+ __func__, session, nbytes, skb->len);
+ goto out_session_cancel;
+ }
+
+ tpdat = se_skb->data;
+ memcpy(&tpdat[offset], &dat[1], nbytes);
+ if (packet == session->pkt.rx)
+ session->pkt.rx++;
+
+ if (skcb->addr.type != J1939_ETP &&
+ j1939_cb_is_broadcast(&session->skcb)) {
+ if (session->pkt.rx >= session->pkt.total)
+ final = true;
+ } else {
+ /* never final, an EOMA must follow */
+ if (session->pkt.rx >= session->pkt.last)
+ do_cts_eoma = true;
+ }
+
+ if (final) {
+ j1939_session_completed(session);
+ } else if (do_cts_eoma) {
+ j1939_tp_set_rxtimeout(session, 1250);
+ if (!session->transmission)
+ j1939_tp_schedule_txtimer(session, 0);
+ } else {
+ j1939_tp_set_rxtimeout(session, 250);
+ }
+ session->last_cmd = 0xff;
+ j1939_session_put(session);
+
+ return;
+
+ out_session_cancel:
+ j1939_session_timers_cancel(session);
+ j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
+ j1939_session_cancel(session, J1939_XTP_ABORT_FAULT);
+ j1939_session_put(session);
+}
+
+static void j1939_xtp_rx_dat(struct j1939_priv *priv, struct sk_buff *skb)
+{
+ struct j1939_sk_buff_cb *skcb;
+ struct j1939_session *session;
+
+ skcb = j1939_skb_to_cb(skb);
+
+ if (j1939_tp_im_transmitter(skcb)) {
+ session = j1939_session_get_by_addr(priv, &skcb->addr, false,
+ true);
+ if (!session)
+ netdev_info(priv->ndev, "%s: no tx connection found\n",
+ __func__);
+ else
+ j1939_xtp_rx_dat_one(session, skb);
+ }
+
+ if (j1939_tp_im_receiver(skcb)) {
+ session = j1939_session_get_by_addr(priv, &skcb->addr, false,
+ false);
+ if (!session)
+ netdev_info(priv->ndev, "%s: no rx connection found\n",
+ __func__);
+ else
+ j1939_xtp_rx_dat_one(session, skb);
+ }
+}
+
+/* j1939 main intf */
+struct j1939_session *j1939_tp_send(struct j1939_priv *priv,
+ struct sk_buff *skb, size_t size)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ struct j1939_session *session;
+ int ret;
+
+ if (skcb->addr.pgn == J1939_TP_PGN_DAT ||
+ skcb->addr.pgn == J1939_TP_PGN_CTL ||
+ skcb->addr.pgn == J1939_ETP_PGN_DAT ||
+ skcb->addr.pgn == J1939_ETP_PGN_CTL)
+ /* avoid conflict */
+ return ERR_PTR(-EDOM);
+
+ if (size > priv->tp_max_packet_size)
+ return ERR_PTR(-EMSGSIZE);
+
+ if (size <= 8)
+ skcb->addr.type = J1939_SIMPLE;
+ else if (size > J1939_MAX_TP_PACKET_SIZE)
+ skcb->addr.type = J1939_ETP;
+ else
+ skcb->addr.type = J1939_TP;
+
+ if (skcb->addr.type == J1939_ETP &&
+ j1939_cb_is_broadcast(skcb))
+ return ERR_PTR(-EDESTADDRREQ);
+
+ /* fill in addresses from names */
+ ret = j1939_ac_fixup(priv, skb);
+ if (unlikely(ret))
+ return ERR_PTR(ret);
+
+ /* fix DST flags, it may be used there soon */
+ if (j1939_address_is_unicast(skcb->addr.da) &&
+ priv->ents[skcb->addr.da].nusers)
+ skcb->flags |= J1939_ECU_LOCAL_DST;
+
+ /* src is always local, I'm sending ... */
+ skcb->flags |= J1939_ECU_LOCAL_SRC;
+
+ /* prepare new session */
+ session = j1939_session_new(priv, skb, size);
+ if (!session)
+ return ERR_PTR(-ENOMEM);
+
+ /* skb is recounted in j1939_session_new() */
+ session->sk = skb->sk;
+ session->transmission = true;
+ session->pkt.total = (size + 6) / 7;
+ session->pkt.block = skcb->addr.type == J1939_ETP ? 255 :
+ min(j1939_tp_block ?: 255, session->pkt.total);
+
+ if (j1939_cb_is_broadcast(&session->skcb))
+ /* set the end-packet for broadcast */
+ session->pkt.last = session->pkt.total;
+
+ skcb->tskey = session->sk->sk_tskey++;
+ session->tskey = skcb->tskey;
+
+ return session;
+}
+
+static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ int extd = J1939_TP;
+ u8 cmd = skb->data[0];
+
+ switch (cmd) {
+ case J1939_ETP_CMD_RTS:
+ extd = J1939_ETP;
+ /* fall through */
+ case J1939_TP_CMD_BAM: /* fall through */
+ case J1939_TP_CMD_RTS: /* fall through */
+ if (skcb->addr.type != extd)
+ return;
+
+ if (cmd == J1939_TP_CMD_RTS && j1939_cb_is_broadcast(skcb)) {
+ netdev_alert(priv->ndev, "%s: rts without destination (%02x)\n",
+ __func__, skcb->addr.sa);
+ return;
+ }
+
+ if (j1939_tp_im_transmitter(skcb))
+ j1939_xtp_rx_rts(priv, skb, true);
+
+ if (j1939_tp_im_receiver(skcb))
+ j1939_xtp_rx_rts(priv, skb, false);
+
+ break;
+
+ case J1939_ETP_CMD_CTS:
+ extd = J1939_ETP;
+ /* fall through */
+ case J1939_TP_CMD_CTS:
+ if (skcb->addr.type != extd)
+ return;
+
+ if (j1939_tp_im_transmitter(skcb))
+ j1939_xtp_rx_cts(priv, skb, false);
+
+ if (j1939_tp_im_receiver(skcb))
+ j1939_xtp_rx_cts(priv, skb, true);
+
+ break;
+
+ case J1939_ETP_CMD_DPO:
+ if (skcb->addr.type != J1939_ETP)
+ return;
+
+ if (j1939_tp_im_transmitter(skcb))
+ j1939_xtp_rx_dpo(priv, skb, true);
+
+ if (j1939_tp_im_receiver(skcb))
+ j1939_xtp_rx_dpo(priv, skb, false);
+
+ break;
+
+ case J1939_ETP_CMD_EOMA:
+ extd = J1939_ETP;
+ /* fall through */
+ case J1939_TP_CMD_EOMA:
+ if (skcb->addr.type != extd)
+ return;
+
+ if (j1939_tp_im_transmitter(skcb))
+ j1939_xtp_rx_eoma(priv, skb, false);
+
+ if (j1939_tp_im_receiver(skcb))
+ j1939_xtp_rx_eoma(priv, skb, true);
+
+ break;
+
+ case J1939_ETP_CMD_ABORT: /* && J1939_TP_CMD_ABORT */
+ if (j1939_tp_im_transmitter(skcb))
+ j1939_xtp_rx_abort(priv, skb, true);
+
+ if (j1939_tp_im_receiver(skcb))
+ j1939_xtp_rx_abort(priv, skb, false);
+
+ break;
+ default:
+ return;
+ }
+}
+
+int j1939_tp_recv(struct j1939_priv *priv, struct sk_buff *skb)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+
+ if (!j1939_tp_im_involved_anydir(skcb))
+ return 0;
+
+ switch (skcb->addr.pgn) {
+ case J1939_ETP_PGN_DAT:
+ skcb->addr.type = J1939_ETP;
+ /* fall through */
+ case J1939_TP_PGN_DAT:
+ j1939_xtp_rx_dat(priv, skb);
+ break;
+
+ case J1939_ETP_PGN_CTL:
+ skcb->addr.type = J1939_ETP;
+ /* fall through */
+ case J1939_TP_PGN_CTL:
+ if (skb->len < 8)
+ return 0; /* Don't care. Nothing to extract here */
+
+ j1939_tp_cmd_recv(priv, skb);
+ break;
+ default:
+ return 0; /* no problem */
+ }
+ return 1; /* "I processed the message" */
+}
+
+void j1939_simple_recv(struct j1939_priv *priv, struct sk_buff *skb)
+{
+ struct j1939_session *session;
+
+ if (!skb->sk)
+ return;
+
+ j1939_session_list_lock(priv);
+ session = j1939_session_get_simple(priv, skb);
+ j1939_session_list_unlock(priv);
+ if (!session) {
+ netdev_warn(priv->ndev,
+ "%s: Received already invalidated message\n",
+ __func__);
+ return;
+ }
+
+ j1939_session_timers_cancel(session);
+ j1939_session_deactivate(session);
+ j1939_session_put(session);
+}
+
+int j1939_cancel_active_session(struct j1939_priv *priv, struct sock *sk)
+{
+ struct j1939_session *session, *saved;
+
+ netdev_dbg(priv->ndev, "%s, sk: %p\n", __func__, sk);
+ j1939_session_list_lock(priv);
+ list_for_each_entry_safe(session, saved,
+ &priv->active_session_list,
+ active_session_list_entry) {
+ if (!sk || sk == session->sk) {
+ j1939_session_timers_cancel(session);
+ session->err = ESHUTDOWN;
+ j1939_session_deactivate_locked(session);
+ }
+ }
+ j1939_session_list_unlock(priv);
+ return NOTIFY_DONE;
+}
+
+void j1939_tp_init(struct j1939_priv *priv)
+{
+ spin_lock_init(&priv->active_session_list_lock);
+ INIT_LIST_HEAD(&priv->active_session_list);
+ priv->tp_max_packet_size = J1939_MAX_ETP_PACKET_SIZE;
+}
diff --git a/net/can/proc.c b/net/can/proc.c
index 70fea17bb04c..e6881bfc3ed1 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
/*
* proc.c - procfs support for Protocol family CAN core module
*
@@ -44,6 +45,7 @@
#include <linux/list.h>
#include <linux/rcupdate.h>
#include <linux/if_arp.h>
+#include <linux/can/can-ml.h>
#include <linux/can/core.h>
#include "af_can.h"
@@ -77,21 +79,21 @@ static const char rx_list_name[][8] = {
static void can_init_stats(struct net *net)
{
- struct s_stats *can_stats = net->can.can_stats;
- struct s_pstats *can_pstats = net->can.can_pstats;
+ struct can_pkg_stats *pkg_stats = net->can.pkg_stats;
+ struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats;
/*
* This memset function is called from a timer context (when
* can_stattimer is active which is the default) OR in a process
* context (reading the proc_fs when can_stattimer is disabled).
*/
- memset(can_stats, 0, sizeof(struct s_stats));
- can_stats->jiffies_init = jiffies;
+ memset(pkg_stats, 0, sizeof(struct can_pkg_stats));
+ pkg_stats->jiffies_init = jiffies;
- can_pstats->stats_reset++;
+ rcv_lists_stats->stats_reset++;
if (user_reset) {
user_reset = 0;
- can_pstats->user_reset++;
+ rcv_lists_stats->user_reset++;
}
}
@@ -117,8 +119,8 @@ static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif,
void can_stat_update(struct timer_list *t)
{
- struct net *net = from_timer(net, t, can.can_stattimer);
- struct s_stats *can_stats = net->can.can_stats;
+ struct net *net = from_timer(net, t, can.stattimer);
+ struct can_pkg_stats *pkg_stats = net->can.pkg_stats;
unsigned long j = jiffies; /* snapshot */
/* restart counting in timer context on user request */
@@ -126,57 +128,57 @@ void can_stat_update(struct timer_list *t)
can_init_stats(net);
/* restart counting on jiffies overflow */
- if (j < can_stats->jiffies_init)
+ if (j < pkg_stats->jiffies_init)
can_init_stats(net);
/* prevent overflow in calc_rate() */
- if (can_stats->rx_frames > (ULONG_MAX / HZ))
+ if (pkg_stats->rx_frames > (ULONG_MAX / HZ))
can_init_stats(net);
/* prevent overflow in calc_rate() */
- if (can_stats->tx_frames > (ULONG_MAX / HZ))
+ if (pkg_stats->tx_frames > (ULONG_MAX / HZ))
can_init_stats(net);
/* matches overflow - very improbable */
- if (can_stats->matches > (ULONG_MAX / 100))
+ if (pkg_stats->matches > (ULONG_MAX / 100))
can_init_stats(net);
/* calc total values */
- if (can_stats->rx_frames)
- can_stats->total_rx_match_ratio = (can_stats->matches * 100) /
- can_stats->rx_frames;
+ if (pkg_stats->rx_frames)
+ pkg_stats->total_rx_match_ratio = (pkg_stats->matches * 100) /
+ pkg_stats->rx_frames;
- can_stats->total_tx_rate = calc_rate(can_stats->jiffies_init, j,
- can_stats->tx_frames);
- can_stats->total_rx_rate = calc_rate(can_stats->jiffies_init, j,
- can_stats->rx_frames);
+ pkg_stats->total_tx_rate = calc_rate(pkg_stats->jiffies_init, j,
+ pkg_stats->tx_frames);
+ pkg_stats->total_rx_rate = calc_rate(pkg_stats->jiffies_init, j,
+ pkg_stats->rx_frames);
/* calc current values */
- if (can_stats->rx_frames_delta)
- can_stats->current_rx_match_ratio =
- (can_stats->matches_delta * 100) /
- can_stats->rx_frames_delta;
+ if (pkg_stats->rx_frames_delta)
+ pkg_stats->current_rx_match_ratio =
+ (pkg_stats->matches_delta * 100) /
+ pkg_stats->rx_frames_delta;
- can_stats->current_tx_rate = calc_rate(0, HZ, can_stats->tx_frames_delta);
- can_stats->current_rx_rate = calc_rate(0, HZ, can_stats->rx_frames_delta);
+ pkg_stats->current_tx_rate = calc_rate(0, HZ, pkg_stats->tx_frames_delta);
+ pkg_stats->current_rx_rate = calc_rate(0, HZ, pkg_stats->rx_frames_delta);
/* check / update maximum values */
- if (can_stats->max_tx_rate < can_stats->current_tx_rate)
- can_stats->max_tx_rate = can_stats->current_tx_rate;
+ if (pkg_stats->max_tx_rate < pkg_stats->current_tx_rate)
+ pkg_stats->max_tx_rate = pkg_stats->current_tx_rate;
- if (can_stats->max_rx_rate < can_stats->current_rx_rate)
- can_stats->max_rx_rate = can_stats->current_rx_rate;
+ if (pkg_stats->max_rx_rate < pkg_stats->current_rx_rate)
+ pkg_stats->max_rx_rate = pkg_stats->current_rx_rate;
- if (can_stats->max_rx_match_ratio < can_stats->current_rx_match_ratio)
- can_stats->max_rx_match_ratio = can_stats->current_rx_match_ratio;
+ if (pkg_stats->max_rx_match_ratio < pkg_stats->current_rx_match_ratio)
+ pkg_stats->max_rx_match_ratio = pkg_stats->current_rx_match_ratio;
/* clear values for 'current rate' calculation */
- can_stats->tx_frames_delta = 0;
- can_stats->rx_frames_delta = 0;
- can_stats->matches_delta = 0;
+ pkg_stats->tx_frames_delta = 0;
+ pkg_stats->rx_frames_delta = 0;
+ pkg_stats->matches_delta = 0;
/* restart timer (one second) */
- mod_timer(&net->can.can_stattimer, round_jiffies(jiffies + HZ));
+ mod_timer(&net->can.stattimer, round_jiffies(jiffies + HZ));
}
/*
@@ -211,60 +213,60 @@ static void can_print_recv_banner(struct seq_file *m)
static int can_stats_proc_show(struct seq_file *m, void *v)
{
struct net *net = m->private;
- struct s_stats *can_stats = net->can.can_stats;
- struct s_pstats *can_pstats = net->can.can_pstats;
+ struct can_pkg_stats *pkg_stats = net->can.pkg_stats;
+ struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats;
seq_putc(m, '\n');
- seq_printf(m, " %8ld transmitted frames (TXF)\n", can_stats->tx_frames);
- seq_printf(m, " %8ld received frames (RXF)\n", can_stats->rx_frames);
- seq_printf(m, " %8ld matched frames (RXMF)\n", can_stats->matches);
+ seq_printf(m, " %8ld transmitted frames (TXF)\n", pkg_stats->tx_frames);
+ seq_printf(m, " %8ld received frames (RXF)\n", pkg_stats->rx_frames);
+ seq_printf(m, " %8ld matched frames (RXMF)\n", pkg_stats->matches);
seq_putc(m, '\n');
- if (net->can.can_stattimer.function == can_stat_update) {
+ if (net->can.stattimer.function == can_stat_update) {
seq_printf(m, " %8ld %% total match ratio (RXMR)\n",
- can_stats->total_rx_match_ratio);
+ pkg_stats->total_rx_match_ratio);
seq_printf(m, " %8ld frames/s total tx rate (TXR)\n",
- can_stats->total_tx_rate);
+ pkg_stats->total_tx_rate);
seq_printf(m, " %8ld frames/s total rx rate (RXR)\n",
- can_stats->total_rx_rate);
+ pkg_stats->total_rx_rate);
seq_putc(m, '\n');
seq_printf(m, " %8ld %% current match ratio (CRXMR)\n",
- can_stats->current_rx_match_ratio);
+ pkg_stats->current_rx_match_ratio);
seq_printf(m, " %8ld frames/s current tx rate (CTXR)\n",
- can_stats->current_tx_rate);
+ pkg_stats->current_tx_rate);
seq_printf(m, " %8ld frames/s current rx rate (CRXR)\n",
- can_stats->current_rx_rate);
+ pkg_stats->current_rx_rate);
seq_putc(m, '\n');
seq_printf(m, " %8ld %% max match ratio (MRXMR)\n",
- can_stats->max_rx_match_ratio);
+ pkg_stats->max_rx_match_ratio);
seq_printf(m, " %8ld frames/s max tx rate (MTXR)\n",
- can_stats->max_tx_rate);
+ pkg_stats->max_tx_rate);
seq_printf(m, " %8ld frames/s max rx rate (MRXR)\n",
- can_stats->max_rx_rate);
+ pkg_stats->max_rx_rate);
seq_putc(m, '\n');
}
seq_printf(m, " %8ld current receive list entries (CRCV)\n",
- can_pstats->rcv_entries);
+ rcv_lists_stats->rcv_entries);
seq_printf(m, " %8ld maximum receive list entries (MRCV)\n",
- can_pstats->rcv_entries_max);
+ rcv_lists_stats->rcv_entries_max);
- if (can_pstats->stats_reset)
+ if (rcv_lists_stats->stats_reset)
seq_printf(m, "\n %8ld statistic resets (STR)\n",
- can_pstats->stats_reset);
+ rcv_lists_stats->stats_reset);
- if (can_pstats->user_reset)
+ if (rcv_lists_stats->user_reset)
seq_printf(m, " %8ld user statistic resets (USTR)\n",
- can_pstats->user_reset);
+ rcv_lists_stats->user_reset);
seq_putc(m, '\n');
return 0;
@@ -273,20 +275,20 @@ static int can_stats_proc_show(struct seq_file *m, void *v)
static int can_reset_stats_proc_show(struct seq_file *m, void *v)
{
struct net *net = m->private;
- struct s_pstats *can_pstats = net->can.can_pstats;
- struct s_stats *can_stats = net->can.can_stats;
+ struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats;
+ struct can_pkg_stats *pkg_stats = net->can.pkg_stats;
user_reset = 1;
- if (net->can.can_stattimer.function == can_stat_update) {
+ if (net->can.stattimer.function == can_stat_update) {
seq_printf(m, "Scheduled statistic reset #%ld.\n",
- can_pstats->stats_reset + 1);
+ rcv_lists_stats->stats_reset + 1);
} else {
- if (can_stats->jiffies_init != jiffies)
+ if (pkg_stats->jiffies_init != jiffies)
can_init_stats(net);
seq_printf(m, "Performed statistic reset #%ld.\n",
- can_pstats->stats_reset);
+ rcv_lists_stats->stats_reset);
}
return 0;
}
@@ -299,11 +301,11 @@ static int can_version_proc_show(struct seq_file *m, void *v)
static inline void can_rcvlist_proc_show_one(struct seq_file *m, int idx,
struct net_device *dev,
- struct can_dev_rcv_lists *d)
+ struct can_dev_rcv_lists *dev_rcv_lists)
{
- if (!hlist_empty(&d->rx[idx])) {
+ if (!hlist_empty(&dev_rcv_lists->rx[idx])) {
can_print_recv_banner(m);
- can_print_rcvlist(m, &d->rx[idx], dev);
+ can_print_rcvlist(m, &dev_rcv_lists->rx[idx], dev);
} else
seq_printf(m, " (%s: no entry)\n", DNAME(dev));
@@ -314,7 +316,7 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v)
/* double cast to prevent GCC warning */
int idx = (int)(long)PDE_DATA(m->file->f_inode);
struct net_device *dev;
- struct can_dev_rcv_lists *d;
+ struct can_dev_rcv_lists *dev_rcv_lists;
struct net *net = m->private;
seq_printf(m, "\nreceive list '%s':\n", rx_list_name[idx]);
@@ -322,8 +324,8 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v)
rcu_read_lock();
/* receive list for 'all' CAN devices (dev == NULL) */
- d = net->can.can_rx_alldev_list;
- can_rcvlist_proc_show_one(m, idx, NULL, d);
+ dev_rcv_lists = net->can.rx_alldev_list;
+ can_rcvlist_proc_show_one(m, idx, NULL, dev_rcv_lists);
/* receive list for registered CAN devices */
for_each_netdev_rcu(net, dev) {
@@ -365,7 +367,7 @@ static inline void can_rcvlist_proc_show_array(struct seq_file *m,
static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v)
{
struct net_device *dev;
- struct can_dev_rcv_lists *d;
+ struct can_dev_rcv_lists *dev_rcv_lists;
struct net *net = m->private;
/* RX_SFF */
@@ -374,15 +376,16 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v)
rcu_read_lock();
/* sff receive list for 'all' CAN devices (dev == NULL) */
- d = net->can.can_rx_alldev_list;
- can_rcvlist_proc_show_array(m, NULL, d->rx_sff, ARRAY_SIZE(d->rx_sff));
+ dev_rcv_lists = net->can.rx_alldev_list;
+ can_rcvlist_proc_show_array(m, NULL, dev_rcv_lists->rx_sff,
+ ARRAY_SIZE(dev_rcv_lists->rx_sff));
/* sff receive list for registered CAN devices */
for_each_netdev_rcu(net, dev) {
if (dev->type == ARPHRD_CAN && dev->ml_priv) {
- d = dev->ml_priv;
- can_rcvlist_proc_show_array(m, dev, d->rx_sff,
- ARRAY_SIZE(d->rx_sff));
+ dev_rcv_lists = dev->ml_priv;
+ can_rcvlist_proc_show_array(m, dev, dev_rcv_lists->rx_sff,
+ ARRAY_SIZE(dev_rcv_lists->rx_sff));
}
}
@@ -395,7 +398,7 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v)
static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v)
{
struct net_device *dev;
- struct can_dev_rcv_lists *d;
+ struct can_dev_rcv_lists *dev_rcv_lists;
struct net *net = m->private;
/* RX_EFF */
@@ -404,15 +407,16 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v)
rcu_read_lock();
/* eff receive list for 'all' CAN devices (dev == NULL) */
- d = net->can.can_rx_alldev_list;
- can_rcvlist_proc_show_array(m, NULL, d->rx_eff, ARRAY_SIZE(d->rx_eff));
+ dev_rcv_lists = net->can.rx_alldev_list;
+ can_rcvlist_proc_show_array(m, NULL, dev_rcv_lists->rx_eff,
+ ARRAY_SIZE(dev_rcv_lists->rx_eff));
/* eff receive list for registered CAN devices */
for_each_netdev_rcu(net, dev) {
if (dev->type == ARPHRD_CAN && dev->ml_priv) {
- d = dev->ml_priv;
- can_rcvlist_proc_show_array(m, dev, d->rx_eff,
- ARRAY_SIZE(d->rx_eff));
+ dev_rcv_lists = dev->ml_priv;
+ can_rcvlist_proc_show_array(m, dev, dev_rcv_lists->rx_eff,
+ ARRAY_SIZE(dev_rcv_lists->rx_eff));
}
}
diff --git a/net/can/raw.c b/net/can/raw.c
index afcbff063a67..59c039d73c6d 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -1,5 +1,5 @@
-/*
- * raw.c - Raw sockets for protocol family CAN
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* raw.c - Raw sockets for protocol family CAN
*
* Copyright (c) 2002-2007 Volkswagen Group Electronic Research
* All rights reserved.
@@ -64,8 +64,7 @@ MODULE_ALIAS("can-proto-1");
#define MASK_ALL 0
-/*
- * A raw socket has a list of can_filters attached to it, each receiving
+/* A raw socket has a list of can_filters attached to it, each receiving
* the CAN frames matching that filter. If the filter list is empty,
* no CAN frames will be received by the socket. The default after
* opening the socket, is to have one filter which receives all frames.
@@ -96,8 +95,7 @@ struct raw_sock {
struct uniqframe __percpu *uniq;
};
-/*
- * Return pointer to store the extra msg flags for raw_recvmsg().
+/* Return pointer to store the extra msg flags for raw_recvmsg().
* We use the space of one unsigned int beyond the 'struct sockaddr_can'
* in skb->cb.
*/
@@ -156,8 +154,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
if (!skb)
return;
- /*
- * Put the datagram to the queue so that raw_recvmsg() can
+ /* Put the datagram to the queue so that raw_recvmsg() can
* get it from there. We need to pass the interface index to
* raw_recvmsg(). We pass a whole struct sockaddr_can in skb->cb
* containing the interface index.
@@ -283,7 +280,6 @@ static int raw_notifier(struct notifier_block *nb,
return NOTIFY_DONE;
switch (msg) {
-
case NETDEV_UNREGISTER:
lock_sock(sk);
/* remove current filters & unregister */
@@ -369,8 +365,9 @@ static int raw_release(struct socket *sock)
raw_disable_allfilters(dev_net(dev), dev, sk);
dev_put(dev);
}
- } else
+ } else {
raw_disable_allfilters(sock_net(sk), NULL, sk);
+ }
}
if (ro->count > 1)
@@ -399,7 +396,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
int err = 0;
int notify_enetdown = 0;
- if (len < sizeof(*addr))
+ if (len < CAN_REQUIRED_SIZE(*addr, can_ifindex))
return -EINVAL;
if (addr->can_family != AF_CAN)
return -EINVAL;
@@ -450,8 +447,9 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
dev, sk);
dev_put(dev);
}
- } else
+ } else {
raw_disable_allfilters(sock_net(sk), NULL, sk);
+ }
}
ro->ifindex = ifindex;
ro->bound = 1;
@@ -502,7 +500,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
return -EINVAL;
switch (optname) {
-
case CAN_RAW_FILTER:
if (optlen % sizeof(struct can_filter) != 0)
return -EINVAL;
@@ -665,17 +662,18 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
return -EINVAL;
switch (optname) {
-
case CAN_RAW_FILTER:
lock_sock(sk);
if (ro->count > 0) {
int fsize = ro->count * sizeof(struct can_filter);
+
if (len > fsize)
len = fsize;
if (copy_to_user(optval, ro->filter, len))
err = -EFAULT;
- } else
+ } else {
len = 0;
+ }
release_sock(sk);
if (!err)
@@ -735,15 +733,16 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
if (msg->msg_name) {
DECLARE_SOCKADDR(struct sockaddr_can *, addr, msg->msg_name);
- if (msg->msg_namelen < sizeof(*addr))
+ if (msg->msg_namelen < CAN_REQUIRED_SIZE(*addr, can_ifindex))
return -EINVAL;
if (addr->can_family != AF_CAN)
return -EINVAL;
ifindex = addr->can_ifindex;
- } else
+ } else {
ifindex = ro->ifindex;
+ }
dev = dev_get_by_index(sock_net(sk), ifindex);
if (!dev)
@@ -836,6 +835,13 @@ static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
return size;
}
+static int raw_sock_no_ioctlcmd(struct socket *sock, unsigned int cmd,
+ unsigned long arg)
+{
+ /* no ioctls for socket layer -> hand it down to NIC layer */
+ return -ENOIOCTLCMD;
+}
+
static const struct proto_ops raw_ops = {
.family = PF_CAN,
.release = raw_release,
@@ -845,7 +851,7 @@ static const struct proto_ops raw_ops = {
.accept = sock_no_accept,
.getname = raw_getname,
.poll = datagram_poll,
- .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */
+ .ioctl = raw_sock_no_ioctlcmd,
.gettstamp = sock_gettstamp,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -879,7 +885,7 @@ static __init int raw_module_init(void)
err = can_proto_register(&raw_can_proto);
if (err < 0)
- printk(KERN_ERR "can: registration of raw protocol failed\n");
+ pr_err("can: registration of raw protocol failed\n");
return err;
}
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 94c7f77ecb6b..da5639a5bd3b 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -12,6 +12,9 @@
static atomic_t cache_idx;
+#define SK_STORAGE_CREATE_FLAG_MASK \
+ (BPF_F_NO_PREALLOC | BPF_F_CLONE)
+
struct bucket {
struct hlist_head list;
raw_spinlock_t lock;
@@ -209,7 +212,6 @@ static void selem_unlink_sk(struct bpf_sk_storage_elem *selem)
kfree_rcu(sk_storage, rcu);
}
-/* sk_storage->lock must be held and sk_storage->list cannot be empty */
static void __selem_link_sk(struct bpf_sk_storage *sk_storage,
struct bpf_sk_storage_elem *selem)
{
@@ -509,7 +511,7 @@ static int sk_storage_delete(struct sock *sk, struct bpf_map *map)
return 0;
}
-/* Called by __sk_destruct() */
+/* Called by __sk_destruct() & bpf_sk_storage_clone() */
void bpf_sk_storage_free(struct sock *sk)
{
struct bpf_sk_storage_elem *selem;
@@ -557,6 +559,11 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
smap = (struct bpf_sk_storage_map *)map;
+ /* Note that this map might be concurrently cloned from
+ * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
+ * RCU read section to finish before proceeding. New RCU
+ * read sections should be prevented via bpf_map_inc_not_zero.
+ */
synchronize_rcu();
/* bpf prog and the userspace can no longer access this map
@@ -601,7 +608,9 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
{
- if (attr->map_flags != BPF_F_NO_PREALLOC || attr->max_entries ||
+ if (attr->map_flags & ~SK_STORAGE_CREATE_FLAG_MASK ||
+ !(attr->map_flags & BPF_F_NO_PREALLOC) ||
+ attr->max_entries ||
attr->key_size != sizeof(int) || !attr->value_size ||
/* Enforce BTF for userspace sk dumping */
!attr->btf_key_type_id || !attr->btf_value_type_id)
@@ -739,6 +748,95 @@ static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key)
return err;
}
+static struct bpf_sk_storage_elem *
+bpf_sk_storage_clone_elem(struct sock *newsk,
+ struct bpf_sk_storage_map *smap,
+ struct bpf_sk_storage_elem *selem)
+{
+ struct bpf_sk_storage_elem *copy_selem;
+
+ copy_selem = selem_alloc(smap, newsk, NULL, true);
+ if (!copy_selem)
+ return NULL;
+
+ if (map_value_has_spin_lock(&smap->map))
+ copy_map_value_locked(&smap->map, SDATA(copy_selem)->data,
+ SDATA(selem)->data, true);
+ else
+ copy_map_value(&smap->map, SDATA(copy_selem)->data,
+ SDATA(selem)->data);
+
+ return copy_selem;
+}
+
+int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
+{
+ struct bpf_sk_storage *new_sk_storage = NULL;
+ struct bpf_sk_storage *sk_storage;
+ struct bpf_sk_storage_elem *selem;
+ int ret = 0;
+
+ RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
+
+ rcu_read_lock();
+ sk_storage = rcu_dereference(sk->sk_bpf_storage);
+
+ if (!sk_storage || hlist_empty(&sk_storage->list))
+ goto out;
+
+ hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
+ struct bpf_sk_storage_elem *copy_selem;
+ struct bpf_sk_storage_map *smap;
+ struct bpf_map *map;
+
+ smap = rcu_dereference(SDATA(selem)->smap);
+ if (!(smap->map.map_flags & BPF_F_CLONE))
+ continue;
+
+ /* Note that for lockless listeners adding new element
+ * here can race with cleanup in bpf_sk_storage_map_free.
+ * Try to grab map refcnt to make sure that it's still
+ * alive and prevent concurrent removal.
+ */
+ map = bpf_map_inc_not_zero(&smap->map, false);
+ if (IS_ERR(map))
+ continue;
+
+ copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem);
+ if (!copy_selem) {
+ ret = -ENOMEM;
+ bpf_map_put(map);
+ goto out;
+ }
+
+ if (new_sk_storage) {
+ selem_link_map(smap, copy_selem);
+ __selem_link_sk(new_sk_storage, copy_selem);
+ } else {
+ ret = sk_storage_alloc(newsk, smap, copy_selem);
+ if (ret) {
+ kfree(copy_selem);
+ atomic_sub(smap->elem_size,
+ &newsk->sk_omem_alloc);
+ bpf_map_put(map);
+ goto out;
+ }
+
+ new_sk_storage = rcu_dereference(copy_selem->sk_storage);
+ }
+ bpf_map_put(map);
+ }
+
+out:
+ rcu_read_unlock();
+
+ /* In case of an error, don't free anything explicitly here, the
+ * caller is responsible to call bpf_sk_storage_free.
+ */
+
+ return ret;
+}
+
BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
void *, value, u64, flags)
{
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 45a162ef5e02..4cc8dc5db2b7 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -442,8 +442,8 @@ static int __skb_datagram_iter(const struct sk_buff *skb, int offset,
if (copy > len)
copy = len;
- n = cb(vaddr + frag->page_offset +
- offset - start, copy, data, to);
+ n = cb(vaddr + skb_frag_off(frag) + offset - start,
+ copy, data, to);
kunmap(page);
offset += n;
if (n != copy)
@@ -573,7 +573,7 @@ int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
if (copy > len)
copy = len;
copied = copy_page_from_iter(skb_frag_page(frag),
- frag->page_offset + offset - start,
+ skb_frag_off(frag) + offset - start,
copy, from);
if (copied != copy)
goto fault;
diff --git a/net/core/dev.c b/net/core/dev.c
index 5156c0edebe8..71b18e80389f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3467,18 +3467,22 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
qdisc_calculate_pkt_len(skb, q);
if (q->flags & TCQ_F_NOLOCK) {
- if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
- __qdisc_drop(skb, &to_free);
- rc = NET_XMIT_DROP;
- } else if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty &&
- qdisc_run_begin(q)) {
+ if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty &&
+ qdisc_run_begin(q)) {
+ if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED,
+ &q->state))) {
+ __qdisc_drop(skb, &to_free);
+ rc = NET_XMIT_DROP;
+ goto end_run;
+ }
qdisc_bstats_cpu_update(q, skb);
+ rc = NET_XMIT_SUCCESS;
if (sch_direct_xmit(skb, q, dev, txq, NULL, true))
__qdisc_run(q);
+end_run:
qdisc_run_end(q);
- rc = NET_XMIT_SUCCESS;
} else {
rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
qdisc_run(q);
@@ -3963,6 +3967,8 @@ int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */
int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */
int dev_rx_weight __read_mostly = 64;
int dev_tx_weight __read_mostly = 64;
+/* Maximum number of GRO_NORMAL skbs to batch up for list-RX */
+int gro_normal_batch __read_mostly = 8;
/* Called with irq disabled */
static inline void ____napi_schedule(struct softnet_data *sd,
@@ -5486,7 +5492,7 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
skb->data_len -= grow;
skb->tail += grow;
- pinfo->frags[0].page_offset += grow;
+ skb_frag_off_add(&pinfo->frags[0], grow);
skb_frag_size_sub(&pinfo->frags[0], grow);
if (unlikely(!skb_frag_size(&pinfo->frags[0]))) {
@@ -5747,6 +5753,26 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
}
EXPORT_SYMBOL(napi_get_frags);
+/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
+static void gro_normal_list(struct napi_struct *napi)
+{
+ if (!napi->rx_count)
+ return;
+ netif_receive_skb_list_internal(&napi->rx_list);
+ INIT_LIST_HEAD(&napi->rx_list);
+ napi->rx_count = 0;
+}
+
+/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
+ * pass the whole batch up to the stack.
+ */
+static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
+{
+ list_add_tail(&skb->list, &napi->rx_list);
+ if (++napi->rx_count >= gro_normal_batch)
+ gro_normal_list(napi);
+}
+
static gro_result_t napi_frags_finish(struct napi_struct *napi,
struct sk_buff *skb,
gro_result_t ret)
@@ -5756,8 +5782,8 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi,
case GRO_HELD:
__skb_push(skb, ETH_HLEN);
skb->protocol = eth_type_trans(skb, skb->dev);
- if (ret == GRO_NORMAL && netif_receive_skb_internal(skb))
- ret = GRO_DROP;
+ if (ret == GRO_NORMAL)
+ gro_normal_one(napi, skb);
break;
case GRO_DROP:
@@ -6034,6 +6060,8 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
NAPIF_STATE_IN_BUSY_POLL)))
return false;
+ gro_normal_list(n);
+
if (n->gro_bitmask) {
unsigned long timeout = 0;
@@ -6119,10 +6147,19 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
* Ideally, a new ndo_busy_poll_stop() could avoid another round.
*/
rc = napi->poll(napi, BUSY_POLL_BUDGET);
+ /* We can't gro_normal_list() here, because napi->poll() might have
+ * rearmed the napi (napi_complete_done()) in which case it could
+ * already be running on another CPU.
+ */
trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
netpoll_poll_unlock(have_poll_lock);
- if (rc == BUSY_POLL_BUDGET)
+ if (rc == BUSY_POLL_BUDGET) {
+ /* As the whole budget was spent, we still own the napi so can
+ * safely handle the rx_list.
+ */
+ gro_normal_list(napi);
__napi_schedule(napi);
+ }
local_bh_enable();
}
@@ -6167,6 +6204,7 @@ restart:
}
work = napi_poll(napi, BUSY_POLL_BUDGET);
trace_napi_poll(napi, work, BUSY_POLL_BUDGET);
+ gro_normal_list(napi);
count:
if (work > 0)
__NET_ADD_STATS(dev_net(napi->dev),
@@ -6272,6 +6310,8 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
napi->timer.function = napi_watchdog;
init_gro_hash(napi);
napi->skb = NULL;
+ INIT_LIST_HEAD(&napi->rx_list);
+ napi->rx_count = 0;
napi->poll = poll;
if (weight > NAPI_POLL_WEIGHT)
netdev_err_once(dev, "%s() called with weight %d\n", __func__,
@@ -6368,6 +6408,8 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
goto out_unlock;
}
+ gro_normal_list(n);
+
if (n->gro_bitmask) {
/* flush too old packets
* If HZ < 1000, flush all packets.
@@ -8088,12 +8130,15 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
bpf_chk = generic_xdp_install;
if (fd >= 0) {
+ u32 prog_id;
+
if (!offload && __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG)) {
NL_SET_ERR_MSG(extack, "native and generic XDP can't be active at the same time");
return -EEXIST;
}
- if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
- __dev_xdp_query(dev, bpf_op, query)) {
+
+ prog_id = __dev_xdp_query(dev, bpf_op, query);
+ if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && prog_id) {
NL_SET_ERR_MSG(extack, "XDP program already attached");
return -EBUSY;
}
@@ -8108,6 +8153,14 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
bpf_prog_put(prog);
return -EINVAL;
}
+
+ if (prog->aux->id == prog_id) {
+ bpf_prog_put(prog);
+ return 0;
+ }
+ } else {
+ if (!__dev_xdp_query(dev, bpf_op, query))
+ return 0;
}
err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 4f40aeace902..e48680efe54a 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -18,6 +18,8 @@
#include <linux/spinlock.h>
#include <linux/refcount.h>
#include <linux/workqueue.h>
+#include <linux/u64_stats_sync.h>
+#include <linux/timekeeping.h>
#include <rdma/ib_verbs.h>
#include <net/netlink.h>
#include <net/genetlink.h>
@@ -25,6 +27,7 @@
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/devlink.h>
+#include <net/drop_monitor.h>
#define CREATE_TRACE_POINTS
#include <trace/events/devlink.h>
@@ -133,7 +136,7 @@ static struct devlink *devlink_get_from_info(struct genl_info *info)
}
static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
- int port_index)
+ unsigned int port_index)
{
struct devlink_port *devlink_port;
@@ -144,7 +147,8 @@ static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
return NULL;
}
-static bool devlink_port_index_exists(struct devlink *devlink, int port_index)
+static bool devlink_port_index_exists(struct devlink *devlink,
+ unsigned int port_index)
{
return devlink_port_get_by_index(devlink, port_index);
}
@@ -342,7 +346,6 @@ struct devlink_snapshot {
struct list_head list;
struct devlink_region *region;
devlink_snapshot_data_dest_t *data_destructor;
- u64 data_len;
u8 *data;
u32 id;
};
@@ -371,14 +374,6 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
return NULL;
}
-static void devlink_region_snapshot_del(struct devlink_snapshot *snapshot)
-{
- snapshot->region->cur_snapshots--;
- list_del(&snapshot->list);
- (*snapshot->data_destructor)(snapshot->data);
- kfree(snapshot);
-}
-
#define DEVLINK_NL_FLAG_NEED_DEVLINK BIT(0)
#define DEVLINK_NL_FLAG_NEED_PORT BIT(1)
#define DEVLINK_NL_FLAG_NEED_SB BIT(2)
@@ -476,6 +471,8 @@ static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
if (devlink_nl_put_handle(msg, devlink))
goto nla_put_failure;
+ if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_FAILED, devlink->reload_failed))
+ goto nla_put_failure;
genlmsg_end(msg, hdr);
return 0;
@@ -515,32 +512,37 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg,
return 0;
if (nla_put_u16(msg, DEVLINK_ATTR_PORT_FLAVOUR, attrs->flavour))
return -EMSGSIZE;
- if (devlink_port->attrs.flavour == DEVLINK_PORT_FLAVOUR_PCI_PF) {
+ switch (devlink_port->attrs.flavour) {
+ case DEVLINK_PORT_FLAVOUR_PCI_PF:
if (nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER,
attrs->pci_pf.pf))
return -EMSGSIZE;
- } else if (devlink_port->attrs.flavour == DEVLINK_PORT_FLAVOUR_PCI_VF) {
+ break;
+ case DEVLINK_PORT_FLAVOUR_PCI_VF:
if (nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER,
attrs->pci_vf.pf) ||
nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_VF_NUMBER,
attrs->pci_vf.vf))
return -EMSGSIZE;
+ break;
+ case DEVLINK_PORT_FLAVOUR_PHYSICAL:
+ case DEVLINK_PORT_FLAVOUR_CPU:
+ case DEVLINK_PORT_FLAVOUR_DSA:
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER,
+ attrs->phys.port_number))
+ return -EMSGSIZE;
+ if (!attrs->split)
+ return 0;
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP,
+ attrs->phys.port_number))
+ return -EMSGSIZE;
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER,
+ attrs->phys.split_subport_number))
+ return -EMSGSIZE;
+ break;
+ default:
+ break;
}
- if (devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PHYSICAL &&
- devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_CPU &&
- devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_DSA)
- return 0;
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER,
- attrs->phys.port_number))
- return -EMSGSIZE;
- if (!attrs->split)
- return 0;
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP,
- attrs->phys.port_number))
- return -EMSGSIZE;
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER,
- attrs->phys.split_subport_number))
- return -EMSGSIZE;
return 0;
}
@@ -560,7 +562,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
goto nla_put_failure;
- spin_lock(&devlink_port->type_lock);
+ spin_lock_bh(&devlink_port->type_lock);
if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type))
goto nla_put_failure_type_locked;
if (devlink_port->desired_type != DEVLINK_PORT_TYPE_NOTSET &&
@@ -585,7 +587,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
ibdev->name))
goto nla_put_failure_type_locked;
}
- spin_unlock(&devlink_port->type_lock);
+ spin_unlock_bh(&devlink_port->type_lock);
if (devlink_nl_port_attrs_put(msg, devlink_port))
goto nla_put_failure;
@@ -593,7 +595,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
return 0;
nla_put_failure_type_locked:
- spin_unlock(&devlink_port->type_lock);
+ spin_unlock_bh(&devlink_port->type_lock);
nla_put_failure:
genlmsg_cancel(msg, hdr);
return -EMSGSIZE;
@@ -2672,12 +2674,32 @@ devlink_resources_validate(struct devlink *devlink,
return err;
}
+static bool devlink_reload_supported(struct devlink *devlink)
+{
+ return devlink->ops->reload_down && devlink->ops->reload_up;
+}
+
+static void devlink_reload_failed_set(struct devlink *devlink,
+ bool reload_failed)
+{
+ if (devlink->reload_failed == reload_failed)
+ return;
+ devlink->reload_failed = reload_failed;
+ devlink_notify(devlink, DEVLINK_CMD_NEW);
+}
+
+bool devlink_is_reload_failed(const struct devlink *devlink)
+{
+ return devlink->reload_failed;
+}
+EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
+
static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
int err;
- if (!devlink->ops->reload)
+ if (!devlink_reload_supported(devlink))
return -EOPNOTSUPP;
err = devlink_resources_validate(devlink, NULL, info);
@@ -2685,7 +2707,12 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed");
return err;
}
- return devlink->ops->reload(devlink, info->extack);
+ err = devlink->ops->reload_down(devlink, info->extack);
+ if (err)
+ return err;
+ err = devlink->ops->reload_up(devlink, info->extack);
+ devlink_reload_failed_set(devlink, !!err);
+ return err;
}
static int devlink_nl_flash_update_fill(struct sk_buff *msg,
@@ -2852,6 +2879,11 @@ static const struct devlink_param devlink_param_generic[] = {
.name = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME,
.type = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE,
},
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_RESET_DEV_ON_DRV_PROBE,
+ .name = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_NAME,
+ .type = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_TYPE,
+ },
};
static int devlink_param_generic_verify(const struct devlink_param *param)
@@ -3596,6 +3628,16 @@ out_free_msg:
nlmsg_free(msg);
}
+static void devlink_region_snapshot_del(struct devlink_region *region,
+ struct devlink_snapshot *snapshot)
+{
+ devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_DEL);
+ region->cur_snapshots--;
+ list_del(&snapshot->list);
+ (*snapshot->data_destructor)(snapshot->data);
+ kfree(snapshot);
+}
+
static int devlink_nl_cmd_region_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
@@ -3691,8 +3733,7 @@ static int devlink_nl_cmd_region_del(struct sk_buff *skb,
if (!snapshot)
return -EINVAL;
- devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_DEL);
- devlink_region_snapshot_del(snapshot);
+ devlink_region_snapshot_del(region, snapshot);
return 0;
}
@@ -3748,8 +3789,8 @@ static int devlink_nl_region_read_snapshot_fill(struct sk_buff *skb,
if (!snapshot)
return -EINVAL;
- if (end_offset > snapshot->data_len || dump)
- end_offset = snapshot->data_len;
+ if (end_offset > region->size || dump)
+ end_offset = region->size;
while (curr_offset < end_offset) {
u32 data_size;
@@ -5154,6 +5195,571 @@ devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
return 0;
}
+struct devlink_stats {
+ u64 rx_bytes;
+ u64 rx_packets;
+ struct u64_stats_sync syncp;
+};
+
+/**
+ * struct devlink_trap_group_item - Packet trap group attributes.
+ * @group: Immutable packet trap group attributes.
+ * @refcount: Number of trap items using the group.
+ * @list: trap_group_list member.
+ * @stats: Trap group statistics.
+ *
+ * Describes packet trap group attributes. Created by devlink during trap
+ * registration.
+ */
+struct devlink_trap_group_item {
+ const struct devlink_trap_group *group;
+ refcount_t refcount;
+ struct list_head list;
+ struct devlink_stats __percpu *stats;
+};
+
+/**
+ * struct devlink_trap_item - Packet trap attributes.
+ * @trap: Immutable packet trap attributes.
+ * @group_item: Associated group item.
+ * @list: trap_list member.
+ * @action: Trap action.
+ * @stats: Trap statistics.
+ * @priv: Driver private information.
+ *
+ * Describes both mutable and immutable packet trap attributes. Created by
+ * devlink during trap registration and used for all trap related operations.
+ */
+struct devlink_trap_item {
+ const struct devlink_trap *trap;
+ struct devlink_trap_group_item *group_item;
+ struct list_head list;
+ enum devlink_trap_action action;
+ struct devlink_stats __percpu *stats;
+ void *priv;
+};
+
+static struct devlink_trap_item *
+devlink_trap_item_lookup(struct devlink *devlink, const char *name)
+{
+ struct devlink_trap_item *trap_item;
+
+ list_for_each_entry(trap_item, &devlink->trap_list, list) {
+ if (!strcmp(trap_item->trap->name, name))
+ return trap_item;
+ }
+
+ return NULL;
+}
+
+static struct devlink_trap_item *
+devlink_trap_item_get_from_info(struct devlink *devlink,
+ struct genl_info *info)
+{
+ struct nlattr *attr;
+
+ if (!info->attrs[DEVLINK_ATTR_TRAP_NAME])
+ return NULL;
+ attr = info->attrs[DEVLINK_ATTR_TRAP_NAME];
+
+ return devlink_trap_item_lookup(devlink, nla_data(attr));
+}
+
+static int
+devlink_trap_action_get_from_info(struct genl_info *info,
+ enum devlink_trap_action *p_trap_action)
+{
+ u8 val;
+
+ val = nla_get_u8(info->attrs[DEVLINK_ATTR_TRAP_ACTION]);
+ switch (val) {
+ case DEVLINK_TRAP_ACTION_DROP: /* fall-through */
+ case DEVLINK_TRAP_ACTION_TRAP:
+ *p_trap_action = val;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int devlink_trap_metadata_put(struct sk_buff *msg,
+ const struct devlink_trap *trap)
+{
+ struct nlattr *attr;
+
+ attr = nla_nest_start(msg, DEVLINK_ATTR_TRAP_METADATA);
+ if (!attr)
+ return -EMSGSIZE;
+
+ if ((trap->metadata_cap & DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT) &&
+ nla_put_flag(msg, DEVLINK_ATTR_TRAP_METADATA_TYPE_IN_PORT))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
+static void devlink_trap_stats_read(struct devlink_stats __percpu *trap_stats,
+ struct devlink_stats *stats)
+{
+ int i;
+
+ memset(stats, 0, sizeof(*stats));
+ for_each_possible_cpu(i) {
+ struct devlink_stats *cpu_stats;
+ u64 rx_packets, rx_bytes;
+ unsigned int start;
+
+ cpu_stats = per_cpu_ptr(trap_stats, i);
+ do {
+ start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+ rx_packets = cpu_stats->rx_packets;
+ rx_bytes = cpu_stats->rx_bytes;
+ } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
+
+ stats->rx_packets += rx_packets;
+ stats->rx_bytes += rx_bytes;
+ }
+}
+
+static int devlink_trap_stats_put(struct sk_buff *msg,
+ struct devlink_stats __percpu *trap_stats)
+{
+ struct devlink_stats stats;
+ struct nlattr *attr;
+
+ devlink_trap_stats_read(trap_stats, &stats);
+
+ attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
+ if (!attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
+ stats.rx_packets, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
+ stats.rx_bytes, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_nl_trap_fill(struct sk_buff *msg, struct devlink *devlink,
+ const struct devlink_trap_item *trap_item,
+ enum devlink_command cmd, u32 portid, u32 seq,
+ int flags)
+{
+ struct devlink_trap_group_item *group_item = trap_item->group_item;
+ void *hdr;
+ int err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME,
+ group_item->group->name))
+ goto nla_put_failure;
+
+ if (nla_put_string(msg, DEVLINK_ATTR_TRAP_NAME, trap_item->trap->name))
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_TYPE, trap_item->trap->type))
+ goto nla_put_failure;
+
+ if (trap_item->trap->generic &&
+ nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC))
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_ACTION, trap_item->action))
+ goto nla_put_failure;
+
+ err = devlink_trap_metadata_put(msg, trap_item->trap);
+ if (err)
+ goto nla_put_failure;
+
+ err = devlink_trap_stats_put(msg, trap_item->stats);
+ if (err)
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_trap_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_trap_item *trap_item;
+ struct sk_buff *msg;
+ int err;
+
+ if (list_empty(&devlink->trap_list))
+ return -EOPNOTSUPP;
+
+ trap_item = devlink_trap_item_get_from_info(devlink, info);
+ if (!trap_item) {
+ NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap");
+ return -ENOENT;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_trap_fill(msg, devlink, trap_item,
+ DEVLINK_CMD_TRAP_NEW, info->snd_portid,
+ info->snd_seq, 0);
+ if (err)
+ goto err_trap_fill;
+
+ return genlmsg_reply(msg, info);
+
+err_trap_fill:
+ nlmsg_free(msg);
+ return err;
+}
+
+static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ struct devlink_trap_item *trap_item;
+ struct devlink *devlink;
+ int start = cb->args[0];
+ int idx = 0;
+ int err;
+
+ mutex_lock(&devlink_mutex);
+ list_for_each_entry(devlink, &devlink_list, list) {
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ continue;
+ mutex_lock(&devlink->lock);
+ list_for_each_entry(trap_item, &devlink->trap_list, list) {
+ if (idx < start) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_trap_fill(msg, devlink, trap_item,
+ DEVLINK_CMD_TRAP_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err) {
+ mutex_unlock(&devlink->lock);
+ goto out;
+ }
+ idx++;
+ }
+ mutex_unlock(&devlink->lock);
+ }
+out:
+ mutex_unlock(&devlink_mutex);
+
+ cb->args[0] = idx;
+ return msg->len;
+}
+
+static int __devlink_trap_action_set(struct devlink *devlink,
+ struct devlink_trap_item *trap_item,
+ enum devlink_trap_action trap_action,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ if (trap_item->action != trap_action &&
+ trap_item->trap->type != DEVLINK_TRAP_TYPE_DROP) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot change action of non-drop traps. Skipping");
+ return 0;
+ }
+
+ err = devlink->ops->trap_action_set(devlink, trap_item->trap,
+ trap_action);
+ if (err)
+ return err;
+
+ trap_item->action = trap_action;
+
+ return 0;
+}
+
+static int devlink_trap_action_set(struct devlink *devlink,
+ struct devlink_trap_item *trap_item,
+ struct genl_info *info)
+{
+ enum devlink_trap_action trap_action;
+ int err;
+
+ if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION])
+ return 0;
+
+ err = devlink_trap_action_get_from_info(info, &trap_action);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Invalid trap action");
+ return -EINVAL;
+ }
+
+ return __devlink_trap_action_set(devlink, trap_item, trap_action,
+ info->extack);
+}
+
+static int devlink_nl_cmd_trap_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_trap_item *trap_item;
+ int err;
+
+ if (list_empty(&devlink->trap_list))
+ return -EOPNOTSUPP;
+
+ trap_item = devlink_trap_item_get_from_info(devlink, info);
+ if (!trap_item) {
+ NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap");
+ return -ENOENT;
+ }
+
+ err = devlink_trap_action_set(devlink, trap_item, info);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_lookup(struct devlink *devlink, const char *name)
+{
+ struct devlink_trap_group_item *group_item;
+
+ list_for_each_entry(group_item, &devlink->trap_group_list, list) {
+ if (!strcmp(group_item->group->name, name))
+ return group_item;
+ }
+
+ return NULL;
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_get_from_info(struct devlink *devlink,
+ struct genl_info *info)
+{
+ char *name;
+
+ if (!info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME])
+ return NULL;
+ name = nla_data(info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME]);
+
+ return devlink_trap_group_item_lookup(devlink, name);
+}
+
+static int
+devlink_nl_trap_group_fill(struct sk_buff *msg, struct devlink *devlink,
+ const struct devlink_trap_group_item *group_item,
+ enum devlink_command cmd, u32 portid, u32 seq,
+ int flags)
+{
+ void *hdr;
+ int err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME,
+ group_item->group->name))
+ goto nla_put_failure;
+
+ if (group_item->group->generic &&
+ nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC))
+ goto nla_put_failure;
+
+ err = devlink_trap_stats_put(msg, group_item->stats);
+ if (err)
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_trap_group_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_trap_group_item *group_item;
+ struct sk_buff *msg;
+ int err;
+
+ if (list_empty(&devlink->trap_group_list))
+ return -EOPNOTSUPP;
+
+ group_item = devlink_trap_group_item_get_from_info(devlink, info);
+ if (!group_item) {
+ NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap group");
+ return -ENOENT;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_trap_group_fill(msg, devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_NEW,
+ info->snd_portid, info->snd_seq, 0);
+ if (err)
+ goto err_trap_group_fill;
+
+ return genlmsg_reply(msg, info);
+
+err_trap_group_fill:
+ nlmsg_free(msg);
+ return err;
+}
+
+static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ enum devlink_command cmd = DEVLINK_CMD_TRAP_GROUP_NEW;
+ struct devlink_trap_group_item *group_item;
+ u32 portid = NETLINK_CB(cb->skb).portid;
+ struct devlink *devlink;
+ int start = cb->args[0];
+ int idx = 0;
+ int err;
+
+ mutex_lock(&devlink_mutex);
+ list_for_each_entry(devlink, &devlink_list, list) {
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ continue;
+ mutex_lock(&devlink->lock);
+ list_for_each_entry(group_item, &devlink->trap_group_list,
+ list) {
+ if (idx < start) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_trap_group_fill(msg, devlink,
+ group_item, cmd,
+ portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err) {
+ mutex_unlock(&devlink->lock);
+ goto out;
+ }
+ idx++;
+ }
+ mutex_unlock(&devlink->lock);
+ }
+out:
+ mutex_unlock(&devlink_mutex);
+
+ cb->args[0] = idx;
+ return msg->len;
+}
+
+static int
+__devlink_trap_group_action_set(struct devlink *devlink,
+ struct devlink_trap_group_item *group_item,
+ enum devlink_trap_action trap_action,
+ struct netlink_ext_ack *extack)
+{
+ const char *group_name = group_item->group->name;
+ struct devlink_trap_item *trap_item;
+ int err;
+
+ list_for_each_entry(trap_item, &devlink->trap_list, list) {
+ if (strcmp(trap_item->trap->group.name, group_name))
+ continue;
+ err = __devlink_trap_action_set(devlink, trap_item,
+ trap_action, extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int
+devlink_trap_group_action_set(struct devlink *devlink,
+ struct devlink_trap_group_item *group_item,
+ struct genl_info *info)
+{
+ enum devlink_trap_action trap_action;
+ int err;
+
+ if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION])
+ return 0;
+
+ err = devlink_trap_action_get_from_info(info, &trap_action);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Invalid trap action");
+ return -EINVAL;
+ }
+
+ err = __devlink_trap_group_action_set(devlink, group_item, trap_action,
+ info->extack);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_trap_group_item *group_item;
+ int err;
+
+ if (list_empty(&devlink->trap_group_list))
+ return -EOPNOTSUPP;
+
+ group_item = devlink_trap_group_item_get_from_info(devlink, info);
+ if (!group_item) {
+ NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap group");
+ return -ENOENT;
+ }
+
+ err = devlink_trap_group_action_set(devlink, group_item, info);
+ if (err)
+ return err;
+
+ return 0;
+}
+
static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -5184,6 +5790,9 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 },
[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING },
};
static const struct genl_ops devlink_nl_ops[] = {
@@ -5483,6 +6092,32 @@ static const struct genl_ops devlink_nl_ops[] = {
.flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
+ {
+ .cmd = DEVLINK_CMD_TRAP_GET,
+ .doit = devlink_nl_cmd_trap_get_doit,
+ .dumpit = devlink_nl_cmd_trap_get_dumpit,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_SET,
+ .doit = devlink_nl_cmd_trap_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_GROUP_GET,
+ .doit = devlink_nl_cmd_trap_group_get_doit,
+ .dumpit = devlink_nl_cmd_trap_group_get_dumpit,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_GROUP_SET,
+ .doit = devlink_nl_cmd_trap_group_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
};
static struct genl_family devlink_nl_family __ro_after_init = {
@@ -5528,6 +6163,8 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
INIT_LIST_HEAD(&devlink->param_list);
INIT_LIST_HEAD(&devlink->region_list);
INIT_LIST_HEAD(&devlink->reporter_list);
+ INIT_LIST_HEAD(&devlink->trap_list);
+ INIT_LIST_HEAD(&devlink->trap_group_list);
mutex_init(&devlink->lock);
mutex_init(&devlink->reporters_lock);
return devlink;
@@ -5574,6 +6211,8 @@ void devlink_free(struct devlink *devlink)
{
mutex_destroy(&devlink->reporters_lock);
mutex_destroy(&devlink->lock);
+ WARN_ON(!list_empty(&devlink->trap_group_list));
+ WARN_ON(!list_empty(&devlink->trap_list));
WARN_ON(!list_empty(&devlink->reporter_list));
WARN_ON(!list_empty(&devlink->region_list));
WARN_ON(!list_empty(&devlink->param_list));
@@ -5678,10 +6317,10 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port,
if (WARN_ON(!devlink_port->registered))
return;
devlink_port_type_warn_cancel(devlink_port);
- spin_lock(&devlink_port->type_lock);
+ spin_lock_bh(&devlink_port->type_lock);
devlink_port->type = type;
devlink_port->type_dev = type_dev;
- spin_unlock(&devlink_port->type_lock);
+ spin_unlock_bh(&devlink_port->type_lock);
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
}
@@ -6538,7 +7177,7 @@ __devlink_param_driverinit_value_set(struct devlink *devlink,
int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
union devlink_param_value *init_val)
{
- if (!devlink->ops->reload)
+ if (!devlink_reload_supported(devlink))
return -EOPNOTSUPP;
return __devlink_param_driverinit_value_get(&devlink->param_list,
@@ -6585,7 +7224,7 @@ int devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port,
{
struct devlink *devlink = devlink_port->devlink;
- if (!devlink->ops->reload)
+ if (!devlink_reload_supported(devlink))
return -EOPNOTSUPP;
return __devlink_param_driverinit_value_get(&devlink_port->param_list,
@@ -6744,7 +7383,7 @@ void devlink_region_destroy(struct devlink_region *region)
/* Free all snapshots of region */
list_for_each_entry_safe(snapshot, ts, &region->snapshot_list, list)
- devlink_region_snapshot_del(snapshot);
+ devlink_region_snapshot_del(region, snapshot);
list_del(&region->list);
@@ -6784,12 +7423,11 @@ EXPORT_SYMBOL_GPL(devlink_region_shapshot_id_get);
* The @snapshot_id should be obtained using the getter function.
*
* @region: devlink region of the snapshot
- * @data_len: size of snapshot data
* @data: snapshot data
* @snapshot_id: snapshot id to be created
* @data_destructor: pointer to destructor function to free data
*/
-int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
+int devlink_region_snapshot_create(struct devlink_region *region,
u8 *data, u32 snapshot_id,
devlink_snapshot_data_dest_t *data_destructor)
{
@@ -6819,7 +7457,6 @@ int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
snapshot->id = snapshot_id;
snapshot->region = region;
snapshot->data = data;
- snapshot->data_len = data_len;
snapshot->data_destructor = data_destructor;
list_add_tail(&snapshot->list, &region->snapshot_list);
@@ -6836,6 +7473,475 @@ unlock:
}
EXPORT_SYMBOL_GPL(devlink_region_snapshot_create);
+#define DEVLINK_TRAP(_id, _type) \
+ { \
+ .type = DEVLINK_TRAP_TYPE_##_type, \
+ .id = DEVLINK_TRAP_GENERIC_ID_##_id, \
+ .name = DEVLINK_TRAP_GENERIC_NAME_##_id, \
+ }
+
+static const struct devlink_trap devlink_trap_generic[] = {
+ DEVLINK_TRAP(SMAC_MC, DROP),
+ DEVLINK_TRAP(VLAN_TAG_MISMATCH, DROP),
+ DEVLINK_TRAP(INGRESS_VLAN_FILTER, DROP),
+ DEVLINK_TRAP(INGRESS_STP_FILTER, DROP),
+ DEVLINK_TRAP(EMPTY_TX_LIST, DROP),
+ DEVLINK_TRAP(PORT_LOOPBACK_FILTER, DROP),
+ DEVLINK_TRAP(BLACKHOLE_ROUTE, DROP),
+ DEVLINK_TRAP(TTL_ERROR, EXCEPTION),
+ DEVLINK_TRAP(TAIL_DROP, DROP),
+};
+
+#define DEVLINK_TRAP_GROUP(_id) \
+ { \
+ .id = DEVLINK_TRAP_GROUP_GENERIC_ID_##_id, \
+ .name = DEVLINK_TRAP_GROUP_GENERIC_NAME_##_id, \
+ }
+
+static const struct devlink_trap_group devlink_trap_group_generic[] = {
+ DEVLINK_TRAP_GROUP(L2_DROPS),
+ DEVLINK_TRAP_GROUP(L3_DROPS),
+ DEVLINK_TRAP_GROUP(BUFFER_DROPS),
+};
+
+static int devlink_trap_generic_verify(const struct devlink_trap *trap)
+{
+ if (trap->id > DEVLINK_TRAP_GENERIC_ID_MAX)
+ return -EINVAL;
+
+ if (strcmp(trap->name, devlink_trap_generic[trap->id].name))
+ return -EINVAL;
+
+ if (trap->type != devlink_trap_generic[trap->id].type)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int devlink_trap_driver_verify(const struct devlink_trap *trap)
+{
+ int i;
+
+ if (trap->id <= DEVLINK_TRAP_GENERIC_ID_MAX)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(devlink_trap_generic); i++) {
+ if (!strcmp(trap->name, devlink_trap_generic[i].name))
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+static int devlink_trap_verify(const struct devlink_trap *trap)
+{
+ if (!trap || !trap->name || !trap->group.name)
+ return -EINVAL;
+
+ if (trap->generic)
+ return devlink_trap_generic_verify(trap);
+ else
+ return devlink_trap_driver_verify(trap);
+}
+
+static int
+devlink_trap_group_generic_verify(const struct devlink_trap_group *group)
+{
+ if (group->id > DEVLINK_TRAP_GROUP_GENERIC_ID_MAX)
+ return -EINVAL;
+
+ if (strcmp(group->name, devlink_trap_group_generic[group->id].name))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int
+devlink_trap_group_driver_verify(const struct devlink_trap_group *group)
+{
+ int i;
+
+ if (group->id <= DEVLINK_TRAP_GROUP_GENERIC_ID_MAX)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(devlink_trap_group_generic); i++) {
+ if (!strcmp(group->name, devlink_trap_group_generic[i].name))
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+static int devlink_trap_group_verify(const struct devlink_trap_group *group)
+{
+ if (group->generic)
+ return devlink_trap_group_generic_verify(group);
+ else
+ return devlink_trap_group_driver_verify(group);
+}
+
+static void
+devlink_trap_group_notify(struct devlink *devlink,
+ const struct devlink_trap_group_item *group_item,
+ enum devlink_command cmd)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_GROUP_NEW &&
+ cmd != DEVLINK_CMD_TRAP_GROUP_DEL);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_trap_group_fill(msg, devlink, group_item, cmd, 0, 0,
+ 0);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_create(struct devlink *devlink,
+ const struct devlink_trap_group *group)
+{
+ struct devlink_trap_group_item *group_item;
+ int err;
+
+ err = devlink_trap_group_verify(group);
+ if (err)
+ return ERR_PTR(err);
+
+ group_item = kzalloc(sizeof(*group_item), GFP_KERNEL);
+ if (!group_item)
+ return ERR_PTR(-ENOMEM);
+
+ group_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats);
+ if (!group_item->stats) {
+ err = -ENOMEM;
+ goto err_stats_alloc;
+ }
+
+ group_item->group = group;
+ refcount_set(&group_item->refcount, 1);
+
+ if (devlink->ops->trap_group_init) {
+ err = devlink->ops->trap_group_init(devlink, group);
+ if (err)
+ goto err_group_init;
+ }
+
+ list_add_tail(&group_item->list, &devlink->trap_group_list);
+ devlink_trap_group_notify(devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_NEW);
+
+ return group_item;
+
+err_group_init:
+ free_percpu(group_item->stats);
+err_stats_alloc:
+ kfree(group_item);
+ return ERR_PTR(err);
+}
+
+static void
+devlink_trap_group_item_destroy(struct devlink *devlink,
+ struct devlink_trap_group_item *group_item)
+{
+ devlink_trap_group_notify(devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_DEL);
+ list_del(&group_item->list);
+ free_percpu(group_item->stats);
+ kfree(group_item);
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_get(struct devlink *devlink,
+ const struct devlink_trap_group *group)
+{
+ struct devlink_trap_group_item *group_item;
+
+ group_item = devlink_trap_group_item_lookup(devlink, group->name);
+ if (group_item) {
+ refcount_inc(&group_item->refcount);
+ return group_item;
+ }
+
+ return devlink_trap_group_item_create(devlink, group);
+}
+
+static void
+devlink_trap_group_item_put(struct devlink *devlink,
+ struct devlink_trap_group_item *group_item)
+{
+ if (!refcount_dec_and_test(&group_item->refcount))
+ return;
+
+ devlink_trap_group_item_destroy(devlink, group_item);
+}
+
+static int
+devlink_trap_item_group_link(struct devlink *devlink,
+ struct devlink_trap_item *trap_item)
+{
+ struct devlink_trap_group_item *group_item;
+
+ group_item = devlink_trap_group_item_get(devlink,
+ &trap_item->trap->group);
+ if (IS_ERR(group_item))
+ return PTR_ERR(group_item);
+
+ trap_item->group_item = group_item;
+
+ return 0;
+}
+
+static void
+devlink_trap_item_group_unlink(struct devlink *devlink,
+ struct devlink_trap_item *trap_item)
+{
+ devlink_trap_group_item_put(devlink, trap_item->group_item);
+}
+
+static void devlink_trap_notify(struct devlink *devlink,
+ const struct devlink_trap_item *trap_item,
+ enum devlink_command cmd)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_NEW &&
+ cmd != DEVLINK_CMD_TRAP_DEL);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_trap_fill(msg, devlink, trap_item, cmd, 0, 0, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static int
+devlink_trap_register(struct devlink *devlink,
+ const struct devlink_trap *trap, void *priv)
+{
+ struct devlink_trap_item *trap_item;
+ int err;
+
+ if (devlink_trap_item_lookup(devlink, trap->name))
+ return -EEXIST;
+
+ trap_item = kzalloc(sizeof(*trap_item), GFP_KERNEL);
+ if (!trap_item)
+ return -ENOMEM;
+
+ trap_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats);
+ if (!trap_item->stats) {
+ err = -ENOMEM;
+ goto err_stats_alloc;
+ }
+
+ trap_item->trap = trap;
+ trap_item->action = trap->init_action;
+ trap_item->priv = priv;
+
+ err = devlink_trap_item_group_link(devlink, trap_item);
+ if (err)
+ goto err_group_link;
+
+ err = devlink->ops->trap_init(devlink, trap, trap_item);
+ if (err)
+ goto err_trap_init;
+
+ list_add_tail(&trap_item->list, &devlink->trap_list);
+ devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW);
+
+ return 0;
+
+err_trap_init:
+ devlink_trap_item_group_unlink(devlink, trap_item);
+err_group_link:
+ free_percpu(trap_item->stats);
+err_stats_alloc:
+ kfree(trap_item);
+ return err;
+}
+
+static void devlink_trap_unregister(struct devlink *devlink,
+ const struct devlink_trap *trap)
+{
+ struct devlink_trap_item *trap_item;
+
+ trap_item = devlink_trap_item_lookup(devlink, trap->name);
+ if (WARN_ON_ONCE(!trap_item))
+ return;
+
+ devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL);
+ list_del(&trap_item->list);
+ if (devlink->ops->trap_fini)
+ devlink->ops->trap_fini(devlink, trap, trap_item);
+ devlink_trap_item_group_unlink(devlink, trap_item);
+ free_percpu(trap_item->stats);
+ kfree(trap_item);
+}
+
+static void devlink_trap_disable(struct devlink *devlink,
+ const struct devlink_trap *trap)
+{
+ struct devlink_trap_item *trap_item;
+
+ trap_item = devlink_trap_item_lookup(devlink, trap->name);
+ if (WARN_ON_ONCE(!trap_item))
+ return;
+
+ devlink->ops->trap_action_set(devlink, trap, DEVLINK_TRAP_ACTION_DROP);
+ trap_item->action = DEVLINK_TRAP_ACTION_DROP;
+}
+
+/**
+ * devlink_traps_register - Register packet traps with devlink.
+ * @devlink: devlink.
+ * @traps: Packet traps.
+ * @traps_count: Count of provided packet traps.
+ * @priv: Driver private information.
+ *
+ * Return: Non-zero value on failure.
+ */
+int devlink_traps_register(struct devlink *devlink,
+ const struct devlink_trap *traps,
+ size_t traps_count, void *priv)
+{
+ int i, err;
+
+ if (!devlink->ops->trap_init || !devlink->ops->trap_action_set)
+ return -EINVAL;
+
+ mutex_lock(&devlink->lock);
+ for (i = 0; i < traps_count; i++) {
+ const struct devlink_trap *trap = &traps[i];
+
+ err = devlink_trap_verify(trap);
+ if (err)
+ goto err_trap_verify;
+
+ err = devlink_trap_register(devlink, trap, priv);
+ if (err)
+ goto err_trap_register;
+ }
+ mutex_unlock(&devlink->lock);
+
+ return 0;
+
+err_trap_register:
+err_trap_verify:
+ for (i--; i >= 0; i--)
+ devlink_trap_unregister(devlink, &traps[i]);
+ mutex_unlock(&devlink->lock);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_traps_register);
+
+/**
+ * devlink_traps_unregister - Unregister packet traps from devlink.
+ * @devlink: devlink.
+ * @traps: Packet traps.
+ * @traps_count: Count of provided packet traps.
+ */
+void devlink_traps_unregister(struct devlink *devlink,
+ const struct devlink_trap *traps,
+ size_t traps_count)
+{
+ int i;
+
+ mutex_lock(&devlink->lock);
+ /* Make sure we do not have any packets in-flight while unregistering
+ * traps by disabling all of them and waiting for a grace period.
+ */
+ for (i = traps_count - 1; i >= 0; i--)
+ devlink_trap_disable(devlink, &traps[i]);
+ synchronize_rcu();
+ for (i = traps_count - 1; i >= 0; i--)
+ devlink_trap_unregister(devlink, &traps[i]);
+ mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devlink_traps_unregister);
+
+static void
+devlink_trap_stats_update(struct devlink_stats __percpu *trap_stats,
+ size_t skb_len)
+{
+ struct devlink_stats *stats;
+
+ stats = this_cpu_ptr(trap_stats);
+ u64_stats_update_begin(&stats->syncp);
+ stats->rx_bytes += skb_len;
+ stats->rx_packets++;
+ u64_stats_update_end(&stats->syncp);
+}
+
+static void
+devlink_trap_report_metadata_fill(struct net_dm_hw_metadata *hw_metadata,
+ const struct devlink_trap_item *trap_item,
+ struct devlink_port *in_devlink_port)
+{
+ struct devlink_trap_group_item *group_item = trap_item->group_item;
+
+ hw_metadata->trap_group_name = group_item->group->name;
+ hw_metadata->trap_name = trap_item->trap->name;
+
+ spin_lock(&in_devlink_port->type_lock);
+ if (in_devlink_port->type == DEVLINK_PORT_TYPE_ETH)
+ hw_metadata->input_dev = in_devlink_port->type_dev;
+ spin_unlock(&in_devlink_port->type_lock);
+}
+
+/**
+ * devlink_trap_report - Report trapped packet to drop monitor.
+ * @devlink: devlink.
+ * @skb: Trapped packet.
+ * @trap_ctx: Trap context.
+ * @in_devlink_port: Input devlink port.
+ */
+void devlink_trap_report(struct devlink *devlink, struct sk_buff *skb,
+ void *trap_ctx, struct devlink_port *in_devlink_port)
+{
+ struct devlink_trap_item *trap_item = trap_ctx;
+ struct net_dm_hw_metadata hw_metadata = {};
+
+ devlink_trap_stats_update(trap_item->stats, skb->len);
+ devlink_trap_stats_update(trap_item->group_item->stats, skb->len);
+
+ devlink_trap_report_metadata_fill(&hw_metadata, trap_item,
+ in_devlink_port);
+ net_dm_hw_report(skb, &hw_metadata);
+}
+EXPORT_SYMBOL_GPL(devlink_trap_report);
+
+/**
+ * devlink_trap_ctx_priv - Trap context to driver private information.
+ * @trap_ctx: Trap context.
+ *
+ * Return: Driver private information passed during registration.
+ */
+void *devlink_trap_ctx_priv(void *trap_ctx)
+{
+ struct devlink_trap_item *trap_item = trap_ctx;
+
+ return trap_item->priv;
+}
+EXPORT_SYMBOL_GPL(devlink_trap_ctx_priv);
+
static void __devlink_compat_running_version(struct devlink *devlink,
char *buf, size_t len)
{
@@ -6941,11 +8047,10 @@ int devlink_compat_switch_id_get(struct net_device *dev,
{
struct devlink_port *devlink_port;
- /* RTNL mutex is held here which ensures that devlink_port
- * instance cannot disappear in the middle. No need to take
+ /* Caller must hold RTNL mutex or reference to dev, which ensures that
+ * devlink_port instance cannot disappear in the middle. No need to take
* any devlink lock as only permanent values are accessed.
*/
- ASSERT_RTNL();
devlink_port = netdev_to_devlink_port(dev);
if (!devlink_port || !devlink_port->attrs.switch_port)
return -EOPNOTSUPP;
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 4ea4347f5062..536e032d95c8 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -26,6 +26,7 @@
#include <linux/bitops.h>
#include <linux/slab.h>
#include <linux/module.h>
+#include <net/drop_monitor.h>
#include <net/genetlink.h>
#include <net/netevent.h>
@@ -43,13 +44,44 @@
* netlink alerts
*/
static int trace_state = TRACE_OFF;
-static DEFINE_MUTEX(trace_state_mutex);
+static bool monitor_hw;
+
+/* net_dm_mutex
+ *
+ * An overall lock guarding every operation coming from userspace.
+ * It also guards the global 'hw_stats_list' list.
+ */
+static DEFINE_MUTEX(net_dm_mutex);
+
+struct net_dm_stats {
+ u64 dropped;
+ struct u64_stats_sync syncp;
+};
+
+#define NET_DM_MAX_HW_TRAP_NAME_LEN 40
+
+struct net_dm_hw_entry {
+ char trap_name[NET_DM_MAX_HW_TRAP_NAME_LEN];
+ u32 count;
+};
+
+struct net_dm_hw_entries {
+ u32 num_entries;
+ struct net_dm_hw_entry entries[0];
+};
struct per_cpu_dm_data {
- spinlock_t lock;
- struct sk_buff *skb;
+ spinlock_t lock; /* Protects 'skb', 'hw_entries' and
+ * 'send_timer'
+ */
+ union {
+ struct sk_buff *skb;
+ struct net_dm_hw_entries *hw_entries;
+ };
+ struct sk_buff_head drop_queue;
struct work_struct dm_alert_work;
struct timer_list send_timer;
+ struct net_dm_stats stats;
};
struct dm_hw_stat_delta {
@@ -63,12 +95,37 @@ struct dm_hw_stat_delta {
static struct genl_family net_drop_monitor_family;
static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
+static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_hw_cpu_data);
static int dm_hit_limit = 64;
static int dm_delay = 1;
static unsigned long dm_hw_check_delta = 2*HZ;
static LIST_HEAD(hw_stats_list);
+static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY;
+static u32 net_dm_trunc_len;
+static u32 net_dm_queue_len = 1000;
+
+struct net_dm_alert_ops {
+ void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb,
+ void *location);
+ void (*napi_poll_probe)(void *ignore, struct napi_struct *napi,
+ int work, int budget);
+ void (*work_item_func)(struct work_struct *work);
+ void (*hw_work_item_func)(struct work_struct *work);
+ void (*hw_probe)(struct sk_buff *skb,
+ const struct net_dm_hw_metadata *hw_metadata);
+};
+
+struct net_dm_skb_cb {
+ union {
+ struct net_dm_hw_metadata *hw_metadata;
+ void *pc;
+ };
+};
+
+#define NET_DM_SKB_CB(__skb) ((struct net_dm_skb_cb *)&((__skb)->cb[0]))
+
static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
{
size_t al;
@@ -235,48 +292,844 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
rcu_read_unlock();
}
-static int set_all_monitor_traces(int state)
+static struct net_dm_hw_entries *
+net_dm_hw_reset_per_cpu_data(struct per_cpu_dm_data *hw_data)
{
- int rc = 0;
- struct dm_hw_stat_delta *new_stat = NULL;
- struct dm_hw_stat_delta *temp;
+ struct net_dm_hw_entries *hw_entries;
+ unsigned long flags;
+
+ hw_entries = kzalloc(struct_size(hw_entries, entries, dm_hit_limit),
+ GFP_KERNEL);
+ if (!hw_entries) {
+ /* If the memory allocation failed, we try to perform another
+ * allocation in 1/10 second. Otherwise, the probe function
+ * will constantly bail out.
+ */
+ mod_timer(&hw_data->send_timer, jiffies + HZ / 10);
+ }
- mutex_lock(&trace_state_mutex);
+ spin_lock_irqsave(&hw_data->lock, flags);
+ swap(hw_data->hw_entries, hw_entries);
+ spin_unlock_irqrestore(&hw_data->lock, flags);
- if (state == trace_state) {
- rc = -EAGAIN;
- goto out_unlock;
+ return hw_entries;
+}
+
+static int net_dm_hw_entry_put(struct sk_buff *msg,
+ const struct net_dm_hw_entry *hw_entry)
+{
+ struct nlattr *attr;
+
+ attr = nla_nest_start(msg, NET_DM_ATTR_HW_ENTRY);
+ if (!attr)
+ return -EMSGSIZE;
+
+ if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_NAME, hw_entry->trap_name))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, NET_DM_ATTR_HW_TRAP_COUNT, hw_entry->count))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
+static int net_dm_hw_entries_put(struct sk_buff *msg,
+ const struct net_dm_hw_entries *hw_entries)
+{
+ struct nlattr *attr;
+ int i;
+
+ attr = nla_nest_start(msg, NET_DM_ATTR_HW_ENTRIES);
+ if (!attr)
+ return -EMSGSIZE;
+
+ for (i = 0; i < hw_entries->num_entries; i++) {
+ int rc;
+
+ rc = net_dm_hw_entry_put(msg, &hw_entries->entries[i]);
+ if (rc)
+ goto nla_put_failure;
}
- switch (state) {
- case TRACE_ON:
- if (!try_module_get(THIS_MODULE)) {
- rc = -ENODEV;
- break;
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
+static int
+net_dm_hw_summary_report_fill(struct sk_buff *msg,
+ const struct net_dm_hw_entries *hw_entries)
+{
+ struct net_dm_alert_msg anc_hdr = { 0 };
+ void *hdr;
+ int rc;
+
+ hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0,
+ NET_DM_CMD_ALERT);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ /* We need to put the ancillary header in order not to break user
+ * space.
+ */
+ if (nla_put(msg, NLA_UNSPEC, sizeof(anc_hdr), &anc_hdr))
+ goto nla_put_failure;
+
+ rc = net_dm_hw_entries_put(msg, hw_entries);
+ if (rc)
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static void net_dm_hw_summary_work(struct work_struct *work)
+{
+ struct net_dm_hw_entries *hw_entries;
+ struct per_cpu_dm_data *hw_data;
+ struct sk_buff *msg;
+ int rc;
+
+ hw_data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
+
+ hw_entries = net_dm_hw_reset_per_cpu_data(hw_data);
+ if (!hw_entries)
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ goto out;
+
+ rc = net_dm_hw_summary_report_fill(msg, hw_entries);
+ if (rc) {
+ nlmsg_free(msg);
+ goto out;
+ }
+
+ genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL);
+
+out:
+ kfree(hw_entries);
+}
+
+static void
+net_dm_hw_summary_probe(struct sk_buff *skb,
+ const struct net_dm_hw_metadata *hw_metadata)
+{
+ struct net_dm_hw_entries *hw_entries;
+ struct net_dm_hw_entry *hw_entry;
+ struct per_cpu_dm_data *hw_data;
+ unsigned long flags;
+ int i;
+
+ hw_data = this_cpu_ptr(&dm_hw_cpu_data);
+ spin_lock_irqsave(&hw_data->lock, flags);
+ hw_entries = hw_data->hw_entries;
+
+ if (!hw_entries)
+ goto out;
+
+ for (i = 0; i < hw_entries->num_entries; i++) {
+ hw_entry = &hw_entries->entries[i];
+ if (!strncmp(hw_entry->trap_name, hw_metadata->trap_name,
+ NET_DM_MAX_HW_TRAP_NAME_LEN - 1)) {
+ hw_entry->count++;
+ goto out;
}
+ }
+ if (WARN_ON_ONCE(hw_entries->num_entries == dm_hit_limit))
+ goto out;
- rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
- rc |= register_trace_napi_poll(trace_napi_poll_hit, NULL);
- break;
+ hw_entry = &hw_entries->entries[hw_entries->num_entries];
+ strlcpy(hw_entry->trap_name, hw_metadata->trap_name,
+ NET_DM_MAX_HW_TRAP_NAME_LEN - 1);
+ hw_entry->count = 1;
+ hw_entries->num_entries++;
- case TRACE_OFF:
- rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
- rc |= unregister_trace_napi_poll(trace_napi_poll_hit, NULL);
+ if (!timer_pending(&hw_data->send_timer)) {
+ hw_data->send_timer.expires = jiffies + dm_delay * HZ;
+ add_timer(&hw_data->send_timer);
+ }
- tracepoint_synchronize_unregister();
+out:
+ spin_unlock_irqrestore(&hw_data->lock, flags);
+}
- /*
- * Clean the device list
+static const struct net_dm_alert_ops net_dm_alert_summary_ops = {
+ .kfree_skb_probe = trace_kfree_skb_hit,
+ .napi_poll_probe = trace_napi_poll_hit,
+ .work_item_func = send_dm_alert,
+ .hw_work_item_func = net_dm_hw_summary_work,
+ .hw_probe = net_dm_hw_summary_probe,
+};
+
+static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
+ struct sk_buff *skb,
+ void *location)
+{
+ ktime_t tstamp = ktime_get_real();
+ struct per_cpu_dm_data *data;
+ struct sk_buff *nskb;
+ unsigned long flags;
+
+ if (!skb_mac_header_was_set(skb))
+ return;
+
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (!nskb)
+ return;
+
+ NET_DM_SKB_CB(nskb)->pc = location;
+ /* Override the timestamp because we care about the time when the
+ * packet was dropped.
+ */
+ nskb->tstamp = tstamp;
+
+ data = this_cpu_ptr(&dm_cpu_data);
+
+ spin_lock_irqsave(&data->drop_queue.lock, flags);
+ if (skb_queue_len(&data->drop_queue) < net_dm_queue_len)
+ __skb_queue_tail(&data->drop_queue, nskb);
+ else
+ goto unlock_free;
+ spin_unlock_irqrestore(&data->drop_queue.lock, flags);
+
+ schedule_work(&data->dm_alert_work);
+
+ return;
+
+unlock_free:
+ spin_unlock_irqrestore(&data->drop_queue.lock, flags);
+ u64_stats_update_begin(&data->stats.syncp);
+ data->stats.dropped++;
+ u64_stats_update_end(&data->stats.syncp);
+ consume_skb(nskb);
+}
+
+static void net_dm_packet_trace_napi_poll_hit(void *ignore,
+ struct napi_struct *napi,
+ int work, int budget)
+{
+}
+
+static size_t net_dm_in_port_size(void)
+{
+ /* NET_DM_ATTR_IN_PORT nest */
+ return nla_total_size(0) +
+ /* NET_DM_ATTR_PORT_NETDEV_IFINDEX */
+ nla_total_size(sizeof(u32)) +
+ /* NET_DM_ATTR_PORT_NETDEV_NAME */
+ nla_total_size(IFNAMSIZ + 1);
+}
+
+#define NET_DM_MAX_SYMBOL_LEN 40
+
+static size_t net_dm_packet_report_size(size_t payload_len)
+{
+ size_t size;
+
+ size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize);
+
+ return NLMSG_ALIGN(size) +
+ /* NET_DM_ATTR_ORIGIN */
+ nla_total_size(sizeof(u16)) +
+ /* NET_DM_ATTR_PC */
+ nla_total_size(sizeof(u64)) +
+ /* NET_DM_ATTR_SYMBOL */
+ nla_total_size(NET_DM_MAX_SYMBOL_LEN + 1) +
+ /* NET_DM_ATTR_IN_PORT */
+ net_dm_in_port_size() +
+ /* NET_DM_ATTR_TIMESTAMP */
+ nla_total_size(sizeof(u64)) +
+ /* NET_DM_ATTR_ORIG_LEN */
+ nla_total_size(sizeof(u32)) +
+ /* NET_DM_ATTR_PROTO */
+ nla_total_size(sizeof(u16)) +
+ /* NET_DM_ATTR_PAYLOAD */
+ nla_total_size(payload_len);
+}
+
+static int net_dm_packet_report_in_port_put(struct sk_buff *msg, int ifindex,
+ const char *name)
+{
+ struct nlattr *attr;
+
+ attr = nla_nest_start(msg, NET_DM_ATTR_IN_PORT);
+ if (!attr)
+ return -EMSGSIZE;
+
+ if (ifindex &&
+ nla_put_u32(msg, NET_DM_ATTR_PORT_NETDEV_IFINDEX, ifindex))
+ goto nla_put_failure;
+
+ if (name && nla_put_string(msg, NET_DM_ATTR_PORT_NETDEV_NAME, name))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
+static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
+ size_t payload_len)
+{
+ u64 pc = (u64)(uintptr_t) NET_DM_SKB_CB(skb)->pc;
+ char buf[NET_DM_MAX_SYMBOL_LEN];
+ struct nlattr *attr;
+ void *hdr;
+ int rc;
+
+ hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0,
+ NET_DM_CMD_PACKET_ALERT);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_SW))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, pc, NET_DM_ATTR_PAD))
+ goto nla_put_failure;
+
+ snprintf(buf, sizeof(buf), "%pS", NET_DM_SKB_CB(skb)->pc);
+ if (nla_put_string(msg, NET_DM_ATTR_SYMBOL, buf))
+ goto nla_put_failure;
+
+ rc = net_dm_packet_report_in_port_put(msg, skb->skb_iif, NULL);
+ if (rc)
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, NET_DM_ATTR_TIMESTAMP,
+ ktime_to_ns(skb->tstamp), NET_DM_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len))
+ goto nla_put_failure;
+
+ if (!payload_len)
+ goto out;
+
+ if (nla_put_u16(msg, NET_DM_ATTR_PROTO, be16_to_cpu(skb->protocol)))
+ goto nla_put_failure;
+
+ attr = skb_put(msg, nla_total_size(payload_len));
+ attr->nla_type = NET_DM_ATTR_PAYLOAD;
+ attr->nla_len = nla_attr_size(payload_len);
+ if (skb_copy_bits(skb, 0, nla_data(attr), payload_len))
+ goto nla_put_failure;
+
+out:
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+#define NET_DM_MAX_PACKET_SIZE (0xffff - NLA_HDRLEN - NLA_ALIGNTO)
+
+static void net_dm_packet_report(struct sk_buff *skb)
+{
+ struct sk_buff *msg;
+ size_t payload_len;
+ int rc;
+
+ /* Make sure we start copying the packet from the MAC header */
+ if (skb->data > skb_mac_header(skb))
+ skb_push(skb, skb->data - skb_mac_header(skb));
+ else
+ skb_pull(skb, skb_mac_header(skb) - skb->data);
+
+ /* Ensure packet fits inside a single netlink attribute */
+ payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE);
+ if (net_dm_trunc_len)
+ payload_len = min_t(size_t, net_dm_trunc_len, payload_len);
+
+ msg = nlmsg_new(net_dm_packet_report_size(payload_len), GFP_KERNEL);
+ if (!msg)
+ goto out;
+
+ rc = net_dm_packet_report_fill(msg, skb, payload_len);
+ if (rc) {
+ nlmsg_free(msg);
+ goto out;
+ }
+
+ genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL);
+
+out:
+ consume_skb(skb);
+}
+
+static void net_dm_packet_work(struct work_struct *work)
+{
+ struct per_cpu_dm_data *data;
+ struct sk_buff_head list;
+ struct sk_buff *skb;
+ unsigned long flags;
+
+ data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
+
+ __skb_queue_head_init(&list);
+
+ spin_lock_irqsave(&data->drop_queue.lock, flags);
+ skb_queue_splice_tail_init(&data->drop_queue, &list);
+ spin_unlock_irqrestore(&data->drop_queue.lock, flags);
+
+ while ((skb = __skb_dequeue(&list)))
+ net_dm_packet_report(skb);
+}
+
+static size_t
+net_dm_hw_packet_report_size(size_t payload_len,
+ const struct net_dm_hw_metadata *hw_metadata)
+{
+ size_t size;
+
+ size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize);
+
+ return NLMSG_ALIGN(size) +
+ /* NET_DM_ATTR_ORIGIN */
+ nla_total_size(sizeof(u16)) +
+ /* NET_DM_ATTR_HW_TRAP_GROUP_NAME */
+ nla_total_size(strlen(hw_metadata->trap_group_name) + 1) +
+ /* NET_DM_ATTR_HW_TRAP_NAME */
+ nla_total_size(strlen(hw_metadata->trap_name) + 1) +
+ /* NET_DM_ATTR_IN_PORT */
+ net_dm_in_port_size() +
+ /* NET_DM_ATTR_TIMESTAMP */
+ nla_total_size(sizeof(u64)) +
+ /* NET_DM_ATTR_ORIG_LEN */
+ nla_total_size(sizeof(u32)) +
+ /* NET_DM_ATTR_PROTO */
+ nla_total_size(sizeof(u16)) +
+ /* NET_DM_ATTR_PAYLOAD */
+ nla_total_size(payload_len);
+}
+
+static int net_dm_hw_packet_report_fill(struct sk_buff *msg,
+ struct sk_buff *skb, size_t payload_len)
+{
+ struct net_dm_hw_metadata *hw_metadata;
+ struct nlattr *attr;
+ void *hdr;
+
+ hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
+
+ hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0,
+ NET_DM_CMD_PACKET_ALERT);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_HW))
+ goto nla_put_failure;
+
+ if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_GROUP_NAME,
+ hw_metadata->trap_group_name))
+ goto nla_put_failure;
+
+ if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_NAME,
+ hw_metadata->trap_name))
+ goto nla_put_failure;
+
+ if (hw_metadata->input_dev) {
+ struct net_device *dev = hw_metadata->input_dev;
+ int rc;
+
+ rc = net_dm_packet_report_in_port_put(msg, dev->ifindex,
+ dev->name);
+ if (rc)
+ goto nla_put_failure;
+ }
+
+ if (nla_put_u64_64bit(msg, NET_DM_ATTR_TIMESTAMP,
+ ktime_to_ns(skb->tstamp), NET_DM_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len))
+ goto nla_put_failure;
+
+ if (!payload_len)
+ goto out;
+
+ if (nla_put_u16(msg, NET_DM_ATTR_PROTO, be16_to_cpu(skb->protocol)))
+ goto nla_put_failure;
+
+ attr = skb_put(msg, nla_total_size(payload_len));
+ attr->nla_type = NET_DM_ATTR_PAYLOAD;
+ attr->nla_len = nla_attr_size(payload_len);
+ if (skb_copy_bits(skb, 0, nla_data(attr), payload_len))
+ goto nla_put_failure;
+
+out:
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static struct net_dm_hw_metadata *
+net_dm_hw_metadata_clone(const struct net_dm_hw_metadata *hw_metadata)
+{
+ struct net_dm_hw_metadata *n_hw_metadata;
+ const char *trap_group_name;
+ const char *trap_name;
+
+ n_hw_metadata = kmalloc(sizeof(*hw_metadata), GFP_ATOMIC);
+ if (!n_hw_metadata)
+ return NULL;
+
+ trap_group_name = kmemdup(hw_metadata->trap_group_name,
+ strlen(hw_metadata->trap_group_name) + 1,
+ GFP_ATOMIC | __GFP_ZERO);
+ if (!trap_group_name)
+ goto free_hw_metadata;
+ n_hw_metadata->trap_group_name = trap_group_name;
+
+ trap_name = kmemdup(hw_metadata->trap_name,
+ strlen(hw_metadata->trap_name) + 1,
+ GFP_ATOMIC | __GFP_ZERO);
+ if (!trap_name)
+ goto free_trap_group;
+ n_hw_metadata->trap_name = trap_name;
+
+ n_hw_metadata->input_dev = hw_metadata->input_dev;
+ if (n_hw_metadata->input_dev)
+ dev_hold(n_hw_metadata->input_dev);
+
+ return n_hw_metadata;
+
+free_trap_group:
+ kfree(trap_group_name);
+free_hw_metadata:
+ kfree(n_hw_metadata);
+ return NULL;
+}
+
+static void
+net_dm_hw_metadata_free(const struct net_dm_hw_metadata *hw_metadata)
+{
+ if (hw_metadata->input_dev)
+ dev_put(hw_metadata->input_dev);
+ kfree(hw_metadata->trap_name);
+ kfree(hw_metadata->trap_group_name);
+ kfree(hw_metadata);
+}
+
+static void net_dm_hw_packet_report(struct sk_buff *skb)
+{
+ struct net_dm_hw_metadata *hw_metadata;
+ struct sk_buff *msg;
+ size_t payload_len;
+ int rc;
+
+ if (skb->data > skb_mac_header(skb))
+ skb_push(skb, skb->data - skb_mac_header(skb));
+ else
+ skb_pull(skb, skb_mac_header(skb) - skb->data);
+
+ payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE);
+ if (net_dm_trunc_len)
+ payload_len = min_t(size_t, net_dm_trunc_len, payload_len);
+
+ hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
+ msg = nlmsg_new(net_dm_hw_packet_report_size(payload_len, hw_metadata),
+ GFP_KERNEL);
+ if (!msg)
+ goto out;
+
+ rc = net_dm_hw_packet_report_fill(msg, skb, payload_len);
+ if (rc) {
+ nlmsg_free(msg);
+ goto out;
+ }
+
+ genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL);
+
+out:
+ net_dm_hw_metadata_free(NET_DM_SKB_CB(skb)->hw_metadata);
+ consume_skb(skb);
+}
+
+static void net_dm_hw_packet_work(struct work_struct *work)
+{
+ struct per_cpu_dm_data *hw_data;
+ struct sk_buff_head list;
+ struct sk_buff *skb;
+ unsigned long flags;
+
+ hw_data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
+
+ __skb_queue_head_init(&list);
+
+ spin_lock_irqsave(&hw_data->drop_queue.lock, flags);
+ skb_queue_splice_tail_init(&hw_data->drop_queue, &list);
+ spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
+
+ while ((skb = __skb_dequeue(&list)))
+ net_dm_hw_packet_report(skb);
+}
+
+static void
+net_dm_hw_packet_probe(struct sk_buff *skb,
+ const struct net_dm_hw_metadata *hw_metadata)
+{
+ struct net_dm_hw_metadata *n_hw_metadata;
+ ktime_t tstamp = ktime_get_real();
+ struct per_cpu_dm_data *hw_data;
+ struct sk_buff *nskb;
+ unsigned long flags;
+
+ if (!skb_mac_header_was_set(skb))
+ return;
+
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (!nskb)
+ return;
+
+ n_hw_metadata = net_dm_hw_metadata_clone(hw_metadata);
+ if (!n_hw_metadata)
+ goto free;
+
+ NET_DM_SKB_CB(nskb)->hw_metadata = n_hw_metadata;
+ nskb->tstamp = tstamp;
+
+ hw_data = this_cpu_ptr(&dm_hw_cpu_data);
+
+ spin_lock_irqsave(&hw_data->drop_queue.lock, flags);
+ if (skb_queue_len(&hw_data->drop_queue) < net_dm_queue_len)
+ __skb_queue_tail(&hw_data->drop_queue, nskb);
+ else
+ goto unlock_free;
+ spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
+
+ schedule_work(&hw_data->dm_alert_work);
+
+ return;
+
+unlock_free:
+ spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
+ u64_stats_update_begin(&hw_data->stats.syncp);
+ hw_data->stats.dropped++;
+ u64_stats_update_end(&hw_data->stats.syncp);
+ net_dm_hw_metadata_free(n_hw_metadata);
+free:
+ consume_skb(nskb);
+}
+
+static const struct net_dm_alert_ops net_dm_alert_packet_ops = {
+ .kfree_skb_probe = net_dm_packet_trace_kfree_skb_hit,
+ .napi_poll_probe = net_dm_packet_trace_napi_poll_hit,
+ .work_item_func = net_dm_packet_work,
+ .hw_work_item_func = net_dm_hw_packet_work,
+ .hw_probe = net_dm_hw_packet_probe,
+};
+
+static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = {
+ [NET_DM_ALERT_MODE_SUMMARY] = &net_dm_alert_summary_ops,
+ [NET_DM_ALERT_MODE_PACKET] = &net_dm_alert_packet_ops,
+};
+
+void net_dm_hw_report(struct sk_buff *skb,
+ const struct net_dm_hw_metadata *hw_metadata)
+{
+ rcu_read_lock();
+
+ if (!monitor_hw)
+ goto out;
+
+ net_dm_alert_ops_arr[net_dm_alert_mode]->hw_probe(skb, hw_metadata);
+
+out:
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(net_dm_hw_report);
+
+static int net_dm_hw_monitor_start(struct netlink_ext_ack *extack)
+{
+ const struct net_dm_alert_ops *ops;
+ int cpu;
+
+ if (monitor_hw) {
+ NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already enabled");
+ return -EAGAIN;
+ }
+
+ ops = net_dm_alert_ops_arr[net_dm_alert_mode];
+
+ if (!try_module_get(THIS_MODULE)) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module");
+ return -ENODEV;
+ }
+
+ for_each_possible_cpu(cpu) {
+ struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
+ struct net_dm_hw_entries *hw_entries;
+
+ INIT_WORK(&hw_data->dm_alert_work, ops->hw_work_item_func);
+ timer_setup(&hw_data->send_timer, sched_send_work, 0);
+ hw_entries = net_dm_hw_reset_per_cpu_data(hw_data);
+ kfree(hw_entries);
+ }
+
+ monitor_hw = true;
+
+ return 0;
+}
+
+static void net_dm_hw_monitor_stop(struct netlink_ext_ack *extack)
+{
+ int cpu;
+
+ if (!monitor_hw)
+ NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already disabled");
+
+ monitor_hw = false;
+
+ /* After this call returns we are guaranteed that no CPU is processing
+ * any hardware drops.
+ */
+ synchronize_rcu();
+
+ for_each_possible_cpu(cpu) {
+ struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
+ struct sk_buff *skb;
+
+ del_timer_sync(&hw_data->send_timer);
+ cancel_work_sync(&hw_data->dm_alert_work);
+ while ((skb = __skb_dequeue(&hw_data->drop_queue))) {
+ struct net_dm_hw_metadata *hw_metadata;
+
+ hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
+ net_dm_hw_metadata_free(hw_metadata);
+ consume_skb(skb);
+ }
+ }
+
+ module_put(THIS_MODULE);
+}
+
+static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
+{
+ const struct net_dm_alert_ops *ops;
+ int cpu, rc;
+
+ ops = net_dm_alert_ops_arr[net_dm_alert_mode];
+
+ if (!try_module_get(THIS_MODULE)) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module");
+ return -ENODEV;
+ }
+
+ for_each_possible_cpu(cpu) {
+ struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
+ struct sk_buff *skb;
+
+ INIT_WORK(&data->dm_alert_work, ops->work_item_func);
+ timer_setup(&data->send_timer, sched_send_work, 0);
+ /* Allocate a new per-CPU skb for the summary alert message and
+ * free the old one which might contain stale data from
+ * previous tracing.
*/
- list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
- if (new_stat->dev == NULL) {
- list_del_rcu(&new_stat->list);
- kfree_rcu(new_stat, rcu);
- }
+ skb = reset_per_cpu_data(data);
+ consume_skb(skb);
+ }
+
+ rc = register_trace_kfree_skb(ops->kfree_skb_probe, NULL);
+ if (rc) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to kfree_skb() tracepoint");
+ goto err_module_put;
+ }
+
+ rc = register_trace_napi_poll(ops->napi_poll_probe, NULL);
+ if (rc) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to napi_poll() tracepoint");
+ goto err_unregister_trace;
+ }
+
+ return 0;
+
+err_unregister_trace:
+ unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL);
+err_module_put:
+ module_put(THIS_MODULE);
+ return rc;
+}
+
+static void net_dm_trace_off_set(void)
+{
+ struct dm_hw_stat_delta *new_stat, *temp;
+ const struct net_dm_alert_ops *ops;
+ int cpu;
+
+ ops = net_dm_alert_ops_arr[net_dm_alert_mode];
+
+ unregister_trace_napi_poll(ops->napi_poll_probe, NULL);
+ unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL);
+
+ tracepoint_synchronize_unregister();
+
+ /* Make sure we do not send notifications to user space after request
+ * to stop tracing returns.
+ */
+ for_each_possible_cpu(cpu) {
+ struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
+ struct sk_buff *skb;
+
+ del_timer_sync(&data->send_timer);
+ cancel_work_sync(&data->dm_alert_work);
+ while ((skb = __skb_dequeue(&data->drop_queue)))
+ consume_skb(skb);
+ }
+
+ list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
+ if (new_stat->dev == NULL) {
+ list_del_rcu(&new_stat->list);
+ kfree_rcu(new_stat, rcu);
}
+ }
+
+ module_put(THIS_MODULE);
+}
+
+static int set_all_monitor_traces(int state, struct netlink_ext_ack *extack)
+{
+ int rc = 0;
- module_put(THIS_MODULE);
+ if (state == trace_state) {
+ NL_SET_ERR_MSG_MOD(extack, "Trace state already set to requested state");
+ return -EAGAIN;
+ }
+ switch (state) {
+ case TRACE_ON:
+ rc = net_dm_trace_on_set(extack);
+ break;
+ case TRACE_OFF:
+ net_dm_trace_off_set();
break;
default:
rc = 1;
@@ -288,30 +1141,331 @@ static int set_all_monitor_traces(int state)
else
rc = -EINPROGRESS;
-out_unlock:
- mutex_unlock(&trace_state_mutex);
-
return rc;
}
+static bool net_dm_is_monitoring(void)
+{
+ return trace_state == TRACE_ON || monitor_hw;
+}
+
+static int net_dm_alert_mode_get_from_info(struct genl_info *info,
+ enum net_dm_alert_mode *p_alert_mode)
+{
+ u8 val;
+
+ val = nla_get_u8(info->attrs[NET_DM_ATTR_ALERT_MODE]);
+
+ switch (val) {
+ case NET_DM_ALERT_MODE_SUMMARY: /* fall-through */
+ case NET_DM_ALERT_MODE_PACKET:
+ *p_alert_mode = val;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int net_dm_alert_mode_set(struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ enum net_dm_alert_mode alert_mode;
+ int rc;
+
+ if (!info->attrs[NET_DM_ATTR_ALERT_MODE])
+ return 0;
+
+ rc = net_dm_alert_mode_get_from_info(info, &alert_mode);
+ if (rc) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid alert mode");
+ return -EINVAL;
+ }
+
+ net_dm_alert_mode = alert_mode;
+
+ return 0;
+}
+
+static void net_dm_trunc_len_set(struct genl_info *info)
+{
+ if (!info->attrs[NET_DM_ATTR_TRUNC_LEN])
+ return;
+
+ net_dm_trunc_len = nla_get_u32(info->attrs[NET_DM_ATTR_TRUNC_LEN]);
+}
+
+static void net_dm_queue_len_set(struct genl_info *info)
+{
+ if (!info->attrs[NET_DM_ATTR_QUEUE_LEN])
+ return;
+
+ net_dm_queue_len = nla_get_u32(info->attrs[NET_DM_ATTR_QUEUE_LEN]);
+}
static int net_dm_cmd_config(struct sk_buff *skb,
struct genl_info *info)
{
- return -ENOTSUPP;
+ struct netlink_ext_ack *extack = info->extack;
+ int rc;
+
+ if (net_dm_is_monitoring()) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot configure drop monitor during monitoring");
+ return -EBUSY;
+ }
+
+ rc = net_dm_alert_mode_set(info);
+ if (rc)
+ return rc;
+
+ net_dm_trunc_len_set(info);
+
+ net_dm_queue_len_set(info);
+
+ return 0;
+}
+
+static int net_dm_monitor_start(bool set_sw, bool set_hw,
+ struct netlink_ext_ack *extack)
+{
+ bool sw_set = false;
+ int rc;
+
+ if (set_sw) {
+ rc = set_all_monitor_traces(TRACE_ON, extack);
+ if (rc)
+ return rc;
+ sw_set = true;
+ }
+
+ if (set_hw) {
+ rc = net_dm_hw_monitor_start(extack);
+ if (rc)
+ goto err_monitor_hw;
+ }
+
+ return 0;
+
+err_monitor_hw:
+ if (sw_set)
+ set_all_monitor_traces(TRACE_OFF, extack);
+ return rc;
+}
+
+static void net_dm_monitor_stop(bool set_sw, bool set_hw,
+ struct netlink_ext_ack *extack)
+{
+ if (set_hw)
+ net_dm_hw_monitor_stop(extack);
+ if (set_sw)
+ set_all_monitor_traces(TRACE_OFF, extack);
}
static int net_dm_cmd_trace(struct sk_buff *skb,
struct genl_info *info)
{
+ bool set_sw = !!info->attrs[NET_DM_ATTR_SW_DROPS];
+ bool set_hw = !!info->attrs[NET_DM_ATTR_HW_DROPS];
+ struct netlink_ext_ack *extack = info->extack;
+
+ /* To maintain backward compatibility, we start / stop monitoring of
+ * software drops if no flag is specified.
+ */
+ if (!set_sw && !set_hw)
+ set_sw = true;
+
switch (info->genlhdr->cmd) {
case NET_DM_CMD_START:
- return set_all_monitor_traces(TRACE_ON);
+ return net_dm_monitor_start(set_sw, set_hw, extack);
case NET_DM_CMD_STOP:
- return set_all_monitor_traces(TRACE_OFF);
+ net_dm_monitor_stop(set_sw, set_hw, extack);
+ return 0;
}
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
+}
+
+static int net_dm_config_fill(struct sk_buff *msg, struct genl_info *info)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+ &net_drop_monitor_family, 0, NET_DM_CMD_CONFIG_NEW);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (nla_put_u8(msg, NET_DM_ATTR_ALERT_MODE, net_dm_alert_mode))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, NET_DM_ATTR_TRUNC_LEN, net_dm_trunc_len))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, NET_DM_ATTR_QUEUE_LEN, net_dm_queue_len))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static int net_dm_cmd_config_get(struct sk_buff *skb, struct genl_info *info)
+{
+ struct sk_buff *msg;
+ int rc;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ rc = net_dm_config_fill(msg, info);
+ if (rc)
+ goto free_msg;
+
+ return genlmsg_reply(msg, info);
+
+free_msg:
+ nlmsg_free(msg);
+ return rc;
+}
+
+static void net_dm_stats_read(struct net_dm_stats *stats)
+{
+ int cpu;
+
+ memset(stats, 0, sizeof(*stats));
+ for_each_possible_cpu(cpu) {
+ struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
+ struct net_dm_stats *cpu_stats = &data->stats;
+ unsigned int start;
+ u64 dropped;
+
+ do {
+ start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+ dropped = cpu_stats->dropped;
+ } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
+
+ stats->dropped += dropped;
+ }
+}
+
+static int net_dm_stats_put(struct sk_buff *msg)
+{
+ struct net_dm_stats stats;
+ struct nlattr *attr;
+
+ net_dm_stats_read(&stats);
+
+ attr = nla_nest_start(msg, NET_DM_ATTR_STATS);
+ if (!attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED,
+ stats.dropped, NET_DM_ATTR_PAD))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
+static void net_dm_hw_stats_read(struct net_dm_stats *stats)
+{
+ int cpu;
+
+ memset(stats, 0, sizeof(*stats));
+ for_each_possible_cpu(cpu) {
+ struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
+ struct net_dm_stats *cpu_stats = &hw_data->stats;
+ unsigned int start;
+ u64 dropped;
+
+ do {
+ start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+ dropped = cpu_stats->dropped;
+ } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
+
+ stats->dropped += dropped;
+ }
+}
+
+static int net_dm_hw_stats_put(struct sk_buff *msg)
+{
+ struct net_dm_stats stats;
+ struct nlattr *attr;
+
+ net_dm_hw_stats_read(&stats);
+
+ attr = nla_nest_start(msg, NET_DM_ATTR_HW_STATS);
+ if (!attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED,
+ stats.dropped, NET_DM_ATTR_PAD))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
+static int net_dm_stats_fill(struct sk_buff *msg, struct genl_info *info)
+{
+ void *hdr;
+ int rc;
+
+ hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+ &net_drop_monitor_family, 0, NET_DM_CMD_STATS_NEW);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ rc = net_dm_stats_put(msg);
+ if (rc)
+ goto nla_put_failure;
+
+ rc = net_dm_hw_stats_put(msg);
+ if (rc)
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static int net_dm_cmd_stats_get(struct sk_buff *skb, struct genl_info *info)
+{
+ struct sk_buff *msg;
+ int rc;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ rc = net_dm_stats_fill(msg, info);
+ if (rc)
+ goto free_msg;
+
+ return genlmsg_reply(msg, info);
+
+free_msg:
+ nlmsg_free(msg);
+ return rc;
}
static int dropmon_net_event(struct notifier_block *ev_block,
@@ -330,12 +1484,12 @@ static int dropmon_net_event(struct notifier_block *ev_block,
new_stat->dev = dev;
new_stat->last_rx = jiffies;
- mutex_lock(&trace_state_mutex);
+ mutex_lock(&net_dm_mutex);
list_add_rcu(&new_stat->list, &hw_stats_list);
- mutex_unlock(&trace_state_mutex);
+ mutex_unlock(&net_dm_mutex);
break;
case NETDEV_UNREGISTER:
- mutex_lock(&trace_state_mutex);
+ mutex_lock(&net_dm_mutex);
list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
if (new_stat->dev == dev) {
new_stat->dev = NULL;
@@ -346,18 +1500,28 @@ static int dropmon_net_event(struct notifier_block *ev_block,
}
}
}
- mutex_unlock(&trace_state_mutex);
+ mutex_unlock(&net_dm_mutex);
break;
}
out:
return NOTIFY_DONE;
}
+static const struct nla_policy net_dm_nl_policy[NET_DM_ATTR_MAX + 1] = {
+ [NET_DM_ATTR_UNSPEC] = { .strict_start_type = NET_DM_ATTR_UNSPEC + 1 },
+ [NET_DM_ATTR_ALERT_MODE] = { .type = NLA_U8 },
+ [NET_DM_ATTR_TRUNC_LEN] = { .type = NLA_U32 },
+ [NET_DM_ATTR_QUEUE_LEN] = { .type = NLA_U32 },
+ [NET_DM_ATTR_SW_DROPS] = {. type = NLA_FLAG },
+ [NET_DM_ATTR_HW_DROPS] = {. type = NLA_FLAG },
+};
+
static const struct genl_ops dropmon_ops[] = {
{
.cmd = NET_DM_CMD_CONFIG,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = net_dm_cmd_config,
+ .flags = GENL_ADMIN_PERM,
},
{
.cmd = NET_DM_CMD_START,
@@ -369,12 +1533,38 @@ static const struct genl_ops dropmon_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = net_dm_cmd_trace,
},
+ {
+ .cmd = NET_DM_CMD_CONFIG_GET,
+ .doit = net_dm_cmd_config_get,
+ },
+ {
+ .cmd = NET_DM_CMD_STATS_GET,
+ .doit = net_dm_cmd_stats_get,
+ },
};
+static int net_dm_nl_pre_doit(const struct genl_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ mutex_lock(&net_dm_mutex);
+
+ return 0;
+}
+
+static void net_dm_nl_post_doit(const struct genl_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ mutex_unlock(&net_dm_mutex);
+}
+
static struct genl_family net_drop_monitor_family __ro_after_init = {
.hdrsize = 0,
.name = "NET_DM",
.version = 2,
+ .maxattr = NET_DM_ATTR_MAX,
+ .policy = net_dm_nl_policy,
+ .pre_doit = net_dm_nl_pre_doit,
+ .post_doit = net_dm_nl_post_doit,
.module = THIS_MODULE,
.ops = dropmon_ops,
.n_ops = ARRAY_SIZE(dropmon_ops),
@@ -386,9 +1576,57 @@ static struct notifier_block dropmon_net_notifier = {
.notifier_call = dropmon_net_event
};
-static int __init init_net_drop_monitor(void)
+static void __net_dm_cpu_data_init(struct per_cpu_dm_data *data)
+{
+ spin_lock_init(&data->lock);
+ skb_queue_head_init(&data->drop_queue);
+ u64_stats_init(&data->stats.syncp);
+}
+
+static void __net_dm_cpu_data_fini(struct per_cpu_dm_data *data)
+{
+ WARN_ON(!skb_queue_empty(&data->drop_queue));
+}
+
+static void net_dm_cpu_data_init(int cpu)
+{
+ struct per_cpu_dm_data *data;
+
+ data = &per_cpu(dm_cpu_data, cpu);
+ __net_dm_cpu_data_init(data);
+}
+
+static void net_dm_cpu_data_fini(int cpu)
{
struct per_cpu_dm_data *data;
+
+ data = &per_cpu(dm_cpu_data, cpu);
+ /* At this point, we should have exclusive access
+ * to this struct and can free the skb inside it.
+ */
+ consume_skb(data->skb);
+ __net_dm_cpu_data_fini(data);
+}
+
+static void net_dm_hw_cpu_data_init(int cpu)
+{
+ struct per_cpu_dm_data *hw_data;
+
+ hw_data = &per_cpu(dm_hw_cpu_data, cpu);
+ __net_dm_cpu_data_init(hw_data);
+}
+
+static void net_dm_hw_cpu_data_fini(int cpu)
+{
+ struct per_cpu_dm_data *hw_data;
+
+ hw_data = &per_cpu(dm_hw_cpu_data, cpu);
+ kfree(hw_data->hw_entries);
+ __net_dm_cpu_data_fini(hw_data);
+}
+
+static int __init init_net_drop_monitor(void)
+{
int cpu, rc;
pr_info("Initializing network drop monitor service\n");
@@ -414,14 +1652,10 @@ static int __init init_net_drop_monitor(void)
rc = 0;
for_each_possible_cpu(cpu) {
- data = &per_cpu(dm_cpu_data, cpu);
- INIT_WORK(&data->dm_alert_work, send_dm_alert);
- timer_setup(&data->send_timer, sched_send_work, 0);
- spin_lock_init(&data->lock);
- reset_per_cpu_data(data);
+ net_dm_cpu_data_init(cpu);
+ net_dm_hw_cpu_data_init(cpu);
}
-
goto out;
out_unreg:
@@ -432,7 +1666,6 @@ out:
static void exit_net_drop_monitor(void)
{
- struct per_cpu_dm_data *data;
int cpu;
BUG_ON(unregister_netdevice_notifier(&dropmon_net_notifier));
@@ -440,19 +1673,11 @@ static void exit_net_drop_monitor(void)
/*
* Because of the module_get/put we do in the trace state change path
* we are guarnateed not to have any current users when we get here
- * all we need to do is make sure that we don't have any running timers
- * or pending schedule calls
*/
for_each_possible_cpu(cpu) {
- data = &per_cpu(dm_cpu_data, cpu);
- del_timer_sync(&data->send_timer);
- cancel_work_sync(&data->dm_alert_work);
- /*
- * At this point, we should have exclusive access
- * to this struct and can free the skb inside it
- */
- kfree_skb(data->skb);
+ net_dm_hw_cpu_data_fini(cpu);
+ net_dm_cpu_data_fini(cpu);
}
BUG_ON(genl_unregister_family(&net_drop_monitor_family));
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 6288e69e94fc..c763106c73fc 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -133,6 +133,7 @@ phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
[ETHTOOL_ID_UNSPEC] = "Unspec",
[ETHTOOL_PHY_DOWNSHIFT] = "phy-downshift",
[ETHTOOL_PHY_FAST_LINK_DOWN] = "phy-fast-link-down",
+ [ETHTOOL_PHY_EDPD] = "phy-energy-detect-power-down",
};
static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
@@ -2451,6 +2452,11 @@ static int ethtool_phy_tunable_valid(const struct ethtool_tunable *tuna)
tuna->type_id != ETHTOOL_TUNABLE_U8)
return -EINVAL;
break;
+ case ETHTOOL_PHY_EDPD:
+ if (tuna->len != sizeof(u16) ||
+ tuna->type_id != ETHTOOL_TUNABLE_U16)
+ return -EINVAL;
+ break;
default:
return -EINVAL;
}
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
index 13a40b831d6d..470a606d5e8d 100644
--- a/net/core/fib_notifier.c
+++ b/net/core/fib_notifier.c
@@ -5,8 +5,15 @@
#include <linux/module.h>
#include <linux/init.h>
#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include <net/fib_notifier.h>
+static unsigned int fib_notifier_net_id;
+
+struct fib_notifier_net {
+ struct list_head fib_notifier_ops;
+};
+
static ATOMIC_NOTIFIER_HEAD(fib_chain);
int call_fib_notifier(struct notifier_block *nb, struct net *net,
@@ -34,6 +41,7 @@ EXPORT_SYMBOL(call_fib_notifiers);
static unsigned int fib_seq_sum(void)
{
+ struct fib_notifier_net *fn_net;
struct fib_notifier_ops *ops;
unsigned int fib_seq = 0;
struct net *net;
@@ -41,8 +49,9 @@ static unsigned int fib_seq_sum(void)
rtnl_lock();
down_read(&net_rwsem);
for_each_net(net) {
+ fn_net = net_generic(net, fib_notifier_net_id);
rcu_read_lock();
- list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
+ list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) {
if (!try_module_get(ops->owner))
continue;
fib_seq += ops->fib_seq_read(net);
@@ -58,9 +67,10 @@ static unsigned int fib_seq_sum(void)
static int fib_net_dump(struct net *net, struct notifier_block *nb)
{
+ struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
struct fib_notifier_ops *ops;
- list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
+ list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) {
int err;
if (!try_module_get(ops->owner))
@@ -127,12 +137,13 @@ EXPORT_SYMBOL(unregister_fib_notifier);
static int __fib_notifier_ops_register(struct fib_notifier_ops *ops,
struct net *net)
{
+ struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
struct fib_notifier_ops *o;
- list_for_each_entry(o, &net->fib_notifier_ops, list)
+ list_for_each_entry(o, &fn_net->fib_notifier_ops, list)
if (ops->family == o->family)
return -EEXIST;
- list_add_tail_rcu(&ops->list, &net->fib_notifier_ops);
+ list_add_tail_rcu(&ops->list, &fn_net->fib_notifier_ops);
return 0;
}
@@ -167,18 +178,24 @@ EXPORT_SYMBOL(fib_notifier_ops_unregister);
static int __net_init fib_notifier_net_init(struct net *net)
{
- INIT_LIST_HEAD(&net->fib_notifier_ops);
+ struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
+
+ INIT_LIST_HEAD(&fn_net->fib_notifier_ops);
return 0;
}
static void __net_exit fib_notifier_net_exit(struct net *net)
{
- WARN_ON_ONCE(!list_empty(&net->fib_notifier_ops));
+ struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
+
+ WARN_ON_ONCE(!list_empty(&fn_net->fib_notifier_ops));
}
static struct pernet_operations fib_notifier_net_ops = {
.init = fib_notifier_net_init,
.exit = fib_notifier_net_exit,
+ .id = &fib_notifier_net_id,
+ .size = sizeof(struct fib_notifier_net),
};
static int __init fib_notifier_init(void)
diff --git a/net/core/filter.c b/net/core/filter.c
index 4c6a252d4212..ed6563622ce3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3517,7 +3517,8 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
int err;
switch (map->map_type) {
- case BPF_MAP_TYPE_DEVMAP: {
+ case BPF_MAP_TYPE_DEVMAP:
+ case BPF_MAP_TYPE_DEVMAP_HASH: {
struct bpf_dtab_netdev *dst = fwd;
err = dev_map_enqueue(dst, xdp, dev_rx);
@@ -3554,6 +3555,7 @@ void xdp_do_flush_map(void)
if (map) {
switch (map->map_type) {
case BPF_MAP_TYPE_DEVMAP:
+ case BPF_MAP_TYPE_DEVMAP_HASH:
__dev_map_flush(map);
break;
case BPF_MAP_TYPE_CPUMAP:
@@ -3574,6 +3576,8 @@ static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
switch (map->map_type) {
case BPF_MAP_TYPE_DEVMAP:
return __dev_map_lookup_elem(map, index);
+ case BPF_MAP_TYPE_DEVMAP_HASH:
+ return __dev_map_hash_lookup_elem(map, index);
case BPF_MAP_TYPE_CPUMAP:
return __cpu_map_lookup_elem(map, index);
case BPF_MAP_TYPE_XSKMAP:
@@ -3655,7 +3659,8 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
ri->tgt_value = NULL;
WRITE_ONCE(ri->map, NULL);
- if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
+ if (map->map_type == BPF_MAP_TYPE_DEVMAP ||
+ map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
struct bpf_dtab_netdev *dst = fwd;
err = dev_map_generic_redirect(dst, skb, xdp_prog);
@@ -5850,6 +5855,75 @@ static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
.arg5_type = ARG_CONST_SIZE,
};
+BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
+ struct tcphdr *, th, u32, th_len)
+{
+#ifdef CONFIG_SYN_COOKIES
+ u32 cookie;
+ u16 mss;
+
+ if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4))
+ return -EINVAL;
+
+ if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
+ return -EINVAL;
+
+ if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
+ return -ENOENT;
+
+ if (!th->syn || th->ack || th->fin || th->rst)
+ return -EINVAL;
+
+ if (unlikely(iph_len < sizeof(struct iphdr)))
+ return -EINVAL;
+
+ /* Both struct iphdr and struct ipv6hdr have the version field at the
+ * same offset so we can cast to the shorter header (struct iphdr).
+ */
+ switch (((struct iphdr *)iph)->version) {
+ case 4:
+ if (sk->sk_family == AF_INET6 && sk->sk_ipv6only)
+ return -EINVAL;
+
+ mss = tcp_v4_get_syncookie(sk, iph, th, &cookie);
+ break;
+
+#if IS_BUILTIN(CONFIG_IPV6)
+ case 6:
+ if (unlikely(iph_len < sizeof(struct ipv6hdr)))
+ return -EINVAL;
+
+ if (sk->sk_family != AF_INET6)
+ return -EINVAL;
+
+ mss = tcp_v6_get_syncookie(sk, iph, th, &cookie);
+ break;
+#endif /* CONFIG_IPV6 */
+
+ default:
+ return -EPROTONOSUPPORT;
+ }
+ if (mss == 0)
+ return -ENOENT;
+
+ return cookie | ((u64)mss << 32);
+#else
+ return -EOPNOTSUPP;
+#endif /* CONFIG_SYN_COOKIES */
+}
+
+static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
+ .func = bpf_tcp_gen_syncookie,
+ .gpl_only = true, /* __cookie_v*_init_sequence() is GPL */
+ .pkt_access = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_SOCK_COMMON,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_PTR_TO_MEM,
+ .arg5_type = ARG_CONST_SIZE,
+};
+
#endif /* CONFIG_INET */
bool bpf_helper_changes_pkt_data(void *func)
@@ -5999,6 +6073,8 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_socket_cookie_proto;
case BPF_FUNC_get_socket_uid:
return &bpf_get_socket_uid_proto;
+ case BPF_FUNC_perf_event_output:
+ return &bpf_skb_event_output_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -6019,6 +6095,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_storage_get_proto;
case BPF_FUNC_sk_storage_delete:
return &bpf_sk_storage_delete_proto;
+ case BPF_FUNC_perf_event_output:
+ return &bpf_skb_event_output_proto;
#ifdef CONFIG_SOCK_CGROUP_DATA
case BPF_FUNC_skb_cgroup_id:
return &bpf_skb_cgroup_id_proto;
@@ -6135,6 +6213,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_tcp_check_syncookie_proto;
case BPF_FUNC_skb_ecn_set_ce:
return &bpf_skb_ecn_set_ce_proto;
+ case BPF_FUNC_tcp_gen_syncookie:
+ return &bpf_tcp_gen_syncookie_proto;
#endif
default:
return bpf_base_func_proto(func_id);
@@ -6174,6 +6254,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_xdp_skc_lookup_tcp_proto;
case BPF_FUNC_tcp_check_syncookie:
return &bpf_tcp_check_syncookie_proto;
+ case BPF_FUNC_tcp_gen_syncookie:
+ return &bpf_tcp_gen_syncookie_proto;
#endif
default:
return bpf_base_func_proto(func_id);
@@ -6267,6 +6349,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_redirect_map_proto;
case BPF_FUNC_sk_redirect_hash:
return &bpf_sk_redirect_hash_proto;
+ case BPF_FUNC_perf_event_output:
+ return &bpf_skb_event_output_proto;
#ifdef CONFIG_INET
case BPF_FUNC_sk_lookup_tcp:
return &bpf_sk_lookup_tcp_proto;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 2470b4b404e6..7c09d87d3269 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -737,6 +737,7 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
struct flow_dissector_key_basic *key_basic;
struct flow_dissector_key_addrs *key_addrs;
struct flow_dissector_key_ports *key_ports;
+ struct flow_dissector_key_tags *key_tags;
key_control = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_CONTROL,
@@ -781,10 +782,18 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
key_ports->src = flow_keys->sport;
key_ports->dst = flow_keys->dport;
}
+
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_FLOW_LABEL)) {
+ key_tags = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_FLOW_LABEL,
+ target_container);
+ key_tags->flow_label = ntohl(flow_keys->flow_label);
+ }
}
bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
- __be16 proto, int nhoff, int hlen)
+ __be16 proto, int nhoff, int hlen, unsigned int flags)
{
struct bpf_flow_keys *flow_keys = ctx->flow_keys;
u32 result;
@@ -795,6 +804,14 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
flow_keys->nhoff = nhoff;
flow_keys->thoff = flow_keys->nhoff;
+ BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG !=
+ (int)FLOW_DISSECTOR_F_PARSE_1ST_FRAG);
+ BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL !=
+ (int)FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+ BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP !=
+ (int)FLOW_DISSECTOR_F_STOP_AT_ENCAP);
+ flow_keys->flags = flags;
+
preempt_disable();
result = BPF_PROG_RUN(prog, ctx);
preempt_enable();
@@ -914,7 +931,7 @@ bool __skb_flow_dissect(const struct net *net,
}
ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff,
- hlen);
+ hlen, flags);
__skb_flow_bpf_to_target(&flow_keys, flow_dissector,
target_container);
rcu_read_unlock();
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index d63b970784dc..cf52d9c422fa 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -2,6 +2,8 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <net/flow_offload.h>
+#include <linux/rtnetlink.h>
+#include <linux/mutex.h>
struct flow_rule *flow_rule_alloc(unsigned int num_actions)
{
@@ -280,3 +282,242 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f,
}
}
EXPORT_SYMBOL(flow_block_cb_setup_simple);
+
+static LIST_HEAD(block_ing_cb_list);
+
+static struct rhashtable indr_setup_block_ht;
+
+struct flow_indr_block_cb {
+ struct list_head list;
+ void *cb_priv;
+ flow_indr_block_bind_cb_t *cb;
+ void *cb_ident;
+};
+
+struct flow_indr_block_dev {
+ struct rhash_head ht_node;
+ struct net_device *dev;
+ unsigned int refcnt;
+ struct list_head cb_list;
+};
+
+static const struct rhashtable_params flow_indr_setup_block_ht_params = {
+ .key_offset = offsetof(struct flow_indr_block_dev, dev),
+ .head_offset = offsetof(struct flow_indr_block_dev, ht_node),
+ .key_len = sizeof(struct net_device *),
+};
+
+static struct flow_indr_block_dev *
+flow_indr_block_dev_lookup(struct net_device *dev)
+{
+ return rhashtable_lookup_fast(&indr_setup_block_ht, &dev,
+ flow_indr_setup_block_ht_params);
+}
+
+static struct flow_indr_block_dev *
+flow_indr_block_dev_get(struct net_device *dev)
+{
+ struct flow_indr_block_dev *indr_dev;
+
+ indr_dev = flow_indr_block_dev_lookup(dev);
+ if (indr_dev)
+ goto inc_ref;
+
+ indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL);
+ if (!indr_dev)
+ return NULL;
+
+ INIT_LIST_HEAD(&indr_dev->cb_list);
+ indr_dev->dev = dev;
+ if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node,
+ flow_indr_setup_block_ht_params)) {
+ kfree(indr_dev);
+ return NULL;
+ }
+
+inc_ref:
+ indr_dev->refcnt++;
+ return indr_dev;
+}
+
+static void flow_indr_block_dev_put(struct flow_indr_block_dev *indr_dev)
+{
+ if (--indr_dev->refcnt)
+ return;
+
+ rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node,
+ flow_indr_setup_block_ht_params);
+ kfree(indr_dev);
+}
+
+static struct flow_indr_block_cb *
+flow_indr_block_cb_lookup(struct flow_indr_block_dev *indr_dev,
+ flow_indr_block_bind_cb_t *cb, void *cb_ident)
+{
+ struct flow_indr_block_cb *indr_block_cb;
+
+ list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
+ if (indr_block_cb->cb == cb &&
+ indr_block_cb->cb_ident == cb_ident)
+ return indr_block_cb;
+ return NULL;
+}
+
+static struct flow_indr_block_cb *
+flow_indr_block_cb_add(struct flow_indr_block_dev *indr_dev, void *cb_priv,
+ flow_indr_block_bind_cb_t *cb, void *cb_ident)
+{
+ struct flow_indr_block_cb *indr_block_cb;
+
+ indr_block_cb = flow_indr_block_cb_lookup(indr_dev, cb, cb_ident);
+ if (indr_block_cb)
+ return ERR_PTR(-EEXIST);
+
+ indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL);
+ if (!indr_block_cb)
+ return ERR_PTR(-ENOMEM);
+
+ indr_block_cb->cb_priv = cb_priv;
+ indr_block_cb->cb = cb;
+ indr_block_cb->cb_ident = cb_ident;
+ list_add(&indr_block_cb->list, &indr_dev->cb_list);
+
+ return indr_block_cb;
+}
+
+static void flow_indr_block_cb_del(struct flow_indr_block_cb *indr_block_cb)
+{
+ list_del(&indr_block_cb->list);
+ kfree(indr_block_cb);
+}
+
+static DEFINE_MUTEX(flow_indr_block_ing_cb_lock);
+
+static void flow_block_ing_cmd(struct net_device *dev,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_priv,
+ enum flow_block_command command)
+{
+ struct flow_indr_block_ing_entry *entry;
+
+ mutex_lock(&flow_indr_block_ing_cb_lock);
+ list_for_each_entry(entry, &block_ing_cb_list, list) {
+ entry->cb(dev, cb, cb_priv, command);
+ }
+ mutex_unlock(&flow_indr_block_ing_cb_lock);
+}
+
+int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_ident)
+{
+ struct flow_indr_block_cb *indr_block_cb;
+ struct flow_indr_block_dev *indr_dev;
+ int err;
+
+ indr_dev = flow_indr_block_dev_get(dev);
+ if (!indr_dev)
+ return -ENOMEM;
+
+ indr_block_cb = flow_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident);
+ err = PTR_ERR_OR_ZERO(indr_block_cb);
+ if (err)
+ goto err_dev_put;
+
+ flow_block_ing_cmd(dev, indr_block_cb->cb, indr_block_cb->cb_priv,
+ FLOW_BLOCK_BIND);
+
+ return 0;
+
+err_dev_put:
+ flow_indr_block_dev_put(indr_dev);
+ return err;
+}
+EXPORT_SYMBOL_GPL(__flow_indr_block_cb_register);
+
+int flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_ident)
+{
+ int err;
+
+ rtnl_lock();
+ err = __flow_indr_block_cb_register(dev, cb_priv, cb, cb_ident);
+ rtnl_unlock();
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(flow_indr_block_cb_register);
+
+void __flow_indr_block_cb_unregister(struct net_device *dev,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_ident)
+{
+ struct flow_indr_block_cb *indr_block_cb;
+ struct flow_indr_block_dev *indr_dev;
+
+ indr_dev = flow_indr_block_dev_lookup(dev);
+ if (!indr_dev)
+ return;
+
+ indr_block_cb = flow_indr_block_cb_lookup(indr_dev, cb, cb_ident);
+ if (!indr_block_cb)
+ return;
+
+ flow_block_ing_cmd(dev, indr_block_cb->cb, indr_block_cb->cb_priv,
+ FLOW_BLOCK_UNBIND);
+
+ flow_indr_block_cb_del(indr_block_cb);
+ flow_indr_block_dev_put(indr_dev);
+}
+EXPORT_SYMBOL_GPL(__flow_indr_block_cb_unregister);
+
+void flow_indr_block_cb_unregister(struct net_device *dev,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_ident)
+{
+ rtnl_lock();
+ __flow_indr_block_cb_unregister(dev, cb, cb_ident);
+ rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(flow_indr_block_cb_unregister);
+
+void flow_indr_block_call(struct net_device *dev,
+ struct flow_block_offload *bo,
+ enum flow_block_command command)
+{
+ struct flow_indr_block_cb *indr_block_cb;
+ struct flow_indr_block_dev *indr_dev;
+
+ indr_dev = flow_indr_block_dev_lookup(dev);
+ if (!indr_dev)
+ return;
+
+ list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
+ indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
+ bo);
+}
+EXPORT_SYMBOL_GPL(flow_indr_block_call);
+
+void flow_indr_add_block_ing_cb(struct flow_indr_block_ing_entry *entry)
+{
+ mutex_lock(&flow_indr_block_ing_cb_lock);
+ list_add_tail(&entry->list, &block_ing_cb_list);
+ mutex_unlock(&flow_indr_block_ing_cb_lock);
+}
+EXPORT_SYMBOL_GPL(flow_indr_add_block_ing_cb);
+
+void flow_indr_del_block_ing_cb(struct flow_indr_block_ing_entry *entry)
+{
+ mutex_lock(&flow_indr_block_ing_cb_lock);
+ list_del(&entry->list);
+ mutex_unlock(&flow_indr_block_ing_cb_lock);
+}
+EXPORT_SYMBOL_GPL(flow_indr_del_block_ing_cb);
+
+static int __init init_flow_indr_rhashtable(void)
+{
+ return rhashtable_init(&indr_setup_block_ht,
+ &flow_indr_setup_block_ht_params);
+}
+subsys_initcall(init_flow_indr_rhashtable);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index f79e61c570ea..5480edff0c86 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -3033,7 +3033,7 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
struct net *net = seq_file_net(seq);
struct neigh_hash_table *nht = state->nht;
struct neighbour *n = NULL;
- int bucket = state->bucket;
+ int bucket;
state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 3272dc7a8c81..5bc65587f1c4 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -61,7 +61,7 @@ static int page_pool_init(struct page_pool *pool,
struct page_pool *page_pool_create(const struct page_pool_params *params)
{
struct page_pool *pool;
- int err = 0;
+ int err;
pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, params->nid);
if (!pool)
@@ -82,12 +82,9 @@ EXPORT_SYMBOL(page_pool_create);
static struct page *__page_pool_get_cached(struct page_pool *pool)
{
struct ptr_ring *r = &pool->ring;
+ bool refill = false;
struct page *page;
- /* Quicker fallback, avoid locks when ring is empty */
- if (__ptr_ring_empty(r))
- return NULL;
-
/* Test for safe-context, caller should provide this guarantee */
if (likely(in_serving_softirq())) {
if (likely(pool->alloc.count)) {
@@ -95,27 +92,23 @@ static struct page *__page_pool_get_cached(struct page_pool *pool)
page = pool->alloc.cache[--pool->alloc.count];
return page;
}
- /* Slower-path: Alloc array empty, time to refill
- *
- * Open-coded bulk ptr_ring consumer.
- *
- * Discussion: the ring consumer lock is not really
- * needed due to the softirq/NAPI protection, but
- * later need the ability to reclaim pages on the
- * ring. Thus, keeping the locks.
- */
- spin_lock(&r->consumer_lock);
- while ((page = __ptr_ring_consume(r))) {
- if (pool->alloc.count == PP_ALLOC_CACHE_REFILL)
- break;
- pool->alloc.cache[pool->alloc.count++] = page;
- }
- spin_unlock(&r->consumer_lock);
- return page;
+ refill = true;
}
- /* Slow-path: Get page from locked ring queue */
- page = ptr_ring_consume(&pool->ring);
+ /* Quicker fallback, avoid locks when ring is empty */
+ if (__ptr_ring_empty(r))
+ return NULL;
+
+ /* Slow-path: Get page from locked ring queue,
+ * refill alloc array if requested.
+ */
+ spin_lock(&r->consumer_lock);
+ page = __ptr_ring_consume(r);
+ if (refill)
+ pool->alloc.count = __ptr_ring_consume_batched(r,
+ pool->alloc.cache,
+ PP_ALLOC_CACHE_REFILL);
+ spin_unlock(&r->consumer_lock);
return page;
}
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 1d0c1b4886d7..48b1e429857c 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2650,7 +2650,7 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
}
get_page(pkt_dev->page);
skb_frag_set_page(skb, i, pkt_dev->page);
- skb_shinfo(skb)->frags[i].page_offset = 0;
+ skb_frag_off_set(&skb_shinfo(skb)->frags[i], 0);
/*last fragment, fill rest of data*/
if (i == (frags - 1))
skb_frag_size_set(&skb_shinfo(skb)->frags[i],
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 982d8d12830e..f12e8a050edb 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -785,7 +785,7 @@ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt)
struct page *p;
u8 *vaddr;
- skb_frag_foreach_page(frag, frag->page_offset,
+ skb_frag_foreach_page(frag, skb_frag_off(frag),
skb_frag_size(frag), p, p_off, p_len,
copied) {
seg_len = min_t(int, p_len, len);
@@ -1375,7 +1375,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
struct page *p;
u8 *vaddr;
- skb_frag_foreach_page(f, f->page_offset, skb_frag_size(f),
+ skb_frag_foreach_page(f, skb_frag_off(f), skb_frag_size(f),
p, p_off, p_len, copied) {
u32 copy, done = 0;
vaddr = kmap_atomic(p);
@@ -2144,10 +2144,12 @@ pull_pages:
skb_frag_unref(skb, i);
eat -= size;
} else {
- skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[k];
+
+ *frag = skb_shinfo(skb)->frags[i];
if (eat) {
- skb_shinfo(skb)->frags[k].page_offset += eat;
- skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
+ skb_frag_off_add(frag, eat);
+ skb_frag_size_sub(frag, eat);
if (!i)
goto end;
eat = 0;
@@ -2219,7 +2221,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
copy = len;
skb_frag_foreach_page(f,
- f->page_offset + offset - start,
+ skb_frag_off(f) + offset - start,
copy, p, p_off, p_len, copied) {
vaddr = kmap_atomic(p);
memcpy(to + copied, vaddr + p_off, p_len);
@@ -2395,7 +2397,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
if (__splice_segment(skb_frag_page(f),
- f->page_offset, skb_frag_size(f),
+ skb_frag_off(f), skb_frag_size(f),
offset, len, spd, false, sk, pipe))
return true;
}
@@ -2485,20 +2487,20 @@ do_frag_list:
for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx];
- if (offset < frag->size)
+ if (offset < skb_frag_size(frag))
break;
- offset -= frag->size;
+ offset -= skb_frag_size(frag);
}
for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx];
- slen = min_t(size_t, len, frag->size - offset);
+ slen = min_t(size_t, len, skb_frag_size(frag) - offset);
while (slen) {
- ret = kernel_sendpage_locked(sk, frag->page.p,
- frag->page_offset + offset,
+ ret = kernel_sendpage_locked(sk, skb_frag_page(frag),
+ skb_frag_off(frag) + offset,
slen, MSG_DONTWAIT);
if (ret <= 0)
goto error;
@@ -2580,7 +2582,7 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
copy = len;
skb_frag_foreach_page(frag,
- frag->page_offset + offset - start,
+ skb_frag_off(frag) + offset - start,
copy, p, p_off, p_len, copied) {
vaddr = kmap_atomic(p);
memcpy(vaddr + p_off, from + copied, p_len);
@@ -2660,7 +2662,7 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
copy = len;
skb_frag_foreach_page(frag,
- frag->page_offset + offset - start,
+ skb_frag_off(frag) + offset - start,
copy, p, p_off, p_len, copied) {
vaddr = kmap_atomic(p);
csum2 = INDIRECT_CALL_1(ops->update,
@@ -2759,7 +2761,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
copy = len;
skb_frag_foreach_page(frag,
- frag->page_offset + offset - start,
+ skb_frag_off(frag) + offset - start,
copy, p, p_off, p_len, copied) {
vaddr = kmap_atomic(p);
csum2 = csum_partial_copy_nocheck(vaddr + p_off,
@@ -2975,11 +2977,15 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen)
skb_zerocopy_clone(to, from, GFP_ATOMIC);
for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
+ int size;
+
if (!len)
break;
skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
- skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len);
- len -= skb_shinfo(to)->frags[j].size;
+ size = min_t(int, skb_frag_size(&skb_shinfo(to)->frags[j]),
+ len);
+ skb_frag_size_set(&skb_shinfo(to)->frags[j], size);
+ len -= size;
skb_frag_ref(to, j);
j++;
}
@@ -3230,7 +3236,7 @@ static inline void skb_split_no_header(struct sk_buff *skb,
* 2. Split is accurately. We make this.
*/
skb_frag_ref(skb, i);
- skb_shinfo(skb1)->frags[0].page_offset += len - pos;
+ skb_frag_off_add(&skb_shinfo(skb1)->frags[0], len - pos);
skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos);
skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos);
skb_shinfo(skb)->nr_frags++;
@@ -3293,7 +3299,7 @@ static int skb_prepare_for_shift(struct sk_buff *skb)
int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
{
int from, to, merge, todo;
- struct skb_frag_struct *fragfrom, *fragto;
+ skb_frag_t *fragfrom, *fragto;
BUG_ON(shiftlen > skb->len);
@@ -3312,7 +3318,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
*/
if (!to ||
!skb_can_coalesce(tgt, to, skb_frag_page(fragfrom),
- fragfrom->page_offset)) {
+ skb_frag_off(fragfrom))) {
merge = -1;
} else {
merge = to - 1;
@@ -3329,7 +3335,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
skb_frag_size_add(fragto, shiftlen);
skb_frag_size_sub(fragfrom, shiftlen);
- fragfrom->page_offset += shiftlen;
+ skb_frag_off_add(fragfrom, shiftlen);
goto onlymerged;
}
@@ -3360,11 +3366,11 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
} else {
__skb_frag_ref(fragfrom);
- fragto->page = fragfrom->page;
- fragto->page_offset = fragfrom->page_offset;
+ skb_frag_page_copy(fragto, fragfrom);
+ skb_frag_off_copy(fragto, fragfrom);
skb_frag_size_set(fragto, todo);
- fragfrom->page_offset += todo;
+ skb_frag_off_add(fragfrom, todo);
skb_frag_size_sub(fragfrom, todo);
todo = 0;
@@ -3489,7 +3495,7 @@ next_skb:
if (!st->frag_data)
st->frag_data = kmap_atomic(skb_frag_page(frag));
- *data = (u8 *) st->frag_data + frag->page_offset +
+ *data = (u8 *) st->frag_data + skb_frag_off(frag) +
(abs_offset - st->stepped_offset);
return block_limit - abs_offset;
@@ -3625,10 +3631,10 @@ static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb)
struct page *page;
page = virt_to_head_page(frag_skb->head);
- head_frag.page.p = page;
- head_frag.page_offset = frag_skb->data -
- (unsigned char *)page_address(page);
- head_frag.size = skb_headlen(frag_skb);
+ __skb_frag_set_page(&head_frag, page);
+ skb_frag_off_set(&head_frag, frag_skb->data -
+ (unsigned char *)page_address(page));
+ skb_frag_size_set(&head_frag, skb_headlen(frag_skb));
return head_frag;
}
@@ -3890,7 +3896,7 @@ normal:
size = skb_frag_size(nskb_frag);
if (pos < offset) {
- nskb_frag->page_offset += offset - pos;
+ skb_frag_off_add(nskb_frag, offset - pos);
skb_frag_size_sub(nskb_frag, offset - pos);
}
@@ -4011,7 +4017,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
*--frag = *--frag2;
} while (--i);
- frag->page_offset += offset;
+ skb_frag_off_add(frag, offset);
skb_frag_size_sub(frag, offset);
/* all fragments truesize : remove (head size + sk_buff) */
@@ -4040,8 +4046,8 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags;
- frag->page.p = page;
- frag->page_offset = first_offset;
+ __skb_frag_set_page(frag, page);
+ skb_frag_off_set(frag, first_offset);
skb_frag_size_set(frag, first_size);
memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags);
@@ -4057,7 +4063,7 @@ merge:
if (offset > headlen) {
unsigned int eat = offset - headlen;
- skbinfo->frags[0].page_offset += eat;
+ skb_frag_off_add(&skbinfo->frags[0], eat);
skb_frag_size_sub(&skbinfo->frags[0], eat);
skb->data_len -= eat;
skb->len -= eat;
@@ -4100,6 +4106,9 @@ static const u8 skb_ext_type_len[] = {
#ifdef CONFIG_XFRM
[SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path),
#endif
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
+#endif
};
static __always_inline unsigned int skb_ext_total_length(void)
@@ -4111,6 +4120,9 @@ static __always_inline unsigned int skb_ext_total_length(void)
#ifdef CONFIG_XFRM
skb_ext_type_len[SKB_EXT_SEC_PATH] +
#endif
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ skb_ext_type_len[TC_SKB_EXT] +
+#endif
0;
}
@@ -4182,7 +4194,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len,
if (copy > len)
copy = len;
sg_set_page(&sg[elt], skb_frag_page(frag), copy,
- frag->page_offset+offset-start);
+ skb_frag_off(frag) + offset - start);
elt++;
if (!(len -= copy))
return elt;
@@ -5853,7 +5865,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- shinfo->frags[0].page_offset += off - pos;
+ skb_frag_off_add(&shinfo->frags[0], off - pos);
skb_frag_size_sub(&shinfo->frags[0], off - pos);
}
skb_frag_ref(skb, i);
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 6832eeb4b785..cf390e0aa73d 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -190,8 +190,7 @@ static int __sk_msg_free(struct sock *sk, struct sk_msg *msg, u32 i,
sk_msg_check_to_free(msg, i, msg->sg.size);
sge = sk_msg_elem(msg, i);
}
- if (msg->skb)
- consume_skb(msg->skb);
+ consume_skb(msg->skb);
sk_msg_init(msg);
return freed;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 545fac19a711..07863edbe6fc 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1851,9 +1851,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
goto out;
}
RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
-#ifdef CONFIG_BPF_SYSCALL
- RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
-#endif
+
+ if (bpf_sk_storage_clone(sk, newsk)) {
+ sk_free_unlock_clone(newsk);
+ newsk = NULL;
+ goto out;
+ }
newsk->sk_err = 0;
newsk->sk_err_soft = 0;
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 50916f9bc4f2..eb114ee419b6 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -345,7 +345,7 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
return -EINVAL;
if (unlikely(idx >= map->max_entries))
return -E2BIG;
- if (unlikely(icsk->icsk_ulp_data))
+ if (unlikely(rcu_access_pointer(icsk->icsk_ulp_data)))
return -EINVAL;
link = sk_psock_init_link();
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 9408f9264d05..f3ceec93f392 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -295,8 +295,19 @@ struct sock *reuseport_select_sock(struct sock *sk,
select_by_hash:
/* no bpf or invalid bpf result: fall back to hash usage */
- if (!sk2)
- sk2 = reuse->socks[reciprocal_scale(hash, socks)];
+ if (!sk2) {
+ int i, j;
+
+ i = j = reciprocal_scale(hash, socks);
+ while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) {
+ i++;
+ if (i >= reuse->num_socks)
+ i = 0;
+ if (i == j)
+ goto out;
+ }
+ sk2 = reuse->socks[i];
+ }
}
out:
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 8da5b3a54dac..eb29e5adc84d 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -567,6 +567,14 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_do_static_key,
},
+ {
+ .procname = "gro_normal_batch",
+ .data = &gro_normal_batch,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE,
+ },
{ }
};
diff --git a/net/core/tso.c b/net/core/tso.c
index 43f4eba61933..d4d5c077ad72 100644
--- a/net/core/tso.c
+++ b/net/core/tso.c
@@ -55,8 +55,8 @@ void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size)
skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx];
/* Move to next segment */
- tso->size = frag->size;
- tso->data = page_address(frag->page.p) + frag->page_offset;
+ tso->size = skb_frag_size(frag);
+ tso->data = skb_frag_address(frag);
tso->next_frag_idx++;
}
}
@@ -79,8 +79,8 @@ void tso_start(struct sk_buff *skb, struct tso_t *tso)
skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx];
/* Move to next segment */
- tso->size = frag->size;
- tso->data = page_address(frag->page.p) + frag->page_offset;
+ tso->size = skb_frag_size(frag);
+ tso->data = skb_frag_address(frag);
tso->next_frag_idx++;
}
}
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 6e942dda1bcd..29e2bd5cc5af 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -73,23 +73,11 @@ config NET_DSA_TAG_MTK
Say Y or M if you want to enable support for tagging frames for
Mediatek switches.
-config NET_DSA_TAG_KSZ_COMMON
- tristate
- default n
-
config NET_DSA_TAG_KSZ
- tristate "Tag driver for Microchip 9893 family of switches"
- select NET_DSA_TAG_KSZ_COMMON
- help
- Say Y if you want to enable support for tagging frames for the
- Microchip 9893 family of switches.
-
-config NET_DSA_TAG_KSZ9477
- tristate "Tag driver for Microchip 9477 family of switches"
- select NET_DSA_TAG_KSZ_COMMON
+ tristate "Tag driver for Microchip 8795/9477/9893 families of switches"
help
Say Y if you want to enable support for tagging frames for the
- Microchip 9477 family of switches.
+ Microchip 8795/9477/9893 families of switches.
config NET_DSA_TAG_QCA
tristate "Tag driver for Qualcomm Atheros QCA8K switches"
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index c342f54715ba..2c6d286f0511 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_NET_DSA_TAG_BRCM_COMMON) += tag_brcm.o
obj-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
obj-$(CONFIG_NET_DSA_TAG_GSWIP) += tag_gswip.o
-obj-$(CONFIG_NET_DSA_TAG_KSZ_COMMON) += tag_ksz.o
+obj-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o
obj-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o
obj-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o
obj-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 3abd173ebacb..73002022c9d8 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -254,88 +254,109 @@ static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst)
static int dsa_port_setup(struct dsa_port *dp)
{
- enum devlink_port_flavour flavour;
struct dsa_switch *ds = dp->ds;
struct dsa_switch_tree *dst = ds->dst;
+ const unsigned char *id = (const unsigned char *)&dst->index;
+ const unsigned char len = sizeof(dst->index);
+ struct devlink_port *dlp = &dp->devlink_port;
+ bool dsa_port_link_registered = false;
+ bool devlink_port_registered = false;
+ struct devlink *dl = ds->devlink;
+ bool dsa_port_enabled = false;
int err = 0;
- if (dp->type == DSA_PORT_TYPE_UNUSED)
- return 0;
-
- memset(&dp->devlink_port, 0, sizeof(dp->devlink_port));
- dp->mac = of_get_mac_address(dp->dn);
-
- switch (dp->type) {
- case DSA_PORT_TYPE_CPU:
- flavour = DEVLINK_PORT_FLAVOUR_CPU;
- break;
- case DSA_PORT_TYPE_DSA:
- flavour = DEVLINK_PORT_FLAVOUR_DSA;
- break;
- case DSA_PORT_TYPE_USER: /* fall-through */
- default:
- flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
- break;
- }
-
- /* dp->index is used now as port_number. However
- * CPU and DSA ports should have separate numbering
- * independent from front panel port numbers.
- */
- devlink_port_attrs_set(&dp->devlink_port, flavour,
- dp->index, false, 0,
- (const char *) &dst->index, sizeof(dst->index));
- err = devlink_port_register(ds->devlink, &dp->devlink_port,
- dp->index);
- if (err)
- return err;
-
switch (dp->type) {
case DSA_PORT_TYPE_UNUSED:
+ dsa_port_disable(dp);
break;
case DSA_PORT_TYPE_CPU:
+ memset(dlp, 0, sizeof(*dlp));
+ devlink_port_attrs_set(dlp, DEVLINK_PORT_FLAVOUR_CPU,
+ dp->index, false, 0, id, len);
+ err = devlink_port_register(dl, dlp, dp->index);
+ if (err)
+ break;
+ devlink_port_registered = true;
+
err = dsa_port_link_register_of(dp);
if (err)
- dev_err(ds->dev, "failed to setup link for port %d.%d\n",
- ds->index, dp->index);
+ break;
+ dsa_port_link_registered = true;
+
+ err = dsa_port_enable(dp, NULL);
+ if (err)
+ break;
+ dsa_port_enabled = true;
+
break;
case DSA_PORT_TYPE_DSA:
+ memset(dlp, 0, sizeof(*dlp));
+ devlink_port_attrs_set(dlp, DEVLINK_PORT_FLAVOUR_DSA,
+ dp->index, false, 0, id, len);
+ err = devlink_port_register(dl, dlp, dp->index);
+ if (err)
+ break;
+ devlink_port_registered = true;
+
err = dsa_port_link_register_of(dp);
if (err)
- dev_err(ds->dev, "failed to setup link for port %d.%d\n",
- ds->index, dp->index);
+ break;
+ dsa_port_link_registered = true;
+
+ err = dsa_port_enable(dp, NULL);
+ if (err)
+ break;
+ dsa_port_enabled = true;
+
break;
case DSA_PORT_TYPE_USER:
+ memset(dlp, 0, sizeof(*dlp));
+ devlink_port_attrs_set(dlp, DEVLINK_PORT_FLAVOUR_PHYSICAL,
+ dp->index, false, 0, id, len);
+ err = devlink_port_register(dl, dlp, dp->index);
+ if (err)
+ break;
+ devlink_port_registered = true;
+
+ dp->mac = of_get_mac_address(dp->dn);
err = dsa_slave_create(dp);
if (err)
- dev_err(ds->dev, "failed to create slave for port %d.%d\n",
- ds->index, dp->index);
- else
- devlink_port_type_eth_set(&dp->devlink_port, dp->slave);
+ break;
+
+ devlink_port_type_eth_set(dlp, dp->slave);
break;
}
- if (err)
- devlink_port_unregister(&dp->devlink_port);
+ if (err && dsa_port_enabled)
+ dsa_port_disable(dp);
+ if (err && dsa_port_link_registered)
+ dsa_port_link_unregister_of(dp);
+ if (err && devlink_port_registered)
+ devlink_port_unregister(dlp);
return err;
}
static void dsa_port_teardown(struct dsa_port *dp)
{
- if (dp->type != DSA_PORT_TYPE_UNUSED)
- devlink_port_unregister(&dp->devlink_port);
+ struct devlink_port *dlp = &dp->devlink_port;
switch (dp->type) {
case DSA_PORT_TYPE_UNUSED:
break;
case DSA_PORT_TYPE_CPU:
+ dsa_port_disable(dp);
dsa_tag_driver_put(dp->tag_ops);
- /* fall-through */
+ devlink_port_unregister(dlp);
+ dsa_port_link_unregister_of(dp);
+ break;
case DSA_PORT_TYPE_DSA:
+ dsa_port_disable(dp);
+ devlink_port_unregister(dlp);
dsa_port_link_unregister_of(dp);
break;
case DSA_PORT_TYPE_USER:
+ devlink_port_unregister(dlp);
if (dp->slave) {
dsa_slave_destroy(dp->slave);
dp->slave = NULL;
@@ -623,6 +644,8 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master)
tag_protocol = ds->ops->get_tag_protocol(ds, dp->index);
tag_ops = dsa_tag_driver_get(tag_protocol);
if (IS_ERR(tag_ops)) {
+ if (PTR_ERR(tag_ops) == -ENOPROTOOPT)
+ return -EPROBE_DEFER;
dev_warn(ds->dev, "No tagger for this switch\n");
return PTR_ERR(tag_ops);
}
@@ -832,20 +855,6 @@ struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n)
if (!ds)
return NULL;
- /* We avoid allocating memory outside dsa_switch
- * if it is not needed.
- */
- if (n <= sizeof(ds->_bitmap) * 8) {
- ds->bitmap = &ds->_bitmap;
- } else {
- ds->bitmap = devm_kcalloc(dev,
- BITS_TO_LONGS(n),
- sizeof(unsigned long),
- GFP_KERNEL);
- if (unlikely(!ds->bitmap))
- return NULL;
- }
-
ds->dev = dev;
ds->num_ports = n;
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 4b52f8bac5e1..a8e52c9967f4 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -8,6 +8,70 @@
#include "dsa_priv.h"
+static int dsa_master_get_regs_len(struct net_device *dev)
+{
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+ const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
+ struct dsa_switch *ds = cpu_dp->ds;
+ int port = cpu_dp->index;
+ int ret = 0;
+ int len;
+
+ if (ops->get_regs_len) {
+ len = ops->get_regs_len(dev);
+ if (len < 0)
+ return len;
+ ret += len;
+ }
+
+ ret += sizeof(struct ethtool_drvinfo);
+ ret += sizeof(struct ethtool_regs);
+
+ if (ds->ops->get_regs_len) {
+ len = ds->ops->get_regs_len(ds, port);
+ if (len < 0)
+ return len;
+ ret += len;
+ }
+
+ return ret;
+}
+
+static void dsa_master_get_regs(struct net_device *dev,
+ struct ethtool_regs *regs, void *data)
+{
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+ const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
+ struct dsa_switch *ds = cpu_dp->ds;
+ struct ethtool_drvinfo *cpu_info;
+ struct ethtool_regs *cpu_regs;
+ int port = cpu_dp->index;
+ int len;
+
+ if (ops->get_regs_len && ops->get_regs) {
+ len = ops->get_regs_len(dev);
+ if (len < 0)
+ return;
+ regs->len = len;
+ ops->get_regs(dev, regs, data);
+ data += regs->len;
+ }
+
+ cpu_info = (struct ethtool_drvinfo *)data;
+ strlcpy(cpu_info->driver, "dsa", sizeof(cpu_info->driver));
+ data += sizeof(*cpu_info);
+ cpu_regs = (struct ethtool_regs *)data;
+ data += sizeof(*cpu_regs);
+
+ if (ds->ops->get_regs_len && ds->ops->get_regs) {
+ len = ds->ops->get_regs_len(ds, port);
+ if (len < 0)
+ return;
+ cpu_regs->len = len;
+ ds->ops->get_regs(ds, port, cpu_regs, data);
+ }
+}
+
static void dsa_master_get_ethtool_stats(struct net_device *dev,
struct ethtool_stats *stats,
uint64_t *data)
@@ -147,6 +211,8 @@ static int dsa_master_ethtool_setup(struct net_device *dev)
if (cpu_dp->orig_ethtool_ops)
memcpy(ops, cpu_dp->orig_ethtool_ops, sizeof(*ops));
+ ops->get_regs_len = dsa_master_get_regs_len;
+ ops->get_regs = dsa_master_get_regs;
ops->get_sset_count = dsa_master_get_sset_count;
ops->get_ethtool_stats = dsa_master_get_ethtool_stats;
ops->get_strings = dsa_master_get_strings;
diff --git a/net/dsa/port.c b/net/dsa/port.c
index f071acf2842b..9b54e5a76297 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -348,10 +348,7 @@ int dsa_port_vlan_add(struct dsa_port *dp,
.vlan = vlan,
};
- if (!dp->bridge_dev || br_vlan_enabled(dp->bridge_dev))
- return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info);
-
- return 0;
+ return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info);
}
int dsa_port_vlan_del(struct dsa_port *dp,
@@ -363,10 +360,7 @@ int dsa_port_vlan_del(struct dsa_port *dp,
.vlan = vlan,
};
- if (!dp->bridge_dev || br_vlan_enabled(dp->bridge_dev))
- return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
-
- return 0;
+ return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
}
int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags)
@@ -382,8 +376,8 @@ int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags)
trans.ph_prepare = true;
err = dsa_port_vlan_add(dp, &vlan, &trans);
- if (err == -EOPNOTSUPP)
- return 0;
+ if (err)
+ return err;
trans.ph_prepare = false;
return dsa_port_vlan_add(dp, &vlan, &trans);
@@ -538,10 +532,6 @@ static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable)
return PTR_ERR(phydev);
if (enable) {
- err = genphy_config_init(phydev);
- if (err < 0)
- goto err_put_dev;
-
err = genphy_resume(phydev);
if (err < 0)
goto err_put_dev;
@@ -589,7 +579,6 @@ static int dsa_port_fixed_link_register_of(struct dsa_port *dp)
mode = PHY_INTERFACE_MODE_NA;
phydev->interface = mode;
- genphy_config_init(phydev);
genphy_read_status(phydev);
if (ds->ops->adjust_link)
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 33f41178afcc..75d58229a4bd 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -312,6 +312,39 @@ static int dsa_slave_port_attr_set(struct net_device *dev,
return ret;
}
+static int dsa_slave_vlan_add(struct net_device *dev,
+ const struct switchdev_obj *obj,
+ struct switchdev_trans *trans)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct switchdev_obj_port_vlan vlan;
+ int err;
+
+ if (obj->orig_dev != dev)
+ return -EOPNOTSUPP;
+
+ if (dp->bridge_dev && !br_vlan_enabled(dp->bridge_dev))
+ return 0;
+
+ vlan = *SWITCHDEV_OBJ_PORT_VLAN(obj);
+
+ err = dsa_port_vlan_add(dp, &vlan, trans);
+ if (err)
+ return err;
+
+ /* We need the dedicated CPU port to be a member of the VLAN as well.
+ * Even though drivers often handle CPU membership in special ways,
+ * it doesn't make sense to program a PVID, so clear this flag.
+ */
+ vlan.flags &= ~BRIDGE_VLAN_INFO_PVID;
+
+ err = dsa_port_vlan_add(dp->cpu_dp, &vlan, trans);
+ if (err)
+ return err;
+
+ return 0;
+}
+
static int dsa_slave_port_obj_add(struct net_device *dev,
const struct switchdev_obj *obj,
struct switchdev_trans *trans,
@@ -339,10 +372,7 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
trans);
break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
- if (obj->orig_dev != dev)
- return -EOPNOTSUPP;
- err = dsa_port_vlan_add(dp, SWITCHDEV_OBJ_PORT_VLAN(obj),
- trans);
+ err = dsa_slave_vlan_add(dev, obj, trans);
break;
default:
err = -EOPNOTSUPP;
@@ -352,6 +382,23 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
return err;
}
+static int dsa_slave_vlan_del(struct net_device *dev,
+ const struct switchdev_obj *obj)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+
+ if (obj->orig_dev != dev)
+ return -EOPNOTSUPP;
+
+ if (dp->bridge_dev && !br_vlan_enabled(dp->bridge_dev))
+ return 0;
+
+ /* Do not deprogram the CPU port as it may be shared with other user
+ * ports which can be members of this VLAN as well.
+ */
+ return dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj));
+}
+
static int dsa_slave_port_obj_del(struct net_device *dev,
const struct switchdev_obj *obj)
{
@@ -371,9 +418,7 @@ static int dsa_slave_port_obj_del(struct net_device *dev,
err = dsa_port_mdb_del(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj));
break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
- if (obj->orig_dev != dev)
- return -EOPNOTSUPP;
- err = dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj));
+ err = dsa_slave_vlan_del(dev, obj);
break;
default:
err = -EOPNOTSUPP;
@@ -990,12 +1035,16 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
void *type_data)
{
- switch (type) {
- case TC_SETUP_BLOCK:
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+
+ if (type == TC_SETUP_BLOCK)
return dsa_slave_setup_tc_block(dev, type_data);
- default:
+
+ if (!ds->ops->port_setup_tc)
return -EOPNOTSUPP;
- }
+
+ return ds->ops->port_setup_tc(ds, dp->index, type, type_data);
}
static void dsa_slave_get_stats64(struct net_device *dev,
@@ -1073,6 +1122,9 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
* need to emulate the switchdev prepare + commit phase.
*/
if (dp->bridge_dev) {
+ if (!br_vlan_enabled(dp->bridge_dev))
+ return 0;
+
/* br_vlan_get_info() returns -EINVAL or -ENOENT if the
* device, respectively the VID is not found, returning
* 0 means success, which is a failure for us here.
@@ -1082,8 +1134,15 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
return -EBUSY;
}
- /* This API only allows programming tagged, non-PVID VIDs */
- return dsa_port_vid_add(dp, vid, 0);
+ ret = dsa_port_vid_add(dp, vid, 0);
+ if (ret)
+ return ret;
+
+ ret = dsa_port_vid_add(dp->cpu_dp, vid, 0);
+ if (ret)
+ return ret;
+
+ return 0;
}
static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
@@ -1097,6 +1156,9 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
* need to emulate the switchdev prepare + commit phase.
*/
if (dp->bridge_dev) {
+ if (!br_vlan_enabled(dp->bridge_dev))
+ return 0;
+
/* br_vlan_get_info() returns -EINVAL or -ENOENT if the
* device, respectively the VID is not found, returning
* 0 means success, which is a failure for us here.
@@ -1106,11 +1168,10 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
return -EBUSY;
}
- ret = dsa_port_vid_del(dp, vid);
- if (ret == -EOPNOTSUPP)
- ret = 0;
-
- return ret;
+ /* Do not deprogram the CPU port as it may be shared with other user
+ * ports which can be members of this VLAN as well.
+ */
+ return dsa_port_vid_del(dp, vid);
}
static const struct ethtool_ops dsa_slave_ethtool_ops = {
@@ -1357,8 +1418,9 @@ int dsa_slave_create(struct dsa_port *port)
if (slave_dev == NULL)
return -ENOMEM;
- slave_dev->features = master->vlan_features | NETIF_F_HW_TC |
- NETIF_F_HW_VLAN_CTAG_FILTER;
+ slave_dev->features = master->vlan_features | NETIF_F_HW_TC;
+ if (ds->ops->port_vlan_add && ds->ops->port_vlan_del)
+ slave_dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
slave_dev->hw_features |= NETIF_F_HW_TC;
slave_dev->ethtool_ops = &dsa_slave_ethtool_ops;
if (!IS_ERR_OR_NULL(port->mac))
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 09d9286b27cc..6a9607518823 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -128,57 +128,51 @@ static int dsa_switch_fdb_del(struct dsa_switch *ds,
return ds->ops->port_fdb_del(ds, port, info->addr, info->vid);
}
-static int
-dsa_switch_mdb_prepare_bitmap(struct dsa_switch *ds,
- const struct switchdev_obj_port_mdb *mdb,
- const unsigned long *bitmap)
+static bool dsa_switch_mdb_match(struct dsa_switch *ds, int port,
+ struct dsa_notifier_mdb_info *info)
+{
+ if (ds->index == info->sw_index && port == info->port)
+ return true;
+
+ if (dsa_is_dsa_port(ds, port))
+ return true;
+
+ return false;
+}
+
+static int dsa_switch_mdb_prepare(struct dsa_switch *ds,
+ struct dsa_notifier_mdb_info *info)
{
int port, err;
if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add)
return -EOPNOTSUPP;
- for_each_set_bit(port, bitmap, ds->num_ports) {
- err = ds->ops->port_mdb_prepare(ds, port, mdb);
- if (err)
- return err;
+ for (port = 0; port < ds->num_ports; port++) {
+ if (dsa_switch_mdb_match(ds, port, info)) {
+ err = ds->ops->port_mdb_prepare(ds, port, info->mdb);
+ if (err)
+ return err;
+ }
}
return 0;
}
-static void dsa_switch_mdb_add_bitmap(struct dsa_switch *ds,
- const struct switchdev_obj_port_mdb *mdb,
- const unsigned long *bitmap)
-{
- int port;
-
- if (!ds->ops->port_mdb_add)
- return;
-
- for_each_set_bit(port, bitmap, ds->num_ports)
- ds->ops->port_mdb_add(ds, port, mdb);
-}
-
static int dsa_switch_mdb_add(struct dsa_switch *ds,
struct dsa_notifier_mdb_info *info)
{
- const struct switchdev_obj_port_mdb *mdb = info->mdb;
- struct switchdev_trans *trans = info->trans;
int port;
- /* Build a mask of Multicast group members */
- bitmap_zero(ds->bitmap, ds->num_ports);
- if (ds->index == info->sw_index)
- set_bit(info->port, ds->bitmap);
- for (port = 0; port < ds->num_ports; port++)
- if (dsa_is_dsa_port(ds, port))
- set_bit(port, ds->bitmap);
+ if (switchdev_trans_ph_prepare(info->trans))
+ return dsa_switch_mdb_prepare(ds, info);
- if (switchdev_trans_ph_prepare(trans))
- return dsa_switch_mdb_prepare_bitmap(ds, mdb, ds->bitmap);
+ if (!ds->ops->port_mdb_add)
+ return 0;
- dsa_switch_mdb_add_bitmap(ds, mdb, ds->bitmap);
+ for (port = 0; port < ds->num_ports; port++)
+ if (dsa_switch_mdb_match(ds, port, info))
+ ds->ops->port_mdb_add(ds, port, info->mdb);
return 0;
}
@@ -186,13 +180,11 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
static int dsa_switch_mdb_del(struct dsa_switch *ds,
struct dsa_notifier_mdb_info *info)
{
- const struct switchdev_obj_port_mdb *mdb = info->mdb;
-
if (!ds->ops->port_mdb_del)
return -EOPNOTSUPP;
if (ds->index == info->sw_index)
- return ds->ops->port_mdb_del(ds, info->port, mdb);
+ return ds->ops->port_mdb_del(ds, info->port, info->mdb);
return 0;
}
@@ -234,59 +226,55 @@ static int dsa_port_vlan_check(struct dsa_switch *ds, int port,
(void *)vlan);
}
-static int
-dsa_switch_vlan_prepare_bitmap(struct dsa_switch *ds,
- const struct switchdev_obj_port_vlan *vlan,
- const unsigned long *bitmap)
+static bool dsa_switch_vlan_match(struct dsa_switch *ds, int port,
+ struct dsa_notifier_vlan_info *info)
+{
+ if (ds->index == info->sw_index && port == info->port)
+ return true;
+
+ if (dsa_is_dsa_port(ds, port))
+ return true;
+
+ return false;
+}
+
+static int dsa_switch_vlan_prepare(struct dsa_switch *ds,
+ struct dsa_notifier_vlan_info *info)
{
int port, err;
if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add)
return -EOPNOTSUPP;
- for_each_set_bit(port, bitmap, ds->num_ports) {
- err = dsa_port_vlan_check(ds, port, vlan);
- if (err)
- return err;
+ for (port = 0; port < ds->num_ports; port++) {
+ if (dsa_switch_vlan_match(ds, port, info)) {
+ err = dsa_port_vlan_check(ds, port, info->vlan);
+ if (err)
+ return err;
- err = ds->ops->port_vlan_prepare(ds, port, vlan);
- if (err)
- return err;
+ err = ds->ops->port_vlan_prepare(ds, port, info->vlan);
+ if (err)
+ return err;
+ }
}
return 0;
}
-static void
-dsa_switch_vlan_add_bitmap(struct dsa_switch *ds,
- const struct switchdev_obj_port_vlan *vlan,
- const unsigned long *bitmap)
-{
- int port;
-
- for_each_set_bit(port, bitmap, ds->num_ports)
- ds->ops->port_vlan_add(ds, port, vlan);
-}
-
static int dsa_switch_vlan_add(struct dsa_switch *ds,
struct dsa_notifier_vlan_info *info)
{
- const struct switchdev_obj_port_vlan *vlan = info->vlan;
- struct switchdev_trans *trans = info->trans;
int port;
- /* Build a mask of VLAN members */
- bitmap_zero(ds->bitmap, ds->num_ports);
- if (ds->index == info->sw_index)
- set_bit(info->port, ds->bitmap);
- for (port = 0; port < ds->num_ports; port++)
- if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
- set_bit(port, ds->bitmap);
+ if (switchdev_trans_ph_prepare(info->trans))
+ return dsa_switch_vlan_prepare(ds, info);
- if (switchdev_trans_ph_prepare(trans))
- return dsa_switch_vlan_prepare_bitmap(ds, vlan, ds->bitmap);
+ if (!ds->ops->port_vlan_add)
+ return 0;
- dsa_switch_vlan_add_bitmap(ds, vlan, ds->bitmap);
+ for (port = 0; port < ds->num_ports; port++)
+ if (dsa_switch_vlan_match(ds, port, info))
+ ds->ops->port_vlan_add(ds, port, info->vlan);
return 0;
}
@@ -294,14 +282,15 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds,
static int dsa_switch_vlan_del(struct dsa_switch *ds,
struct dsa_notifier_vlan_info *info)
{
- const struct switchdev_obj_port_vlan *vlan = info->vlan;
-
if (!ds->ops->port_vlan_del)
return -EOPNOTSUPP;
if (ds->index == info->sw_index)
- return ds->ops->port_vlan_del(ds, info->port, vlan);
+ return ds->ops->port_vlan_del(ds, info->port, info->vlan);
+ /* Do not deprogram the DSA links as they may be used as conduit
+ * for other VLAN members in the fabric.
+ */
return 0;
}
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index 67a1bc635a7b..9c1cc2482b68 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -93,6 +93,79 @@ int dsa_8021q_rx_source_port(u16 vid)
}
EXPORT_SYMBOL_GPL(dsa_8021q_rx_source_port);
+static int dsa_8021q_restore_pvid(struct dsa_switch *ds, int port)
+{
+ struct bridge_vlan_info vinfo;
+ struct net_device *slave;
+ u16 pvid;
+ int err;
+
+ if (!dsa_is_user_port(ds, port))
+ return 0;
+
+ slave = ds->ports[port].slave;
+
+ err = br_vlan_get_pvid(slave, &pvid);
+ if (err < 0)
+ /* There is no pvid on the bridge for this port, which is
+ * perfectly valid. Nothing to restore, bye-bye!
+ */
+ return 0;
+
+ err = br_vlan_get_info(slave, pvid, &vinfo);
+ if (err < 0) {
+ dev_err(ds->dev, "Couldn't determine PVID attributes\n");
+ return err;
+ }
+
+ return dsa_port_vid_add(&ds->ports[port], pvid, vinfo.flags);
+}
+
+/* If @enabled is true, installs @vid with @flags into the switch port's HW
+ * filter.
+ * If @enabled is false, deletes @vid (ignores @flags) from the port. Had the
+ * user explicitly configured this @vid through the bridge core, then the @vid
+ * is installed again, but this time with the flags from the bridge layer.
+ */
+static int dsa_8021q_vid_apply(struct dsa_switch *ds, int port, u16 vid,
+ u16 flags, bool enabled)
+{
+ struct dsa_port *dp = &ds->ports[port];
+ struct bridge_vlan_info vinfo;
+ int err;
+
+ if (enabled)
+ return dsa_port_vid_add(dp, vid, flags);
+
+ err = dsa_port_vid_del(dp, vid);
+ if (err < 0)
+ return err;
+
+ /* Nothing to restore from the bridge for a non-user port.
+ * The CPU port VLANs are restored implicitly with the user ports,
+ * similar to how the bridge does in dsa_slave_vlan_add and
+ * dsa_slave_vlan_del.
+ */
+ if (!dsa_is_user_port(ds, port))
+ return 0;
+
+ err = br_vlan_get_info(dp->slave, vid, &vinfo);
+ /* Couldn't determine bridge attributes for this vid,
+ * it means the bridge had not configured it.
+ */
+ if (err < 0)
+ return 0;
+
+ /* Restore the VID from the bridge */
+ err = dsa_port_vid_add(dp, vid, vinfo.flags);
+ if (err < 0)
+ return err;
+
+ vinfo.flags &= ~BRIDGE_VLAN_INFO_PVID;
+
+ return dsa_port_vid_add(dp->cpu_dp, vid, vinfo.flags);
+}
+
/* RX VLAN tagging (left) and TX VLAN tagging (right) setup shown for a single
* front-panel switch port (here swp0).
*
@@ -148,8 +221,6 @@ EXPORT_SYMBOL_GPL(dsa_8021q_rx_source_port);
int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled)
{
int upstream = dsa_upstream_port(ds, port);
- struct dsa_port *dp = &ds->ports[port];
- struct dsa_port *upstream_dp = &ds->ports[upstream];
u16 rx_vid = dsa_8021q_rx_vid(ds, port);
u16 tx_vid = dsa_8021q_tx_vid(ds, port);
int i, err;
@@ -166,7 +237,6 @@ int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled)
* restrictions, so there are no concerns about leaking traffic.
*/
for (i = 0; i < ds->num_ports; i++) {
- struct dsa_port *other_dp = &ds->ports[i];
u16 flags;
if (i == upstream)
@@ -179,10 +249,7 @@ int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled)
/* The RX VID is a regular VLAN on all others */
flags = BRIDGE_VLAN_INFO_UNTAGGED;
- if (enabled)
- err = dsa_port_vid_add(other_dp, rx_vid, flags);
- else
- err = dsa_port_vid_del(other_dp, rx_vid);
+ err = dsa_8021q_vid_apply(ds, i, rx_vid, flags, enabled);
if (err) {
dev_err(ds->dev, "Failed to apply RX VID %d to port %d: %d\n",
rx_vid, port, err);
@@ -193,10 +260,7 @@ int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled)
/* CPU port needs to see this port's RX VID
* as tagged egress.
*/
- if (enabled)
- err = dsa_port_vid_add(upstream_dp, rx_vid, 0);
- else
- err = dsa_port_vid_del(upstream_dp, rx_vid);
+ err = dsa_8021q_vid_apply(ds, upstream, rx_vid, 0, enabled);
if (err) {
dev_err(ds->dev, "Failed to apply RX VID %d to port %d: %d\n",
rx_vid, port, err);
@@ -204,26 +268,24 @@ int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled)
}
/* Finally apply the TX VID on this port and on the CPU port */
- if (enabled)
- err = dsa_port_vid_add(dp, tx_vid, BRIDGE_VLAN_INFO_UNTAGGED);
- else
- err = dsa_port_vid_del(dp, tx_vid);
+ err = dsa_8021q_vid_apply(ds, port, tx_vid, BRIDGE_VLAN_INFO_UNTAGGED,
+ enabled);
if (err) {
dev_err(ds->dev, "Failed to apply TX VID %d on port %d: %d\n",
tx_vid, port, err);
return err;
}
- if (enabled)
- err = dsa_port_vid_add(upstream_dp, tx_vid, 0);
- else
- err = dsa_port_vid_del(upstream_dp, tx_vid);
+ err = dsa_8021q_vid_apply(ds, upstream, tx_vid, 0, enabled);
if (err) {
dev_err(ds->dev, "Failed to apply TX VID %d on port %d: %d\n",
tx_vid, upstream, err);
return err;
}
- return 0;
+ if (!enabled)
+ err = dsa_8021q_restore_pvid(ds, port);
+
+ return err;
}
EXPORT_SYMBOL_GPL(dsa_port_setup_8021q_tagging);
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index b4872b87d4a6..73605bcbb385 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -70,6 +70,67 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb,
}
/*
+ * For Ingress (Host -> KSZ8795), 1 byte is added before FCS.
+ * ---------------------------------------------------------------------------
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag(1byte)|FCS(4bytes)
+ * ---------------------------------------------------------------------------
+ * tag : each bit represents port (eg, 0x01=port1, 0x02=port2, 0x10=port5)
+ *
+ * For Egress (KSZ8795 -> Host), 1 byte is added before FCS.
+ * ---------------------------------------------------------------------------
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|FCS(4bytes)
+ * ---------------------------------------------------------------------------
+ * tag0 : zero-based value represents port
+ * (eg, 0x00=port1, 0x02=port3, 0x06=port7)
+ */
+
+#define KSZ8795_INGRESS_TAG_LEN 1
+
+#define KSZ8795_TAIL_TAG_OVERRIDE BIT(6)
+#define KSZ8795_TAIL_TAG_LOOKUP BIT(7)
+
+static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct sk_buff *nskb;
+ u8 *tag;
+ u8 *addr;
+
+ nskb = ksz_common_xmit(skb, dev, KSZ8795_INGRESS_TAG_LEN);
+ if (!nskb)
+ return NULL;
+
+ /* Tag encoding */
+ tag = skb_put(nskb, KSZ8795_INGRESS_TAG_LEN);
+ addr = skb_mac_header(nskb);
+
+ *tag = 1 << dp->index;
+ if (is_link_local_ether_addr(addr))
+ *tag |= KSZ8795_TAIL_TAG_OVERRIDE;
+
+ return nskb;
+}
+
+static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt)
+{
+ u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
+
+ return ksz_common_rcv(skb, dev, tag[0] & 7, KSZ_EGRESS_TAG_LEN);
+}
+
+static const struct dsa_device_ops ksz8795_netdev_ops = {
+ .name = "ksz8795",
+ .proto = DSA_TAG_PROTO_KSZ8795,
+ .xmit = ksz8795_xmit,
+ .rcv = ksz8795_rcv,
+ .overhead = KSZ8795_INGRESS_TAG_LEN,
+};
+
+DSA_TAG_DRIVER(ksz8795_netdev_ops);
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ8795);
+
+/*
* For Ingress (Host -> KSZ9477), 2 bytes are added before FCS.
* ---------------------------------------------------------------------------
* DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|tag1(1byte)|FCS(4bytes)
@@ -183,6 +244,7 @@ DSA_TAG_DRIVER(ksz9893_netdev_ops);
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ9893);
static struct dsa_tag_driver *dsa_tag_driver_array[] = {
+ &DSA_TAG_DRIVER_NAME(ksz8795_netdev_ops),
&DSA_TAG_DRIVER_NAME(ksz9477_netdev_ops),
&DSA_TAG_DRIVER_NAME(ksz9893_netdev_ops),
};
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 47ee88163a9d..9c9aff3e52cf 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -89,7 +89,8 @@ static struct sk_buff *sja1105_xmit(struct sk_buff *skb,
struct dsa_port *dp = dsa_slave_to_port(netdev);
struct dsa_switch *ds = dp->ds;
u16 tx_vid = dsa_8021q_tx_vid(ds, dp->index);
- u8 pcp = skb->priority;
+ u16 queue_mapping = skb_get_queue_mapping(skb);
+ u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
/* Transmitting management traffic does not rely upon switch tagging,
* but instead SPI-installed management routes. Part 2 of this
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index ed2301ef872e..70f92aaca411 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1845,13 +1845,8 @@ static __net_init int inet_init_net(struct net *net)
return 0;
}
-static __net_exit void inet_exit_net(struct net *net)
-{
-}
-
static __net_initdata struct pernet_operations af_inet_ops = {
.init = inet_init_net,
- .exit = inet_exit_net,
};
static int __init init_inet_pernet_ops(void)
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 7bd29e694603..9a0fe0c2fa02 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -15,6 +15,7 @@
#include <net/sock.h>
#include <net/route.h>
#include <net/tcp_states.h>
+#include <net/sock_reuseport.h>
int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
@@ -69,6 +70,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
}
inet->inet_daddr = fl4->daddr;
inet->inet_dport = usin->sin_port;
+ reuseport_has_conns(sk, true);
sk->sk_state = TCP_ESTABLISHED;
sk_set_txhash(sk);
inet->inet_id = jiffies;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index cc7ef0d05bbd..5eb73775c3f7 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1266,6 +1266,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
cork->length = 0;
cork->ttl = ipc->ttl;
cork->tos = ipc->tos;
+ cork->mark = ipc->sockc.mark;
cork->priority = ipc->priority;
cork->transmit_time = ipc->sockc.transmit_time;
cork->tx_flags = 0;
@@ -1529,7 +1530,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
}
skb->priority = (cork->tos != -1) ? cork->priority: sk->sk_priority;
- skb->mark = sk->sk_mark;
+ skb->mark = cork->mark;
skb->tstamp = cork->transmit_time;
/*
* Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c07bc82cbbe9..313470f6bb14 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1134,8 +1134,8 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
if (!found) {
/* Create a new entry if allowable */
- if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
- (c = ipmr_cache_alloc_unres()) == NULL) {
+ c = ipmr_cache_alloc_unres();
+ if (!c) {
spin_unlock_bh(&mfc_unres_lock);
kfree_skb(skb);
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 69e76d677f9e..f17b402111ce 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -272,7 +272,7 @@ config IP_NF_TARGET_CLUSTERIP
The CLUSTERIP target allows you to build load-balancing clusters of
network servers without having a dedicated load-balancing
router/server/switch.
-
+
To compile it as a module, choose M here. If unsure, say N.
config IP_NF_TARGET_ECN
@@ -281,7 +281,7 @@ config IP_NF_TARGET_ECN
depends on NETFILTER_ADVANCED
---help---
This option adds a `ECN' target, which can be used in the iptables mangle
- table.
+ table.
You can use this target to remove the ECN bits from the IPv4 header of
an IP packet. This is particularly useful, if you need to work around
@@ -306,7 +306,7 @@ config IP_NF_RAW
This option adds a `raw' table to iptables. This table is the very
first in the netfilter framework and hooks in at the PREROUTING
and OUTPUT chains.
-
+
If you want to compile it as a module, say M here and read
<file:Documentation/kbuild/modules.rst>. If unsure, say `N'.
@@ -318,7 +318,7 @@ config IP_NF_SECURITY
help
This option adds a `security' table to iptables, for use
with Mandatory Access Control (MAC) policy.
-
+
If unsure, say N.
endif # IP_NF_IPTABLES
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index c50e0ec095d2..7c497c78105f 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -31,7 +31,7 @@ obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
# flow table support
obj-$(CONFIG_NF_FLOW_TABLE_IPV4) += nf_flow_table_ipv4.o
-# generic IP tables
+# generic IP tables
obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
# the three instances of ip_tables
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 0e70f3f65f6f..748dc3ce58d3 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -36,8 +36,8 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
opts.options |= XT_SYNPROXY_OPT_ECN;
opts.options &= info->options;
- opts.mss_encode = opts.mss;
- opts.mss = info->mss;
+ opts.mss_encode = opts.mss_option;
+ opts.mss_option = info->mss;
if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
synproxy_init_timestamp_cookie(info, &opts);
else
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 5fe5a3981d43..fc34fd1668d6 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1151,7 +1151,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh,
.fc_encap_type = cfg->nh_encap_type,
};
u32 tb_id = l3mdev_fib_table(cfg->dev);
- int err = -EINVAL;
+ int err;
err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack);
if (err) {
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 9d24ef5c5d8f..535427292194 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -781,7 +781,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
} else if (!ipc.oif)
ipc.oif = inet->uc_index;
- flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
+ flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos,
RT_SCOPE_UNIVERSE, sk->sk_protocol,
inet_sk_flowi_flags(sk), faddr, saddr, 0, 0,
sk->sk_uid);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 40a6abbc9cf6..80da5a66d5d7 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -375,7 +375,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
skb_reserve(skb, hlen);
skb->priority = sk->sk_priority;
- skb->mark = sk->sk_mark;
+ skb->mark = sockc->mark;
skb->tstamp = sockc->transmit_time;
skb_dst_set(skb, &rt->dst);
*rtp = NULL;
@@ -623,7 +623,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
}
- flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
+ flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos,
RT_SCOPE_UNIVERSE,
hdrincl ? IPPROTO_RAW : sk->sk_protocol,
inet_sk_flowi_flags(sk) |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 0b980e841927..59ded25acd04 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -820,6 +820,15 @@ static struct ctl_table ipv4_net_table[] = {
.extra2 = &tcp_min_snd_mss_max,
},
{
+ .procname = "tcp_mtu_probe_floor",
+ .data = &init_net.ipv4.sysctl_tcp_mtu_probe_floor,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &tcp_min_snd_mss_min,
+ .extra2 = &tcp_min_snd_mss_max,
+ },
+ {
.procname = "tcp_probe_threshold",
.data = &init_net.ipv4.sysctl_tcp_probe_threshold,
.maxlen = sizeof(int),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 61082065b26a..79c325a07ba5 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1182,7 +1182,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
struct sockcm_cookie sockc;
int flags, err, copied = 0;
int mss_now = 0, size_goal, copied_syn = 0;
- bool process_backlog = false;
+ int process_backlog = 0;
bool zc = false;
long timeo;
@@ -1274,9 +1274,10 @@ new_segment:
if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf;
- if (process_backlog && sk_flush_backlog(sk)) {
- process_backlog = false;
- goto restart;
+ if (unlikely(process_backlog >= 16)) {
+ process_backlog = 0;
+ if (sk_flush_backlog(sk))
+ goto restart;
}
first_skb = tcp_rtx_and_write_queues_empty(sk);
skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
@@ -1284,7 +1285,7 @@ new_segment:
if (!skb)
goto wait_for_memory;
- process_backlog = true;
+ process_backlog++;
skb->ip_summed = CHECKSUM_PARTIAL;
skb_entail(sk, skb);
@@ -1789,19 +1790,21 @@ static int tcp_zerocopy_receive(struct sock *sk,
break;
frags = skb_shinfo(skb)->frags;
while (offset) {
- if (frags->size > offset)
+ if (skb_frag_size(frags) > offset)
goto out;
- offset -= frags->size;
+ offset -= skb_frag_size(frags);
frags++;
}
}
- if (frags->size != PAGE_SIZE || frags->page_offset) {
+ if (skb_frag_size(frags) != PAGE_SIZE || skb_frag_off(frags)) {
int remaining = zc->recv_skip_hint;
+ int size = skb_frag_size(frags);
- while (remaining && (frags->size != PAGE_SIZE ||
- frags->page_offset)) {
- remaining -= frags->size;
+ while (remaining && (size != PAGE_SIZE ||
+ skb_frag_off(frags))) {
+ remaining -= size;
frags++;
+ size = skb_frag_size(frags);
}
zc->recv_skip_hint -= remaining;
break;
@@ -2650,6 +2653,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->rx_opt.saw_tstamp = 0;
tp->rx_opt.dsack = 0;
tp->rx_opt.num_sacks = 0;
+ tp->rcv_ooopack = 0;
/* Clean up fastopen related fields */
@@ -3292,6 +3296,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_bytes_retrans = tp->bytes_retrans;
info->tcpi_dsack_dups = tp->dsack_dups;
info->tcpi_reord_seen = tp->reord_seen;
+ info->tcpi_rcv_ooopack = tp->rcv_ooopack;
+ info->tcpi_snd_wnd = tp->snd_wnd;
unlock_sock_fast(sk, slow);
}
EXPORT_SYMBOL_GPL(tcp_get_info);
@@ -3794,8 +3800,8 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
return 1;
for (i = 0; i < shi->nr_frags; ++i) {
- const struct skb_frag_struct *f = &shi->frags[i];
- unsigned int offset = f->page_offset;
+ const skb_frag_t *f = &shi->frags[i];
+ unsigned int offset = skb_frag_off(f);
struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT);
sg_set_page(&sg, page, skb_frag_size(f),
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 56be7d27f208..95b59540eee1 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -346,7 +346,7 @@ static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
/* Calculate bdp based on min RTT and the estimated bottleneck bandwidth:
*
- * bdp = bw * min_rtt * gain
+ * bdp = ceil(bw * min_rtt * gain)
*
* The key factor, gain, controls the amount of queue. While a small gain
* builds a smaller queue, it becomes more vulnerable to noise in RTT
@@ -370,7 +370,9 @@ static u32 bbr_bdp(struct sock *sk, u32 bw, int gain)
w = (u64)bw * bbr->min_rtt_us;
- /* Apply a gain to the given value, then remove the BW_SCALE shift. */
+ /* Apply a gain to the given value, remove the BW_SCALE shift, and
+ * round the value up to avoid a negative feedback loop.
+ */
bdp = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
return bdp;
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index a3a386236d93..81a8221d650a 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -81,13 +81,42 @@ static int tcp_diag_put_md5sig(struct sk_buff *skb,
}
#endif
+static int tcp_diag_put_ulp(struct sk_buff *skb, struct sock *sk,
+ const struct tcp_ulp_ops *ulp_ops)
+{
+ struct nlattr *nest;
+ int err;
+
+ nest = nla_nest_start_noflag(skb, INET_DIAG_ULP_INFO);
+ if (!nest)
+ return -EMSGSIZE;
+
+ err = nla_put_string(skb, INET_ULP_INFO_NAME, ulp_ops->name);
+ if (err)
+ goto nla_failure;
+
+ if (ulp_ops->get_info)
+ err = ulp_ops->get_info(sk, skb);
+ if (err)
+ goto nla_failure;
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_failure:
+ nla_nest_cancel(skb, nest);
+ return err;
+}
+
static int tcp_diag_get_aux(struct sock *sk, bool net_admin,
struct sk_buff *skb)
{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ int err = 0;
+
#ifdef CONFIG_TCP_MD5SIG
if (net_admin) {
struct tcp_md5sig_info *md5sig;
- int err = 0;
rcu_read_lock();
md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info);
@@ -99,11 +128,21 @@ static int tcp_diag_get_aux(struct sock *sk, bool net_admin,
}
#endif
+ if (net_admin) {
+ const struct tcp_ulp_ops *ulp_ops;
+
+ ulp_ops = icsk->icsk_ulp_ops;
+ if (ulp_ops)
+ err = tcp_diag_put_ulp(skb, sk, ulp_ops);
+ if (err)
+ return err;
+ }
return 0;
}
static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin)
{
+ struct inet_connection_sock *icsk = inet_csk(sk);
size_t size = 0;
#ifdef CONFIG_TCP_MD5SIG
@@ -124,6 +163,17 @@ static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin)
}
#endif
+ if (net_admin && sk_fullsock(sk)) {
+ const struct tcp_ulp_ops *ulp_ops;
+
+ ulp_ops = icsk->icsk_ulp_ops;
+ if (ulp_ops) {
+ size += nla_total_size(0) +
+ nla_total_size(TCP_ULP_NAME_MAX);
+ if (ulp_ops->get_info_size)
+ size += ulp_ops->get_info_size(sk);
+ }
+ }
return size;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8a1cd93dbb09..3578357abe30 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3782,6 +3782,49 @@ static void smc_parse_options(const struct tcphdr *th,
#endif
}
+/* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped
+ * value on success.
+ */
+static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
+{
+ const unsigned char *ptr = (const unsigned char *)(th + 1);
+ int length = (th->doff * 4) - sizeof(struct tcphdr);
+ u16 mss = 0;
+
+ while (length > 0) {
+ int opcode = *ptr++;
+ int opsize;
+
+ switch (opcode) {
+ case TCPOPT_EOL:
+ return mss;
+ case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
+ length--;
+ continue;
+ default:
+ if (length < 2)
+ return mss;
+ opsize = *ptr++;
+ if (opsize < 2) /* "silly options" */
+ return mss;
+ if (opsize > length)
+ return mss; /* fail on partial options */
+ if (opcode == TCPOPT_MSS && opsize == TCPOLEN_MSS) {
+ u16 in_mss = get_unaligned_be16(ptr);
+
+ if (in_mss) {
+ if (user_mss && user_mss < in_mss)
+ in_mss = user_mss;
+ mss = in_mss;
+ }
+ }
+ ptr += opsize - 2;
+ length -= opsize;
+ }
+ }
+ return mss;
+}
+
/* Look for tcp options. Normally only called on SYN and SYNACK packets.
* But, this can also be called on packets in the established flow when
* the fast version below fails.
@@ -4512,6 +4555,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
tp->pred_flags = 0;
inet_csk_schedule_ack(sk);
+ tp->rcv_ooopack += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
seq = TCP_SKB_CB(skb)->seq;
end_seq = TCP_SKB_CB(skb)->end_seq;
@@ -6422,9 +6466,7 @@ EXPORT_SYMBOL(inet_reqsk_alloc);
/*
* Return true if a syncookie should be sent
*/
-static bool tcp_syn_flood_action(const struct sock *sk,
- const struct sk_buff *skb,
- const char *proto)
+static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
{
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
const char *msg = "Dropping request";
@@ -6444,7 +6486,7 @@ static bool tcp_syn_flood_action(const struct sock *sk,
net->ipv4.sysctl_tcp_syncookies != 2 &&
xchg(&queue->synflood_warned, 1) == 0)
net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
- proto, ntohs(tcp_hdr(skb)->dest), msg);
+ proto, sk->sk_num, msg);
return want_cookie;
}
@@ -6466,6 +6508,36 @@ static void tcp_reqsk_record_syn(const struct sock *sk,
}
}
+/* If a SYN cookie is required and supported, returns a clamped MSS value to be
+ * used for SYN cookie generation.
+ */
+u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
+ const struct tcp_request_sock_ops *af_ops,
+ struct sock *sk, struct tcphdr *th)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ u16 mss;
+
+ if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 &&
+ !inet_csk_reqsk_queue_is_full(sk))
+ return 0;
+
+ if (!tcp_syn_flood_action(sk, rsk_ops->slab_name))
+ return 0;
+
+ if (sk_acceptq_is_full(sk)) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
+ return 0;
+ }
+
+ mss = tcp_parse_mss_option(th, tp->rx_opt.user_mss);
+ if (!mss)
+ mss = af_ops->mss_clamp;
+
+ return mss;
+}
+EXPORT_SYMBOL_GPL(tcp_get_syncookie_mss);
+
int tcp_conn_request(struct request_sock_ops *rsk_ops,
const struct tcp_request_sock_ops *af_ops,
struct sock *sk, struct sk_buff *skb)
@@ -6487,7 +6559,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
*/
if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
inet_csk_reqsk_queue_is_full(sk)) && !isn) {
- want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name);
+ want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
if (!want_cookie)
goto drop;
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d57641cb3477..fd394ad179a0 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1515,6 +1515,21 @@ static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
return sk;
}
+u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph,
+ struct tcphdr *th, u32 *cookie)
+{
+ u16 mss = 0;
+#ifdef CONFIG_SYN_COOKIES
+ mss = tcp_get_syncookie_mss(&tcp_request_sock_ops,
+ &tcp_request_sock_ipv4_ops, sk, th);
+ if (mss) {
+ *cookie = __cookie_v4_init_sequence(iph, th, &mss);
+ tcp_synq_overflow(sk);
+ }
+#endif
+ return mss;
+}
+
/* The socket must have it's spinlock held when we get
* here, unless it is a TCP_LISTEN socket.
*
@@ -2637,6 +2652,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS;
net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
+ net->ipv4.sysctl_tcp_mtu_probe_floor = TCP_MIN_SND_MSS;
net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8a645f304e6c..fec6d67bfd14 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1050,11 +1050,22 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
tcb = TCP_SKB_CB(skb);
memset(&opts, 0, sizeof(opts));
- if (unlikely(tcb->tcp_flags & TCPHDR_SYN))
+ if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) {
tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
- else
+ } else {
tcp_options_size = tcp_established_options(sk, skb, &opts,
&md5);
+ /* Force a PSH flag on all (GSO) packets to expedite GRO flush
+ * at receiver : This slightly improve GRO performance.
+ * Note that we do not force the PSH flag for non GSO packets,
+ * because they might be sent under high congestion events,
+ * and in this case it is better to delay the delivery of 1-MSS
+ * packets and thus the corresponding ACK packet that would
+ * release the following packet.
+ */
+ if (tcp_skb_pcount(skb) > 1)
+ tcb->tcp_flags |= TCPHDR_PSH;
+ }
tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
/* if no packet is in qdisc/device queue, then allow XPS to select
@@ -1403,7 +1414,7 @@ static int __pskb_trim_head(struct sk_buff *skb, int len)
} else {
shinfo->frags[k] = shinfo->frags[i];
if (eat) {
- shinfo->frags[k].page_offset += eat;
+ skb_frag_off_add(&shinfo->frags[k], eat);
skb_frag_size_sub(&shinfo->frags[k], eat);
eat = 0;
}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index c801cd37cc2a..dbd9d2d0ee63 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -154,7 +154,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
} else {
mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
- mss = max(mss, 68 - tcp_sk(sk)->tcp_header_len);
+ mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor);
mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss);
icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index d88821c794fb..cf755156a684 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -423,12 +423,13 @@ static struct sock *udp4_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif);
if (score > badness) {
- if (sk->sk_reuseport) {
+ if (sk->sk_reuseport &&
+ sk->sk_state != TCP_ESTABLISHED) {
hash = udp_ehashfn(net, daddr, hnum,
saddr, sport);
result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
- if (result)
+ if (result && !reuseport_has_conns(sk, false))
return result;
}
badness = score;
@@ -1130,7 +1131,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl4 = &fl4_stack;
- flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
+ flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, tos,
RT_SCOPE_UNIVERSE, sk->sk_protocol,
flow_flags,
faddr, saddr, dport, inet->inet_sport,
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 9ab897ded4df..96f939248d2f 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -27,6 +27,7 @@
#include <net/ip6_route.h>
#include <net/tcp_states.h>
#include <net/dsfield.h>
+#include <net/sock_reuseport.h>
#include <linux/errqueue.h>
#include <linux/uaccess.h>
@@ -254,6 +255,7 @@ ipv4_connected:
goto out;
}
+ reuseport_has_conns(sk, true);
sk->sk_state = TCP_ESTABLISHED;
sk_set_txhash(sk);
out:
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index b358f1a4dd08..da46c4284676 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -197,10 +197,8 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
struct ipv6hdr _ip6, *ip6;
ip6 = skb_header_pointer(skb, *offset, sizeof(_ip6), &_ip6);
- if (!ip6 || (ip6->version != 6)) {
- printk(KERN_ERR "IPv6 header not found\n");
+ if (!ip6 || (ip6->version != 6))
return -EBADMSG;
- }
start = *offset + sizeof(struct ipv6hdr);
nexthdr = ip6->nexthdr;
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index dd2d0b963260..d5779d6a6065 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -968,7 +968,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
if (unlikely(!tun_info ||
!(tun_info->mode & IP_TUNNEL_INFO_TX) ||
ip_tunnel_info_af(tun_info) != AF_INET6))
- return -EINVAL;
+ goto tx_err;
key = &tun_info->key;
memset(&fl6, 0, sizeof(fl6));
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index fa014d5f1732..d432d0011c16 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -80,8 +80,10 @@ static void ip6_sublist_rcv_finish(struct list_head *head)
{
struct sk_buff *skb, *next;
- list_for_each_entry_safe(skb, next, head, list)
+ list_for_each_entry_safe(skb, next, head, list) {
+ skb_list_del_init(skb);
dst_input(skb);
+ }
}
static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 8e49fd62eea9..89a4c7c2e25d 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1294,6 +1294,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
cork->base.fragsize = mtu;
cork->base.gso_size = ipc6->gso_size;
cork->base.tx_flags = 0;
+ cork->base.mark = ipc6->sockc.mark;
sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
if (dst_allfrag(xfrm_dst_path(&rt->dst)))
@@ -1764,7 +1765,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
hdr->daddr = *final_dst;
skb->priority = sk->sk_priority;
- skb->mark = sk->sk_mark;
+ skb->mark = cork->base.mark;
skb->tstamp = cork->base.transmit_time;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index e80d36c5073d..857a89ad4d6c 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1148,8 +1148,8 @@ static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
* Create a new entry if allowable
*/
- if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
- (c = ip6mr_cache_alloc_unres()) == NULL) {
+ c = ip6mr_cache_alloc_unres();
+ if (!c) {
spin_unlock_bh(&mfc_unres_lock);
kfree_skb(skb);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 083cc1c94cd3..53caf59c591e 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -196,6 +196,7 @@ static inline int ndisc_is_useropt(const struct net_device *dev,
{
return opt->nd_opt_type == ND_OPT_RDNSS ||
opt->nd_opt_type == ND_OPT_DNSSL ||
+ opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL ||
ndisc_ops_is_useropt(dev, opt->nd_opt_type);
}
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 61819ed858b1..a9bff556d3b2 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -113,9 +113,9 @@ int __nf_ip6_route(struct net *net, struct dst_entry **dst,
EXPORT_SYMBOL_GPL(__nf_ip6_route);
int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
- struct nf_ct_bridge_frag_data *data,
+ struct nf_bridge_frag_data *data,
int (*output)(struct net *, struct sock *sk,
- const struct nf_ct_bridge_frag_data *data,
+ const struct nf_bridge_frag_data *data,
struct sk_buff *))
{
int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 5cdb4a69d277..fd1f52a21bf1 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -36,8 +36,8 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
opts.options |= XT_SYNPROXY_OPT_ECN;
opts.options &= info->options;
- opts.mss_encode = opts.mss;
- opts.mss = info->mss;
+ opts.mss_encode = opts.mss_option;
+ opts.mss_option = info->mss;
if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
synproxy_init_timestamp_cookie(info, &opts);
else
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 0fc6326ef499..c52ff929c93b 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -16,7 +16,7 @@
#include <net/ipv6.h>
#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_ipv6/ip6t_ipv6header.h>
MODULE_LICENSE("GPL");
@@ -42,7 +42,7 @@ ipv6header_mt6(const struct sk_buff *skb, struct xt_action_param *par)
len = skb->len - ptr;
temp = 0;
- while (ip6t_ext_hdr(nexthdr)) {
+ while (nf_ip6_ext_hdr(nexthdr)) {
const struct ipv6_opt_hdr *hp;
struct ipv6_opt_hdr _hdr;
int hdrlen;
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
index f53bd8f01219..22b80db6d882 100644
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -18,7 +18,7 @@
#include <net/route.h>
#include <linux/netfilter.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6.h>
#include <linux/netfilter/xt_LOG.h>
#include <net/netfilter/nf_log.h>
@@ -70,7 +70,7 @@ static void dump_ipv6_packet(struct net *net, struct nf_log_buf *m,
fragment = 0;
ptr = ip6hoff + sizeof(struct ipv6hdr);
currenthdr = ih->nexthdr;
- while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) {
+ while (currenthdr != NEXTHDR_NONE && nf_ip6_ext_hdr(currenthdr)) {
struct ipv6_opt_hdr _hdr;
const struct ipv6_opt_hdr *hp;
diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c
index 437d95545c31..b9df879c48d3 100644
--- a/net/ipv6/netfilter/nf_socket_ipv6.c
+++ b/net/ipv6/netfilter/nf_socket_ipv6.c
@@ -12,7 +12,6 @@
#include <net/sock.h>
#include <net/inet_sock.h>
#include <net/inet6_hashtables.h>
-#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#include <net/netfilter/nf_socket.h>
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <net/netfilter/nf_conntrack.h>
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 8a6131991e38..6e1888ee4036 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -646,7 +646,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
skb->protocol = htons(ETH_P_IPV6);
skb->priority = sk->sk_priority;
- skb->mark = sk->sk_mark;
+ skb->mark = sockc->mark;
skb->tstamp = sockc->transmit_time;
skb_put(skb, length);
@@ -810,6 +810,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipcm6_init(&ipc6);
ipc6.sockc.tsflags = sk->sk_tsflags;
+ ipc6.sockc.mark = sk->sk_mark;
if (sin6) {
if (addr_len < SIN6_LEN_RFC2133)
@@ -891,6 +892,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
opt = ipv6_fixup_options(&opt_space, opt);
fl6.flowi6_proto = proto;
+ fl6.flowi6_mark = ipc6.sockc.mark;
if (!hdrincl) {
rfv.msg = msg;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 546088e50815..a63ff85fe141 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -227,7 +227,7 @@ static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
struct net_device *dev = dst->dev;
struct rt6_info *rt = (struct rt6_info *)dst;
- daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
+ daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr);
if (!daddr)
return;
if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
@@ -2725,10 +2725,9 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
rcu_read_lock();
res.f6i = rcu_dereference(rt6->from);
- if (!res.f6i) {
- rcu_read_unlock();
- return;
- }
+ if (!res.f6i)
+ goto out_unlock;
+
res.fib6_flags = res.f6i->fib6_flags;
res.fib6_type = res.f6i->fib6_type;
@@ -2744,10 +2743,8 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
/* fib6_info uses a nexthop that does not have fib6_nh
* using the dst->dev + gw. Should be impossible.
*/
- if (!arg.match) {
- rcu_read_unlock();
- return;
- }
+ if (!arg.match)
+ goto out_unlock;
res.nh = arg.match;
} else {
@@ -2760,6 +2757,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
if (rt6_insert_exception(nrt6, &res))
dst_release_immediate(&nrt6->dst);
}
+out_unlock:
rcu_read_unlock();
}
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5da069e91cac..87f44d3250ee 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1063,6 +1063,21 @@ static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
return sk;
}
+u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
+ struct tcphdr *th, u32 *cookie)
+{
+ u16 mss = 0;
+#ifdef CONFIG_SYN_COOKIES
+ mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
+ &tcp_request_sock_ipv6_ops, sk, th);
+ if (mss) {
+ *cookie = __cookie_v6_init_sequence(iph, th, &mss);
+ tcp_synq_overflow(sk);
+ }
+#endif
+ return mss;
+}
+
static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
if (skb->protocol == htons(ETH_P_IP))
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 827fe7385078..aae4938f3dea 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -158,13 +158,14 @@ static struct sock *udp6_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif);
if (score > badness) {
- if (sk->sk_reuseport) {
+ if (sk->sk_reuseport &&
+ sk->sk_state != TCP_ESTABLISHED) {
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
- if (result)
+ if (result && !reuseport_has_conns(sk, false))
return result;
}
result = sk;
@@ -1230,6 +1231,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipcm6_init(&ipc6);
ipc6.gso_size = up->gso_size;
ipc6.sockc.tsflags = sk->sk_tsflags;
+ ipc6.sockc.mark = sk->sk_mark;
/* destination address check */
if (sin6) {
@@ -1352,7 +1354,7 @@ do_udp_sendmsg:
if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
- fl6.flowi6_mark = sk->sk_mark;
+ fl6.flowi6_mark = ipc6.sockc.mark;
fl6.flowi6_uid = sk->sk_uid;
if (msg->msg_controllen) {
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 5dbc0c48f8cb..8f12f5c6ab87 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -379,7 +379,7 @@ static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb)
struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
struct bpf_prog *prog = psock->bpf_prog;
- return (*prog->bpf_func)(skb, prog->insnsi);
+ return BPF_PROG_RUN(prog, skb);
}
static int kcm_read_sock_done(struct strparser *strp, int err)
@@ -635,15 +635,15 @@ do_frag_list:
frag_offset = 0;
do_frag:
frag = &skb_shinfo(skb)->frags[fragidx];
- if (WARN_ON(!frag->size)) {
+ if (WARN_ON(!skb_frag_size(frag))) {
ret = -EINVAL;
goto out;
}
ret = kernel_sendpage(psock->sk->sk_socket,
- frag->page.p,
- frag->page_offset + frag_offset,
- frag->size - frag_offset,
+ skb_frag_page(frag),
+ skb_frag_off(frag) + frag_offset,
+ skb_frag_size(frag) - frag_offset,
MSG_DONTWAIT);
if (ret <= 0) {
if (ret == -EAGAIN) {
@@ -678,7 +678,7 @@ do_frag:
sent += ret;
frag_offset += ret;
KCM_STATS_ADD(psock->stats.tx_bytes, ret);
- if (frag_offset < frag->size) {
+ if (frag_offset < skb_frag_size(frag)) {
/* Not finished with this frag */
goto do_frag;
}
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 01b0dad24500..4d1c335e06e5 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -178,17 +178,54 @@ static void sta_rx_agg_reorder_timer_expired(struct timer_list *t)
rcu_read_unlock();
}
-static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid,
+static void ieee80211_add_addbaext(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb,
+ const struct ieee80211_addba_ext_ie *req)
+{
+ struct ieee80211_supported_band *sband;
+ struct ieee80211_addba_ext_ie *resp;
+ const struct ieee80211_sta_he_cap *he_cap;
+ u8 frag_level, cap_frag_level;
+ u8 *pos;
+
+ sband = ieee80211_get_sband(sdata);
+ if (!sband)
+ return;
+ he_cap = ieee80211_get_he_iftype_cap(sband, sdata->vif.type);
+ if (!he_cap)
+ return;
+
+ pos = skb_put_zero(skb, 2 + sizeof(struct ieee80211_addba_ext_ie));
+ *pos++ = WLAN_EID_ADDBA_EXT;
+ *pos++ = sizeof(struct ieee80211_addba_ext_ie);
+ resp = (struct ieee80211_addba_ext_ie *)pos;
+ resp->data = req->data & IEEE80211_ADDBA_EXT_NO_FRAG;
+
+ frag_level = u32_get_bits(req->data,
+ IEEE80211_ADDBA_EXT_FRAG_LEVEL_MASK);
+ cap_frag_level = u32_get_bits(he_cap->he_cap_elem.mac_cap_info[0],
+ IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_MASK);
+ if (frag_level > cap_frag_level)
+ frag_level = cap_frag_level;
+ resp->data |= u8_encode_bits(frag_level,
+ IEEE80211_ADDBA_EXT_FRAG_LEVEL_MASK);
+}
+
+static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid,
u8 dialog_token, u16 status, u16 policy,
- u16 buf_size, u16 timeout)
+ u16 buf_size, u16 timeout,
+ const struct ieee80211_addba_ext_ie *addbaext)
{
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
struct ieee80211_local *local = sdata->local;
struct sk_buff *skb;
struct ieee80211_mgmt *mgmt;
bool amsdu = ieee80211_hw_check(&local->hw, SUPPORTS_AMSDU_IN_AMPDU);
u16 capab;
- skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
+ skb = dev_alloc_skb(sizeof(*mgmt) +
+ 2 + sizeof(struct ieee80211_addba_ext_ie) +
+ local->hw.extra_tx_headroom);
if (!skb)
return;
@@ -222,13 +259,17 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d
mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout);
mgmt->u.action.u.addba_resp.status = cpu_to_le16(status);
+ if (sta->sta.he_cap.has_he && addbaext)
+ ieee80211_add_addbaext(sdata, skb, addbaext);
+
ieee80211_tx_skb(sdata, skb);
}
void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
u8 dialog_token, u16 timeout,
u16 start_seq_num, u16 ba_policy, u16 tid,
- u16 buf_size, bool tx, bool auto_seq)
+ u16 buf_size, bool tx, bool auto_seq,
+ const struct ieee80211_addba_ext_ie *addbaext)
{
struct ieee80211_local *local = sta->sdata->local;
struct tid_ampdu_rx *tid_agg_rx;
@@ -410,21 +451,22 @@ end:
}
if (tx)
- ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid,
+ ieee80211_send_addba_resp(sta, sta->sta.addr, tid,
dialog_token, status, 1, buf_size,
- timeout);
+ timeout, addbaext);
}
static void __ieee80211_start_rx_ba_session(struct sta_info *sta,
u8 dialog_token, u16 timeout,
u16 start_seq_num, u16 ba_policy,
u16 tid, u16 buf_size, bool tx,
- bool auto_seq)
+ bool auto_seq,
+ const struct ieee80211_addba_ext_ie *addbaext)
{
mutex_lock(&sta->ampdu_mlme.mtx);
___ieee80211_start_rx_ba_session(sta, dialog_token, timeout,
start_seq_num, ba_policy, tid,
- buf_size, tx, auto_seq);
+ buf_size, tx, auto_seq, addbaext);
mutex_unlock(&sta->ampdu_mlme.mtx);
}
@@ -434,7 +476,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
size_t len)
{
u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num;
+ struct ieee802_11_elems elems = { 0 };
u8 dialog_token;
+ int ies_len;
/* extract session parameters from addba request frame */
dialog_token = mgmt->u.action.u.addba_req.dialog_token;
@@ -447,9 +491,19 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
+ ies_len = len - offsetof(struct ieee80211_mgmt,
+ u.action.u.addba_req.variable);
+ if (ies_len) {
+ ieee802_11_parse_elems(mgmt->u.action.u.addba_req.variable,
+ ies_len, true, &elems, mgmt->bssid, NULL);
+ if (elems.parse_error)
+ return;
+ }
+
__ieee80211_start_rx_ba_session(sta, dialog_token, timeout,
start_seq_num, ba_policy, tid,
- buf_size, true, false);
+ buf_size, true, false,
+ elems.addba_ext_ie);
}
void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 4105c97c7ba1..70739e746c13 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -980,7 +980,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
BSS_CHANGED_SSID |
BSS_CHANGED_P2P_PS |
BSS_CHANGED_TXPOWER |
- BSS_CHANGED_TWT;
+ BSS_CHANGED_TWT |
+ BSS_CHANGED_HE_OBSS_PD;
int err;
int prev_beacon_int;
@@ -1051,6 +1052,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
sdata->vif.bss_conf.enable_beacon = true;
sdata->vif.bss_conf.allow_p2p_go_ps = sdata->vif.p2p;
sdata->vif.bss_conf.twt_responder = params->twt_responder;
+ memcpy(&sdata->vif.bss_conf.he_obss_pd, &params->he_obss_pd,
+ sizeof(struct ieee80211_he_obss_pd));
sdata->vif.bss_conf.ssid_len = params->ssid_len;
if (params->ssid_len)
@@ -1542,7 +1545,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
if (ether_addr_equal(mac, sdata->vif.addr))
return -EINVAL;
- if (is_multicast_ether_addr(mac))
+ if (!is_valid_ether_addr(mac))
return -EINVAL;
if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER) &&
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 2e7f75938c51..568b3b276931 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -271,8 +271,7 @@ static const char *hw_flag_names[] = {
FLAG(TX_STATUS_NO_AMPDU_LEN),
FLAG(SUPPORTS_MULTI_BSSID),
FLAG(SUPPORTS_ONLY_HE_MULTI_BSSID),
- FLAG(EXT_KEY_ID_NATIVE),
- FLAG(NO_AMPDU_KEYBORDER_SUPPORT),
+ FLAG(AMPDU_KEYBORDER_SUPPORT),
#undef FLAG
};
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index c2d8b5451a5e..2c9b3eb8b652 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -692,14 +692,16 @@ static inline int drv_remain_on_channel(struct ieee80211_local *local,
return ret;
}
-static inline int drv_cancel_remain_on_channel(struct ieee80211_local *local)
+static inline int
+drv_cancel_remain_on_channel(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata)
{
int ret;
might_sleep();
- trace_drv_cancel_remain_on_channel(local);
- ret = local->ops->cancel_remain_on_channel(&local->hw);
+ trace_drv_cancel_remain_on_channel(local, sdata);
+ ret = local->ops->cancel_remain_on_channel(&local->hw, &sdata->vif);
trace_drv_return_int(local, ret);
return ret;
diff --git a/net/mac80211/he.c b/net/mac80211/he.c
index 219650591c79..736da0035135 100644
--- a/net/mac80211/he.c
+++ b/net/mac80211/he.c
@@ -50,3 +50,43 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
he_cap->has_he = true;
}
+
+void
+ieee80211_he_op_ie_to_bss_conf(struct ieee80211_vif *vif,
+ const struct ieee80211_he_operation *he_op_ie_elem)
+{
+ struct ieee80211_he_operation *he_operation =
+ &vif->bss_conf.he_operation;
+
+ if (!he_op_ie_elem) {
+ memset(he_operation, 0, sizeof(*he_operation));
+ return;
+ }
+
+ vif->bss_conf.he_operation = *he_op_ie_elem;
+}
+
+void
+ieee80211_he_spr_ie_to_bss_conf(struct ieee80211_vif *vif,
+ const struct ieee80211_he_spr *he_spr_ie_elem)
+{
+ struct ieee80211_he_obss_pd *he_obss_pd =
+ &vif->bss_conf.he_obss_pd;
+ const u8 *data;
+
+ memset(he_obss_pd, 0, sizeof(*he_obss_pd));
+
+ if (!he_spr_ie_elem)
+ return;
+ data = he_spr_ie_elem->optional;
+
+ if (he_spr_ie_elem->he_sr_control &
+ IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT)
+ data++;
+ if (he_spr_ie_elem->he_sr_control &
+ IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT) {
+ he_obss_pd->max_offset = *data++;
+ he_obss_pd->min_offset = *data++;
+ he_obss_pd->enable = true;
+ }
+}
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index d5a500b2a448..a2e4d6b8fd98 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -359,7 +359,7 @@ void ieee80211_ba_session_work(struct work_struct *work)
sta->ampdu_mlme.tid_rx_manage_offl))
___ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid,
IEEE80211_MAX_AMPDU_BUF_HT,
- false, true);
+ false, true, NULL);
if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS,
sta->ampdu_mlme.tid_rx_manage_offl))
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index f00dca056295..0a6ff01c68a9 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -1252,6 +1252,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata,
static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata)
{
+ struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
struct ieee80211_local *local = sdata->local;
struct sta_info *sta, *tmp;
unsigned long exp_time = IEEE80211_IBSS_INACTIVITY_LIMIT;
@@ -1268,10 +1269,17 @@ static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata)
if (time_is_before_jiffies(last_active + exp_time) ||
(time_is_before_jiffies(last_active + exp_rsn) &&
sta->sta_state != IEEE80211_STA_AUTHORIZED)) {
+ u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
+
sta_dbg(sta->sdata, "expiring inactive %sSTA %pM\n",
sta->sta_state != IEEE80211_STA_AUTHORIZED ?
"not authorized " : "", sta->sta.addr);
+ ieee80211_send_deauth_disassoc(sdata, sta->sta.addr,
+ ifibss->bssid,
+ IEEE80211_STYPE_DEAUTH,
+ WLAN_REASON_DEAUTH_LEAVING,
+ true, frame_buf);
WARN_ON(__sta_info_destroy(sta));
}
}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 004e2e3adb88..05406e9c05b3 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1480,6 +1480,7 @@ struct ieee802_11_elems {
const struct ieee80211_meshconf_ie *mesh_config;
const u8 *he_cap;
const struct ieee80211_he_operation *he_operation;
+ const struct ieee80211_he_spr *he_spr;
const struct ieee80211_mu_edca_param_set *mu_edca_param_set;
const u8 *uora_element;
const u8 *mesh_id;
@@ -1506,6 +1507,7 @@ struct ieee802_11_elems {
u8 max_bssid_indicator;
u8 dtim_count;
u8 dtim_period;
+ const struct ieee80211_addba_ext_ie *addba_ext_ie;
/* length of them, respectively */
u8 ext_capab_len;
@@ -1767,7 +1769,8 @@ ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, u32 info_flags);
void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb,
struct ieee80211_supported_band *sband,
- int retry_count, int shift, bool send_to_cooked);
+ int retry_count, int shift, bool send_to_cooked,
+ struct ieee80211_tx_status *status);
void ieee80211_check_fast_xmit(struct sta_info *sta);
void ieee80211_check_fast_xmit_all(struct ieee80211_local *local);
@@ -1804,7 +1807,8 @@ void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
u8 dialog_token, u16 timeout,
u16 start_seq_num, u16 ba_policy, u16 tid,
- u16 buf_size, bool tx, bool auto_seq);
+ u16 buf_size, bool tx, bool auto_seq,
+ const struct ieee80211_addba_ext_ie *addbaext);
void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
enum ieee80211_agg_stop_reason reason);
void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
@@ -1869,6 +1873,13 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband,
const u8 *he_cap_ie, u8 he_cap_len,
struct sta_info *sta);
+void
+ieee80211_he_spr_ie_to_bss_conf(struct ieee80211_vif *vif,
+ const struct ieee80211_he_spr *he_spr_ie_elem);
+
+void
+ieee80211_he_op_ie_to_bss_conf(struct ieee80211_vif *vif,
+ const struct ieee80211_he_operation *he_op_ie_elem);
/* Spectrum management */
void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
@@ -2088,7 +2099,8 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
const u8 *da, const u8 *key, u8 key_len, u8 key_idx,
u32 tx_flags);
void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
- const u8 *bssid, u16 stype, u16 reason,
+ const u8 *da, const u8 *bssid,
+ u16 stype, u16 reason,
bool send_frame, u8 *frame_buf);
enum {
@@ -2133,9 +2145,11 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
u32 cap);
u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
const struct cfg80211_chan_def *chandef);
+u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype);
u8 *ieee80211_ie_build_he_cap(u8 *pos,
const struct ieee80211_sta_he_cap *he_cap,
u8 *end);
+u8 *ieee80211_ie_build_he_oper(u8 *pos);
int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef,
const struct ieee80211_supported_band *sband,
const u8 *srates, int srates_len, u32 *rates);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 8dc6580e1787..af8b09214786 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1876,7 +1876,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
/* MTU range: 256 - 2304 */
ndev->min_mtu = 256;
- ndev->max_mtu = IEEE80211_MAX_DATA_LEN;
+ ndev->max_mtu = local->hw.max_mtu;
ret = register_netdevice(ndev);
if (ret) {
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index dd60f6428049..0f889b919b06 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -6,6 +6,7 @@
* Copyright 2007-2008 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
+ * Copyright 2018-2019 Intel Corporation
*/
#include <linux/if_ether.h>
@@ -270,7 +271,7 @@ int ieee80211_set_tx_key(struct ieee80211_key *key)
sta->ptk_idx = key->conf.keyidx;
- if (ieee80211_hw_check(&local->hw, NO_AMPDU_KEYBORDER_SUPPORT))
+ if (!ieee80211_hw_check(&local->hw, AMPDU_KEYBORDER_SUPPORT))
clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
ieee80211_check_fast_xmit(sta);
@@ -290,15 +291,15 @@ static void ieee80211_pairwise_rekey(struct ieee80211_key *old,
/* Extended Key ID key install, initial one or rekey */
if (sta->ptk_idx != INVALID_PTK_KEYIDX &&
- ieee80211_hw_check(&local->hw,
- NO_AMPDU_KEYBORDER_SUPPORT)) {
+ !ieee80211_hw_check(&local->hw, AMPDU_KEYBORDER_SUPPORT)) {
/* Aggregation Sessions with Extended Key ID must not
* mix MPDUs with different keyIDs within one A-MPDU.
- * Tear down any running Tx aggregation and all new
- * Rx/Tx aggregation request during rekey if the driver
- * asks us to do so. (Blocking Tx only would be
- * sufficient but WLAN_STA_BLOCK_BA gets the job done
- * for the few ms we need it.)
+ * Tear down running Tx aggregation sessions and block
+ * new Rx/Tx aggregation requests during rekey to
+ * ensure there are no A-MPDUs when the driver is not
+ * supporting A-MPDU key borders. (Blocking Tx only
+ * would be sufficient but WLAN_STA_BLOCK_BA gets the
+ * job done for the few ms we need it.)
*/
set_sta_flag(sta, WLAN_STA_BLOCK_BA);
mutex_lock(&sta->ampdu_mlme.mtx);
@@ -781,9 +782,8 @@ int ieee80211_key_link(struct ieee80211_key *key,
/* The rekey code assumes that the old and new key are using
* the same cipher. Enforce the assumption for pairwise keys.
*/
- if (key &&
- ((alt_key && alt_key->conf.cipher != key->conf.cipher) ||
- (old_key && old_key->conf.cipher != key->conf.cipher)))
+ if ((alt_key && alt_key->conf.cipher != key->conf.cipher) ||
+ (old_key && old_key->conf.cipher != key->conf.cipher))
goto out;
} else if (sta) {
old_key = key_mtx_dereference(sdata->local, sta->gtk[idx]);
@@ -793,7 +793,7 @@ int ieee80211_key_link(struct ieee80211_key *key,
/* Non-pairwise keys must also not switch the cipher on rekey */
if (!pairwise) {
- if (key && old_key && old_key->conf.cipher != key->conf.cipher)
+ if (old_key && old_key->conf.cipher != key->conf.cipher)
goto out;
}
@@ -843,46 +843,30 @@ void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom)
ieee80211_key_destroy(key, delay_tailroom);
}
-void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata)
+void ieee80211_reenable_keys(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_key *key;
struct ieee80211_sub_if_data *vlan;
ASSERT_RTNL();
- if (WARN_ON(!ieee80211_sdata_running(sdata)))
- return;
-
- mutex_lock(&sdata->local->key_mtx);
-
- WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt ||
- sdata->crypto_tx_tailroom_pending_dec);
-
- if (sdata->vif.type == NL80211_IFTYPE_AP) {
- list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
- WARN_ON_ONCE(vlan->crypto_tx_tailroom_needed_cnt ||
- vlan->crypto_tx_tailroom_pending_dec);
- }
-
- list_for_each_entry(key, &sdata->key_list, list) {
- increment_tailroom_need_count(sdata);
- ieee80211_key_enable_hw_accel(key);
- }
-
- mutex_unlock(&sdata->local->key_mtx);
-}
-
-void ieee80211_reset_crypto_tx_tailroom(struct ieee80211_sub_if_data *sdata)
-{
- struct ieee80211_sub_if_data *vlan;
-
mutex_lock(&sdata->local->key_mtx);
sdata->crypto_tx_tailroom_needed_cnt = 0;
+ sdata->crypto_tx_tailroom_pending_dec = 0;
if (sdata->vif.type == NL80211_IFTYPE_AP) {
- list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
+ list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) {
vlan->crypto_tx_tailroom_needed_cnt = 0;
+ vlan->crypto_tx_tailroom_pending_dec = 0;
+ }
+ }
+
+ if (ieee80211_sdata_running(sdata)) {
+ list_for_each_entry(key, &sdata->key_list, list) {
+ increment_tailroom_need_count(sdata);
+ ieee80211_key_enable_hw_accel(key);
+ }
}
mutex_unlock(&sdata->local->key_mtx);
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index b8b9cd743bf4..d6d6e89cf7dd 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -2,6 +2,7 @@
/*
* Copyright 2002-2004, Instant802 Networks, Inc.
* Copyright 2005, Devicescape Software, Inc.
+ * Copyright (C) 2019 Intel Corporation
*/
#ifndef IEEE80211_KEY_H
@@ -156,8 +157,7 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata,
bool force_synchronize);
void ieee80211_free_sta_keys(struct ieee80211_local *local,
struct sta_info *sta);
-void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata);
-void ieee80211_reset_crypto_tx_tailroom(struct ieee80211_sub_if_data *sdata);
+void ieee80211_reenable_keys(struct ieee80211_sub_if_data *sdata);
#define key_mtx_dereference(local, ref) \
rcu_dereference_protected(ref, lockdep_is_held(&((local)->key_mtx)))
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 4c2702f128f3..aba094b4ccfc 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -639,6 +639,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH;
local->hw.uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES;
local->hw.uapsd_max_sp_len = IEEE80211_DEFAULT_MAX_SP_LEN;
+ local->hw.max_mtu = IEEE80211_MAX_DATA_LEN;
local->user_power_level = IEEE80211_UNSET_POWER_LEVEL;
wiphy->ht_capa_mod_mask = &mac80211_ht_capa_mod_mask;
wiphy->vht_capa_mod_mask = &mac80211_vht_capa_mod_mask;
@@ -1048,21 +1049,15 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
}
}
- /* Enable Extended Key IDs when driver allowed it, or when it
- * supports neither HW crypto nor A-MPDUs
+ /* Mac80211 and therefore all drivers using SW crypto only
+ * are able to handle PTK rekeys and Extended Key ID.
*/
- if ((!local->ops->set_key &&
- !ieee80211_hw_check(hw, AMPDU_AGGREGATION)) ||
- ieee80211_hw_check(&local->hw, EXT_KEY_ID_NATIVE))
- wiphy_ext_feature_set(local->hw.wiphy,
- NL80211_EXT_FEATURE_EXT_KEY_ID);
-
- /* Mac80211 and therefore all cards only using SW crypto are able to
- * handle PTK rekeys correctly
- */
- if (!local->ops->set_key)
+ if (!local->ops->set_key) {
wiphy_ext_feature_set(local->hw.wiphy,
NL80211_EXT_FEATURE_CAN_REPLACE_PTK0);
+ wiphy_ext_feature_set(local->hw.wiphy,
+ NL80211_EXT_FEATURE_EXT_KEY_ID);
+ }
/*
* Calculate scan IE length -- we need this to alloc
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 2e7fa743c892..d09b3c789314 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -532,6 +532,61 @@ int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata,
return 0;
}
+int mesh_add_he_cap_ie(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb, u8 ie_len)
+{
+ const struct ieee80211_sta_he_cap *he_cap;
+ struct ieee80211_supported_band *sband;
+ u8 *pos;
+
+ sband = ieee80211_get_sband(sdata);
+ if (!sband)
+ return -EINVAL;
+
+ he_cap = ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT);
+
+ if (!he_cap ||
+ sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
+ sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
+ return 0;
+
+ if (skb_tailroom(skb) < ie_len)
+ return -ENOMEM;
+
+ pos = skb_put(skb, ie_len);
+ ieee80211_ie_build_he_cap(pos, he_cap, pos + ie_len);
+
+ return 0;
+}
+
+int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb)
+{
+ const struct ieee80211_sta_he_cap *he_cap;
+ struct ieee80211_supported_band *sband;
+ u8 *pos;
+
+ sband = ieee80211_get_sband(sdata);
+ if (!sband)
+ return -EINVAL;
+
+ he_cap = ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT);
+ if (!he_cap ||
+ sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
+ sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
+ return 0;
+
+ if (skb_tailroom(skb) < 2 + 1 + sizeof(struct ieee80211_he_operation))
+ return -ENOMEM;
+
+ pos = skb_put(skb, 2 + 1 + sizeof(struct ieee80211_he_operation));
+ ieee80211_ie_build_he_oper(pos);
+
+ return 0;
+}
+
static void ieee80211_mesh_path_timer(struct timer_list *t)
{
struct ieee80211_sub_if_data *sdata =
@@ -677,6 +732,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
struct ieee80211_chanctx_conf *chanctx_conf;
struct mesh_csa_settings *csa;
enum nl80211_band band;
+ u8 ie_len_he_cap;
u8 *pos;
struct ieee80211_sub_if_data *sdata;
int hdr_len = offsetofend(struct ieee80211_mgmt, u.beacon);
@@ -687,6 +743,8 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
band = chanctx_conf->def.chan->band;
rcu_read_unlock();
+ ie_len_he_cap = ieee80211_ie_len_he_cap(sdata,
+ NL80211_IFTYPE_MESH_POINT);
head_len = hdr_len +
2 + /* NULL SSID */
/* Channel Switch Announcement */
@@ -706,6 +764,8 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
2 + sizeof(__le16) + /* awake window */
2 + sizeof(struct ieee80211_vht_cap) +
2 + sizeof(struct ieee80211_vht_operation) +
+ ie_len_he_cap +
+ 2 + 1 + sizeof(struct ieee80211_he_operation) +
ifmsh->ie_len;
bcn = kzalloc(sizeof(*bcn) + head_len + tail_len, GFP_KERNEL);
@@ -823,6 +883,8 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
mesh_add_awake_window_ie(sdata, skb) ||
mesh_add_vht_cap_ie(sdata, skb) ||
mesh_add_vht_oper_ie(sdata, skb) ||
+ mesh_add_he_cap_ie(sdata, skb, ie_len_he_cap) ||
+ mesh_add_he_oper_ie(sdata, skb) ||
mesh_add_vendor_ies(sdata, skb))
goto out_free;
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 94d57cce70da..953f720754e8 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -218,6 +218,10 @@ int mesh_add_vht_cap_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb);
int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb);
+int mesh_add_he_cap_ie(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb, u8 ie_len);
+int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb);
void mesh_rmc_free(struct ieee80211_sub_if_data *sdata);
int mesh_rmc_init(struct ieee80211_sub_if_data *sdata);
void ieee80211s_init(void);
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index dd3aefd052a9..737c5f4dbf52 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -218,9 +218,12 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
bool include_plid = false;
u16 peering_proto = 0;
u8 *pos, ie_len = 4;
+ u8 ie_len_he_cap;
int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.self_prot);
int err = -ENOMEM;
+ ie_len_he_cap = ieee80211_ie_len_he_cap(sdata,
+ NL80211_IFTYPE_MESH_POINT);
skb = dev_alloc_skb(local->tx_headroom +
hdr_len +
2 + /* capability info */
@@ -233,6 +236,8 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
2 + sizeof(struct ieee80211_ht_operation) +
2 + sizeof(struct ieee80211_vht_cap) +
2 + sizeof(struct ieee80211_vht_operation) +
+ ie_len_he_cap +
+ 2 + 1 + sizeof(struct ieee80211_he_operation) +
2 + 8 + /* peering IE */
sdata->u.mesh.ie_len);
if (!skb)
@@ -321,7 +326,9 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
if (mesh_add_ht_cap_ie(sdata, skb) ||
mesh_add_ht_oper_ie(sdata, skb) ||
mesh_add_vht_cap_ie(sdata, skb) ||
- mesh_add_vht_oper_ie(sdata, skb))
+ mesh_add_vht_oper_ie(sdata, skb) ||
+ mesh_add_he_cap_ie(sdata, skb, ie_len_he_cap) ||
+ mesh_add_he_oper_ie(sdata, skb))
goto free;
}
@@ -433,6 +440,9 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
elems->vht_cap_elem, sta);
+ ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, elems->he_cap,
+ elems->he_cap_len, sta);
+
if (bw != sta->sta.bandwidth)
changed |= IEEE80211_RC_BW_CHANGED;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 4c888dc9bd81..26a2f49208b6 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -158,10 +158,10 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap));
ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap);
+ memset(chandef, 0, sizeof(struct cfg80211_chan_def));
chandef->chan = channel;
chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
chandef->center_freq1 = channel->center_freq;
- chandef->center_freq2 = 0;
if (!ht_oper || !sta_ht_cap.ht_supported) {
ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
@@ -2278,8 +2278,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
!ifmgd->have_beacon)
drv_mgd_prepare_tx(sdata->local, sdata, 0);
- ieee80211_send_deauth_disassoc(sdata, ifmgd->bssid, stype,
- reason, tx, frame_buf);
+ ieee80211_send_deauth_disassoc(sdata, ifmgd->bssid,
+ ifmgd->bssid, stype, reason,
+ tx, frame_buf);
}
/* flush out frame - make sure the deauth was actually sent */
@@ -2522,7 +2523,10 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
if (ieee80211_hw_check(&sdata->local->hw, REPORTS_TX_ACK_STATUS)) {
ifmgd->nullfunc_failed = false;
- ieee80211_send_nullfunc(sdata->local, sdata, false);
+ if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE))
+ ifmgd->probe_send_count--;
+ else
+ ieee80211_send_nullfunc(sdata->local, sdata, false);
} else {
int ssid_len;
@@ -3391,6 +3395,8 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
if (elems.uora_element)
bss_conf->uora_ocw_range = elems.uora_element[0];
+ ieee80211_he_op_ie_to_bss_conf(&sdata->vif, elems.he_operation);
+ ieee80211_he_spr_ie_to_bss_conf(&sdata->vif, elems.he_spr);
/* TODO: OPEN: what happens if BSS color disable is set? */
}
@@ -4504,7 +4510,7 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata)
* cfg80211 won't know and won't actually abort those attempts,
* thus we need to do that ourselves.
*/
- ieee80211_send_deauth_disassoc(sdata, bssid,
+ ieee80211_send_deauth_disassoc(sdata, bssid, bssid,
IEEE80211_STYPE_DEAUTH,
WLAN_REASON_DEAUTH_LEAVING,
false, frame_buf);
@@ -5291,7 +5297,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
netdev_info(sdata->dev,
- "disabling HE/HT/VHT due to WEP/TKIP use\n");
+ "disabling HT/VHT/HE due to WEP/TKIP use\n");
}
}
@@ -5545,7 +5551,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
ieee80211_get_reason_code_string(req->reason_code));
drv_mgd_prepare_tx(sdata->local, sdata, 0);
- ieee80211_send_deauth_disassoc(sdata, req->bssid,
+ ieee80211_send_deauth_disassoc(sdata, req->bssid, req->bssid,
IEEE80211_STYPE_DEAUTH,
req->reason_code, tx,
frame_buf);
@@ -5565,7 +5571,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
ieee80211_get_reason_code_string(req->reason_code));
drv_mgd_prepare_tx(sdata->local, sdata, 0);
- ieee80211_send_deauth_disassoc(sdata, req->bssid,
+ ieee80211_send_deauth_disassoc(sdata, req->bssid, req->bssid,
IEEE80211_STYPE_DEAUTH,
req->reason_code, tx,
frame_buf);
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 60ef8972b254..c710504ccf1a 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -8,6 +8,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright (C) 2019 Intel Corporation
*/
#include <linux/export.h>
#include <net/mac80211.h>
@@ -732,7 +733,7 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local,
}
if (local->ops->remain_on_channel) {
- ret = drv_cancel_remain_on_channel(local);
+ ret = drv_cancel_remain_on_channel(local, roc->sdata);
if (WARN_ON_ONCE(ret)) {
mutex_unlock(&local->mtx);
return ret;
@@ -991,7 +992,7 @@ void ieee80211_roc_purge(struct ieee80211_local *local,
if (roc->started) {
if (local->ops->remain_on_channel) {
/* can race, so ignore return value */
- drv_cancel_remain_on_channel(local);
+ drv_cancel_remain_on_channel(local, sdata);
ieee80211_roc_notify_destroy(roc);
} else {
roc->abort = true;
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index 5d5348bc41ec..5397c6dad056 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -60,15 +60,6 @@ static inline void rate_control_add_sta_debugfs(struct sta_info *sta)
#endif
}
-static inline void rate_control_remove_sta_debugfs(struct sta_info *sta)
-{
-#ifdef CONFIG_MAC80211_DEBUGFS
- struct rate_control_ref *ref = sta->rate_ctrl;
- if (ref && ref->ops->remove_sta_debugfs)
- ref->ops->remove_sta_debugfs(ref->priv, sta->rate_ctrl_priv);
-#endif
-}
-
void ieee80211_check_rate_mask(struct ieee80211_sub_if_data *sdata);
/* Get a reference to the rate control algorithm. If `name' is NULL, get the
diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h
index 3c96a853adbd..51d8b2c846e7 100644
--- a/net/mac80211/rc80211_minstrel.h
+++ b/net/mac80211/rc80211_minstrel.h
@@ -95,6 +95,7 @@ struct minstrel_sta_info {
struct minstrel_priv {
struct ieee80211_hw *hw;
bool has_mrr;
+ u32 sample_switch;
unsigned int cw_min;
unsigned int cw_max;
unsigned int max_retry;
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 5a882da82f0e..0ef2633349b5 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -18,6 +18,8 @@
#define AVG_AMPDU_SIZE 16
#define AVG_PKT_SIZE 1200
+#define SAMPLE_SWITCH_THR 100
+
/* Number of bits for an average sized packet */
#define MCS_NBITS ((AVG_PKT_SIZE * AVG_AMPDU_SIZE) << 3)
@@ -58,6 +60,7 @@
[GROUP_IDX(_streams, _sgi, _ht40)] = { \
.streams = _streams, \
.shift = _s, \
+ .bw = _ht40, \
.flags = \
IEEE80211_TX_RC_MCS | \
(_sgi ? IEEE80211_TX_RC_SHORT_GI : 0) | \
@@ -94,6 +97,7 @@
[VHT_GROUP_IDX(_streams, _sgi, _bw)] = { \
.streams = _streams, \
.shift = _s, \
+ .bw = _bw, \
.flags = \
IEEE80211_TX_RC_VHT_MCS | \
(_sgi ? IEEE80211_TX_RC_SHORT_GI : 0) | \
@@ -486,7 +490,7 @@ minstrel_ht_assign_best_tp_rates(struct minstrel_ht_sta *mi,
tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma;
tmp_mcs_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob);
- if (tmp_cck_tp > tmp_mcs_tp) {
+ if (tmp_cck_tp_rate && tmp_cck_tp > tmp_mcs_tp) {
for(i = 0; i < MAX_THR_RATES; i++) {
minstrel_ht_sort_best_tp_rates(mi, tmp_cck_tp_rate[i],
tmp_mcs_tp_rate);
@@ -526,6 +530,133 @@ minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi)
}
}
+static inline int
+minstrel_get_duration(int index)
+{
+ const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES];
+ unsigned int duration = group->duration[index % MCS_GROUP_RATES];
+ return duration << group->shift;
+}
+
+static bool
+minstrel_ht_probe_group(struct minstrel_ht_sta *mi, const struct mcs_group *tp_group,
+ int tp_idx, const struct mcs_group *group)
+{
+ if (group->bw < tp_group->bw)
+ return false;
+
+ if (group->streams == tp_group->streams)
+ return true;
+
+ if (tp_idx < 4 && group->streams == tp_group->streams - 1)
+ return true;
+
+ return group->streams == tp_group->streams + 1;
+}
+
+static void
+minstrel_ht_find_probe_rates(struct minstrel_ht_sta *mi, u16 *rates, int *n_rates,
+ bool faster_rate)
+{
+ const struct mcs_group *group, *tp_group;
+ int i, g, max_dur;
+ int tp_idx;
+
+ tp_group = &minstrel_mcs_groups[mi->max_tp_rate[0] / MCS_GROUP_RATES];
+ tp_idx = mi->max_tp_rate[0] % MCS_GROUP_RATES;
+
+ max_dur = minstrel_get_duration(mi->max_tp_rate[0]);
+ if (faster_rate)
+ max_dur -= max_dur / 16;
+
+ for (g = 0; g < MINSTREL_GROUPS_NB; g++) {
+ u16 supported = mi->supported[g];
+
+ if (!supported)
+ continue;
+
+ group = &minstrel_mcs_groups[g];
+ if (!minstrel_ht_probe_group(mi, tp_group, tp_idx, group))
+ continue;
+
+ for (i = 0; supported; supported >>= 1, i++) {
+ int idx;
+
+ if (!(supported & 1))
+ continue;
+
+ if ((group->duration[i] << group->shift) > max_dur)
+ continue;
+
+ idx = g * MCS_GROUP_RATES + i;
+ if (idx == mi->max_tp_rate[0])
+ continue;
+
+ rates[(*n_rates)++] = idx;
+ break;
+ }
+ }
+}
+
+static void
+minstrel_ht_rate_sample_switch(struct minstrel_priv *mp,
+ struct minstrel_ht_sta *mi)
+{
+ struct minstrel_rate_stats *mrs;
+ u16 rates[MINSTREL_GROUPS_NB];
+ int n_rates = 0;
+ int probe_rate = 0;
+ bool faster_rate;
+ int i;
+ u8 random;
+
+ /*
+ * Use rate switching instead of probing packets for devices with
+ * little control over retry fallback behavior
+ */
+ if (mp->hw->max_rates > 1)
+ return;
+
+ /*
+ * If the current EWMA prob is >75%, look for a rate that's 6.25%
+ * faster than the max tp rate.
+ * If that fails, look again for a rate that is at least as fast
+ */
+ mrs = minstrel_get_ratestats(mi, mi->max_tp_rate[0]);
+ faster_rate = mrs->prob_ewma > MINSTREL_FRAC(75, 100);
+ minstrel_ht_find_probe_rates(mi, rates, &n_rates, faster_rate);
+ if (!n_rates && faster_rate)
+ minstrel_ht_find_probe_rates(mi, rates, &n_rates, false);
+
+ /* If no suitable rate was found, try to pick the next one in the group */
+ if (!n_rates) {
+ int g_idx = mi->max_tp_rate[0] / MCS_GROUP_RATES;
+ u16 supported = mi->supported[g_idx];
+
+ supported >>= mi->max_tp_rate[0] % MCS_GROUP_RATES;
+ for (i = 0; supported; supported >>= 1, i++) {
+ if (!(supported & 1))
+ continue;
+
+ probe_rate = mi->max_tp_rate[0] + i;
+ goto out;
+ }
+
+ return;
+ }
+
+ i = 0;
+ if (n_rates > 1) {
+ random = prandom_u32();
+ i = random % n_rates;
+ }
+ probe_rate = rates[i];
+
+out:
+ mi->sample_rate = probe_rate;
+ mi->sample_mode = MINSTREL_SAMPLE_ACTIVE;
+}
+
/*
* Update rate statistics and select new primary rates
*
@@ -536,7 +667,8 @@ minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi)
* higher throughput rates, even if the probablity is a bit lower
*/
static void
-minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
+minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
+ bool sample)
{
struct minstrel_mcs_group_data *mg;
struct minstrel_rate_stats *mrs;
@@ -544,6 +676,18 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
u16 tmp_mcs_tp_rate[MAX_THR_RATES], tmp_group_tp_rate[MAX_THR_RATES];
u16 tmp_cck_tp_rate[MAX_THR_RATES], index;
+ mi->sample_mode = MINSTREL_SAMPLE_IDLE;
+
+ if (sample) {
+ mi->total_packets_cur = mi->total_packets -
+ mi->total_packets_last;
+ mi->total_packets_last = mi->total_packets;
+ }
+ if (!mp->sample_switch)
+ sample = false;
+ if (mi->total_packets_cur < SAMPLE_SWITCH_THR && mp->sample_switch != 1)
+ sample = false;
+
if (mi->ampdu_packets > 0) {
if (!ieee80211_hw_check(mp->hw, TX_STATUS_NO_AMPDU_LEN))
mi->avg_ampdu_len = minstrel_ewma(mi->avg_ampdu_len,
@@ -558,11 +702,19 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
mi->sample_slow = 0;
mi->sample_count = 0;
- /* Initialize global rate indexes */
- for(j = 0; j < MAX_THR_RATES; j++){
- tmp_mcs_tp_rate[j] = 0;
- tmp_cck_tp_rate[j] = 0;
- }
+ memset(tmp_mcs_tp_rate, 0, sizeof(tmp_mcs_tp_rate));
+ memset(tmp_cck_tp_rate, 0, sizeof(tmp_cck_tp_rate));
+ if (mi->supported[MINSTREL_CCK_GROUP])
+ for (j = 0; j < ARRAY_SIZE(tmp_cck_tp_rate); j++)
+ tmp_cck_tp_rate[j] = MINSTREL_CCK_GROUP * MCS_GROUP_RATES;
+
+ if (mi->supported[MINSTREL_VHT_GROUP_0])
+ index = MINSTREL_VHT_GROUP_0 * MCS_GROUP_RATES;
+ else
+ index = MINSTREL_HT_GROUP_0 * MCS_GROUP_RATES;
+
+ for (j = 0; j < ARRAY_SIZE(tmp_mcs_tp_rate); j++)
+ tmp_mcs_tp_rate[j] = index;
/* Find best rate sets within all MCS groups*/
for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) {
@@ -575,7 +727,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
/* (re)Initialize group rate indexes */
for(j = 0; j < MAX_THR_RATES; j++)
- tmp_group_tp_rate[j] = group;
+ tmp_group_tp_rate[j] = MCS_GROUP_RATES * group;
for (i = 0; i < MCS_GROUP_RATES; i++) {
if (!(mi->supported[group] & BIT(i)))
@@ -622,12 +774,16 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
/* try to sample all available rates during each interval */
mi->sample_count *= 8;
+ if (sample)
+ minstrel_ht_rate_sample_switch(mp, mi);
+
#ifdef CONFIG_MAC80211_DEBUGFS
/* use fixed index if set */
if (mp->fixed_rate_idx != -1) {
for (i = 0; i < 4; i++)
mi->max_tp_rate[i] = mp->fixed_rate_idx;
mi->max_prob_rate = mp->fixed_rate_idx;
+ mi->sample_mode = MINSTREL_SAMPLE_IDLE;
}
#endif
@@ -731,15 +887,17 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
struct minstrel_ht_sta_priv *msp = priv_sta;
struct minstrel_ht_sta *mi = &msp->ht;
struct ieee80211_tx_rate *ar = info->status.rates;
- struct minstrel_rate_stats *rate, *rate2;
+ struct minstrel_rate_stats *rate, *rate2, *rate_sample = NULL;
struct minstrel_priv *mp = priv;
bool last, update = false;
+ bool sample_status = false;
int i;
if (!msp->is_ht)
return mac80211_minstrel.tx_status_ext(priv, sband,
&msp->legacy, st);
+
/* This packet was aggregated but doesn't carry status info */
if ((info->flags & IEEE80211_TX_CTL_AMPDU) &&
!(info->flags & IEEE80211_TX_STAT_AMPDU))
@@ -765,12 +923,17 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE)
mi->sample_packets += info->status.ampdu_len;
+ if (mi->sample_mode != MINSTREL_SAMPLE_IDLE)
+ rate_sample = minstrel_get_ratestats(mi, mi->sample_rate);
+
last = !minstrel_ht_txstat_valid(mp, &ar[0]);
for (i = 0; !last; i++) {
last = (i == IEEE80211_TX_MAX_RATES - 1) ||
!minstrel_ht_txstat_valid(mp, &ar[i + 1]);
rate = minstrel_ht_get_stats(mp, mi, &ar[i]);
+ if (rate == rate_sample)
+ sample_status = true;
if (last)
rate->success += info->status.ampdu_ack_len;
@@ -778,44 +941,60 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
rate->attempts += ar[i].count * info->status.ampdu_len;
}
- /*
- * check for sudden death of spatial multiplexing,
- * downgrade to a lower number of streams if necessary.
- */
- rate = minstrel_get_ratestats(mi, mi->max_tp_rate[0]);
- if (rate->attempts > 30 &&
- MINSTREL_FRAC(rate->success, rate->attempts) <
- MINSTREL_FRAC(20, 100)) {
- minstrel_downgrade_rate(mi, &mi->max_tp_rate[0], true);
+ switch (mi->sample_mode) {
+ case MINSTREL_SAMPLE_IDLE:
+ break;
+
+ case MINSTREL_SAMPLE_ACTIVE:
+ if (!sample_status)
+ break;
+
+ mi->sample_mode = MINSTREL_SAMPLE_PENDING;
update = true;
- }
+ break;
+
+ case MINSTREL_SAMPLE_PENDING:
+ if (sample_status)
+ break;
- rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate[1]);
- if (rate2->attempts > 30 &&
- MINSTREL_FRAC(rate2->success, rate2->attempts) <
- MINSTREL_FRAC(20, 100)) {
- minstrel_downgrade_rate(mi, &mi->max_tp_rate[1], false);
update = true;
+ minstrel_ht_update_stats(mp, mi, false);
+ break;
+ }
+
+
+ if (mp->hw->max_rates > 1) {
+ /*
+ * check for sudden death of spatial multiplexing,
+ * downgrade to a lower number of streams if necessary.
+ */
+ rate = minstrel_get_ratestats(mi, mi->max_tp_rate[0]);
+ if (rate->attempts > 30 &&
+ MINSTREL_FRAC(rate->success, rate->attempts) <
+ MINSTREL_FRAC(20, 100)) {
+ minstrel_downgrade_rate(mi, &mi->max_tp_rate[0], true);
+ update = true;
+ }
+
+ rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate[1]);
+ if (rate2->attempts > 30 &&
+ MINSTREL_FRAC(rate2->success, rate2->attempts) <
+ MINSTREL_FRAC(20, 100)) {
+ minstrel_downgrade_rate(mi, &mi->max_tp_rate[1], false);
+ update = true;
+ }
}
if (time_after(jiffies, mi->last_stats_update +
(mp->update_interval / 2 * HZ) / 1000)) {
update = true;
- minstrel_ht_update_stats(mp, mi);
+ minstrel_ht_update_stats(mp, mi, true);
}
if (update)
minstrel_ht_update_rates(mp, mi);
}
-static inline int
-minstrel_get_duration(int index)
-{
- const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES];
- unsigned int duration = group->duration[index % MCS_GROUP_RATES];
- return duration << group->shift;
-}
-
static void
minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
int index)
@@ -980,14 +1159,18 @@ static void
minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
{
struct ieee80211_sta_rates *rates;
+ u16 first_rate = mi->max_tp_rate[0];
int i = 0;
+ if (mi->sample_mode == MINSTREL_SAMPLE_ACTIVE)
+ first_rate = mi->sample_rate;
+
rates = kzalloc(sizeof(*rates), GFP_ATOMIC);
if (!rates)
return;
/* Start with max_tp_rate[0] */
- minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate[0]);
+ minstrel_ht_set_rate(mp, mi, rates, i++, first_rate);
if (mp->hw->max_rates >= 3) {
/* At least 3 tx rates supported, use max_tp_rate[1] next */
@@ -1012,6 +1195,11 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
int tp_rate1, tp_rate2;
int sample_idx = 0;
+ if (mp->hw->max_rates == 1 && mp->sample_switch &&
+ (mi->total_packets_cur >= SAMPLE_SWITCH_THR ||
+ mp->sample_switch == 1))
+ return -1;
+
if (mi->sample_wait > 0) {
mi->sample_wait--;
return -1;
@@ -1059,6 +1247,21 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
minstrel_get_duration(mi->max_prob_rate) * 3 < sample_dur)
return -1;
+
+ /*
+ * For devices with no configurable multi-rate retry, skip sampling
+ * below the per-group max throughput rate, and only use one sampling
+ * attempt per rate
+ */
+ if (mp->hw->max_rates == 1 &&
+ (minstrel_get_duration(mg->max_group_tp_rate[0]) < sample_dur ||
+ mrs->attempts))
+ return -1;
+
+ /* Skip already sampled slow rates */
+ if (sample_dur >= minstrel_get_duration(tp_rate1) && mrs->attempts)
+ return -1;
+
/*
* Make sure that lower rates get sampled only occasionally,
* if the link is working perfectly.
@@ -1318,7 +1521,7 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
mi->supported[MINSTREL_CCK_GROUP] |= mi->cck_supported_short << 4;
/* create an initial rate table with the lowest supported rates */
- minstrel_ht_update_stats(mp, mi);
+ minstrel_ht_update_stats(mp, mi, true);
minstrel_ht_update_rates(mp, mi);
return;
@@ -1436,6 +1639,8 @@ minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
if (!mp)
return NULL;
+ mp->sample_switch = -1;
+
/* contention window settings
* Just an approximation. Using the per-queue values would complicate
* the calculations and is probably unnecessary */
@@ -1467,6 +1672,8 @@ minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
mp->fixed_rate_idx = (u32) -1;
debugfs_create_u32("fixed_rate_idx", S_IRUGO | S_IWUGO, debugfsdir,
&mp->fixed_rate_idx);
+ debugfs_create_u32("sample_switch", S_IRUGO | S_IWUSR, debugfsdir,
+ &mp->sample_switch);
#endif
minstrel_ht_init_cck_rates(mp);
diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h
index 80296268c778..f938701e7ab7 100644
--- a/net/mac80211/rc80211_minstrel_ht.h
+++ b/net/mac80211/rc80211_minstrel_ht.h
@@ -33,6 +33,7 @@ struct mcs_group {
u16 flags;
u8 streams;
u8 shift;
+ u8 bw;
u16 duration[MCS_GROUP_RATES];
};
@@ -50,6 +51,12 @@ struct minstrel_mcs_group_data {
struct minstrel_rate_stats rates[MCS_GROUP_RATES];
};
+enum minstrel_sample_mode {
+ MINSTREL_SAMPLE_IDLE,
+ MINSTREL_SAMPLE_ACTIVE,
+ MINSTREL_SAMPLE_PENDING,
+};
+
struct minstrel_ht_sta {
struct ieee80211_sta *sta;
@@ -71,6 +78,8 @@ struct minstrel_ht_sta {
unsigned int overhead;
unsigned int overhead_rtscts;
+ unsigned int total_packets_last;
+ unsigned int total_packets_cur;
unsigned int total_packets;
unsigned int sample_packets;
@@ -82,6 +91,9 @@ struct minstrel_ht_sta {
u8 sample_count;
u8 sample_slow;
+ enum minstrel_sample_mode sample_mode;
+ u16 sample_rate;
+
/* current MCS group to be sampled */
u8 sample_group;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 5fb368cc2633..bd11fef2139f 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1065,7 +1065,6 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL);
kfree(sinfo);
- rate_control_remove_sta_debugfs(sta);
ieee80211_sta_debugfs_remove(sta);
cleanup_single_sta(sta);
@@ -1962,6 +1961,7 @@ int sta_info_move_state(struct sta_info *sta,
case IEEE80211_STA_ASSOC:
if (sta->sta_state == IEEE80211_STA_AUTH) {
set_bit(WLAN_STA_ASSOC, &sta->_flags);
+ sta->assoc_at = ktime_get_boottime_ns();
ieee80211_recalc_min_chandef(sta->sdata);
if (!sta->sta.support_p2p_ps)
ieee80211_recalc_p2p_go_ps_allowed(sta->sdata);
@@ -2195,6 +2195,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
BIT_ULL(NL80211_STA_INFO_STA_FLAGS) |
BIT_ULL(NL80211_STA_INFO_BSS_PARAM) |
BIT_ULL(NL80211_STA_INFO_CONNECTED_TIME) |
+ BIT_ULL(NL80211_STA_INFO_ASSOC_AT_BOOTTIME) |
BIT_ULL(NL80211_STA_INFO_RX_DROP_MISC);
if (sdata->vif.type == NL80211_IFTYPE_STATION) {
@@ -2203,6 +2204,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
}
sinfo->connected_time = ktime_get_seconds() - sta->last_connected;
+ sinfo->assoc_at = sta->assoc_at;
sinfo->inactive_time =
jiffies_to_msecs(jiffies - ieee80211_sta_last_active(sta));
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 3260d4234920..369c2dddce52 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -466,6 +466,7 @@ struct ieee80211_sta_rx_stats {
* the station when it leaves powersave or polls for frames
* @driver_buffered_tids: bitmap of TIDs the driver has data buffered on
* @txq_buffered_tids: bitmap of TIDs that mac80211 has txq data buffered on
+ * @assoc_at: clock boottime (in ns) of last association
* @last_connected: time (in seconds) when a station got connected
* @last_seq_ctrl: last received seq/frag number from this STA (per TID
* plus one for non-QoS frames)
@@ -562,6 +563,7 @@ struct sta_info {
unsigned long driver_buffered_tids;
unsigned long txq_buffered_tids;
+ u64 assoc_at;
long last_connected;
/* Updated from RX path only, no locking requirements */
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index a88e3bf17e9d..ab8ba5835ca0 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -254,14 +254,22 @@ static void ieee80211_set_bar_pending(struct sta_info *sta, u8 tid, u16 ssn)
tid_tx->bar_pending = true;
}
-static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info)
+static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info,
+ struct ieee80211_tx_status *status)
{
int len = sizeof(struct ieee80211_radiotap_header);
/* IEEE80211_RADIOTAP_RATE rate */
- if (info->status.rates[0].idx >= 0 &&
- !(info->status.rates[0].flags & (IEEE80211_TX_RC_MCS |
- IEEE80211_TX_RC_VHT_MCS)))
+ if (status && status->rate && !(status->rate->flags &
+ (RATE_INFO_FLAGS_MCS |
+ RATE_INFO_FLAGS_DMG |
+ RATE_INFO_FLAGS_EDMG |
+ RATE_INFO_FLAGS_VHT_MCS |
+ RATE_INFO_FLAGS_HE_MCS)))
+ len += 2;
+ else if (info->status.rates[0].idx >= 0 &&
+ !(info->status.rates[0].flags &
+ (IEEE80211_TX_RC_MCS | IEEE80211_TX_RC_VHT_MCS)))
len += 2;
/* IEEE80211_RADIOTAP_TX_FLAGS */
@@ -272,7 +280,14 @@ static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info)
/* IEEE80211_RADIOTAP_MCS
* IEEE80211_RADIOTAP_VHT */
- if (info->status.rates[0].idx >= 0) {
+ if (status && status->rate) {
+ if (status->rate->flags & RATE_INFO_FLAGS_MCS)
+ len += 3;
+ else if (status->rate->flags & RATE_INFO_FLAGS_VHT_MCS)
+ len = ALIGN(len, 2) + 12;
+ else if (status->rate->flags & RATE_INFO_FLAGS_HE_MCS)
+ len = ALIGN(len, 2) + 12;
+ } else if (info->status.rates[0].idx >= 0) {
if (info->status.rates[0].flags & IEEE80211_TX_RC_MCS)
len += 3;
else if (info->status.rates[0].flags & IEEE80211_TX_RC_VHT_MCS)
@@ -286,12 +301,14 @@ static void
ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
struct ieee80211_supported_band *sband,
struct sk_buff *skb, int retry_count,
- int rtap_len, int shift)
+ int rtap_len, int shift,
+ struct ieee80211_tx_status *status)
{
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
struct ieee80211_radiotap_header *rthdr;
unsigned char *pos;
+ u16 legacy_rate = 0;
u16 txflags;
rthdr = skb_push(skb, rtap_len);
@@ -310,14 +327,23 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
*/
/* IEEE80211_RADIOTAP_RATE */
- if (info->status.rates[0].idx >= 0 &&
- !(info->status.rates[0].flags & (IEEE80211_TX_RC_MCS |
- IEEE80211_TX_RC_VHT_MCS))) {
- u16 rate;
+ if (status && status->rate) {
+ if (!(status->rate->flags & (RATE_INFO_FLAGS_MCS |
+ RATE_INFO_FLAGS_DMG |
+ RATE_INFO_FLAGS_EDMG |
+ RATE_INFO_FLAGS_VHT_MCS |
+ RATE_INFO_FLAGS_HE_MCS)))
+ legacy_rate = status->rate->legacy;
+ } else if (info->status.rates[0].idx >= 0 &&
+ !(info->status.rates[0].flags & (IEEE80211_TX_RC_MCS |
+ IEEE80211_TX_RC_VHT_MCS)))
+ legacy_rate =
+ sband->bitrates[info->status.rates[0].idx].bitrate;
+
+ if (legacy_rate) {
rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE);
- rate = sband->bitrates[info->status.rates[0].idx].bitrate;
- *pos = DIV_ROUND_UP(rate, 5 * (1 << shift));
+ *pos = DIV_ROUND_UP(legacy_rate, 5 * (1 << shift));
/* padding for tx flags */
pos += 2;
}
@@ -341,7 +367,140 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
*pos = retry_count;
pos++;
- if (info->status.rates[0].idx < 0)
+ if (status && status->rate &&
+ (status->rate->flags & RATE_INFO_FLAGS_MCS)) {
+ rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS);
+ pos[0] = IEEE80211_RADIOTAP_MCS_HAVE_MCS |
+ IEEE80211_RADIOTAP_MCS_HAVE_GI |
+ IEEE80211_RADIOTAP_MCS_HAVE_BW;
+ if (status->rate->flags & RATE_INFO_FLAGS_SHORT_GI)
+ pos[1] |= IEEE80211_RADIOTAP_MCS_SGI;
+ if (status->rate->bw == RATE_INFO_BW_40)
+ pos[1] |= IEEE80211_RADIOTAP_MCS_BW_40;
+ pos[2] = status->rate->mcs;
+ pos += 3;
+ } else if (status && status->rate &&
+ (status->rate->flags & RATE_INFO_FLAGS_VHT_MCS)) {
+ u16 known = local->hw.radiotap_vht_details &
+ (IEEE80211_RADIOTAP_VHT_KNOWN_GI |
+ IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH);
+
+ rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_VHT);
+
+ /* required alignment from rthdr */
+ pos = (u8 *)rthdr + ALIGN(pos - (u8 *)rthdr, 2);
+
+ /* u16 known - IEEE80211_RADIOTAP_VHT_KNOWN_* */
+ put_unaligned_le16(known, pos);
+ pos += 2;
+
+ /* u8 flags - IEEE80211_RADIOTAP_VHT_FLAG_* */
+ if (status->rate->flags & RATE_INFO_FLAGS_SHORT_GI)
+ *pos |= IEEE80211_RADIOTAP_VHT_FLAG_SGI;
+ pos++;
+
+ /* u8 bandwidth */
+ switch (status->rate->bw) {
+ case RATE_INFO_BW_160:
+ *pos = 11;
+ break;
+ case RATE_INFO_BW_80:
+ *pos = 4;
+ break;
+ case RATE_INFO_BW_40:
+ *pos = 1;
+ break;
+ default:
+ *pos = 0;
+ break;
+ }
+ pos++;
+
+ /* u8 mcs_nss[4] */
+ *pos = (status->rate->mcs << 4) | status->rate->nss;
+ pos += 4;
+
+ /* u8 coding */
+ pos++;
+ /* u8 group_id */
+ pos++;
+ /* u16 partial_aid */
+ pos += 2;
+ } else if (status && status->rate &&
+ (status->rate->flags & RATE_INFO_FLAGS_HE_MCS)) {
+ struct ieee80211_radiotap_he *he;
+
+ rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_HE);
+
+ /* required alignment from rthdr */
+ pos = (u8 *)rthdr + ALIGN(pos - (u8 *)rthdr, 2);
+ he = (struct ieee80211_radiotap_he *)pos;
+
+ he->data1 = cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA1_FORMAT_SU |
+ IEEE80211_RADIOTAP_HE_DATA1_DATA_MCS_KNOWN |
+ IEEE80211_RADIOTAP_HE_DATA1_DATA_DCM_KNOWN |
+ IEEE80211_RADIOTAP_HE_DATA1_BW_RU_ALLOC_KNOWN);
+
+ he->data2 = cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA2_GI_KNOWN);
+
+#define HE_PREP(f, val) le16_encode_bits(val, IEEE80211_RADIOTAP_HE_##f)
+
+ he->data6 |= HE_PREP(DATA6_NSTS, status->rate->nss);
+
+#define CHECK_GI(s) \
+ BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA5_GI_##s != \
+ (int)NL80211_RATE_INFO_HE_GI_##s)
+
+ CHECK_GI(0_8);
+ CHECK_GI(1_6);
+ CHECK_GI(3_2);
+
+ he->data3 |= HE_PREP(DATA3_DATA_MCS, status->rate->mcs);
+ he->data3 |= HE_PREP(DATA3_DATA_DCM, status->rate->he_dcm);
+
+ he->data5 |= HE_PREP(DATA5_GI, status->rate->he_gi);
+
+ switch (status->rate->bw) {
+ case RATE_INFO_BW_20:
+ he->data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
+ IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_20MHZ);
+ break;
+ case RATE_INFO_BW_40:
+ he->data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
+ IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_40MHZ);
+ break;
+ case RATE_INFO_BW_80:
+ he->data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
+ IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_80MHZ);
+ break;
+ case RATE_INFO_BW_160:
+ he->data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
+ IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_160MHZ);
+ break;
+ case RATE_INFO_BW_HE_RU:
+#define CHECK_RU_ALLOC(s) \
+ BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_##s##T != \
+ NL80211_RATE_INFO_HE_RU_ALLOC_##s + 4)
+
+ CHECK_RU_ALLOC(26);
+ CHECK_RU_ALLOC(52);
+ CHECK_RU_ALLOC(106);
+ CHECK_RU_ALLOC(242);
+ CHECK_RU_ALLOC(484);
+ CHECK_RU_ALLOC(996);
+ CHECK_RU_ALLOC(2x996);
+
+ he->data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
+ status->rate->he_ru_alloc + 4);
+ break;
+ default:
+ WARN_ONCE(1, "Invalid SU BW %d\n", status->rate->bw);
+ }
+
+ pos += sizeof(struct ieee80211_radiotap_he);
+ }
+
+ if ((status && status->rate) || info->status.rates[0].idx < 0)
return;
/* IEEE80211_RADIOTAP_MCS
@@ -645,7 +804,8 @@ static int ieee80211_tx_get_rates(struct ieee80211_hw *hw,
void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb,
struct ieee80211_supported_band *sband,
- int retry_count, int shift, bool send_to_cooked)
+ int retry_count, int shift, bool send_to_cooked,
+ struct ieee80211_tx_status *status)
{
struct sk_buff *skb2;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
@@ -654,14 +814,14 @@ void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb,
int rtap_len;
/* send frame to monitor interfaces now */
- rtap_len = ieee80211_tx_radiotap_len(info);
+ rtap_len = ieee80211_tx_radiotap_len(info, status);
if (WARN_ON_ONCE(skb_headroom(skb) < rtap_len)) {
pr_err("ieee80211_tx_status: headroom too small\n");
dev_kfree_skb(skb);
return;
}
ieee80211_add_tx_radiotap_header(local, sband, skb, retry_count,
- rtap_len, shift);
+ rtap_len, shift, status);
/* XXX: is this sufficient for BPF? */
skb_reset_mac_header(skb);
@@ -901,7 +1061,8 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
}
/* send to monitor interfaces */
- ieee80211_tx_monitor(local, skb, sband, retry_count, shift, send_to_cooked);
+ ieee80211_tx_monitor(local, skb, sband, retry_count, shift,
+ send_to_cooked, status);
}
void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 3bb4459b52c7..4768322dc202 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -1242,9 +1242,10 @@ TRACE_EVENT(drv_remain_on_channel,
)
);
-DEFINE_EVENT(local_only_evt, drv_cancel_remain_on_channel,
- TP_PROTO(struct ieee80211_local *local),
- TP_ARGS(local)
+DEFINE_EVENT(local_sdata_evt, drv_cancel_remain_on_channel,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata),
+ TP_ARGS(local, sdata)
);
TRACE_EVENT(drv_set_ringparam,
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index f13eb2f61ccf..1fa422782905 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -162,6 +162,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
break;
}
case NL80211_BAND_5GHZ:
+ case NL80211_BAND_6GHZ:
if (r->flags & IEEE80211_RATE_MANDATORY_A)
mrate = r->bitrate;
break;
@@ -3546,6 +3547,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
ieee80211_tx_result r;
struct ieee80211_vif *vif = txq->vif;
+ WARN_ON_ONCE(softirq_count() == 0);
+
begin:
spin_lock_bh(&fq->lock);
@@ -4647,7 +4650,8 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
if (!sband)
return bcn;
- ieee80211_tx_monitor(hw_to_local(hw), copy, sband, 1, shift, false);
+ ieee80211_tx_monitor(hw_to_local(hw), copy, sband, 1, shift, false,
+ NULL);
return bcn;
}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index ad1e58184c4e..051a02ddcb85 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1200,6 +1200,13 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
elems->cisco_dtpc_elem = pos;
break;
+ case WLAN_EID_ADDBA_EXT:
+ if (elen != sizeof(struct ieee80211_addba_ext_ie)) {
+ elem_parse_failed = true;
+ break;
+ }
+ elems->addba_ext_ie = (void *)pos;
+ break;
case WLAN_EID_TIMEOUT_INTERVAL:
if (elen >= sizeof(struct ieee80211_timeout_interval_ie))
elems->timeout_int = (void *)pos;
@@ -1233,6 +1240,10 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION &&
elen == 3) {
elems->mbssid_config_ie = (void *)&pos[1];
+ } else if (pos[0] == WLAN_EID_EXT_HE_SPR &&
+ elen >= sizeof(*elems->he_spr) &&
+ elen >= ieee80211_he_spr_size(&pos[1])) {
+ elems->he_spr = (void *)&pos[1];
}
break;
default:
@@ -1572,7 +1583,8 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
}
void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
- const u8 *bssid, u16 stype, u16 reason,
+ const u8 *da, const u8 *bssid,
+ u16 stype, u16 reason,
bool send_frame, u8 *frame_buf)
{
struct ieee80211_local *local = sdata->local;
@@ -1583,7 +1595,7 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | stype);
mgmt->duration = 0; /* initialize only */
mgmt->seq_ctrl = 0; /* initialize only */
- memcpy(mgmt->da, bssid, ETH_ALEN);
+ memcpy(mgmt->da, da, ETH_ALEN);
memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
memcpy(mgmt->bssid, bssid, ETH_ALEN);
/* u.deauth.reason_code == u.disassoc.reason_code */
@@ -2409,11 +2421,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
/* add back keys */
list_for_each_entry(sdata, &local->interfaces, list)
- ieee80211_reset_crypto_tx_tailroom(sdata);
-
- list_for_each_entry(sdata, &local->interfaces, list)
- if (ieee80211_sdata_running(sdata))
- ieee80211_enable_keys(sdata);
+ ieee80211_reenable_keys(sdata);
/* Reconfigure sched scan if it was interrupted by FW restart */
mutex_lock(&local->mtx);
@@ -2702,6 +2710,27 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
return pos;
}
+u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype)
+{
+ const struct ieee80211_sta_he_cap *he_cap;
+ struct ieee80211_supported_band *sband;
+ u8 n;
+
+ sband = ieee80211_get_sband(sdata);
+ if (!sband)
+ return 0;
+
+ he_cap = ieee80211_get_he_iftype_cap(sband, iftype);
+ if (!he_cap)
+ return 0;
+
+ n = ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem);
+ return 2 + 1 +
+ sizeof(he_cap->he_cap_elem) + n +
+ ieee80211_he_ppe_size(he_cap->ppe_thres[0],
+ he_cap->he_cap_elem.phy_cap_info);
+}
+
u8 *ieee80211_ie_build_he_cap(u8 *pos,
const struct ieee80211_sta_he_cap *he_cap,
u8 *end)
@@ -2891,6 +2920,34 @@ u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
return pos + sizeof(struct ieee80211_vht_operation);
}
+u8 *ieee80211_ie_build_he_oper(u8 *pos)
+{
+ struct ieee80211_he_operation *he_oper;
+ u32 he_oper_params;
+
+ *pos++ = WLAN_EID_EXTENSION;
+ *pos++ = 1 + sizeof(struct ieee80211_he_operation);
+ *pos++ = WLAN_EID_EXT_HE_OPERATION;
+
+ he_oper_params = 0;
+ he_oper_params |= u32_encode_bits(1023, /* disabled */
+ IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK);
+ he_oper_params |= u32_encode_bits(1,
+ IEEE80211_HE_OPERATION_ER_SU_DISABLE);
+ he_oper_params |= u32_encode_bits(1,
+ IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED);
+
+ he_oper = (struct ieee80211_he_operation *)pos;
+ he_oper->he_oper_params = cpu_to_le32(he_oper_params);
+
+ /* don't require special HE peer rates */
+ he_oper->he_mcs_nss_set = cpu_to_le16(0xffff);
+
+ /* TODO add VHT operational and 6GHz operational subelement? */
+
+ return pos + sizeof(struct ieee80211_vht_operation);
+}
+
bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper,
struct cfg80211_chan_def *chandef)
{
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index b20ff28d9f30..ccdcb9ad9ac7 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -4,7 +4,7 @@
*
* Portions of this file
* Copyright(c) 2015 - 2016 Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018 - 2019 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -349,6 +349,14 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta)
cap_width == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ)
return IEEE80211_STA_RX_BW_160;
+ /*
+ * If this is non-zero, then it does support 160 MHz after all,
+ * in one form or the other. We don't distinguish here (or even
+ * above) between 160 and 80+80 yet.
+ */
+ if (vht_cap->cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK)
+ return IEEE80211_STA_RX_BW_160;
+
return IEEE80211_STA_RX_BW_80;
}
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index ee72779729e5..91bf32af55e9 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -946,7 +946,8 @@ ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx)
info = IEEE80211_SKB_CB(skb);
- if (info->control.hw_key)
+ if (info->control.hw_key &&
+ !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIE))
return TX_CONTINUE;
if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie)))
@@ -962,6 +963,9 @@ ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx)
bip_ipn_set64(mmie->sequence_number, pn64);
+ if (info->control.hw_key)
+ return TX_CONTINUE;
+
bip_aad(skb, aad);
/*
diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h
index a8e9def593f2..80938b338fee 100644
--- a/net/ncsi/ncsi-pkt.h
+++ b/net/ncsi/ncsi-pkt.h
@@ -387,6 +387,9 @@ struct ncsi_aen_hncdsc_pkt {
#define NCSI_PKT_CMD_OEM 0x50 /* OEM */
#define NCSI_PKT_CMD_PLDM 0x51 /* PLDM request over NCSI over RBT */
#define NCSI_PKT_CMD_GPUUID 0x52 /* Get package UUID */
+#define NCSI_PKT_CMD_QPNPR 0x56 /* Query Pending NC PLDM request */
+#define NCSI_PKT_CMD_SNPR 0x57 /* Send NC PLDM Reply */
+
/* NCSI packet responses */
#define NCSI_PKT_RSP_CIS (NCSI_PKT_CMD_CIS + 0x80)
@@ -419,6 +422,8 @@ struct ncsi_aen_hncdsc_pkt {
#define NCSI_PKT_RSP_OEM (NCSI_PKT_CMD_OEM + 0x80)
#define NCSI_PKT_RSP_PLDM (NCSI_PKT_CMD_PLDM + 0x80)
#define NCSI_PKT_RSP_GPUUID (NCSI_PKT_CMD_GPUUID + 0x80)
+#define NCSI_PKT_RSP_QPNPR (NCSI_PKT_CMD_QPNPR + 0x80)
+#define NCSI_PKT_RSP_SNPR (NCSI_PKT_CMD_SNPR + 0x80)
/* NCSI response code/reason */
#define NCSI_PKT_RSP_C_COMPLETED 0x0000 /* Command Completed */
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index d876bd55f356..d5611f04926d 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -1038,6 +1038,11 @@ static int ncsi_rsp_handler_gpuuid(struct ncsi_request *nr)
return 0;
}
+static int ncsi_rsp_handler_pldm(struct ncsi_request *nr)
+{
+ return 0;
+}
+
static int ncsi_rsp_handler_netlink(struct ncsi_request *nr)
{
struct ncsi_dev_priv *ndp = nr->ndp;
@@ -1086,13 +1091,15 @@ static struct ncsi_rsp_handler {
{ NCSI_PKT_RSP_GVI, 40, ncsi_rsp_handler_gvi },
{ NCSI_PKT_RSP_GC, 32, ncsi_rsp_handler_gc },
{ NCSI_PKT_RSP_GP, -1, ncsi_rsp_handler_gp },
- { NCSI_PKT_RSP_GCPS, 172, ncsi_rsp_handler_gcps },
- { NCSI_PKT_RSP_GNS, 172, ncsi_rsp_handler_gns },
- { NCSI_PKT_RSP_GNPTS, 172, ncsi_rsp_handler_gnpts },
+ { NCSI_PKT_RSP_GCPS, 204, ncsi_rsp_handler_gcps },
+ { NCSI_PKT_RSP_GNS, 32, ncsi_rsp_handler_gns },
+ { NCSI_PKT_RSP_GNPTS, 48, ncsi_rsp_handler_gnpts },
{ NCSI_PKT_RSP_GPS, 8, ncsi_rsp_handler_gps },
{ NCSI_PKT_RSP_OEM, -1, ncsi_rsp_handler_oem },
- { NCSI_PKT_RSP_PLDM, 0, NULL },
- { NCSI_PKT_RSP_GPUUID, 20, ncsi_rsp_handler_gpuuid }
+ { NCSI_PKT_RSP_PLDM, -1, ncsi_rsp_handler_pldm },
+ { NCSI_PKT_RSP_GPUUID, 20, ncsi_rsp_handler_gpuuid },
+ { NCSI_PKT_RSP_QPNPR, -1, ncsi_rsp_handler_pldm },
+ { NCSI_PKT_RSP_SNPR, -1, ncsi_rsp_handler_pldm }
};
int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev,
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 0d65f4d39494..34ec7afec116 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -20,7 +20,7 @@ config NETFILTER_FAMILY_ARP
bool
config NETFILTER_NETLINK_ACCT
-tristate "Netfilter NFACCT over NFNETLINK interface"
+ tristate "Netfilter NFACCT over NFNETLINK interface"
depends on NETFILTER_ADVANCED
select NETFILTER_NETLINK
help
@@ -34,7 +34,7 @@ config NETFILTER_NETLINK_QUEUE
help
If this option is enabled, the kernel will include support
for queueing packets via NFNETLINK.
-
+
config NETFILTER_NETLINK_LOG
tristate "Netfilter LOG over NFNETLINK interface"
default m if NETFILTER_ADVANCED=n
@@ -1502,7 +1502,7 @@ config NETFILTER_XT_MATCH_REALM
This option adds a `realm' match, which allows you to use the realm
key from the routing subsystem inside iptables.
- This match pretty much resembles the CONFIG_NET_CLS_ROUTE4 option
+ This match pretty much resembles the CONFIG_NET_CLS_ROUTE4 option
in tc world.
If you want to compile it as a module, say M here and read
@@ -1523,7 +1523,7 @@ config NETFILTER_XT_MATCH_SCTP
depends on NETFILTER_ADVANCED
default IP_SCTP
help
- With this option enabled, you will be able to use the
+ With this option enabled, you will be able to use the
`sctp' match in order to match on SCTP source/destination ports
and SCTP chunk types.
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 9270a7fae484..4fc075b612fe 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -124,7 +124,7 @@ nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
-# generic X tables
+# generic X tables
obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
# combos
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 0feb77fa9edc..d098d87bc331 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -7,7 +7,7 @@
#include <linux/rcupdate.h>
#include <linux/jhash.h>
#include <linux/types.h>
-#include <linux/netfilter/ipset/ip_set_timeout.h>
+#include <linux/netfilter/ipset/ip_set.h>
#define __ipset_dereference_protected(p, c) rcu_dereference_protected(p, c)
#define ipset_dereference_protected(p, set) \
@@ -953,7 +953,7 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]));
#endif
key = HKEY(d, h->initval, t->htable_bits);
- n = rcu_dereference_bh(hbucket(t, key));
+ n = rcu_dereference_bh(hbucket(t, key));
if (!n)
continue;
for (i = 0; i < n->pos; i++) {
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 6f9ead6319e0..67ac50104e6f 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -288,7 +288,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
if (n &&
!(SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(n, set))))
- n = NULL;
+ n = NULL;
e = kzalloc(set->dsize, GFP_ATOMIC);
if (!e)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 46f06f92ab8f..8b80ab794a92 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -617,7 +617,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
iph->protocol == IPPROTO_UDP) ?
IP_VS_CONN_F_ONE_PACKET : 0;
- union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } };
+ union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } };
/* create a new connection entry */
IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 060565e7d227..8b48e7ce1c2c 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -262,7 +262,7 @@ static inline unsigned int
ip_vs_svc_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto,
const union nf_inet_addr *addr, __be16 port)
{
- register unsigned int porth = ntohs(port);
+ unsigned int porth = ntohs(port);
__be32 addr_fold = addr->ip;
__u32 ahash;
@@ -493,7 +493,7 @@ static inline unsigned int ip_vs_rs_hashkey(int af,
const union nf_inet_addr *addr,
__be16 port)
{
- register unsigned int porth = ntohs(port);
+ unsigned int porth = ntohs(port);
__be32 addr_fold = addr->ip;
#ifdef CONFIG_IP_VS_IPV6
@@ -1737,12 +1737,18 @@ proc_do_defense_mode(struct ctl_table *table, int write,
int val = *valp;
int rc;
- rc = proc_dointvec(table, write, buffer, lenp, ppos);
+ struct ctl_table tmp = {
+ .data = &val,
+ .maxlen = sizeof(int),
+ .mode = table->mode,
+ };
+
+ rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
if (write && (*valp != val)) {
- if ((*valp < 0) || (*valp > 3)) {
- /* Restore the correct value */
- *valp = val;
+ if (val < 0 || val > 3) {
+ rc = -EINVAL;
} else {
+ *valp = val;
update_defense_level(ipvs);
}
}
@@ -1756,33 +1762,20 @@ proc_do_sync_threshold(struct ctl_table *table, int write,
int *valp = table->data;
int val[2];
int rc;
+ struct ctl_table tmp = {
+ .data = &val,
+ .maxlen = table->maxlen,
+ .mode = table->mode,
+ };
- /* backup the value first */
memcpy(val, valp, sizeof(val));
-
- rc = proc_dointvec(table, write, buffer, lenp, ppos);
- if (write && (valp[0] < 0 || valp[1] < 0 ||
- (valp[0] >= valp[1] && valp[1]))) {
- /* Restore the correct value */
- memcpy(valp, val, sizeof(val));
- }
- return rc;
-}
-
-static int
-proc_do_sync_mode(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- int *valp = table->data;
- int val = *valp;
- int rc;
-
- rc = proc_dointvec(table, write, buffer, lenp, ppos);
- if (write && (*valp != val)) {
- if ((*valp < 0) || (*valp > 1)) {
- /* Restore the correct value */
- *valp = val;
- }
+ rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
+ if (write) {
+ if (val[0] < 0 || val[1] < 0 ||
+ (val[0] >= val[1] && val[1]))
+ rc = -EINVAL;
+ else
+ memcpy(valp, val, sizeof(val));
}
return rc;
}
@@ -1795,12 +1788,18 @@ proc_do_sync_ports(struct ctl_table *table, int write,
int val = *valp;
int rc;
- rc = proc_dointvec(table, write, buffer, lenp, ppos);
+ struct ctl_table tmp = {
+ .data = &val,
+ .maxlen = sizeof(int),
+ .mode = table->mode,
+ };
+
+ rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
if (write && (*valp != val)) {
- if (*valp < 1 || !is_power_of_2(*valp)) {
- /* Restore the correct value */
+ if (val < 1 || !is_power_of_2(val))
+ rc = -EINVAL;
+ else
*valp = val;
- }
}
return rc;
}
@@ -1860,7 +1859,9 @@ static struct ctl_table vs_vars[] = {
.procname = "sync_version",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_do_sync_mode,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{
.procname = "sync_ports",
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index c8b5a504476c..77c323c36a88 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -160,7 +160,7 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
/* get weighted least-connection node in the destination set */
static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
{
- register struct ip_vs_dest_set_elem *e;
+ struct ip_vs_dest_set_elem *e;
struct ip_vs_dest *dest, *least;
int loh, doh;
@@ -209,7 +209,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
/* get weighted most-connection node in the destination set */
static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
{
- register struct ip_vs_dest_set_elem *e;
+ struct ip_vs_dest_set_elem *e;
struct ip_vs_dest *dest, *most;
int moh, doh;
diff --git a/net/netfilter/ipvs/ip_vs_mh.c b/net/netfilter/ipvs/ip_vs_mh.c
index 94d9d349ebb0..da0280cec506 100644
--- a/net/netfilter/ipvs/ip_vs_mh.c
+++ b/net/netfilter/ipvs/ip_vs_mh.c
@@ -174,8 +174,8 @@ static int ip_vs_mh_populate(struct ip_vs_mh_state *s,
return 0;
}
- table = kcalloc(BITS_TO_LONGS(IP_VS_MH_TAB_SIZE),
- sizeof(unsigned long), GFP_KERNEL);
+ table = kcalloc(BITS_TO_LONGS(IP_VS_MH_TAB_SIZE),
+ sizeof(unsigned long), GFP_KERNEL);
if (!table)
return -ENOMEM;
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 000d961b97e4..32b028853a7c 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -710,7 +710,7 @@ static int __ip_vs_tcp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd
sizeof(tcp_timeouts));
if (!pd->timeout_table)
return -ENOMEM;
- pd->tcp_state_table = tcp_states;
+ pd->tcp_state_table = tcp_states;
return 0;
}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 81a8ef42b88d..0c63120b2db2 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -73,8 +73,7 @@ struct conntrack_gc_work {
};
static __read_mostly struct kmem_cache *nf_conntrack_cachep;
-static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
-static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
+static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
static __read_mostly bool nf_conntrack_locks_all;
/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 5e2812ee2149..6fba74b5aaf7 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -24,6 +24,7 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_extend.h>
static DEFINE_MUTEX(nf_ct_ecache_mutex);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 65364de915d1..42557d2b6a90 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -25,8 +25,10 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_zones.h>
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 8d96738b7dfd..9eca90414bb7 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -162,7 +162,7 @@ static int try_rfc959(const char *data, size_t dlen,
if (length == 0)
return 0;
- cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16) |
+ cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16) |
(array[2] << 8) | array[3]);
cmd->u.tcp.port = htons((array[4] << 8) | array[5]);
return length;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 8d729e7c36ff..118f415928ae 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -21,10 +21,11 @@
#include <linux/rtnetlink.h>
#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_l4proto.h>
-#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_log.h>
static DEFINE_MUTEX(nf_ct_helper_mutex);
diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c
index 74b8113f7aeb..522792556632 100644
--- a/net/netfilter/nf_conntrack_labels.c
+++ b/net/netfilter/nf_conntrack_labels.c
@@ -11,7 +11,7 @@
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_labels.h>
-static spinlock_t nf_connlabels_lock;
+static DEFINE_SPINLOCK(nf_connlabels_lock);
static int replace_u32(u32 *address, u32 mask, u32 new)
{
@@ -89,7 +89,6 @@ int nf_conntrack_labels_init(void)
{
BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE / sizeof(long) >= U8_MAX);
- spin_lock_init(&nf_connlabels_lock);
return nf_ct_extend_register(&labels_extend);
}
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
index 7e317e6698ba..6f9144e1f1c1 100644
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -22,7 +22,6 @@
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_timeout.h>
#include <net/netfilter/nf_conntrack_zones.h>
-#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
#include <net/netfilter/nf_log.h>
static const unsigned int nf_ct_icmpv6_timeout = 30*HZ;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 85c1f8c213b0..1926fd56df56 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1227,7 +1227,7 @@ static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
[CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 },
[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) },
- [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) },
+ [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) },
};
#define TCP_NLATTR_SIZE ( \
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 0006503d2da9..410809c669e1 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -511,8 +511,6 @@ static void nf_conntrack_standalone_fini_proc(struct net *net)
/* Log invalid packets of a given protocol */
static int log_invalid_proto_min __read_mostly;
static int log_invalid_proto_max __read_mostly = 255;
-static int zero;
-static int one = 1;
/* size the user *wants to set */
static unsigned int nf_conntrack_htable_size_user __read_mostly;
@@ -629,8 +627,8 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
[NF_SYSCTL_CT_LOG_INVALID] = {
.procname = "nf_conntrack_log_invalid",
@@ -654,8 +652,8 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
[NF_SYSCTL_CT_HELPER] = {
.procname = "nf_conntrack_helper",
@@ -663,8 +661,8 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#ifdef CONFIG_NF_CONNTRACK_EVENTS
[NF_SYSCTL_CT_EVENTS] = {
@@ -673,8 +671,8 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#endif
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
@@ -684,8 +682,8 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#endif
[NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC] = {
@@ -759,16 +757,16 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
[NF_SYSCTL_CT_PROTO_TCP_LIBERAL] = {
.procname = "nf_conntrack_tcp_be_liberal",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
[NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS] = {
.procname = "nf_conntrack_tcp_max_retrans",
@@ -904,8 +902,8 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#endif
#ifdef CONFIG_NF_CT_PROTO_GRE
@@ -1169,7 +1167,6 @@ static int __init nf_conntrack_standalone_init(void)
if (ret < 0)
goto out_start;
- BUILD_BUG_ON(SKB_NFCT_PTRMASK != NFCT_PTRMASK);
BUILD_BUG_ON(NFCT_INFOMASK <= IP_CT_NUMBER);
#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c
index 13d0f4a92647..14387e0b8008 100644
--- a/net/netfilter/nf_conntrack_timeout.c
+++ b/net/netfilter/nf_conntrack_timeout.c
@@ -19,6 +19,7 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_timeout.h>
struct nf_ct_timeout *
diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c
index 5a35ef08c3cb..f108a76925dd 100644
--- a/net/netfilter/nf_dup_netdev.c
+++ b/net/netfilter/nf_dup_netdev.c
@@ -10,6 +10,7 @@
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
#include <net/netfilter/nf_dup_netdev.h>
static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev)
@@ -50,5 +51,25 @@ void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif)
}
EXPORT_SYMBOL_GPL(nf_dup_netdev_egress);
+int nft_fwd_dup_netdev_offload(struct nft_offload_ctx *ctx,
+ struct nft_flow_rule *flow,
+ enum flow_action_id id, int oif)
+{
+ struct flow_action_entry *entry;
+ struct net_device *dev;
+
+ /* nft_flow_rule_destroy() releases the reference on this device. */
+ dev = dev_get_by_index(ctx->net, oif);
+ if (!dev)
+ return -EOPNOTSUPP;
+
+ entry = &flow->rule->action.entries[ctx->num_actions++];
+ entry->id = id;
+ entry->dev = dev;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_fwd_dup_netdev_offload);
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index a0b4bf654de2..132f5228b431 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -11,6 +11,7 @@
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_tuple.h>
struct flow_offload_entry {
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 3f6023ed4966..bfc555fcbc72 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -18,12 +18,12 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_zones.h>
-#include <linux/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <uapi/linux/netfilter/nf_nat.h>
#include "nf_internals.h"
diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
index 7ac733ebd060..0a59c14b5177 100644
--- a/net/netfilter/nf_nat_proto.c
+++ b/net/netfilter/nf_nat_proto.c
@@ -722,7 +722,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
return ret;
}
-const struct nf_hook_ops nf_nat_ipv4_ops[] = {
+static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
/* Before packet filtering, change destination */
{
.hook = nf_nat_ipv4_in,
@@ -961,7 +961,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
return ret;
}
-const struct nf_hook_ops nf_nat_ipv6_ops[] = {
+static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
/* Before packet filtering, change destination */
{
.hook = nf_nat_ipv6_in,
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index c769462a839e..b0930d4aba22 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -56,7 +56,7 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
switch (opcode) {
case TCPOPT_MSS:
if (opsize == TCPOLEN_MSS) {
- opts->mss = get_unaligned_be16(ptr);
+ opts->mss_option = get_unaligned_be16(ptr);
opts->options |= NF_SYNPROXY_OPT_MSS;
}
break;
@@ -115,7 +115,7 @@ synproxy_build_options(struct tcphdr *th, const struct synproxy_options *opts)
if (options & NF_SYNPROXY_OPT_MSS)
*ptr++ = htonl((TCPOPT_MSS << 24) |
(TCPOLEN_MSS << 16) |
- opts->mss);
+ opts->mss_option);
if (options & NF_SYNPROXY_OPT_TIMESTAMP) {
if (options & NF_SYNPROXY_OPT_SACK_PERM)
@@ -642,7 +642,7 @@ synproxy_recv_client_ack(struct net *net,
}
this_cpu_inc(snet->stats->cookie_valid);
- opts->mss = mss;
+ opts->mss_option = mss;
opts->options |= NF_SYNPROXY_OPT_MSS;
if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
@@ -1060,7 +1060,7 @@ synproxy_recv_client_ack_ipv6(struct net *net,
}
this_cpu_inc(snet->stats->cookie_valid);
- opts->mss = mss;
+ opts->mss_option = mss;
opts->options |= NF_SYNPROXY_OPT_MSS;
if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d47469f824a1..e4a68dc42694 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2853,7 +2853,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
return nft_table_validate(net, table);
if (chain->flags & NFT_CHAIN_HW_OFFLOAD) {
- flow = nft_flow_rule_create(rule);
+ flow = nft_flow_rule_create(net, rule);
if (IS_ERR(flow))
return PTR_ERR(flow);
@@ -5131,6 +5131,41 @@ nft_obj_type_get(struct net *net, u32 objtype)
return ERR_PTR(-ENOENT);
}
+static int nf_tables_updobj(const struct nft_ctx *ctx,
+ const struct nft_object_type *type,
+ const struct nlattr *attr,
+ struct nft_object *obj)
+{
+ struct nft_object *newobj;
+ struct nft_trans *trans;
+ int err;
+
+ if (!obj->ops->update)
+ return -EOPNOTSUPP;
+
+ trans = nft_trans_alloc(ctx, NFT_MSG_NEWOBJ,
+ sizeof(struct nft_trans_obj));
+ if (!trans)
+ return -ENOMEM;
+
+ newobj = nft_obj_init(ctx, type, attr);
+ if (IS_ERR(newobj)) {
+ err = PTR_ERR(newobj);
+ goto err_free_trans;
+ }
+
+ nft_trans_obj(trans) = obj;
+ nft_trans_obj_update(trans) = true;
+ nft_trans_obj_newobj(trans) = newobj;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+
+ return 0;
+
+err_free_trans:
+ kfree(trans);
+ return err;
+}
+
static int nf_tables_newobj(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[],
@@ -5170,7 +5205,13 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
return -EEXIST;
}
- return 0;
+ if (nlh->nlmsg_flags & NLM_F_REPLACE)
+ return -EOPNOTSUPP;
+
+ type = nft_obj_type_get(net, objtype);
+ nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+
+ return nf_tables_updobj(&ctx, type, nla[NFTA_OBJ_DATA], obj);
}
nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
@@ -6431,6 +6472,19 @@ static void nft_chain_commit_update(struct nft_trans *trans)
}
}
+static void nft_obj_commit_update(struct nft_trans *trans)
+{
+ struct nft_object *newobj;
+ struct nft_object *obj;
+
+ obj = nft_trans_obj(trans);
+ newobj = nft_trans_obj_newobj(trans);
+
+ obj->ops->update(obj, newobj);
+
+ kfree(newobj);
+}
+
static void nft_commit_release(struct nft_trans *trans)
{
switch (trans->msg_type) {
@@ -6795,10 +6849,18 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
te->set->ndeact--;
break;
case NFT_MSG_NEWOBJ:
- nft_clear(net, nft_trans_obj(trans));
- nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans),
- NFT_MSG_NEWOBJ);
- nft_trans_destroy(trans);
+ if (nft_trans_obj_update(trans)) {
+ nft_obj_commit_update(trans);
+ nf_tables_obj_notify(&trans->ctx,
+ nft_trans_obj(trans),
+ NFT_MSG_NEWOBJ);
+ } else {
+ nft_clear(net, nft_trans_obj(trans));
+ nf_tables_obj_notify(&trans->ctx,
+ nft_trans_obj(trans),
+ NFT_MSG_NEWOBJ);
+ nft_trans_destroy(trans);
+ }
break;
case NFT_MSG_DELOBJ:
nft_obj_del(nft_trans_obj(trans));
@@ -6945,8 +7007,13 @@ static int __nf_tables_abort(struct net *net)
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWOBJ:
- trans->ctx.table->use--;
- nft_obj_del(nft_trans_obj(trans));
+ if (nft_trans_obj_update(trans)) {
+ kfree(nft_trans_obj_newobj(trans));
+ nft_trans_destroy(trans);
+ } else {
+ trans->ctx.table->use--;
+ nft_obj_del(nft_trans_obj(trans));
+ }
break;
case NFT_MSG_DELOBJ:
trans->ctx.table->use++;
@@ -7627,13 +7694,20 @@ static int __init nf_tables_module_init(void)
if (err < 0)
goto err4;
+ err = nft_offload_init();
+ if (err < 0)
+ goto err5;
+
/* must be last */
err = nfnetlink_subsys_register(&nf_tables_subsys);
if (err < 0)
- goto err5;
+ goto err6;
nft_chain_route_init();
+
return err;
+err6:
+ nft_offload_exit();
err5:
rhltable_destroy(&nft_objname_ht);
err4:
@@ -7650,6 +7724,7 @@ err1:
static void __exit nf_tables_module_exit(void)
{
nfnetlink_subsys_unregister(&nf_tables_subsys);
+ nft_offload_exit();
unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
nft_chain_filter_fini();
nft_chain_route_fini();
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index c0d18c1d77ac..21bb772cb4b7 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -28,13 +28,10 @@ static struct nft_flow_rule *nft_flow_rule_alloc(int num_actions)
return flow;
}
-struct nft_flow_rule *nft_flow_rule_create(const struct nft_rule *rule)
+struct nft_flow_rule *nft_flow_rule_create(struct net *net,
+ const struct nft_rule *rule)
{
- struct nft_offload_ctx ctx = {
- .dep = {
- .type = NFT_OFFLOAD_DEP_UNSPEC,
- },
- };
+ struct nft_offload_ctx *ctx;
struct nft_flow_rule *flow;
int num_actions = 0, err;
struct nft_expr *expr;
@@ -52,21 +49,32 @@ struct nft_flow_rule *nft_flow_rule_create(const struct nft_rule *rule)
return ERR_PTR(-ENOMEM);
expr = nft_expr_first(rule);
+
+ ctx = kzalloc(sizeof(struct nft_offload_ctx), GFP_KERNEL);
+ if (!ctx) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ ctx->net = net;
+ ctx->dep.type = NFT_OFFLOAD_DEP_UNSPEC;
+
while (expr->ops && expr != nft_expr_last(rule)) {
if (!expr->ops->offload) {
err = -EOPNOTSUPP;
goto err_out;
}
- err = expr->ops->offload(&ctx, flow, expr);
+ err = expr->ops->offload(ctx, flow, expr);
if (err < 0)
goto err_out;
expr = nft_expr_next(expr);
}
- flow->proto = ctx.dep.l3num;
+ flow->proto = ctx->dep.l3num;
+ kfree(ctx);
return flow;
err_out:
+ kfree(ctx);
nft_flow_rule_destroy(flow);
return ERR_PTR(err);
@@ -74,6 +82,19 @@ err_out:
void nft_flow_rule_destroy(struct nft_flow_rule *flow)
{
+ struct flow_action_entry *entry;
+ int i;
+
+ flow_action_for_each(i, entry, &flow->rule->action) {
+ switch (entry->id) {
+ case FLOW_ACTION_REDIRECT:
+ case FLOW_ACTION_MIRRED:
+ dev_put(entry->dev);
+ break;
+ default:
+ break;
+ }
+ }
kfree(flow->rule);
kfree(flow);
}
@@ -134,20 +155,20 @@ int nft_chain_offload_priority(struct nft_base_chain *basechain)
return 0;
}
-static int nft_flow_offload_rule(struct nft_trans *trans,
+static int nft_flow_offload_rule(struct nft_chain *chain,
+ struct nft_rule *rule,
+ struct nft_flow_rule *flow,
enum flow_cls_command command)
{
- struct nft_flow_rule *flow = nft_trans_flow_rule(trans);
- struct nft_rule *rule = nft_trans_rule(trans);
struct flow_cls_offload cls_flow = {};
struct nft_base_chain *basechain;
struct netlink_ext_ack extack;
__be16 proto = ETH_P_ALL;
- if (!nft_is_base_chain(trans->ctx.chain))
+ if (!nft_is_base_chain(chain))
return -EOPNOTSUPP;
- basechain = nft_base_chain(trans->ctx.chain);
+ basechain = nft_base_chain(chain);
if (flow)
proto = flow->proto;
@@ -182,58 +203,130 @@ static int nft_flow_offload_unbind(struct flow_block_offload *bo,
return 0;
}
-#define FLOW_SETUP_BLOCK TC_SETUP_BLOCK
+static int nft_block_setup(struct nft_base_chain *basechain,
+ struct flow_block_offload *bo,
+ enum flow_block_command cmd)
+{
+ int err;
-static int nft_flow_offload_chain(struct nft_trans *trans,
- enum flow_block_command cmd)
+ switch (cmd) {
+ case FLOW_BLOCK_BIND:
+ err = nft_flow_offload_bind(bo, basechain);
+ break;
+ case FLOW_BLOCK_UNBIND:
+ err = nft_flow_offload_unbind(bo, basechain);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ err = -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
+static int nft_block_offload_cmd(struct nft_base_chain *chain,
+ struct net_device *dev,
+ enum flow_block_command cmd)
+{
+ struct netlink_ext_ack extack = {};
+ struct flow_block_offload bo = {};
+ int err;
+
+ bo.net = dev_net(dev);
+ bo.block = &chain->flow_block;
+ bo.command = cmd;
+ bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+ bo.extack = &extack;
+ INIT_LIST_HEAD(&bo.cb_list);
+
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
+ if (err < 0)
+ return err;
+
+ return nft_block_setup(chain, &bo, cmd);
+}
+
+static void nft_indr_block_ing_cmd(struct net_device *dev,
+ struct nft_base_chain *chain,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_priv,
+ enum flow_block_command cmd)
{
- struct nft_chain *chain = trans->ctx.chain;
struct netlink_ext_ack extack = {};
struct flow_block_offload bo = {};
+
+ if (!chain)
+ return;
+
+ bo.net = dev_net(dev);
+ bo.block = &chain->flow_block;
+ bo.command = cmd;
+ bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+ bo.extack = &extack;
+ INIT_LIST_HEAD(&bo.cb_list);
+
+ cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
+
+ nft_block_setup(chain, &bo, cmd);
+}
+
+static int nft_indr_block_offload_cmd(struct nft_base_chain *chain,
+ struct net_device *dev,
+ enum flow_block_command cmd)
+{
+ struct flow_block_offload bo = {};
+ struct netlink_ext_ack extack = {};
+
+ bo.net = dev_net(dev);
+ bo.block = &chain->flow_block;
+ bo.command = cmd;
+ bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+ bo.extack = &extack;
+ INIT_LIST_HEAD(&bo.cb_list);
+
+ flow_indr_block_call(dev, &bo, cmd);
+
+ if (list_empty(&bo.cb_list))
+ return -EOPNOTSUPP;
+
+ return nft_block_setup(chain, &bo, cmd);
+}
+
+#define FLOW_SETUP_BLOCK TC_SETUP_BLOCK
+
+static int nft_flow_offload_chain(struct nft_chain *chain,
+ u8 *ppolicy,
+ enum flow_block_command cmd)
+{
struct nft_base_chain *basechain;
struct net_device *dev;
- int err;
+ u8 policy;
if (!nft_is_base_chain(chain))
return -EOPNOTSUPP;
basechain = nft_base_chain(chain);
dev = basechain->ops.dev;
- if (!dev || !dev->netdev_ops->ndo_setup_tc)
+ if (!dev)
return -EOPNOTSUPP;
+ policy = ppolicy ? *ppolicy : basechain->policy;
+
/* Only default policy to accept is supported for now. */
- if (cmd == FLOW_BLOCK_BIND &&
- nft_trans_chain_policy(trans) != -1 &&
- nft_trans_chain_policy(trans) != NF_ACCEPT)
+ if (cmd == FLOW_BLOCK_BIND && policy != -1 && policy != NF_ACCEPT)
return -EOPNOTSUPP;
- bo.command = cmd;
- bo.block = &basechain->flow_block;
- bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
- bo.extack = &extack;
- INIT_LIST_HEAD(&bo.cb_list);
-
- err = dev->netdev_ops->ndo_setup_tc(dev, FLOW_SETUP_BLOCK, &bo);
- if (err < 0)
- return err;
-
- switch (cmd) {
- case FLOW_BLOCK_BIND:
- err = nft_flow_offload_bind(&bo, basechain);
- break;
- case FLOW_BLOCK_UNBIND:
- err = nft_flow_offload_unbind(&bo, basechain);
- break;
- }
-
- return err;
+ if (dev->netdev_ops->ndo_setup_tc)
+ return nft_block_offload_cmd(basechain, dev, cmd);
+ else
+ return nft_indr_block_offload_cmd(basechain, dev, cmd);
}
int nft_flow_rule_offload_commit(struct net *net)
{
struct nft_trans *trans;
int err = 0;
+ u8 policy;
list_for_each_entry(trans, &net->nft.commit_list, list) {
if (trans->ctx.family != NFPROTO_NETDEV)
@@ -244,13 +337,17 @@ int nft_flow_rule_offload_commit(struct net *net)
if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
continue;
- err = nft_flow_offload_chain(trans, FLOW_BLOCK_BIND);
+ policy = nft_trans_chain_policy(trans);
+ err = nft_flow_offload_chain(trans->ctx.chain, &policy,
+ FLOW_BLOCK_BIND);
break;
case NFT_MSG_DELCHAIN:
if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
continue;
- err = nft_flow_offload_chain(trans, FLOW_BLOCK_UNBIND);
+ policy = nft_trans_chain_policy(trans);
+ err = nft_flow_offload_chain(trans->ctx.chain, &policy,
+ FLOW_BLOCK_BIND);
break;
case NFT_MSG_NEWRULE:
if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
@@ -260,14 +357,20 @@ int nft_flow_rule_offload_commit(struct net *net)
!(trans->ctx.flags & NLM_F_APPEND))
return -EOPNOTSUPP;
- err = nft_flow_offload_rule(trans, FLOW_CLS_REPLACE);
+ err = nft_flow_offload_rule(trans->ctx.chain,
+ nft_trans_rule(trans),
+ nft_trans_flow_rule(trans),
+ FLOW_CLS_REPLACE);
nft_flow_rule_destroy(nft_trans_flow_rule(trans));
break;
case NFT_MSG_DELRULE:
if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
continue;
- err = nft_flow_offload_rule(trans, FLOW_CLS_DESTROY);
+ err = nft_flow_offload_rule(trans->ctx.chain,
+ nft_trans_rule(trans),
+ nft_trans_flow_rule(trans),
+ FLOW_CLS_DESTROY);
break;
}
@@ -277,3 +380,104 @@ int nft_flow_rule_offload_commit(struct net *net)
return err;
}
+
+static struct nft_chain *__nft_offload_get_chain(struct net_device *dev)
+{
+ struct nft_base_chain *basechain;
+ struct net *net = dev_net(dev);
+ const struct nft_table *table;
+ struct nft_chain *chain;
+
+ list_for_each_entry(table, &net->nft.tables, list) {
+ if (table->family != NFPROTO_NETDEV)
+ continue;
+
+ list_for_each_entry(chain, &table->chains, list) {
+ if (!nft_is_base_chain(chain) ||
+ !(chain->flags & NFT_CHAIN_HW_OFFLOAD))
+ continue;
+
+ basechain = nft_base_chain(chain);
+ if (strncmp(basechain->dev_name, dev->name, IFNAMSIZ))
+ continue;
+
+ return chain;
+ }
+ }
+
+ return NULL;
+}
+
+static void nft_indr_block_cb(struct net_device *dev,
+ flow_indr_block_bind_cb_t *cb, void *cb_priv,
+ enum flow_block_command cmd)
+{
+ struct net *net = dev_net(dev);
+ struct nft_chain *chain;
+
+ mutex_lock(&net->nft.commit_mutex);
+ chain = __nft_offload_get_chain(dev);
+ if (chain) {
+ struct nft_base_chain *basechain;
+
+ basechain = nft_base_chain(chain);
+ nft_indr_block_ing_cmd(dev, basechain, cb, cb_priv, cmd);
+ }
+ mutex_unlock(&net->nft.commit_mutex);
+}
+
+static void nft_offload_chain_clean(struct nft_chain *chain)
+{
+ struct nft_rule *rule;
+
+ list_for_each_entry(rule, &chain->rules, list) {
+ nft_flow_offload_rule(chain, rule,
+ NULL, FLOW_CLS_DESTROY);
+ }
+
+ nft_flow_offload_chain(chain, NULL, FLOW_BLOCK_UNBIND);
+}
+
+static int nft_offload_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct net *net = dev_net(dev);
+ struct nft_chain *chain;
+
+ mutex_lock(&net->nft.commit_mutex);
+ chain = __nft_offload_get_chain(dev);
+ if (chain)
+ nft_offload_chain_clean(chain);
+ mutex_unlock(&net->nft.commit_mutex);
+
+ return NOTIFY_DONE;
+}
+
+static struct flow_indr_block_ing_entry block_ing_entry = {
+ .cb = nft_indr_block_cb,
+ .list = LIST_HEAD_INIT(block_ing_entry.list),
+};
+
+static struct notifier_block nft_offload_netdev_notifier = {
+ .notifier_call = nft_offload_netdev_event,
+};
+
+int nft_offload_init(void)
+{
+ int err;
+
+ err = register_netdevice_notifier(&nft_offload_netdev_notifier);
+ if (err < 0)
+ return err;
+
+ flow_indr_add_block_ing_cb(&block_ing_entry);
+
+ return 0;
+}
+
+void nft_offload_exit(void)
+{
+ flow_indr_del_block_ing_cb(&block_ing_entry);
+ unregister_netdevice_notifier(&nft_offload_netdev_notifier);
+}
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 6dee4f9a944c..0ba020ca38e6 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -385,6 +385,57 @@ nfulnl_timer(struct timer_list *t)
instance_put(inst);
}
+static u32 nfulnl_get_bridge_size(const struct sk_buff *skb)
+{
+ u32 size = 0;
+
+ if (!skb_mac_header_was_set(skb))
+ return 0;
+
+ if (skb_vlan_tag_present(skb)) {
+ size += nla_total_size(0); /* nested */
+ size += nla_total_size(sizeof(u16)); /* id */
+ size += nla_total_size(sizeof(u16)); /* tag */
+ }
+
+ if (skb->network_header > skb->mac_header)
+ size += nla_total_size(skb->network_header - skb->mac_header);
+
+ return size;
+}
+
+static int nfulnl_put_bridge(struct nfulnl_instance *inst, const struct sk_buff *skb)
+{
+ if (!skb_mac_header_was_set(skb))
+ return 0;
+
+ if (skb_vlan_tag_present(skb)) {
+ struct nlattr *nest;
+
+ nest = nla_nest_start(inst->skb, NFULA_VLAN);
+ if (!nest)
+ goto nla_put_failure;
+
+ if (nla_put_be16(inst->skb, NFULA_VLAN_TCI, htons(skb->vlan_tci)) ||
+ nla_put_be16(inst->skb, NFULA_VLAN_PROTO, skb->vlan_proto))
+ goto nla_put_failure;
+
+ nla_nest_end(inst->skb, nest);
+ }
+
+ if (skb->mac_header < skb->network_header) {
+ int len = (int)(skb->network_header - skb->mac_header);
+
+ if (nla_put(inst->skb, NFULA_L2HDR, len, skb_mac_header(skb)))
+ goto nla_put_failure;
+ }
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
/* This is an inline function, we don't really care about a long
* list of arguments */
static inline int
@@ -580,6 +631,10 @@ __build_packet_message(struct nfnl_log_net *log,
NFULA_CT, NFULA_CT_INFO) < 0)
goto nla_put_failure;
+ if ((pf == NFPROTO_NETDEV || pf == NFPROTO_BRIDGE) &&
+ nfulnl_put_bridge(inst, skb) < 0)
+ goto nla_put_failure;
+
if (data_len) {
struct nlattr *nla;
int size = nla_attr_size(data_len);
@@ -651,7 +706,7 @@ nfulnl_log_packet(struct net *net,
/* FIXME: do we want to make the size calculation conditional based on
* what is actually present? way more branches and checks, but more
* memory efficient... */
- size = nlmsg_total_size(sizeof(struct nfgenmsg))
+ size = nlmsg_total_size(sizeof(struct nfgenmsg))
+ nla_total_size(sizeof(struct nfulnl_msg_packet_hdr))
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
@@ -668,7 +723,7 @@ nfulnl_log_packet(struct net *net,
+ nla_total_size(sizeof(struct nfgenmsg)); /* NLMSG_DONE */
if (in && skb_mac_header_was_set(skb)) {
- size += nla_total_size(skb->dev->hard_header_len)
+ size += nla_total_size(skb->dev->hard_header_len)
+ nla_total_size(sizeof(u_int16_t)) /* hwtype */
+ nla_total_size(sizeof(u_int16_t)); /* hwlen */
}
@@ -687,6 +742,8 @@ nfulnl_log_packet(struct net *net,
size += nfnl_ct->build_size(ct);
}
}
+ if (pf == NFPROTO_NETDEV || pf == NFPROTO_BRIDGE)
+ size += nfulnl_get_bridge_size(skb);
qthreshold = inst->qthreshold;
/* per-rule qthreshold overrides per-instance */
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index b6a7ce622c72..feabdfb22920 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -394,7 +394,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
char *secdata = NULL;
u32 seclen = 0;
- size = nlmsg_total_size(sizeof(struct nfgenmsg))
+ size = nlmsg_total_size(sizeof(struct nfgenmsg))
+ nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
@@ -453,7 +453,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
}
if (queue->flags & NFQA_CFG_F_UID_GID) {
- size += (nla_total_size(sizeof(u_int32_t)) /* uid */
+ size += (nla_total_size(sizeof(u_int32_t)) /* uid */
+ nla_total_size(sizeof(u_int32_t))); /* gid */
}
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index b310b637b550..974300178fa9 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -13,6 +13,7 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
struct nft_bitwise {
enum nft_registers sreg:8;
@@ -126,12 +127,30 @@ nla_put_failure:
return -1;
}
+static struct nft_data zero;
+
+static int nft_bitwise_offload(struct nft_offload_ctx *ctx,
+ struct nft_flow_rule *flow,
+ const struct nft_expr *expr)
+{
+ const struct nft_bitwise *priv = nft_expr_priv(expr);
+
+ if (memcmp(&priv->xor, &zero, sizeof(priv->xor)) ||
+ priv->sreg != priv->dreg)
+ return -EOPNOTSUPP;
+
+ memcpy(&ctx->regs[priv->dreg].mask, &priv->mask, sizeof(priv->mask));
+
+ return 0;
+}
+
static const struct nft_expr_ops nft_bitwise_ops = {
.type = &nft_bitwise_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)),
.eval = nft_bitwise_eval,
.init = nft_bitwise_init,
.dump = nft_bitwise_dump,
+ .offload = nft_bitwise_offload,
};
struct nft_expr_type nft_bitwise_type __read_mostly = {
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index e06318428ea0..12bed3f7bbc6 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -43,14 +43,15 @@ void nft_byteorder_eval(const struct nft_expr *expr,
switch (priv->op) {
case NFT_BYTEORDER_NTOH:
for (i = 0; i < priv->len / 8; i++) {
- src64 = get_unaligned((u64 *)&src[i]);
- put_unaligned_be64(src64, &dst[i]);
+ src64 = nft_reg_load64(&src[i]);
+ nft_reg_store64(&dst[i], be64_to_cpu(src64));
}
break;
case NFT_BYTEORDER_HTON:
for (i = 0; i < priv->len / 8; i++) {
- src64 = get_unaligned_be64(&src[i]);
- put_unaligned(src64, (u64 *)&dst[i]);
+ src64 = (__force __u64)
+ cpu_to_be64(nft_reg_load64(&src[i]));
+ nft_reg_store64(&dst[i], src64);
}
break;
}
diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c
index c6052fdd2c40..c2e78c160fd7 100644
--- a/net/netfilter/nft_dup_netdev.c
+++ b/net/netfilter/nft_dup_netdev.c
@@ -10,6 +10,7 @@
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
#include <net/netfilter/nf_dup_netdev.h>
struct nft_dup_netdev {
@@ -56,6 +57,16 @@ nla_put_failure:
return -1;
}
+static int nft_dup_netdev_offload(struct nft_offload_ctx *ctx,
+ struct nft_flow_rule *flow,
+ const struct nft_expr *expr)
+{
+ const struct nft_dup_netdev *priv = nft_expr_priv(expr);
+ int oif = ctx->regs[priv->sreg_dev].data.data[0];
+
+ return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_MIRRED, oif);
+}
+
static struct nft_expr_type nft_dup_netdev_type;
static const struct nft_expr_ops nft_dup_netdev_ops = {
.type = &nft_dup_netdev_type,
@@ -63,6 +74,7 @@ static const struct nft_expr_ops nft_dup_netdev_ops = {
.eval = nft_dup_netdev_eval,
.init = nft_dup_netdev_init,
.dump = nft_dup_netdev_dump,
+ .offload = nft_dup_netdev_offload,
};
static struct nft_expr_type nft_dup_netdev_type __read_mostly = {
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 33833a0cb989..8887295414dc 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -84,6 +84,11 @@ void nft_dynset_eval(const struct nft_expr *expr,
const struct nft_expr *sexpr;
u64 timeout;
+ if (priv->op == NFT_DYNSET_OP_DELETE) {
+ set->ops->delete(set, &regs->data[priv->sreg_key]);
+ return;
+ }
+
if (set->ops->update(set, &regs->data[priv->sreg_key], nft_dynset_new,
expr, regs, &ext)) {
sexpr = NULL;
@@ -161,6 +166,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP]));
switch (priv->op) {
case NFT_DYNSET_OP_ADD:
+ case NFT_DYNSET_OP_DELETE:
break;
case NFT_DYNSET_OP_UPDATE:
if (!(set->flags & NFT_SET_TIMEOUT))
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 01705ad74a9a..22cf236eb5d5 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -6,12 +6,13 @@
#include <linux/netfilter.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
+#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/netfilter/nf_tables.h>
#include <net/ip.h> /* for ipv4 options. */
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_conntrack_core.h>
-#include <linux/netfilter/nf_conntrack_common.h>
+#include <net/netfilter/nf_conntrack_extend.h>
#include <net/netfilter/nf_flow_table.h>
struct nft_flow_offload {
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index 61b7f93ac681..aba11c2333f3 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -12,6 +12,7 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
#include <net/netfilter/nf_dup_netdev.h>
#include <net/neighbour.h>
#include <net/ip.h>
@@ -63,6 +64,16 @@ nla_put_failure:
return -1;
}
+static int nft_fwd_netdev_offload(struct nft_offload_ctx *ctx,
+ struct nft_flow_rule *flow,
+ const struct nft_expr *expr)
+{
+ const struct nft_fwd_netdev *priv = nft_expr_priv(expr);
+ int oif = ctx->regs[priv->sreg_dev].data.data[0];
+
+ return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_REDIRECT, oif);
+}
+
struct nft_fwd_neigh {
enum nft_registers sreg_dev:8;
enum nft_registers sreg_addr:8;
@@ -194,6 +205,7 @@ static const struct nft_expr_ops nft_fwd_netdev_ops = {
.eval = nft_fwd_netdev_eval,
.init = nft_fwd_netdev_init,
.dump = nft_fwd_netdev_dump,
+ .offload = nft_fwd_netdev_offload,
};
static const struct nft_expr_ops *
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index ca2ae4b95a8d..c7f0ef73d939 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -125,17 +125,13 @@ static int nft_immediate_validate(const struct nft_ctx *ctx,
return 0;
}
-static int nft_immediate_offload(struct nft_offload_ctx *ctx,
- struct nft_flow_rule *flow,
- const struct nft_expr *expr)
+static int nft_immediate_offload_verdict(struct nft_offload_ctx *ctx,
+ struct nft_flow_rule *flow,
+ const struct nft_immediate_expr *priv)
{
- const struct nft_immediate_expr *priv = nft_expr_priv(expr);
struct flow_action_entry *entry;
const struct nft_data *data;
- if (priv->dreg != NFT_REG_VERDICT)
- return -EOPNOTSUPP;
-
entry = &flow->rule->action.entries[ctx->num_actions++];
data = &priv->data;
@@ -153,6 +149,20 @@ static int nft_immediate_offload(struct nft_offload_ctx *ctx,
return 0;
}
+static int nft_immediate_offload(struct nft_offload_ctx *ctx,
+ struct nft_flow_rule *flow,
+ const struct nft_expr *expr)
+{
+ const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+ if (priv->dreg == NFT_REG_VERDICT)
+ return nft_immediate_offload_verdict(ctx, flow, priv);
+
+ memcpy(&ctx->regs[priv->dreg].data, &priv->data, sizeof(priv->data));
+
+ return 0;
+}
+
static const struct nft_expr_ops nft_imm_ops = {
.type = &nft_imm_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)),
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index f69afb9ff3cb..317e3a9e8c5b 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -26,8 +26,36 @@
#include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */
+#define NFT_META_SECS_PER_MINUTE 60
+#define NFT_META_SECS_PER_HOUR 3600
+#define NFT_META_SECS_PER_DAY 86400
+#define NFT_META_DAYS_PER_WEEK 7
+
static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state);
+static u8 nft_meta_weekday(unsigned long secs)
+{
+ unsigned int dse;
+ u8 wday;
+
+ secs -= NFT_META_SECS_PER_MINUTE * sys_tz.tz_minuteswest;
+ dse = secs / NFT_META_SECS_PER_DAY;
+ wday = (4 + dse) % NFT_META_DAYS_PER_WEEK;
+
+ return wday;
+}
+
+static u32 nft_meta_hour(unsigned long secs)
+{
+ struct tm tm;
+
+ time64_to_tm(secs, 0, &tm);
+
+ return tm.tm_hour * NFT_META_SECS_PER_HOUR
+ + tm.tm_min * NFT_META_SECS_PER_MINUTE
+ + tm.tm_sec;
+}
+
void nft_meta_get_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -218,6 +246,15 @@ void nft_meta_get_eval(const struct nft_expr *expr,
goto err;
strncpy((char *)dest, out->rtnl_link_ops->kind, IFNAMSIZ);
break;
+ case NFT_META_TIME_NS:
+ nft_reg_store64(dest, ktime_get_real_ns());
+ break;
+ case NFT_META_TIME_DAY:
+ nft_reg_store8(dest, nft_meta_weekday(get_seconds()));
+ break;
+ case NFT_META_TIME_HOUR:
+ *dest = nft_meta_hour(get_seconds());
+ break;
default:
WARN_ON(1);
goto err;
@@ -330,6 +367,15 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
len = sizeof(u8);
break;
#endif
+ case NFT_META_TIME_NS:
+ len = sizeof(u64);
+ break;
+ case NFT_META_TIME_DAY:
+ len = sizeof(u8);
+ break;
+ case NFT_META_TIME_HOUR:
+ len = sizeof(u32);
+ break;
default:
return -EOPNOTSUPP;
}
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index c8745d454bf8..4413690591f2 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -13,7 +13,7 @@
#include <net/netfilter/nf_tables.h>
struct nft_quota {
- u64 quota;
+ atomic64_t quota;
unsigned long flags;
atomic64_t consumed;
};
@@ -21,7 +21,8 @@ struct nft_quota {
static inline bool nft_overquota(struct nft_quota *priv,
const struct sk_buff *skb)
{
- return atomic64_add_return(skb->len, &priv->consumed) >= priv->quota;
+ return atomic64_add_return(skb->len, &priv->consumed) >=
+ atomic64_read(&priv->quota);
}
static inline bool nft_quota_invert(struct nft_quota *priv)
@@ -89,7 +90,7 @@ static int nft_quota_do_init(const struct nlattr * const tb[],
return -EOPNOTSUPP;
}
- priv->quota = quota;
+ atomic64_set(&priv->quota, quota);
priv->flags = flags;
atomic64_set(&priv->consumed, consumed);
@@ -105,10 +106,22 @@ static int nft_quota_obj_init(const struct nft_ctx *ctx,
return nft_quota_do_init(tb, priv);
}
+static void nft_quota_obj_update(struct nft_object *obj,
+ struct nft_object *newobj)
+{
+ struct nft_quota *newpriv = nft_obj_data(newobj);
+ struct nft_quota *priv = nft_obj_data(obj);
+ u64 newquota;
+
+ newquota = atomic64_read(&newpriv->quota);
+ atomic64_set(&priv->quota, newquota);
+ priv->flags = newpriv->flags;
+}
+
static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv,
bool reset)
{
- u64 consumed, consumed_cap;
+ u64 consumed, consumed_cap, quota;
u32 flags = priv->flags;
/* Since we inconditionally increment consumed quota for each packet
@@ -116,14 +129,15 @@ static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv,
* userspace.
*/
consumed = atomic64_read(&priv->consumed);
- if (consumed >= priv->quota) {
- consumed_cap = priv->quota;
+ quota = atomic64_read(&priv->quota);
+ if (consumed >= quota) {
+ consumed_cap = quota;
flags |= NFT_QUOTA_F_DEPLETED;
} else {
consumed_cap = consumed;
}
- if (nla_put_be64(skb, NFTA_QUOTA_BYTES, cpu_to_be64(priv->quota),
+ if (nla_put_be64(skb, NFTA_QUOTA_BYTES, cpu_to_be64(quota),
NFTA_QUOTA_PAD) ||
nla_put_be64(skb, NFTA_QUOTA_CONSUMED, cpu_to_be64(consumed_cap),
NFTA_QUOTA_PAD) ||
@@ -155,6 +169,7 @@ static const struct nft_object_ops nft_quota_obj_ops = {
.init = nft_quota_obj_init,
.eval = nft_quota_obj_eval,
.dump = nft_quota_obj_dump,
+ .update = nft_quota_obj_update,
};
static struct nft_object_type nft_quota_obj_type __read_mostly = {
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index b5aeccdddb22..087a056e34d1 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -10,7 +10,7 @@
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
struct nft_bitmap_elem {
struct list_head head;
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 6e8d20c03e3d..b331a3c9a3a8 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -16,7 +16,7 @@
#include <linux/rhashtable.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
/* We target a hash table size of 4, element hint is 75% of final size */
#define NFT_RHASH_ELEMENT_HINT 3
@@ -234,6 +234,24 @@ static void nft_rhash_remove(const struct net *net,
rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
}
+static bool nft_rhash_delete(const struct nft_set *set,
+ const u32 *key)
+{
+ struct nft_rhash *priv = nft_set_priv(set);
+ struct nft_rhash_cmp_arg arg = {
+ .genmask = NFT_GENMASK_ANY,
+ .set = set,
+ .key = key,
+ };
+ struct nft_rhash_elem *he;
+
+ he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
+ if (he == NULL)
+ return false;
+
+ return rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params) == 0;
+}
+
static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_iter *iter)
{
@@ -662,6 +680,7 @@ struct nft_set_type nft_set_rhash_type __read_mostly = {
.remove = nft_rhash_remove,
.lookup = nft_rhash_lookup,
.update = nft_rhash_update,
+ .delete = nft_rhash_delete,
.walk = nft_rhash_walk,
.get = nft_rhash_get,
},
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 419d58ef802b..57123259452f 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -13,7 +13,7 @@
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
struct nft_rbtree {
struct rb_root root;
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index 928e661d1517..e2c1fc608841 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -24,15 +24,15 @@ static void nft_synproxy_tcp_options(struct synproxy_options *opts,
const struct tcphdr *tcp,
struct synproxy_net *snet,
struct nf_synproxy_info *info,
- struct nft_synproxy *priv)
+ const struct nft_synproxy *priv)
{
this_cpu_inc(snet->stats->syn_received);
if (tcp->ece && tcp->cwr)
opts->options |= NF_SYNPROXY_OPT_ECN;
opts->options &= priv->info.options;
- opts->mss_encode = opts->mss;
- opts->mss = info->mss;
+ opts->mss_encode = opts->mss_option;
+ opts->mss_option = info->mss;
if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
synproxy_init_timestamp_cookie(info, opts);
else
@@ -41,14 +41,13 @@ static void nft_synproxy_tcp_options(struct synproxy_options *opts,
NF_SYNPROXY_OPT_ECN);
}
-static void nft_synproxy_eval_v4(const struct nft_expr *expr,
+static void nft_synproxy_eval_v4(const struct nft_synproxy *priv,
struct nft_regs *regs,
const struct nft_pktinfo *pkt,
const struct tcphdr *tcp,
struct tcphdr *_tcph,
struct synproxy_options *opts)
{
- struct nft_synproxy *priv = nft_expr_priv(expr);
struct nf_synproxy_info info = priv->info;
struct net *net = nft_net(pkt);
struct synproxy_net *snet = synproxy_pernet(net);
@@ -73,14 +72,13 @@ static void nft_synproxy_eval_v4(const struct nft_expr *expr,
}
#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
-static void nft_synproxy_eval_v6(const struct nft_expr *expr,
+static void nft_synproxy_eval_v6(const struct nft_synproxy *priv,
struct nft_regs *regs,
const struct nft_pktinfo *pkt,
const struct tcphdr *tcp,
struct tcphdr *_tcph,
struct synproxy_options *opts)
{
- struct nft_synproxy *priv = nft_expr_priv(expr);
struct nf_synproxy_info info = priv->info;
struct net *net = nft_net(pkt);
struct synproxy_net *snet = synproxy_pernet(net);
@@ -105,9 +103,9 @@ static void nft_synproxy_eval_v6(const struct nft_expr *expr,
}
#endif /* CONFIG_NF_TABLES_IPV6*/
-static void nft_synproxy_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+static void nft_synproxy_do_eval(const struct nft_synproxy *priv,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
struct synproxy_options opts = {};
struct sk_buff *skb = pkt->skb;
@@ -140,23 +138,22 @@ static void nft_synproxy_eval(const struct nft_expr *expr,
switch (skb->protocol) {
case htons(ETH_P_IP):
- nft_synproxy_eval_v4(expr, regs, pkt, tcp, &_tcph, &opts);
+ nft_synproxy_eval_v4(priv, regs, pkt, tcp, &_tcph, &opts);
return;
#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
case htons(ETH_P_IPV6):
- nft_synproxy_eval_v6(expr, regs, pkt, tcp, &_tcph, &opts);
+ nft_synproxy_eval_v6(priv, regs, pkt, tcp, &_tcph, &opts);
return;
#endif
}
regs->verdict.code = NFT_BREAK;
}
-static int nft_synproxy_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
+static int nft_synproxy_do_init(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[],
+ struct nft_synproxy *priv)
{
struct synproxy_net *snet = synproxy_pernet(ctx->net);
- struct nft_synproxy *priv = nft_expr_priv(expr);
u32 flags;
int err;
@@ -206,8 +203,7 @@ nf_ct_failure:
return err;
}
-static void nft_synproxy_destroy(const struct nft_ctx *ctx,
- const struct nft_expr *expr)
+static void nft_synproxy_do_destroy(const struct nft_ctx *ctx)
{
struct synproxy_net *snet = synproxy_pernet(ctx->net);
@@ -229,10 +225,8 @@ static void nft_synproxy_destroy(const struct nft_ctx *ctx,
nf_ct_netns_put(ctx->net, ctx->family);
}
-static int nft_synproxy_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static int nft_synproxy_do_dump(struct sk_buff *skb, struct nft_synproxy *priv)
{
- const struct nft_synproxy *priv = nft_expr_priv(expr);
-
if (nla_put_be16(skb, NFTA_SYNPROXY_MSS, htons(priv->info.mss)) ||
nla_put_u8(skb, NFTA_SYNPROXY_WSCALE, priv->info.wscale) ||
nla_put_be32(skb, NFTA_SYNPROXY_FLAGS, htonl(priv->info.options)))
@@ -244,6 +238,15 @@ nla_put_failure:
return -1;
}
+static void nft_synproxy_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_synproxy *priv = nft_expr_priv(expr);
+
+ nft_synproxy_do_eval(priv, regs, pkt);
+}
+
static int nft_synproxy_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
@@ -252,6 +255,28 @@ static int nft_synproxy_validate(const struct nft_ctx *ctx,
(1 << NF_INET_FORWARD));
}
+static int nft_synproxy_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_synproxy *priv = nft_expr_priv(expr);
+
+ return nft_synproxy_do_init(ctx, tb, priv);
+}
+
+static void nft_synproxy_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ nft_synproxy_do_destroy(ctx);
+}
+
+static int nft_synproxy_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_synproxy *priv = nft_expr_priv(expr);
+
+ return nft_synproxy_do_dump(skb, priv);
+}
+
static struct nft_expr_type nft_synproxy_type;
static const struct nft_expr_ops nft_synproxy_ops = {
.eval = nft_synproxy_eval,
@@ -271,14 +296,89 @@ static struct nft_expr_type nft_synproxy_type __read_mostly = {
.maxattr = NFTA_SYNPROXY_MAX,
};
+static int nft_synproxy_obj_init(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[],
+ struct nft_object *obj)
+{
+ struct nft_synproxy *priv = nft_obj_data(obj);
+
+ return nft_synproxy_do_init(ctx, tb, priv);
+}
+
+static void nft_synproxy_obj_destroy(const struct nft_ctx *ctx,
+ struct nft_object *obj)
+{
+ nft_synproxy_do_destroy(ctx);
+}
+
+static int nft_synproxy_obj_dump(struct sk_buff *skb,
+ struct nft_object *obj, bool reset)
+{
+ struct nft_synproxy *priv = nft_obj_data(obj);
+
+ return nft_synproxy_do_dump(skb, priv);
+}
+
+static void nft_synproxy_obj_eval(struct nft_object *obj,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_synproxy *priv = nft_obj_data(obj);
+
+ nft_synproxy_do_eval(priv, regs, pkt);
+}
+
+static void nft_synproxy_obj_update(struct nft_object *obj,
+ struct nft_object *newobj)
+{
+ struct nft_synproxy *newpriv = nft_obj_data(newobj);
+ struct nft_synproxy *priv = nft_obj_data(obj);
+
+ priv->info = newpriv->info;
+}
+
+static struct nft_object_type nft_synproxy_obj_type;
+static const struct nft_object_ops nft_synproxy_obj_ops = {
+ .type = &nft_synproxy_obj_type,
+ .size = sizeof(struct nft_synproxy),
+ .init = nft_synproxy_obj_init,
+ .destroy = nft_synproxy_obj_destroy,
+ .dump = nft_synproxy_obj_dump,
+ .eval = nft_synproxy_obj_eval,
+ .update = nft_synproxy_obj_update,
+};
+
+static struct nft_object_type nft_synproxy_obj_type __read_mostly = {
+ .type = NFT_OBJECT_SYNPROXY,
+ .ops = &nft_synproxy_obj_ops,
+ .maxattr = NFTA_SYNPROXY_MAX,
+ .policy = nft_synproxy_policy,
+ .owner = THIS_MODULE,
+};
+
static int __init nft_synproxy_module_init(void)
{
- return nft_register_expr(&nft_synproxy_type);
+ int err;
+
+ err = nft_register_obj(&nft_synproxy_obj_type);
+ if (err < 0)
+ return err;
+
+ err = nft_register_expr(&nft_synproxy_type);
+ if (err < 0)
+ goto err;
+
+ return 0;
+
+err:
+ nft_unregister_obj(&nft_synproxy_obj_type);
+ return err;
}
static void __exit nft_synproxy_module_exit(void)
{
- return nft_unregister_expr(&nft_synproxy_type);
+ nft_unregister_expr(&nft_synproxy_type);
+ nft_unregister_obj(&nft_synproxy_obj_type);
}
module_init(nft_synproxy_module_init);
@@ -287,3 +387,4 @@ module_exit(nft_synproxy_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Fernando Fernandez <ffmancera@riseup.net>");
MODULE_ALIAS_NFT_EXPR("synproxy");
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_SYNPROXY);
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index 9cec9eae556a..f56d3ed93e56 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -283,7 +283,7 @@ static int __init idletimer_tg_init(void)
idletimer_tg_kobj = &idletimer_tg_device->kobj;
- err = xt_register_target(&idletimer_tg);
+ err = xt_register_target(&idletimer_tg);
if (err < 0) {
pr_debug("couldn't register xt target\n");
goto out_dev;
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index bc6c8ab0fa62..46fcac75f726 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -13,6 +13,8 @@
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/ip.h>
+#include <linux/ipv6.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/netfilter/x_tables.h>
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 2d2691dd51e0..ced3fc8fad7c 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -34,9 +34,14 @@
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
-#include <linux/netfilter/xt_hashlimit.h>
#include <linux/mutex.h>
#include <linux/kernel.h>
+#include <uapi/linux/netfilter/xt_hashlimit.h>
+
+#define XT_HASHLIMIT_ALL (XT_HASHLIMIT_HASH_DIP | XT_HASHLIMIT_HASH_DPT | \
+ XT_HASHLIMIT_HASH_SIP | XT_HASHLIMIT_HASH_SPT | \
+ XT_HASHLIMIT_INVERT | XT_HASHLIMIT_BYTES |\
+ XT_HASHLIMIT_RATE_MATCH)
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index b92b22ce8abd..ec6ed6fda96c 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -5,12 +5,13 @@
/* (C) 2001-2003 Bart De Schuymer <bdschuym@pandora.be>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/if.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/netfilter_bridge.h>
-#include <linux/netfilter/xt_physdev.h>
#include <linux/netfilter/x_tables.h>
-#include <net/netfilter/br_netfilter.h>
+#include <uapi/linux/netfilter/xt_physdev.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index ecbfa291fb70..731bc2cafae4 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -14,7 +14,6 @@
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/ipset/ip_set.h>
-#include <linux/netfilter/ipset/ip_set_timeout.h>
#include <uapi/linux/netfilter/xt_set.h>
MODULE_LICENSE("GPL");
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 2b0ef55cf89e..409a3ae47ce2 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -607,7 +607,7 @@ catmap_getnode_alloc:
*/
int netlbl_catmap_walk(struct netlbl_lsm_catmap *catmap, u32 offset)
{
- struct netlbl_lsm_catmap *iter = catmap;
+ struct netlbl_lsm_catmap *iter;
u32 idx;
u32 bit;
NETLBL_CATMAP_MAPTYPE bitmap;
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index ea64c90b14e8..17e6ca62f1be 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -970,7 +970,8 @@ static int nfc_genl_dep_link_down(struct sk_buff *skb, struct genl_info *info)
int rc;
u32 idx;
- if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
+ if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
+ !info->attrs[NFC_ATTR_TARGET_INDEX])
return -EINVAL;
idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
@@ -1018,7 +1019,8 @@ static int nfc_genl_llc_get_params(struct sk_buff *skb, struct genl_info *info)
struct sk_buff *msg = NULL;
u32 idx;
- if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
+ if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
+ !info->attrs[NFC_ATTR_FIRMWARE_NAME])
return -EINVAL;
idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index d01410e52097..dde9d762edee 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -222,6 +222,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
struct dp_stats_percpu *stats;
u64 *stats_counter;
u32 n_mask_hit;
+ int error;
stats = this_cpu_ptr(dp->stats_percpu);
@@ -229,7 +230,6 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
if (unlikely(!flow)) {
struct dp_upcall_info upcall;
- int error;
memset(&upcall, 0, sizeof(upcall));
upcall.cmd = OVS_PACKET_CMD_MISS;
@@ -246,7 +246,10 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
ovs_flow_stats_update(flow, key->tp.flags, skb);
sf_acts = rcu_dereference(flow->sf_acts);
- ovs_execute_actions(dp, skb, sf_acts, key);
+ error = ovs_execute_actions(dp, skb, sf_acts, key);
+ if (unlikely(error))
+ net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n",
+ ovs_dp_name(dp), error);
stats_counter = &stats->n_hit;
@@ -1542,10 +1545,34 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *in
dp->user_features = 0;
}
-static void ovs_dp_change(struct datapath *dp, struct nlattr *a[])
+DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
+
+static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
{
- if (a[OVS_DP_ATTR_USER_FEATURES])
- dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
+ u32 user_features = 0;
+
+ if (a[OVS_DP_ATTR_USER_FEATURES]) {
+ user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
+
+ if (user_features & ~(OVS_DP_F_VPORT_PIDS |
+ OVS_DP_F_UNALIGNED |
+ OVS_DP_F_TC_RECIRC_SHARING))
+ return -EOPNOTSUPP;
+
+#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ if (user_features & OVS_DP_F_TC_RECIRC_SHARING)
+ return -EOPNOTSUPP;
+#endif
+ }
+
+ dp->user_features = user_features;
+
+ if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
+ static_branch_enable(&tc_recirc_sharing_support);
+ else
+ static_branch_disable(&tc_recirc_sharing_support);
+
+ return 0;
}
static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
@@ -1607,7 +1634,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
parms.port_no = OVSP_LOCAL;
parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
- ovs_dp_change(dp, a);
+ err = ovs_dp_change(dp, a);
+ if (err)
+ goto err_destroy_meters;
/* So far only local changes have been made, now need the lock. */
ovs_lock();
@@ -1733,7 +1762,9 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
if (IS_ERR(dp))
goto err_unlock_free;
- ovs_dp_change(dp, info->attrs);
+ err = ovs_dp_change(dp, info->attrs);
+ if (err)
+ goto err_unlock_free;
err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
info->snd_seq, 0, OVS_DP_CMD_SET);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 751d34accdf9..81e85dde8217 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -218,6 +218,8 @@ static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
extern struct notifier_block ovs_dp_device_notifier;
extern struct genl_family dp_vport_genl_family;
+DECLARE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
+
void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key);
void ovs_dp_detach_port(struct vport *);
int ovs_dp_upcall(struct datapath *, struct sk_buff *,
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 9d81d2c7bf82..38147e6a20f5 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -842,6 +842,9 @@ static int key_extract_mac_proto(struct sk_buff *skb)
int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
struct sk_buff *skb, struct sw_flow_key *key)
{
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ struct tc_skb_ext *tc_ext;
+#endif
int res, err;
/* Extract metadata from packet. */
@@ -874,7 +877,17 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
if (res < 0)
return res;
key->mac_proto = res;
+
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ if (static_branch_unlikely(&tc_recirc_sharing_support)) {
+ tc_ext = skb_ext_find(skb, TC_SKB_EXT);
+ key->recirc_id = tc_ext ? tc_ext->chain : 0;
+ } else {
+ key->recirc_id = 0;
+ }
+#else
key->recirc_id = 0;
+#endif
err = key_extract(skb, key);
if (!err)
diff --git a/net/psample/psample.c b/net/psample/psample.c
index 66e4b61a350d..a6ceb0533b5b 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -73,7 +73,7 @@ static int psample_nl_cmd_get_group_dumpit(struct sk_buff *msg,
int idx = 0;
int err;
- spin_lock(&psample_groups_lock);
+ spin_lock_bh(&psample_groups_lock);
list_for_each_entry(group, &psample_groups_list, list) {
if (!net_eq(group->net, sock_net(msg->sk)))
continue;
@@ -89,7 +89,7 @@ static int psample_nl_cmd_get_group_dumpit(struct sk_buff *msg,
idx++;
}
- spin_unlock(&psample_groups_lock);
+ spin_unlock_bh(&psample_groups_lock);
cb->args[0] = idx;
return msg->len;
}
@@ -172,7 +172,7 @@ struct psample_group *psample_group_get(struct net *net, u32 group_num)
{
struct psample_group *group;
- spin_lock(&psample_groups_lock);
+ spin_lock_bh(&psample_groups_lock);
group = psample_group_lookup(net, group_num);
if (!group) {
@@ -183,19 +183,27 @@ struct psample_group *psample_group_get(struct net *net, u32 group_num)
group->refcount++;
out:
- spin_unlock(&psample_groups_lock);
+ spin_unlock_bh(&psample_groups_lock);
return group;
}
EXPORT_SYMBOL_GPL(psample_group_get);
+void psample_group_take(struct psample_group *group)
+{
+ spin_lock_bh(&psample_groups_lock);
+ group->refcount++;
+ spin_unlock_bh(&psample_groups_lock);
+}
+EXPORT_SYMBOL_GPL(psample_group_take);
+
void psample_group_put(struct psample_group *group)
{
- spin_lock(&psample_groups_lock);
+ spin_lock_bh(&psample_groups_lock);
if (--group->refcount == 0)
psample_group_destroy(group);
- spin_unlock(&psample_groups_lock);
+ spin_unlock_bh(&psample_groups_lock);
}
EXPORT_SYMBOL_GPL(psample_group_put);
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 2b969f99ef13..1a5bf3fa4578 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -559,7 +559,7 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
ret = -EDESTADDRREQ;
break;
}
- if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) ||
+ if (ipv4_is_multicast(sin->sin_addr.s_addr) ||
sin->sin_addr.s_addr == htonl(INADDR_BROADCAST)) {
ret = -EINVAL;
break;
@@ -593,7 +593,7 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
addr4 = sin6->sin6_addr.s6_addr32[3];
if (addr4 == htonl(INADDR_ANY) ||
addr4 == htonl(INADDR_BROADCAST) ||
- IN_MULTICAST(ntohl(addr4))) {
+ ipv4_is_multicast(addr4)) {
ret = -EPROTOTYPE;
break;
}
@@ -705,7 +705,7 @@ static int rds_create(struct net *net, struct socket *sock, int protocol,
if (sock->type != SOCK_SEQPACKET || protocol)
return -ESOCKTNOSUPPORT;
- sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto, kern);
+ sk = sk_alloc(net, AF_RDS, GFP_KERNEL, &rds_proto, kern);
if (!sk)
return -ENOMEM;
@@ -741,6 +741,10 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len,
spin_lock_bh(&rds_sock_lock);
list_for_each_entry(rs, &rds_sock_list, rs_item) {
+ /* This option only supports IPv4 sockets. */
+ if (!ipv6_addr_v4mapped(&rs->rs_bound_addr))
+ continue;
+
read_lock(&rs->rs_recv_lock);
/* XXX too lazy to maintain counts.. */
@@ -762,21 +766,60 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len,
lens->each = sizeof(struct rds_info_message);
}
+#if IS_ENABLED(CONFIG_IPV6)
+static void rds6_sock_inc_info(struct socket *sock, unsigned int len,
+ struct rds_info_iterator *iter,
+ struct rds_info_lengths *lens)
+{
+ struct rds_incoming *inc;
+ unsigned int total = 0;
+ struct rds_sock *rs;
+
+ len /= sizeof(struct rds6_info_message);
+
+ spin_lock_bh(&rds_sock_lock);
+
+ list_for_each_entry(rs, &rds_sock_list, rs_item) {
+ read_lock(&rs->rs_recv_lock);
+
+ list_for_each_entry(inc, &rs->rs_recv_queue, i_item) {
+ total++;
+ if (total <= len)
+ rds6_inc_info_copy(inc, iter, &inc->i_saddr,
+ &rs->rs_bound_addr, 1);
+ }
+
+ read_unlock(&rs->rs_recv_lock);
+ }
+
+ spin_unlock_bh(&rds_sock_lock);
+
+ lens->nr = total;
+ lens->each = sizeof(struct rds6_info_message);
+}
+#endif
+
static void rds_sock_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens)
{
struct rds_info_socket sinfo;
+ unsigned int cnt = 0;
struct rds_sock *rs;
len /= sizeof(struct rds_info_socket);
spin_lock_bh(&rds_sock_lock);
- if (len < rds_sock_count)
+ if (len < rds_sock_count) {
+ cnt = rds_sock_count;
goto out;
+ }
list_for_each_entry(rs, &rds_sock_list, rs_item) {
+ /* This option only supports IPv4 sockets. */
+ if (!ipv6_addr_v4mapped(&rs->rs_bound_addr))
+ continue;
sinfo.sndbuf = rds_sk_sndbuf(rs);
sinfo.rcvbuf = rds_sk_rcvbuf(rs);
sinfo.bound_addr = rs->rs_bound_addr_v4;
@@ -786,15 +829,51 @@ static void rds_sock_info(struct socket *sock, unsigned int len,
sinfo.inum = sock_i_ino(rds_rs_to_sk(rs));
rds_info_copy(iter, &sinfo, sizeof(sinfo));
+ cnt++;
}
out:
- lens->nr = rds_sock_count;
+ lens->nr = cnt;
lens->each = sizeof(struct rds_info_socket);
spin_unlock_bh(&rds_sock_lock);
}
+#if IS_ENABLED(CONFIG_IPV6)
+static void rds6_sock_info(struct socket *sock, unsigned int len,
+ struct rds_info_iterator *iter,
+ struct rds_info_lengths *lens)
+{
+ struct rds6_info_socket sinfo6;
+ struct rds_sock *rs;
+
+ len /= sizeof(struct rds6_info_socket);
+
+ spin_lock_bh(&rds_sock_lock);
+
+ if (len < rds_sock_count)
+ goto out;
+
+ list_for_each_entry(rs, &rds_sock_list, rs_item) {
+ sinfo6.sndbuf = rds_sk_sndbuf(rs);
+ sinfo6.rcvbuf = rds_sk_rcvbuf(rs);
+ sinfo6.bound_addr = rs->rs_bound_addr;
+ sinfo6.connected_addr = rs->rs_conn_addr;
+ sinfo6.bound_port = rs->rs_bound_port;
+ sinfo6.connected_port = rs->rs_conn_port;
+ sinfo6.inum = sock_i_ino(rds_rs_to_sk(rs));
+
+ rds_info_copy(iter, &sinfo6, sizeof(sinfo6));
+ }
+
+ out:
+ lens->nr = rds_sock_count;
+ lens->each = sizeof(struct rds6_info_socket);
+
+ spin_unlock_bh(&rds_sock_lock);
+}
+#endif
+
static void rds_exit(void)
{
sock_unregister(rds_family_ops.family);
@@ -808,6 +887,10 @@ static void rds_exit(void)
rds_bind_lock_destroy();
rds_info_deregister_func(RDS_INFO_SOCKETS, rds_sock_info);
rds_info_deregister_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info);
+#if IS_ENABLED(CONFIG_IPV6)
+ rds_info_deregister_func(RDS6_INFO_SOCKETS, rds6_sock_info);
+ rds_info_deregister_func(RDS6_INFO_RECV_MESSAGES, rds6_sock_inc_info);
+#endif
}
module_exit(rds_exit);
@@ -845,6 +928,10 @@ static int rds_init(void)
rds_info_register_func(RDS_INFO_SOCKETS, rds_sock_info);
rds_info_register_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info);
+#if IS_ENABLED(CONFIG_IPV6)
+ rds_info_register_func(RDS6_INFO_SOCKETS, rds6_sock_info);
+ rds_info_register_func(RDS6_INFO_RECV_MESSAGES, rds6_sock_inc_info);
+#endif
goto out;
diff --git a/net/rds/bind.c b/net/rds/bind.c
index 05464fd7c17a..20c156a73e73 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -181,7 +181,7 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (addr_len < sizeof(struct sockaddr_in) ||
sin->sin_addr.s_addr == htonl(INADDR_ANY) ||
sin->sin_addr.s_addr == htonl(INADDR_BROADCAST) ||
- IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
+ ipv4_is_multicast(sin->sin_addr.s_addr))
return -EINVAL;
ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &v6addr);
binding_addr = &v6addr;
@@ -206,7 +206,7 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
addr4 = sin6->sin6_addr.s6_addr32[3];
if (addr4 == htonl(INADDR_ANY) ||
addr4 == htonl(INADDR_BROADCAST) ||
- IN_MULTICAST(ntohl(addr4)))
+ ipv4_is_multicast(addr4))
return -EINVAL;
}
/* The scope ID must be specified for link local address. */
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 3cae88cbdaa0..a0f99bbf362c 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -385,6 +385,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
unsigned int posted = 0;
int ret = 0;
bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM);
+ bool must_wake = false;
u32 pos;
/* the goal here is to just make sure that someone, somewhere
@@ -405,6 +406,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
recv = &ic->i_recvs[pos];
ret = rds_ib_recv_refill_one(conn, recv, gfp);
if (ret) {
+ must_wake = true;
break;
}
@@ -423,6 +425,11 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
}
posted++;
+
+ if ((posted > 128 && need_resched()) || posted > 8192) {
+ must_wake = true;
+ break;
+ }
}
/* We're doing flow control - update the window. */
@@ -445,10 +452,13 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
* if we should requeue.
*/
if (rds_conn_up(conn) &&
- ((can_wait && rds_ib_ring_low(&ic->i_recv_ring)) ||
+ (must_wake ||
+ (can_wait && rds_ib_ring_low(&ic->i_recv_ring)) ||
rds_ib_ring_empty(&ic->i_recv_ring))) {
queue_delayed_work(rds_wq, &conn->c_recv_w, 1);
}
+ if (can_wait)
+ cond_resched();
}
/*
@@ -1038,9 +1048,14 @@ int rds_ib_recv_init(void)
si_meminfo(&si);
rds_ib_sysctl_max_recv_allocation = si.totalram / 3 * PAGE_SIZE / RDS_FRAG_SIZE;
- rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming",
- sizeof(struct rds_ib_incoming),
- 0, SLAB_HWCACHE_ALIGN, NULL);
+ rds_ib_incoming_slab =
+ kmem_cache_create_usercopy("rds_ib_incoming",
+ sizeof(struct rds_ib_incoming),
+ 0, SLAB_HWCACHE_ALIGN,
+ offsetof(struct rds_ib_incoming,
+ ii_inc.i_usercopy),
+ sizeof(struct rds_inc_usercopy),
+ NULL);
if (!rds_ib_incoming_slab)
goto out;
diff --git a/net/rds/ib_stats.c b/net/rds/ib_stats.c
index 9252ad126335..ac46d8961b61 100644
--- a/net/rds/ib_stats.c
+++ b/net/rds/ib_stats.c
@@ -42,7 +42,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_ib_statistics, rds_ib_stats);
static const char *const rds_ib_stat_names[] = {
"ib_connect_raced",
"ib_listen_closed_stale",
- "s_ib_evt_handler_call",
+ "ib_evt_handler_call",
"ib_tasklet_call",
"ib_tx_cq_event",
"ib_tx_ring_full",
diff --git a/net/rds/rds.h b/net/rds/rds.h
index f0066d168499..53e86911773a 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -271,6 +271,12 @@ struct rds_ext_header_rdma_dest {
#define RDS_MSG_RX_END 2
#define RDS_MSG_RX_CMSG 3
+/* The following values are whitelisted for usercopy */
+struct rds_inc_usercopy {
+ rds_rdma_cookie_t rdma_cookie;
+ ktime_t rx_tstamp;
+};
+
struct rds_incoming {
refcount_t i_refcount;
struct list_head i_item;
@@ -280,8 +286,7 @@ struct rds_incoming {
unsigned long i_rx_jiffies;
struct in6_addr i_saddr;
- rds_rdma_cookie_t i_rdma_cookie;
- ktime_t i_rx_tstamp;
+ struct rds_inc_usercopy i_usercopy;
u64 i_rx_lat_trace[RDS_RX_MAX_TRACES];
};
@@ -717,7 +722,7 @@ struct rds_statistics {
uint64_t s_cong_send_blocked;
uint64_t s_recv_bytes_added_to_socket;
uint64_t s_recv_bytes_removed_from_socket;
-
+ uint64_t s_send_stuck_rm;
};
/* af_rds.c */
diff --git a/net/rds/recv.c b/net/rds/recv.c
index a42ba7fa06d5..c8404971d5ab 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -47,8 +47,8 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
INIT_LIST_HEAD(&inc->i_item);
inc->i_conn = conn;
inc->i_saddr = *saddr;
- inc->i_rdma_cookie = 0;
- inc->i_rx_tstamp = ktime_set(0, 0);
+ inc->i_usercopy.rdma_cookie = 0;
+ inc->i_usercopy.rx_tstamp = ktime_set(0, 0);
memset(inc->i_rx_lat_trace, 0, sizeof(inc->i_rx_lat_trace));
}
@@ -62,8 +62,8 @@ void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *cp,
inc->i_conn = cp->cp_conn;
inc->i_conn_path = cp;
inc->i_saddr = *saddr;
- inc->i_rdma_cookie = 0;
- inc->i_rx_tstamp = ktime_set(0, 0);
+ inc->i_usercopy.rdma_cookie = 0;
+ inc->i_usercopy.rx_tstamp = ktime_set(0, 0);
}
EXPORT_SYMBOL_GPL(rds_inc_path_init);
@@ -186,7 +186,7 @@ static void rds_recv_incoming_exthdrs(struct rds_incoming *inc, struct rds_sock
case RDS_EXTHDR_RDMA_DEST:
/* We ignore the size for now. We could stash it
* somewhere and use it for error checking. */
- inc->i_rdma_cookie = rds_rdma_make_cookie(
+ inc->i_usercopy.rdma_cookie = rds_rdma_make_cookie(
be32_to_cpu(buffer.rdma_dest.h_rdma_rkey),
be32_to_cpu(buffer.rdma_dest.h_rdma_offset));
@@ -380,7 +380,7 @@ void rds_recv_incoming(struct rds_connection *conn, struct in6_addr *saddr,
be32_to_cpu(inc->i_hdr.h_len),
inc->i_hdr.h_dport);
if (sock_flag(sk, SOCK_RCVTSTAMP))
- inc->i_rx_tstamp = ktime_get_real();
+ inc->i_usercopy.rx_tstamp = ktime_get_real();
rds_inc_addref(inc);
inc->i_rx_lat_trace[RDS_MSG_RX_END] = local_clock();
list_add_tail(&inc->i_item, &rs->rs_recv_queue);
@@ -540,16 +540,18 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg,
{
int ret = 0;
- if (inc->i_rdma_cookie) {
+ if (inc->i_usercopy.rdma_cookie) {
ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RDMA_DEST,
- sizeof(inc->i_rdma_cookie), &inc->i_rdma_cookie);
+ sizeof(inc->i_usercopy.rdma_cookie),
+ &inc->i_usercopy.rdma_cookie);
if (ret)
goto out;
}
- if ((inc->i_rx_tstamp != 0) &&
+ if ((inc->i_usercopy.rx_tstamp != 0) &&
sock_flag(rds_rs_to_sk(rs), SOCK_RCVTSTAMP)) {
- struct __kernel_old_timeval tv = ns_to_kernel_old_timeval(inc->i_rx_tstamp);
+ struct __kernel_old_timeval tv =
+ ns_to_kernel_old_timeval(inc->i_usercopy.rx_tstamp);
if (!sock_flag(rds_rs_to_sk(rs), SOCK_TSTAMP_NEW)) {
ret = put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
diff --git a/net/rds/send.c b/net/rds/send.c
index 031b1e97a466..82dcd8b84fe7 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -145,6 +145,7 @@ int rds_send_xmit(struct rds_conn_path *cp)
LIST_HEAD(to_be_dropped);
int batch_count;
unsigned long send_gen = 0;
+ int same_rm = 0;
restart:
batch_count = 0;
@@ -200,6 +201,17 @@ restart:
rm = cp->cp_xmit_rm;
+ if (!rm) {
+ same_rm = 0;
+ } else {
+ same_rm++;
+ if (same_rm >= 4096) {
+ rds_stats_inc(s_send_stuck_rm);
+ ret = -EAGAIN;
+ break;
+ }
+ }
+
/*
* If between sending messages, we can send a pending congestion
* map update.
@@ -1132,7 +1144,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
case AF_INET:
if (usin->sin_addr.s_addr == htonl(INADDR_ANY) ||
usin->sin_addr.s_addr == htonl(INADDR_BROADCAST) ||
- IN_MULTICAST(ntohl(usin->sin_addr.s_addr))) {
+ ipv4_is_multicast(usin->sin_addr.s_addr)) {
ret = -EINVAL;
goto out;
}
@@ -1163,7 +1175,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
addr4 = sin6->sin6_addr.s6_addr32[3];
if (addr4 == htonl(INADDR_ANY) ||
addr4 == htonl(INADDR_BROADCAST) ||
- IN_MULTICAST(ntohl(addr4))) {
+ ipv4_is_multicast(addr4)) {
ret = -EINVAL;
goto out;
}
diff --git a/net/rds/stats.c b/net/rds/stats.c
index 73be187d389e..9e87da43c004 100644
--- a/net/rds/stats.c
+++ b/net/rds/stats.c
@@ -76,6 +76,9 @@ static const char *const rds_stat_names[] = {
"cong_update_received",
"cong_send_error",
"cong_send_blocked",
+ "recv_bytes_added_to_sock",
+ "recv_bytes_freed_fromsock",
+ "send_stuck_rm",
};
void rds_stats_info_copy(struct rds_info_iterator *iter,
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 8051dfdcf26d..1091bf35a199 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -232,6 +232,9 @@ struct rxrpc_security {
int (*verify_packet)(struct rxrpc_call *, struct sk_buff *,
unsigned int, unsigned int, rxrpc_seq_t, u16);
+ /* Free crypto request on a call */
+ void (*free_call_crypto)(struct rxrpc_call *);
+
/* Locate the data in a received packet that has been verified. */
void (*locate_data)(struct rxrpc_call *, struct sk_buff *,
unsigned int *, unsigned int *);
@@ -564,6 +567,7 @@ struct rxrpc_call {
unsigned long expect_term_by; /* When we expect call termination by */
u32 next_rx_timo; /* Timeout for next Rx packet (jif) */
u32 next_req_timo; /* Timeout for next Rx request packet (jif) */
+ struct skcipher_request *cipher_req; /* Packet cipher request buffer */
struct timer_list timer; /* Combined event timer */
struct work_struct processor; /* Event processor */
rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 014548c259ce..32d8dc677142 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -488,8 +488,10 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
_debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
- if (conn)
+ if (conn) {
rxrpc_disconnect_call(call);
+ conn->security->free_call_crypto(call);
+ }
rxrpc_cleanup_ring(call);
_leave("");
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index a29d26c273b5..f6c59f5fae9d 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -33,6 +33,10 @@ static int none_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
return 0;
}
+static void none_free_call_crypto(struct rxrpc_call *call)
+{
+}
+
static void none_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
unsigned int *_offset, unsigned int *_len)
{
@@ -83,6 +87,7 @@ const struct rxrpc_security rxrpc_no_security = {
.exit = none_exit,
.init_connection_security = none_init_connection_security,
.prime_packet_security = none_prime_packet_security,
+ .free_call_crypto = none_free_call_crypto,
.secure_packet = none_secure_packet,
.verify_packet = none_verify_packet,
.locate_data = none_locate_data,
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index c60c520fde7c..8d8aa3c230b5 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -43,6 +43,7 @@ struct rxkad_level2_hdr {
* packets
*/
static struct crypto_sync_skcipher *rxkad_ci;
+static struct skcipher_request *rxkad_ci_req;
static DEFINE_MUTEX(rxkad_ci_mutex);
/*
@@ -99,8 +100,8 @@ error:
*/
static int rxkad_prime_packet_security(struct rxrpc_connection *conn)
{
+ struct skcipher_request *req;
struct rxrpc_key_token *token;
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
struct scatterlist sg;
struct rxrpc_crypt iv;
__be32 *tmpbuf;
@@ -115,6 +116,12 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn)
if (!tmpbuf)
return -ENOMEM;
+ req = skcipher_request_alloc(&conn->cipher->base, GFP_NOFS);
+ if (!req) {
+ kfree(tmpbuf);
+ return -ENOMEM;
+ }
+
token = conn->params.key->payload.data[0];
memcpy(&iv, token->kad->session_key, sizeof(iv));
@@ -128,7 +135,7 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn)
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, &sg, &sg, tmpsize, iv.x);
crypto_skcipher_encrypt(req);
- skcipher_request_zero(req);
+ skcipher_request_free(req);
memcpy(&conn->csum_iv, tmpbuf + 2, sizeof(conn->csum_iv));
kfree(tmpbuf);
@@ -137,6 +144,35 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn)
}
/*
+ * Allocate and prepare the crypto request on a call. For any particular call,
+ * this is called serially for the packets, so no lock should be necessary.
+ */
+static struct skcipher_request *rxkad_get_call_crypto(struct rxrpc_call *call)
+{
+ struct crypto_skcipher *tfm = &call->conn->cipher->base;
+ struct skcipher_request *cipher_req = call->cipher_req;
+
+ if (!cipher_req) {
+ cipher_req = skcipher_request_alloc(tfm, GFP_NOFS);
+ if (!cipher_req)
+ return NULL;
+ call->cipher_req = cipher_req;
+ }
+
+ return cipher_req;
+}
+
+/*
+ * Clean up the crypto on a call.
+ */
+static void rxkad_free_call_crypto(struct rxrpc_call *call)
+{
+ if (call->cipher_req)
+ skcipher_request_free(call->cipher_req);
+ call->cipher_req = NULL;
+}
+
+/*
* partially encrypt a packet (level 1 security)
*/
static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
@@ -243,7 +279,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call,
void *sechdr)
{
struct rxrpc_skb_priv *sp;
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
+ struct skcipher_request *req;
struct rxrpc_crypt iv;
struct scatterlist sg;
u32 x, y;
@@ -262,6 +298,10 @@ static int rxkad_secure_packet(struct rxrpc_call *call,
if (ret < 0)
return ret;
+ req = rxkad_get_call_crypto(call);
+ if (!req)
+ return -ENOMEM;
+
/* continue encrypting from where we left off */
memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
@@ -488,7 +528,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
unsigned int offset, unsigned int len,
rxrpc_seq_t seq, u16 expected_cksum)
{
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
+ struct skcipher_request *req;
struct rxrpc_crypt iv;
struct scatterlist sg;
bool aborted;
@@ -501,6 +541,10 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
if (!call->conn->cipher)
return 0;
+ req = rxkad_get_call_crypto(call);
+ if (!req)
+ return -ENOMEM;
+
/* continue encrypting from where we left off */
memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
@@ -733,14 +777,18 @@ static void rxkad_calc_response_checksum(struct rxkad_response *response)
/*
* encrypt the response packet
*/
-static void rxkad_encrypt_response(struct rxrpc_connection *conn,
- struct rxkad_response *resp,
- const struct rxkad_key *s2)
+static int rxkad_encrypt_response(struct rxrpc_connection *conn,
+ struct rxkad_response *resp,
+ const struct rxkad_key *s2)
{
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
+ struct skcipher_request *req;
struct rxrpc_crypt iv;
struct scatterlist sg[1];
+ req = skcipher_request_alloc(&conn->cipher->base, GFP_NOFS);
+ if (!req)
+ return -ENOMEM;
+
/* continue encrypting from where we left off */
memcpy(&iv, s2->session_key, sizeof(iv));
@@ -750,7 +798,8 @@ static void rxkad_encrypt_response(struct rxrpc_connection *conn,
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
crypto_skcipher_encrypt(req);
- skcipher_request_zero(req);
+ skcipher_request_free(req);
+ return 0;
}
/*
@@ -825,8 +874,9 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
/* calculate the response checksum and then do the encryption */
rxkad_calc_response_checksum(resp);
- rxkad_encrypt_response(conn, resp, token->kad);
- ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad);
+ ret = rxkad_encrypt_response(conn, resp, token->kad);
+ if (ret == 0)
+ ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad);
kfree(resp);
return ret;
@@ -1003,18 +1053,16 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
struct rxkad_response *resp,
const struct rxrpc_crypt *session_key)
{
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, rxkad_ci);
+ struct skcipher_request *req = rxkad_ci_req;
struct scatterlist sg[1];
struct rxrpc_crypt iv;
_enter(",,%08x%08x",
ntohl(session_key->n[0]), ntohl(session_key->n[1]));
- ASSERT(rxkad_ci != NULL);
-
mutex_lock(&rxkad_ci_mutex);
if (crypto_sync_skcipher_setkey(rxkad_ci, session_key->x,
- sizeof(*session_key)) < 0)
+ sizeof(*session_key)) < 0)
BUG();
memcpy(&iv, session_key, sizeof(iv));
@@ -1208,10 +1256,26 @@ static void rxkad_clear(struct rxrpc_connection *conn)
*/
static int rxkad_init(void)
{
+ struct crypto_sync_skcipher *tfm;
+ struct skcipher_request *req;
+
/* pin the cipher we need so that the crypto layer doesn't invoke
* keventd to go get it */
- rxkad_ci = crypto_alloc_sync_skcipher("pcbc(fcrypt)", 0, 0);
- return PTR_ERR_OR_ZERO(rxkad_ci);
+ tfm = crypto_alloc_sync_skcipher("pcbc(fcrypt)", 0, 0);
+ if (IS_ERR(tfm))
+ return PTR_ERR(tfm);
+
+ req = skcipher_request_alloc(&tfm->base, GFP_KERNEL);
+ if (!req)
+ goto nomem_tfm;
+
+ rxkad_ci_req = req;
+ rxkad_ci = tfm;
+ return 0;
+
+nomem_tfm:
+ crypto_free_sync_skcipher(tfm);
+ return -ENOMEM;
}
/*
@@ -1219,8 +1283,8 @@ static int rxkad_init(void)
*/
static void rxkad_exit(void)
{
- if (rxkad_ci)
- crypto_free_sync_skcipher(rxkad_ci);
+ crypto_free_sync_skcipher(rxkad_ci);
+ skcipher_request_free(rxkad_ci_req);
}
/*
@@ -1235,6 +1299,7 @@ const struct rxrpc_security rxkad = {
.prime_packet_security = rxkad_prime_packet_security,
.secure_packet = rxkad_secure_packet,
.verify_packet = rxkad_verify_packet,
+ .free_call_crypto = rxkad_free_call_crypto,
.locate_data = rxkad_locate_data,
.issue_challenge = rxkad_issue_challenge,
.respond_to_challenge = rxkad_respond_to_challenge,
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index afd2ba157a13..b3faafeafab9 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -963,6 +963,19 @@ config NET_IFE_SKBTCINDEX
tristate "Support to encoding decoding skb tcindex on IFE action"
depends on NET_ACT_IFE
+config NET_TC_SKB_EXT
+ bool "TC recirculation support"
+ depends on NET_CLS_ACT
+ default y if NET_CLS_ACT
+ select SKB_EXTENSIONS
+
+ help
+ Say Y here to allow tc chain misses to continue in OvS datapath in
+ the correct recirc_id, and hardware chain misses to continue in
+ the correct chain in tc software datapath.
+
+ Say N here if you won't be using tc<->ovs offload or tc chains offload.
+
endif # NET_SCHED
config NET_SCH_FIFO
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index cdd6f3818097..fcc46025e790 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -24,12 +24,12 @@
#include <uapi/linux/tc_act/tc_ct.h>
#include <net/tc_act/tc_ct.h>
-#include <linux/netfilter/nf_nat.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#include <uapi/linux/netfilter/nf_nat.h>
static struct tc_action_ops act_ct_ops;
static unsigned int ct_net_id;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 9d1bf508075a..9ce073a05414 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -408,25 +408,31 @@ static struct notifier_block mirred_device_notifier = {
.notifier_call = mirred_device_event,
};
-static struct net_device *tcf_mirred_get_dev(const struct tc_action *a)
+static void tcf_mirred_dev_put(void *priv)
+{
+ struct net_device *dev = priv;
+
+ dev_put(dev);
+}
+
+static struct net_device *
+tcf_mirred_get_dev(const struct tc_action *a,
+ tc_action_priv_destructor *destructor)
{
struct tcf_mirred *m = to_mirred(a);
struct net_device *dev;
rcu_read_lock();
dev = rcu_dereference(m->tcfm_dev);
- if (dev)
+ if (dev) {
dev_hold(dev);
+ *destructor = tcf_mirred_dev_put;
+ }
rcu_read_unlock();
return dev;
}
-static void tcf_mirred_put_dev(struct net_device *dev)
-{
- dev_put(dev);
-}
-
static size_t tcf_mirred_get_fill_size(const struct tc_action *act)
{
return nla_total_size(sizeof(struct tc_mirred));
@@ -446,7 +452,6 @@ static struct tc_action_ops act_mirred_ops = {
.get_fill_size = tcf_mirred_get_fill_size,
.size = sizeof(struct tcf_mirred),
.get_dev = tcf_mirred_get_dev,
- .put_dev = tcf_mirred_put_dev,
};
static __net_init int mirred_init_net(struct net *net)
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 6315e0f8d26e..89c04c52af3d 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -40,6 +40,8 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
[TCA_POLICE_PEAKRATE] = { .len = TC_RTAB_SIZE },
[TCA_POLICE_AVRATE] = { .type = NLA_U32 },
[TCA_POLICE_RESULT] = { .type = NLA_U32 },
+ [TCA_POLICE_RATE64] = { .type = NLA_U64 },
+ [TCA_POLICE_PEAKRATE64] = { .type = NLA_U64 },
};
static int tcf_police_init(struct net *net, struct nlattr *nla,
@@ -58,6 +60,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
struct tcf_police_params *new;
bool exists = false;
u32 index;
+ u64 rate64, prate64;
if (nla == NULL)
return -EINVAL;
@@ -155,14 +158,18 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
}
if (R_tab) {
new->rate_present = true;
- psched_ratecfg_precompute(&new->rate, &R_tab->rate, 0);
+ rate64 = tb[TCA_POLICE_RATE64] ?
+ nla_get_u64(tb[TCA_POLICE_RATE64]) : 0;
+ psched_ratecfg_precompute(&new->rate, &R_tab->rate, rate64);
qdisc_put_rtab(R_tab);
} else {
new->rate_present = false;
}
if (P_tab) {
new->peak_present = true;
- psched_ratecfg_precompute(&new->peak, &P_tab->rate, 0);
+ prate64 = tb[TCA_POLICE_PEAKRATE64] ?
+ nla_get_u64(tb[TCA_POLICE_PEAKRATE64]) : 0;
+ psched_ratecfg_precompute(&new->peak, &P_tab->rate, prate64);
qdisc_put_rtab(P_tab);
} else {
new->peak_present = false;
@@ -313,10 +320,22 @@ static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a,
lockdep_is_held(&police->tcf_lock));
opt.mtu = p->tcfp_mtu;
opt.burst = PSCHED_NS2TICKS(p->tcfp_burst);
- if (p->rate_present)
+ if (p->rate_present) {
psched_ratecfg_getrate(&opt.rate, &p->rate);
- if (p->peak_present)
+ if ((police->params->rate.rate_bytes_ps >= (1ULL << 32)) &&
+ nla_put_u64_64bit(skb, TCA_POLICE_RATE64,
+ police->params->rate.rate_bytes_ps,
+ TCA_POLICE_PAD))
+ goto nla_put_failure;
+ }
+ if (p->peak_present) {
psched_ratecfg_getrate(&opt.peakrate, &p->peak);
+ if ((police->params->peak.rate_bytes_ps >= (1ULL << 32)) &&
+ nla_put_u64_64bit(skb, TCA_POLICE_PEAKRATE64,
+ police->params->peak.rate_bytes_ps,
+ TCA_POLICE_PAD))
+ goto nla_put_failure;
+ }
if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt))
goto nla_put_failure;
if (p->tcfp_result &&
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 10229124a992..692c4c9040fd 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -252,6 +252,32 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index)
return tcf_idr_search(tn, a, index);
}
+static void tcf_psample_group_put(void *priv)
+{
+ struct psample_group *group = priv;
+
+ psample_group_put(group);
+}
+
+static struct psample_group *
+tcf_sample_get_group(const struct tc_action *a,
+ tc_action_priv_destructor *destructor)
+{
+ struct tcf_sample *s = to_sample(a);
+ struct psample_group *group;
+
+ spin_lock_bh(&s->tcf_lock);
+ group = rcu_dereference_protected(s->psample_group,
+ lockdep_is_held(&s->tcf_lock));
+ if (group) {
+ psample_group_take(group);
+ *destructor = tcf_psample_group_put;
+ }
+ spin_unlock_bh(&s->tcf_lock);
+
+ return group;
+}
+
static struct tc_action_ops act_sample_ops = {
.kind = "sample",
.id = TCA_ID_SAMPLE,
@@ -262,6 +288,7 @@ static struct tc_action_ops act_sample_ops = {
.cleanup = tcf_sample_cleanup,
.walk = tcf_sample_walker,
.lookup = tcf_sample_search,
+ .get_psample_group = tcf_sample_get_group,
.size = sizeof(struct tcf_sample),
};
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 287a30bf8930..08aaf719a70f 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -301,6 +301,19 @@ static int tcf_vlan_walker(struct net *net, struct sk_buff *skb,
return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
+static void tcf_vlan_stats_update(struct tc_action *a, u64 bytes, u32 packets,
+ u64 lastuse, bool hw)
+{
+ struct tcf_vlan *v = to_vlan(a);
+ struct tcf_t *tm = &v->tcf_tm;
+
+ _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
+ if (hw)
+ _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
+ bytes, packets);
+ tm->lastuse = max_t(u64, tm->lastuse, lastuse);
+}
+
static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
@@ -325,6 +338,7 @@ static struct tc_action_ops act_vlan_ops = {
.init = tcf_vlan_init,
.cleanup = tcf_vlan_cleanup,
.walk = tcf_vlan_walker,
+ .stats_update = tcf_vlan_stats_update,
.get_fill_size = tcf_vlan_get_fill_size,
.lookup = tcf_vlan_search,
.size = sizeof(struct tcf_vlan),
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index efd3cfb80a2a..32577c248968 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -36,6 +36,8 @@
#include <net/tc_act/tc_sample.h>
#include <net/tc_act/tc_skbedit.h>
#include <net/tc_act/tc_ct.h>
+#include <net/tc_act/tc_mpls.h>
+#include <net/flow_offload.h>
extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
@@ -544,235 +546,73 @@ static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
}
}
-static struct tcf_block *tc_dev_ingress_block(struct net_device *dev)
-{
- const struct Qdisc_class_ops *cops;
- struct Qdisc *qdisc;
-
- if (!dev_ingress_queue(dev))
- return NULL;
-
- qdisc = dev_ingress_queue(dev)->qdisc_sleeping;
- if (!qdisc)
- return NULL;
-
- cops = qdisc->ops->cl_ops;
- if (!cops)
- return NULL;
-
- if (!cops->tcf_block)
- return NULL;
-
- return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
-}
-
-static struct rhashtable indr_setup_block_ht;
-
-struct tc_indr_block_dev {
- struct rhash_head ht_node;
- struct net_device *dev;
- unsigned int refcnt;
- struct list_head cb_list;
- struct tcf_block *block;
-};
-
-struct tc_indr_block_cb {
- struct list_head list;
- void *cb_priv;
- tc_indr_block_bind_cb_t *cb;
- void *cb_ident;
-};
-
-static const struct rhashtable_params tc_indr_setup_block_ht_params = {
- .key_offset = offsetof(struct tc_indr_block_dev, dev),
- .head_offset = offsetof(struct tc_indr_block_dev, ht_node),
- .key_len = sizeof(struct net_device *),
-};
-
-static struct tc_indr_block_dev *
-tc_indr_block_dev_lookup(struct net_device *dev)
-{
- return rhashtable_lookup_fast(&indr_setup_block_ht, &dev,
- tc_indr_setup_block_ht_params);
-}
-
-static struct tc_indr_block_dev *tc_indr_block_dev_get(struct net_device *dev)
-{
- struct tc_indr_block_dev *indr_dev;
-
- indr_dev = tc_indr_block_dev_lookup(dev);
- if (indr_dev)
- goto inc_ref;
-
- indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL);
- if (!indr_dev)
- return NULL;
-
- INIT_LIST_HEAD(&indr_dev->cb_list);
- indr_dev->dev = dev;
- indr_dev->block = tc_dev_ingress_block(dev);
- if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node,
- tc_indr_setup_block_ht_params)) {
- kfree(indr_dev);
- return NULL;
- }
-
-inc_ref:
- indr_dev->refcnt++;
- return indr_dev;
-}
-
-static void tc_indr_block_dev_put(struct tc_indr_block_dev *indr_dev)
-{
- if (--indr_dev->refcnt)
- return;
-
- rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node,
- tc_indr_setup_block_ht_params);
- kfree(indr_dev);
-}
-
-static struct tc_indr_block_cb *
-tc_indr_block_cb_lookup(struct tc_indr_block_dev *indr_dev,
- tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
- struct tc_indr_block_cb *indr_block_cb;
-
- list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
- if (indr_block_cb->cb == cb &&
- indr_block_cb->cb_ident == cb_ident)
- return indr_block_cb;
- return NULL;
-}
-
-static struct tc_indr_block_cb *
-tc_indr_block_cb_add(struct tc_indr_block_dev *indr_dev, void *cb_priv,
- tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
- struct tc_indr_block_cb *indr_block_cb;
-
- indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
- if (indr_block_cb)
- return ERR_PTR(-EEXIST);
-
- indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL);
- if (!indr_block_cb)
- return ERR_PTR(-ENOMEM);
-
- indr_block_cb->cb_priv = cb_priv;
- indr_block_cb->cb = cb;
- indr_block_cb->cb_ident = cb_ident;
- list_add(&indr_block_cb->list, &indr_dev->cb_list);
-
- return indr_block_cb;
-}
-
-static void tc_indr_block_cb_del(struct tc_indr_block_cb *indr_block_cb)
-{
- list_del(&indr_block_cb->list);
- kfree(indr_block_cb);
-}
-
static int tcf_block_setup(struct tcf_block *block,
struct flow_block_offload *bo);
-static void tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev,
- struct tc_indr_block_cb *indr_block_cb,
+static void tc_indr_block_ing_cmd(struct net_device *dev,
+ struct tcf_block *block,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_priv,
enum flow_block_command command)
{
struct flow_block_offload bo = {
.command = command,
.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS,
- .net = dev_net(indr_dev->dev),
- .block_shared = tcf_block_non_null_shared(indr_dev->block),
+ .net = dev_net(dev),
+ .block_shared = tcf_block_non_null_shared(block),
};
INIT_LIST_HEAD(&bo.cb_list);
- if (!indr_dev->block)
+ if (!block)
return;
- bo.block = &indr_dev->block->flow_block;
-
- indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
- &bo);
- tcf_block_setup(indr_dev->block, &bo);
-}
-
-int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
- tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
- struct tc_indr_block_cb *indr_block_cb;
- struct tc_indr_block_dev *indr_dev;
- int err;
-
- indr_dev = tc_indr_block_dev_get(dev);
- if (!indr_dev)
- return -ENOMEM;
-
- indr_block_cb = tc_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident);
- err = PTR_ERR_OR_ZERO(indr_block_cb);
- if (err)
- goto err_dev_put;
+ bo.block = &block->flow_block;
- tc_indr_block_ing_cmd(indr_dev, indr_block_cb, FLOW_BLOCK_BIND);
- return 0;
+ down_write(&block->cb_lock);
+ cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
-err_dev_put:
- tc_indr_block_dev_put(indr_dev);
- return err;
+ tcf_block_setup(block, &bo);
+ up_write(&block->cb_lock);
}
-EXPORT_SYMBOL_GPL(__tc_indr_block_cb_register);
-int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
- tc_indr_block_bind_cb_t *cb, void *cb_ident)
+static struct tcf_block *tc_dev_ingress_block(struct net_device *dev)
{
- int err;
-
- rtnl_lock();
- err = __tc_indr_block_cb_register(dev, cb_priv, cb, cb_ident);
- rtnl_unlock();
+ const struct Qdisc_class_ops *cops;
+ struct Qdisc *qdisc;
- return err;
-}
-EXPORT_SYMBOL_GPL(tc_indr_block_cb_register);
+ if (!dev_ingress_queue(dev))
+ return NULL;
-void __tc_indr_block_cb_unregister(struct net_device *dev,
- tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
- struct tc_indr_block_cb *indr_block_cb;
- struct tc_indr_block_dev *indr_dev;
+ qdisc = dev_ingress_queue(dev)->qdisc_sleeping;
+ if (!qdisc)
+ return NULL;
- indr_dev = tc_indr_block_dev_lookup(dev);
- if (!indr_dev)
- return;
+ cops = qdisc->ops->cl_ops;
+ if (!cops)
+ return NULL;
- indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
- if (!indr_block_cb)
- return;
+ if (!cops->tcf_block)
+ return NULL;
- /* Send unbind message if required to free any block cbs. */
- tc_indr_block_ing_cmd(indr_dev, indr_block_cb, FLOW_BLOCK_UNBIND);
- tc_indr_block_cb_del(indr_block_cb);
- tc_indr_block_dev_put(indr_dev);
+ return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
}
-EXPORT_SYMBOL_GPL(__tc_indr_block_cb_unregister);
-void tc_indr_block_cb_unregister(struct net_device *dev,
- tc_indr_block_bind_cb_t *cb, void *cb_ident)
+static void tc_indr_block_get_and_ing_cmd(struct net_device *dev,
+ flow_indr_block_bind_cb_t *cb,
+ void *cb_priv,
+ enum flow_block_command command)
{
- rtnl_lock();
- __tc_indr_block_cb_unregister(dev, cb, cb_ident);
- rtnl_unlock();
+ struct tcf_block *block = tc_dev_ingress_block(dev);
+
+ tc_indr_block_ing_cmd(dev, block, cb, cb_priv, command);
}
-EXPORT_SYMBOL_GPL(tc_indr_block_cb_unregister);
-static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
+static void tc_indr_block_call(struct tcf_block *block,
+ struct net_device *dev,
struct tcf_block_ext_info *ei,
enum flow_block_command command,
struct netlink_ext_ack *extack)
{
- struct tc_indr_block_cb *indr_block_cb;
- struct tc_indr_block_dev *indr_dev;
struct flow_block_offload bo = {
.command = command,
.binder_type = ei->binder_type,
@@ -783,22 +623,13 @@ static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
};
INIT_LIST_HEAD(&bo.cb_list);
- indr_dev = tc_indr_block_dev_lookup(dev);
- if (!indr_dev)
- return;
-
- indr_dev->block = command == FLOW_BLOCK_BIND ? block : NULL;
-
- list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
- indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
- &bo);
-
+ flow_indr_block_call(dev, &bo, command);
tcf_block_setup(block, &bo);
}
static bool tcf_block_offload_in_use(struct tcf_block *block)
{
- return block->offloadcnt;
+ return atomic_read(&block->offloadcnt);
}
static int tcf_block_offload_cmd(struct tcf_block *block,
@@ -832,6 +663,7 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
struct net_device *dev = q->dev_queue->dev;
int err;
+ down_write(&block->cb_lock);
if (!dev->netdev_ops->ndo_setup_tc)
goto no_offload_dev_inc;
@@ -840,24 +672,31 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
*/
if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) {
NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
- return -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+ goto err_unlock;
}
err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_BIND, extack);
if (err == -EOPNOTSUPP)
goto no_offload_dev_inc;
if (err)
- return err;
+ goto err_unlock;
tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack);
+ up_write(&block->cb_lock);
return 0;
no_offload_dev_inc:
- if (tcf_block_offload_in_use(block))
- return -EOPNOTSUPP;
+ if (tcf_block_offload_in_use(block)) {
+ err = -EOPNOTSUPP;
+ goto err_unlock;
+ }
+ err = 0;
block->nooffloaddevcnt++;
tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack);
- return 0;
+err_unlock:
+ up_write(&block->cb_lock);
+ return err;
}
static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
@@ -866,6 +705,7 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
struct net_device *dev = q->dev_queue->dev;
int err;
+ down_write(&block->cb_lock);
tc_indr_block_call(block, dev, ei, FLOW_BLOCK_UNBIND, NULL);
if (!dev->netdev_ops->ndo_setup_tc)
@@ -873,10 +713,12 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_UNBIND, NULL);
if (err == -EOPNOTSUPP)
goto no_offload_dev_dec;
+ up_write(&block->cb_lock);
return;
no_offload_dev_dec:
WARN_ON(block->nooffloaddevcnt-- == 0);
+ up_write(&block->cb_lock);
}
static int
@@ -991,6 +833,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
return ERR_PTR(-ENOMEM);
}
mutex_init(&block->lock);
+ init_rwsem(&block->cb_lock);
flow_block_init(&block->flow_block);
INIT_LIST_HEAD(&block->chain_list);
INIT_LIST_HEAD(&block->owner_list);
@@ -1526,6 +1369,8 @@ tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
struct tcf_proto *tp, *tp_prev;
int err;
+ lockdep_assert_held(&block->cb_lock);
+
for (chain = __tcf_get_next_chain(block, NULL);
chain;
chain_prev = chain,
@@ -1564,6 +1409,8 @@ static int tcf_block_bind(struct tcf_block *block,
struct flow_block_cb *block_cb, *next;
int err, i = 0;
+ lockdep_assert_held(&block->cb_lock);
+
list_for_each_entry(block_cb, &bo->cb_list, list) {
err = tcf_block_playback_offloads(block, block_cb->cb,
block_cb->cb_priv, true,
@@ -1571,6 +1418,8 @@ static int tcf_block_bind(struct tcf_block *block,
bo->extack);
if (err)
goto err_unroll;
+ if (!bo->unlocked_driver_cb)
+ block->lockeddevcnt++;
i++;
}
@@ -1586,6 +1435,8 @@ err_unroll:
block_cb->cb_priv, false,
tcf_block_offload_in_use(block),
NULL);
+ if (!bo->unlocked_driver_cb)
+ block->lockeddevcnt--;
}
flow_block_cb_free(block_cb);
}
@@ -1598,6 +1449,8 @@ static void tcf_block_unbind(struct tcf_block *block,
{
struct flow_block_cb *block_cb, *next;
+ lockdep_assert_held(&block->cb_lock);
+
list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
tcf_block_playback_offloads(block, block_cb->cb,
block_cb->cb_priv, false,
@@ -1605,6 +1458,8 @@ static void tcf_block_unbind(struct tcf_block *block,
NULL);
list_del(&block_cb->list);
flow_block_cb_free(block_cb);
+ if (!bo->unlocked_driver_cb)
+ block->lockeddevcnt--;
}
}
@@ -1659,6 +1514,18 @@ reclassify:
goto reset;
} else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
first_tp = res->goto_tp;
+
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ {
+ struct tc_skb_ext *ext;
+
+ ext = skb_ext_add(skb, TC_SKB_EXT);
+ if (WARN_ON_ONCE(!ext))
+ return TC_ACT_SHOT;
+
+ ext->chain = err & TC_ACT_EXT_VAL_MASK;
+ }
+#endif
goto reset;
}
#endif
@@ -3151,17 +3018,61 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
}
EXPORT_SYMBOL(tcf_exts_dump_stats);
-int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
- void *type_data, bool err_stop)
+static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
+{
+ if (*flags & TCA_CLS_FLAGS_IN_HW)
+ return;
+ *flags |= TCA_CLS_FLAGS_IN_HW;
+ atomic_inc(&block->offloadcnt);
+}
+
+static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
+{
+ if (!(*flags & TCA_CLS_FLAGS_IN_HW))
+ return;
+ *flags &= ~TCA_CLS_FLAGS_IN_HW;
+ atomic_dec(&block->offloadcnt);
+}
+
+static void tc_cls_offload_cnt_update(struct tcf_block *block,
+ struct tcf_proto *tp, u32 *cnt,
+ u32 *flags, u32 diff, bool add)
+{
+ lockdep_assert_held(&block->cb_lock);
+
+ spin_lock(&tp->lock);
+ if (add) {
+ if (!*cnt)
+ tcf_block_offload_inc(block, flags);
+ *cnt += diff;
+ } else {
+ *cnt -= diff;
+ if (!*cnt)
+ tcf_block_offload_dec(block, flags);
+ }
+ spin_unlock(&tp->lock);
+}
+
+static void
+tc_cls_offload_cnt_reset(struct tcf_block *block, struct tcf_proto *tp,
+ u32 *cnt, u32 *flags)
+{
+ lockdep_assert_held(&block->cb_lock);
+
+ spin_lock(&tp->lock);
+ tcf_block_offload_dec(block, flags);
+ *cnt = 0;
+ spin_unlock(&tp->lock);
+}
+
+static int
+__tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
+ void *type_data, bool err_stop)
{
struct flow_block_cb *block_cb;
int ok_count = 0;
int err;
- /* Make sure all netdevs sharing this block are offload-capable. */
- if (block->nooffloaddevcnt && err_stop)
- return -EOPNOTSUPP;
-
list_for_each_entry(block_cb, &block->flow_block.cb_list, list) {
err = block_cb->cb(type, type_data, block_cb->cb_priv);
if (err) {
@@ -3173,17 +3084,261 @@ int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
}
return ok_count;
}
+
+int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
+ void *type_data, bool err_stop, bool rtnl_held)
+{
+ bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
+ int ok_count;
+
+retry:
+ if (take_rtnl)
+ rtnl_lock();
+ down_read(&block->cb_lock);
+ /* Need to obtain rtnl lock if block is bound to devs that require it.
+ * In block bind code cb_lock is obtained while holding rtnl, so we must
+ * obtain the locks in same order here.
+ */
+ if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
+ up_read(&block->cb_lock);
+ take_rtnl = true;
+ goto retry;
+ }
+
+ ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
+
+ up_read(&block->cb_lock);
+ if (take_rtnl)
+ rtnl_unlock();
+ return ok_count;
+}
EXPORT_SYMBOL(tc_setup_cb_call);
+/* Non-destructive filter add. If filter that wasn't already in hardware is
+ * successfully offloaded, increment block offloads counter. On failure,
+ * previously offloaded filter is considered to be intact and offloads counter
+ * is not decremented.
+ */
+
+int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp,
+ enum tc_setup_type type, void *type_data, bool err_stop,
+ u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
+{
+ bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
+ int ok_count;
+
+retry:
+ if (take_rtnl)
+ rtnl_lock();
+ down_read(&block->cb_lock);
+ /* Need to obtain rtnl lock if block is bound to devs that require it.
+ * In block bind code cb_lock is obtained while holding rtnl, so we must
+ * obtain the locks in same order here.
+ */
+ if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
+ up_read(&block->cb_lock);
+ take_rtnl = true;
+ goto retry;
+ }
+
+ /* Make sure all netdevs sharing this block are offload-capable. */
+ if (block->nooffloaddevcnt && err_stop) {
+ ok_count = -EOPNOTSUPP;
+ goto err_unlock;
+ }
+
+ ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
+ if (ok_count < 0)
+ goto err_unlock;
+
+ if (tp->ops->hw_add)
+ tp->ops->hw_add(tp, type_data);
+ if (ok_count > 0)
+ tc_cls_offload_cnt_update(block, tp, in_hw_count, flags,
+ ok_count, true);
+err_unlock:
+ up_read(&block->cb_lock);
+ if (take_rtnl)
+ rtnl_unlock();
+ return ok_count < 0 ? ok_count : 0;
+}
+EXPORT_SYMBOL(tc_setup_cb_add);
+
+/* Destructive filter replace. If filter that wasn't already in hardware is
+ * successfully offloaded, increment block offload counter. On failure,
+ * previously offloaded filter is considered to be destroyed and offload counter
+ * is decremented.
+ */
+
+int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp,
+ enum tc_setup_type type, void *type_data, bool err_stop,
+ u32 *old_flags, unsigned int *old_in_hw_count,
+ u32 *new_flags, unsigned int *new_in_hw_count,
+ bool rtnl_held)
+{
+ bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
+ int ok_count;
+
+retry:
+ if (take_rtnl)
+ rtnl_lock();
+ down_read(&block->cb_lock);
+ /* Need to obtain rtnl lock if block is bound to devs that require it.
+ * In block bind code cb_lock is obtained while holding rtnl, so we must
+ * obtain the locks in same order here.
+ */
+ if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
+ up_read(&block->cb_lock);
+ take_rtnl = true;
+ goto retry;
+ }
+
+ /* Make sure all netdevs sharing this block are offload-capable. */
+ if (block->nooffloaddevcnt && err_stop) {
+ ok_count = -EOPNOTSUPP;
+ goto err_unlock;
+ }
+
+ tc_cls_offload_cnt_reset(block, tp, old_in_hw_count, old_flags);
+ if (tp->ops->hw_del)
+ tp->ops->hw_del(tp, type_data);
+
+ ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
+ if (ok_count < 0)
+ goto err_unlock;
+
+ if (tp->ops->hw_add)
+ tp->ops->hw_add(tp, type_data);
+ if (ok_count > 0)
+ tc_cls_offload_cnt_update(block, tp, new_in_hw_count,
+ new_flags, ok_count, true);
+err_unlock:
+ up_read(&block->cb_lock);
+ if (take_rtnl)
+ rtnl_unlock();
+ return ok_count < 0 ? ok_count : 0;
+}
+EXPORT_SYMBOL(tc_setup_cb_replace);
+
+/* Destroy filter and decrement block offload counter, if filter was previously
+ * offloaded.
+ */
+
+int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp,
+ enum tc_setup_type type, void *type_data, bool err_stop,
+ u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
+{
+ bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
+ int ok_count;
+
+retry:
+ if (take_rtnl)
+ rtnl_lock();
+ down_read(&block->cb_lock);
+ /* Need to obtain rtnl lock if block is bound to devs that require it.
+ * In block bind code cb_lock is obtained while holding rtnl, so we must
+ * obtain the locks in same order here.
+ */
+ if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
+ up_read(&block->cb_lock);
+ take_rtnl = true;
+ goto retry;
+ }
+
+ ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
+
+ tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags);
+ if (tp->ops->hw_del)
+ tp->ops->hw_del(tp, type_data);
+
+ up_read(&block->cb_lock);
+ if (take_rtnl)
+ rtnl_unlock();
+ return ok_count < 0 ? ok_count : 0;
+}
+EXPORT_SYMBOL(tc_setup_cb_destroy);
+
+int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp,
+ bool add, flow_setup_cb_t *cb,
+ enum tc_setup_type type, void *type_data,
+ void *cb_priv, u32 *flags, unsigned int *in_hw_count)
+{
+ int err = cb(type, type_data, cb_priv);
+
+ if (err) {
+ if (add && tc_skip_sw(*flags))
+ return err;
+ } else {
+ tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, 1,
+ add);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(tc_setup_cb_reoffload);
+
+void tc_cleanup_flow_action(struct flow_action *flow_action)
+{
+ struct flow_action_entry *entry;
+ int i;
+
+ flow_action_for_each(i, entry, flow_action)
+ if (entry->destructor)
+ entry->destructor(entry->destructor_priv);
+}
+EXPORT_SYMBOL(tc_cleanup_flow_action);
+
+static void tcf_mirred_get_dev(struct flow_action_entry *entry,
+ const struct tc_action *act)
+{
+#ifdef CONFIG_NET_CLS_ACT
+ entry->dev = act->ops->get_dev(act, &entry->destructor);
+ if (!entry->dev)
+ return;
+ entry->destructor_priv = entry->dev;
+#endif
+}
+
+static void tcf_tunnel_encap_put_tunnel(void *priv)
+{
+ struct ip_tunnel_info *tunnel = priv;
+
+ kfree(tunnel);
+}
+
+static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry,
+ const struct tc_action *act)
+{
+ entry->tunnel = tcf_tunnel_info_copy(act);
+ if (!entry->tunnel)
+ return -ENOMEM;
+ entry->destructor = tcf_tunnel_encap_put_tunnel;
+ entry->destructor_priv = entry->tunnel;
+ return 0;
+}
+
+static void tcf_sample_get_group(struct flow_action_entry *entry,
+ const struct tc_action *act)
+{
+#ifdef CONFIG_NET_CLS_ACT
+ entry->sample.psample_group =
+ act->ops->get_psample_group(act, &entry->destructor);
+ entry->destructor_priv = entry->sample.psample_group;
+#endif
+}
+
int tc_setup_flow_action(struct flow_action *flow_action,
- const struct tcf_exts *exts)
+ const struct tcf_exts *exts, bool rtnl_held)
{
const struct tc_action *act;
- int i, j, k;
+ int i, j, k, err = 0;
if (!exts)
return 0;
+ if (!rtnl_held)
+ rtnl_lock();
+
j = 0;
tcf_exts_for_each_action(i, act, exts) {
struct flow_action_entry *entry;
@@ -3200,10 +3355,16 @@ int tc_setup_flow_action(struct flow_action *flow_action,
entry->chain_index = tcf_gact_goto_chain_index(act);
} else if (is_tcf_mirred_egress_redirect(act)) {
entry->id = FLOW_ACTION_REDIRECT;
- entry->dev = tcf_mirred_dev(act);
+ tcf_mirred_get_dev(entry, act);
} else if (is_tcf_mirred_egress_mirror(act)) {
entry->id = FLOW_ACTION_MIRRED;
- entry->dev = tcf_mirred_dev(act);
+ tcf_mirred_get_dev(entry, act);
+ } else if (is_tcf_mirred_ingress_redirect(act)) {
+ entry->id = FLOW_ACTION_REDIRECT_INGRESS;
+ tcf_mirred_get_dev(entry, act);
+ } else if (is_tcf_mirred_ingress_mirror(act)) {
+ entry->id = FLOW_ACTION_MIRRED_INGRESS;
+ tcf_mirred_get_dev(entry, act);
} else if (is_tcf_vlan(act)) {
switch (tcf_vlan_action(act)) {
case TCA_VLAN_ACT_PUSH:
@@ -3222,11 +3383,14 @@ int tc_setup_flow_action(struct flow_action *flow_action,
entry->vlan.prio = tcf_vlan_push_prio(act);
break;
default:
+ err = -EOPNOTSUPP;
goto err_out;
}
} else if (is_tcf_tunnel_set(act)) {
entry->id = FLOW_ACTION_TUNNEL_ENCAP;
- entry->tunnel = tcf_tunnel_info(act);
+ err = tcf_tunnel_encap_get_tunnel(entry, act);
+ if (err)
+ goto err_out;
} else if (is_tcf_tunnel_release(act)) {
entry->id = FLOW_ACTION_TUNNEL_DECAP;
} else if (is_tcf_pedit(act)) {
@@ -3239,6 +3403,7 @@ int tc_setup_flow_action(struct flow_action *flow_action,
entry->id = FLOW_ACTION_ADD;
break;
default:
+ err = -EOPNOTSUPP;
goto err_out;
}
entry->mangle.htype = tcf_pedit_htype(act, k);
@@ -3255,11 +3420,10 @@ int tc_setup_flow_action(struct flow_action *flow_action,
entry->mark = tcf_skbedit_mark(act);
} else if (is_tcf_sample(act)) {
entry->id = FLOW_ACTION_SAMPLE;
- entry->sample.psample_group =
- tcf_sample_psample_group(act);
entry->sample.trunc_size = tcf_sample_trunc_size(act);
entry->sample.truncate = tcf_sample_truncate(act);
entry->sample.rate = tcf_sample_rate(act);
+ tcf_sample_get_group(entry, act);
} else if (is_tcf_police(act)) {
entry->id = FLOW_ACTION_POLICE;
entry->police.burst = tcf_police_tcfp_burst(act);
@@ -3269,16 +3433,50 @@ int tc_setup_flow_action(struct flow_action *flow_action,
entry->id = FLOW_ACTION_CT;
entry->ct.action = tcf_ct_action(act);
entry->ct.zone = tcf_ct_zone(act);
+ } else if (is_tcf_mpls(act)) {
+ switch (tcf_mpls_action(act)) {
+ case TCA_MPLS_ACT_PUSH:
+ entry->id = FLOW_ACTION_MPLS_PUSH;
+ entry->mpls_push.proto = tcf_mpls_proto(act);
+ entry->mpls_push.label = tcf_mpls_label(act);
+ entry->mpls_push.tc = tcf_mpls_tc(act);
+ entry->mpls_push.bos = tcf_mpls_bos(act);
+ entry->mpls_push.ttl = tcf_mpls_ttl(act);
+ break;
+ case TCA_MPLS_ACT_POP:
+ entry->id = FLOW_ACTION_MPLS_POP;
+ entry->mpls_pop.proto = tcf_mpls_proto(act);
+ break;
+ case TCA_MPLS_ACT_MODIFY:
+ entry->id = FLOW_ACTION_MPLS_MANGLE;
+ entry->mpls_mangle.label = tcf_mpls_label(act);
+ entry->mpls_mangle.tc = tcf_mpls_tc(act);
+ entry->mpls_mangle.bos = tcf_mpls_bos(act);
+ entry->mpls_mangle.ttl = tcf_mpls_ttl(act);
+ break;
+ default:
+ goto err_out;
+ }
+ } else if (is_tcf_skbedit_ptype(act)) {
+ entry->id = FLOW_ACTION_PTYPE;
+ entry->ptype = tcf_skbedit_ptype(act);
} else {
+ err = -EOPNOTSUPP;
goto err_out;
}
if (!is_tcf_pedit(act))
j++;
}
- return 0;
+
err_out:
- return -EOPNOTSUPP;
+ if (!rtnl_held)
+ rtnl_unlock();
+
+ if (err)
+ tc_cleanup_flow_action(flow_action);
+
+ return err;
}
EXPORT_SYMBOL(tc_setup_flow_action);
@@ -3321,6 +3519,11 @@ static struct pernet_operations tcf_net_ops = {
.size = sizeof(struct tcf_net),
};
+static struct flow_indr_block_ing_entry block_ing_entry = {
+ .cb = tc_indr_block_get_and_ing_cmd,
+ .list = LIST_HEAD_INIT(block_ing_entry.list),
+};
+
static int __init tc_filter_init(void)
{
int err;
@@ -3333,10 +3536,7 @@ static int __init tc_filter_init(void)
if (err)
goto err_register_pernet_subsys;
- err = rhashtable_init(&indr_setup_block_ht,
- &tc_indr_setup_block_ht_params);
- if (err)
- goto err_rhash_setup_block_ht;
+ flow_indr_add_block_ing_cb(&block_ing_entry);
rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
RTNL_FLAG_DOIT_UNLOCKED);
@@ -3351,8 +3551,6 @@ static int __init tc_filter_init(void)
return 0;
-err_rhash_setup_block_ht:
- unregister_pernet_subsys(&tcf_net_ops);
err_register_pernet_subsys:
destroy_workqueue(tc_filter_wq);
return err;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 3f7a9c02b70c..bf10bdaf5012 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -163,17 +163,19 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
cls_bpf.exts_integrated = obj->exts_integrated;
if (oldprog)
- tcf_block_offload_dec(block, &oldprog->gen_flags);
+ err = tc_setup_cb_replace(block, tp, TC_SETUP_CLSBPF, &cls_bpf,
+ skip_sw, &oldprog->gen_flags,
+ &oldprog->in_hw_count,
+ &prog->gen_flags, &prog->in_hw_count,
+ true);
+ else
+ err = tc_setup_cb_add(block, tp, TC_SETUP_CLSBPF, &cls_bpf,
+ skip_sw, &prog->gen_flags,
+ &prog->in_hw_count, true);
- err = tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
- if (prog) {
- if (err < 0) {
- cls_bpf_offload_cmd(tp, oldprog, prog, extack);
- return err;
- } else if (err > 0) {
- prog->in_hw_count = err;
- tcf_block_offload_inc(block, &prog->gen_flags);
- }
+ if (prog && err) {
+ cls_bpf_offload_cmd(tp, oldprog, prog, extack);
+ return err;
}
if (prog && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
@@ -230,7 +232,7 @@ static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
cls_bpf.name = prog->bpf_name;
cls_bpf.exts_integrated = prog->exts_integrated;
- tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, false);
+ tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, false, true);
}
static int cls_bpf_init(struct tcf_proto *tp)
@@ -673,15 +675,11 @@ static int cls_bpf_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb
cls_bpf.name = prog->bpf_name;
cls_bpf.exts_integrated = prog->exts_integrated;
- err = cb(TC_SETUP_CLSBPF, &cls_bpf, cb_priv);
- if (err) {
- if (add && tc_skip_sw(prog->gen_flags))
- return err;
- continue;
- }
-
- tc_cls_offload_cnt_update(block, &prog->in_hw_count,
- &prog->gen_flags, add);
+ err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSBPF,
+ &cls_bpf, cb_priv, &prog->gen_flags,
+ &prog->in_hw_count);
+ if (err)
+ return err;
}
return 0;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 054123742e32..74221e3351c3 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -412,41 +412,27 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f,
struct tcf_block *block = tp->chain->block;
struct flow_cls_offload cls_flower = {};
- if (!rtnl_held)
- rtnl_lock();
-
tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
cls_flower.command = FLOW_CLS_DESTROY;
cls_flower.cookie = (unsigned long) f;
- tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
- spin_lock(&tp->lock);
- list_del_init(&f->hw_list);
- tcf_block_offload_dec(block, &f->flags);
- spin_unlock(&tp->lock);
+ tc_setup_cb_destroy(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, false,
+ &f->flags, &f->in_hw_count, rtnl_held);
- if (!rtnl_held)
- rtnl_unlock();
}
static int fl_hw_replace_filter(struct tcf_proto *tp,
struct cls_fl_filter *f, bool rtnl_held,
struct netlink_ext_ack *extack)
{
- struct cls_fl_head *head = fl_head_dereference(tp);
struct tcf_block *block = tp->chain->block;
struct flow_cls_offload cls_flower = {};
bool skip_sw = tc_skip_sw(f->flags);
int err = 0;
- if (!rtnl_held)
- rtnl_lock();
-
cls_flower.rule = flow_rule_alloc(tcf_exts_num_actions(&f->exts));
- if (!cls_flower.rule) {
- err = -ENOMEM;
- goto errout;
- }
+ if (!cls_flower.rule)
+ return -ENOMEM;
tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
cls_flower.command = FLOW_CLS_REPLACE;
@@ -456,43 +442,31 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
cls_flower.rule->match.key = &f->mkey;
cls_flower.classid = f->res.classid;
- err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts);
+ err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts,
+ rtnl_held);
if (err) {
kfree(cls_flower.rule);
- if (skip_sw)
+ if (skip_sw) {
NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
- else
- err = 0;
- goto errout;
+ return err;
+ }
+ return 0;
}
- err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw);
+ err = tc_setup_cb_add(block, tp, TC_SETUP_CLSFLOWER, &cls_flower,
+ skip_sw, &f->flags, &f->in_hw_count, rtnl_held);
+ tc_cleanup_flow_action(&cls_flower.rule->action);
kfree(cls_flower.rule);
- if (err < 0) {
- fl_hw_destroy_filter(tp, f, true, NULL);
- goto errout;
- } else if (err > 0) {
- f->in_hw_count = err;
- err = 0;
- spin_lock(&tp->lock);
- tcf_block_offload_inc(block, &f->flags);
- spin_unlock(&tp->lock);
- }
-
- if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW)) {
- err = -EINVAL;
- goto errout;
+ if (err) {
+ fl_hw_destroy_filter(tp, f, rtnl_held, NULL);
+ return err;
}
- spin_lock(&tp->lock);
- list_add(&f->hw_list, &head->hw_filters);
- spin_unlock(&tp->lock);
-errout:
- if (!rtnl_held)
- rtnl_unlock();
+ if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW))
+ return -EINVAL;
- return err;
+ return 0;
}
static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f,
@@ -501,22 +475,17 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f,
struct tcf_block *block = tp->chain->block;
struct flow_cls_offload cls_flower = {};
- if (!rtnl_held)
- rtnl_lock();
-
tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL);
cls_flower.command = FLOW_CLS_STATS;
cls_flower.cookie = (unsigned long) f;
cls_flower.classid = f->res.classid;
- tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
+ tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false,
+ rtnl_held);
tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes,
cls_flower.stats.pkts,
cls_flower.stats.lastused);
-
- if (!rtnl_held)
- rtnl_unlock();
}
static void __fl_put(struct cls_fl_filter *f)
@@ -1831,7 +1800,8 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
cls_flower.rule->match.mask = &f->mask->key;
cls_flower.rule->match.key = &f->mkey;
- err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts);
+ err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts,
+ true);
if (err) {
kfree(cls_flower.rule);
if (tc_skip_sw(f->flags)) {
@@ -1844,21 +1814,17 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
cls_flower.classid = f->res.classid;
- err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv);
+ err = tc_setup_cb_reoffload(block, tp, add, cb,
+ TC_SETUP_CLSFLOWER, &cls_flower,
+ cb_priv, &f->flags,
+ &f->in_hw_count);
+ tc_cleanup_flow_action(&cls_flower.rule->action);
kfree(cls_flower.rule);
if (err) {
- if (add && tc_skip_sw(f->flags)) {
- __fl_put(f);
- return err;
- }
- goto next_flow;
+ __fl_put(f);
+ return err;
}
-
- spin_lock(&tp->lock);
- tc_cls_offload_cnt_update(block, &f->in_hw_count, &f->flags,
- add);
- spin_unlock(&tp->lock);
next_flow:
__fl_put(f);
}
@@ -1866,6 +1832,30 @@ next_flow:
return 0;
}
+static void fl_hw_add(struct tcf_proto *tp, void *type_data)
+{
+ struct flow_cls_offload *cls_flower = type_data;
+ struct cls_fl_filter *f =
+ (struct cls_fl_filter *) cls_flower->cookie;
+ struct cls_fl_head *head = fl_head_dereference(tp);
+
+ spin_lock(&tp->lock);
+ list_add(&f->hw_list, &head->hw_filters);
+ spin_unlock(&tp->lock);
+}
+
+static void fl_hw_del(struct tcf_proto *tp, void *type_data)
+{
+ struct flow_cls_offload *cls_flower = type_data;
+ struct cls_fl_filter *f =
+ (struct cls_fl_filter *) cls_flower->cookie;
+
+ spin_lock(&tp->lock);
+ if (!list_empty(&f->hw_list))
+ list_del_init(&f->hw_list);
+ spin_unlock(&tp->lock);
+}
+
static int fl_hw_create_tmplt(struct tcf_chain *chain,
struct fl_flow_tmplt *tmplt)
{
@@ -1886,7 +1876,7 @@ static int fl_hw_create_tmplt(struct tcf_chain *chain,
/* We don't care if driver (any of them) fails to handle this
* call. It serves just as a hint for it.
*/
- tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
+ tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, true);
kfree(cls_flower.rule);
return 0;
@@ -1902,7 +1892,7 @@ static void fl_hw_destroy_tmplt(struct tcf_chain *chain,
cls_flower.command = FLOW_CLS_TMPLT_DESTROY;
cls_flower.cookie = (unsigned long) tmplt;
- tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
+ tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, true);
}
static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain,
@@ -2526,6 +2516,8 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
.delete = fl_delete,
.walk = fl_walk,
.reoffload = fl_reoffload,
+ .hw_add = fl_hw_add,
+ .hw_del = fl_hw_del,
.dump = fl_dump,
.bind_class = fl_bind_class,
.tmplt_create = fl_tmplt_create,
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 455ea2793f9b..7fc2eb62aa98 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -75,8 +75,8 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp,
cls_mall.command = TC_CLSMATCHALL_DESTROY;
cls_mall.cookie = cookie;
- tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false);
- tcf_block_offload_dec(block, &head->flags);
+ tc_setup_cb_destroy(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall, false,
+ &head->flags, &head->in_hw_count, true);
}
static int mall_replace_hw_filter(struct tcf_proto *tp,
@@ -97,7 +97,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
cls_mall.command = TC_CLSMATCHALL_REPLACE;
cls_mall.cookie = cookie;
- err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts);
+ err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts, true);
if (err) {
kfree(cls_mall.rule);
mall_destroy_hw_filter(tp, head, cookie, NULL);
@@ -109,15 +109,14 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
return err;
}
- err = tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, skip_sw);
+ err = tc_setup_cb_add(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall,
+ skip_sw, &head->flags, &head->in_hw_count, true);
+ tc_cleanup_flow_action(&cls_mall.rule->action);
kfree(cls_mall.rule);
- if (err < 0) {
+ if (err) {
mall_destroy_hw_filter(tp, head, cookie, NULL);
return err;
- } else if (err > 0) {
- head->in_hw_count = err;
- tcf_block_offload_inc(block, &head->flags);
}
if (skip_sw && !(head->flags & TCA_CLS_FLAGS_IN_HW))
@@ -302,7 +301,7 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
TC_CLSMATCHALL_REPLACE : TC_CLSMATCHALL_DESTROY;
cls_mall.cookie = (unsigned long)head;
- err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts);
+ err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts, true);
if (err) {
kfree(cls_mall.rule);
if (add && tc_skip_sw(head->flags)) {
@@ -312,16 +311,14 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
return 0;
}
- err = cb(TC_SETUP_CLSMATCHALL, &cls_mall, cb_priv);
+ err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSMATCHALL,
+ &cls_mall, cb_priv, &head->flags,
+ &head->in_hw_count);
+ tc_cleanup_flow_action(&cls_mall.rule->action);
kfree(cls_mall.rule);
- if (err) {
- if (add && tc_skip_sw(head->flags))
- return err;
- return 0;
- }
-
- tc_cls_offload_cnt_update(block, &head->in_hw_count, &head->flags, add);
+ if (err)
+ return err;
return 0;
}
@@ -337,7 +334,7 @@ static void mall_stats_hw_filter(struct tcf_proto *tp,
cls_mall.command = TC_CLSMATCHALL_STATS;
cls_mall.cookie = cookie;
- tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false);
+ tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false, true);
tcf_exts_stats_update(&head->exts, cls_mall.stats.bytes,
cls_mall.stats.pkts, cls_mall.stats.lastused);
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 8614088edd1b..a0e6fac613de 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -480,7 +480,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
cls_u32.hnode.handle = h->handle;
cls_u32.hnode.prio = h->prio;
- tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false);
+ tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false, true);
}
static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
@@ -498,7 +498,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
cls_u32.hnode.handle = h->handle;
cls_u32.hnode.prio = h->prio;
- err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw);
+ err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw, true);
if (err < 0) {
u32_clear_hw_hnode(tp, h, NULL);
return err;
@@ -522,8 +522,8 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
cls_u32.command = TC_CLSU32_DELETE_KNODE;
cls_u32.knode.handle = n->handle;
- tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false);
- tcf_block_offload_dec(block, &n->flags);
+ tc_setup_cb_destroy(block, tp, TC_SETUP_CLSU32, &cls_u32, false,
+ &n->flags, &n->in_hw_count, true);
}
static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
@@ -552,13 +552,11 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
if (n->ht_down)
cls_u32.knode.link_handle = ht->handle;
- err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw);
- if (err < 0) {
+ err = tc_setup_cb_add(block, tp, TC_SETUP_CLSU32, &cls_u32, skip_sw,
+ &n->flags, &n->in_hw_count, true);
+ if (err) {
u32_remove_hw_knode(tp, n, NULL);
return err;
- } else if (err > 0) {
- n->in_hw_count = err;
- tcf_block_offload_inc(block, &n->flags);
}
if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW))
@@ -1201,14 +1199,11 @@ static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n,
cls_u32.knode.link_handle = ht->handle;
}
- err = cb(TC_SETUP_CLSU32, &cls_u32, cb_priv);
- if (err) {
- if (add && tc_skip_sw(n->flags))
- return err;
- return 0;
- }
-
- tc_cls_offload_cnt_update(block, &n->in_hw_count, &n->flags, add);
+ err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSU32,
+ &cls_u32, cb_priv, &n->flags,
+ &n->in_hw_count);
+ if (err)
+ return err;
return 0;
}
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index 810645b5c086..93b58fde99b7 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -299,7 +299,7 @@ static void cbs_set_port_rate(struct net_device *dev, struct cbs_sched_data *q)
{
struct ethtool_link_ksettings ecmd;
int speed = SPEED_10;
- int port_rate = -1;
+ int port_rate;
int err;
err = __ethtool_get_link_ksettings(dev, &ecmd);
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index d59fbcc745d1..c261c0a18868 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -45,7 +45,6 @@ struct fq_codel_flow {
struct sk_buff *tail;
struct list_head flowchain;
int deficit;
- u32 dropped; /* number of drops (or ECN marks) on this flow */
struct codel_vars cvars;
}; /* please try to keep this structure <= 64 bytes */
@@ -173,7 +172,8 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets,
__qdisc_drop(skb, to_free);
} while (++i < max_packets && len < threshold);
- flow->dropped += i;
+ /* Tell codel to increase its signal strength also */
+ flow->cvars.count += i;
q->backlogs[idx] -= len;
q->memory_usage -= mem;
sch->qstats.drops += i;
@@ -211,7 +211,6 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch,
list_add_tail(&flow->flowchain, &q->new_flows);
q->new_flow_count++;
flow->deficit = q->quantum;
- flow->dropped = 0;
}
get_codel_cb(skb)->mem_usage = skb->truesize;
q->memory_usage += get_codel_cb(skb)->mem_usage;
@@ -286,7 +285,6 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch)
struct sk_buff *skb;
struct fq_codel_flow *flow;
struct list_head *head;
- u32 prev_drop_count, prev_ecn_mark;
begin:
head = &q->new_flows;
@@ -303,16 +301,10 @@ begin:
goto begin;
}
- prev_drop_count = q->cstats.drop_count;
- prev_ecn_mark = q->cstats.ecn_mark;
-
skb = codel_dequeue(sch, &sch->qstats.backlog, &q->cparams,
&flow->cvars, &q->cstats, qdisc_pkt_len,
codel_get_enqueue_time, drop_func, dequeue_func);
- flow->dropped += q->cstats.drop_count - prev_drop_count;
- flow->dropped += q->cstats.ecn_mark - prev_ecn_mark;
-
if (!skb) {
/* force a pass through old_flows to prevent starvation */
if ((head == &q->new_flows) && !list_empty(&q->old_flows))
@@ -658,7 +650,7 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl,
sch_tree_unlock(sch);
}
qs.backlog = q->backlogs[idx];
- qs.drops = flow->dropped;
+ qs.drops = 0;
}
if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
return -1;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index ac28f6a5d70e..17bd8f539bc7 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -985,6 +985,9 @@ static void qdisc_destroy(struct Qdisc *qdisc)
void qdisc_put(struct Qdisc *qdisc)
{
+ if (!qdisc)
+ return;
+
if (qdisc->flags & TCQ_F_BUILTIN ||
!refcount_dec_and_test(&qdisc->refcnt))
return;
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 8d8bc2ec5cd6..2f7b34205c82 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -29,8 +29,8 @@ static DEFINE_SPINLOCK(taprio_list_lock);
#define TAPRIO_ALL_GATES_OPEN -1
-#define FLAGS_VALID(flags) (!((flags) & ~TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST))
#define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST)
+#define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)
struct sched_entry {
struct list_head list;
@@ -75,9 +75,16 @@ struct taprio_sched {
struct sched_gate_list __rcu *admin_sched;
struct hrtimer advance_timer;
struct list_head taprio_list;
+ struct sk_buff *(*dequeue)(struct Qdisc *sch);
+ struct sk_buff *(*peek)(struct Qdisc *sch);
u32 txtime_delay;
};
+struct __tc_taprio_qopt_offload {
+ refcount_t users;
+ struct tc_taprio_qopt_offload offload;
+};
+
static ktime_t sched_base_time(const struct sched_gate_list *sched)
{
if (!sched)
@@ -268,6 +275,19 @@ static bool is_valid_interval(struct sk_buff *skb, struct Qdisc *sch)
return entry;
}
+static bool taprio_flags_valid(u32 flags)
+{
+ /* Make sure no other flag bits are set. */
+ if (flags & ~(TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST |
+ TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD))
+ return false;
+ /* txtime-assist and full offload are mutually exclusive */
+ if ((flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) &&
+ (flags & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD))
+ return false;
+ return true;
+}
+
/* This returns the tstamp value set by TCP in terms of the set clock. */
static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb)
{
@@ -417,7 +437,7 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return qdisc_enqueue(skb, child, to_free);
}
-static struct sk_buff *taprio_peek(struct Qdisc *sch)
+static struct sk_buff *taprio_peek_soft(struct Qdisc *sch)
{
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
@@ -461,6 +481,36 @@ static struct sk_buff *taprio_peek(struct Qdisc *sch)
return NULL;
}
+static struct sk_buff *taprio_peek_offload(struct Qdisc *sch)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct sk_buff *skb;
+ int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct Qdisc *child = q->qdiscs[i];
+
+ if (unlikely(!child))
+ continue;
+
+ skb = child->ops->peek(child);
+ if (!skb)
+ continue;
+
+ return skb;
+ }
+
+ return NULL;
+}
+
+static struct sk_buff *taprio_peek(struct Qdisc *sch)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+
+ return q->peek(sch);
+}
+
static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry)
{
atomic_set(&entry->budget,
@@ -468,7 +518,7 @@ static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry)
atomic64_read(&q->picos_per_byte)));
}
-static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
+static struct sk_buff *taprio_dequeue_soft(struct Qdisc *sch)
{
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
@@ -550,6 +600,40 @@ done:
return skb;
}
+static struct sk_buff *taprio_dequeue_offload(struct Qdisc *sch)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct sk_buff *skb;
+ int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct Qdisc *child = q->qdiscs[i];
+
+ if (unlikely(!child))
+ continue;
+
+ skb = child->ops->dequeue(child);
+ if (unlikely(!skb))
+ continue;
+
+ qdisc_bstats_update(sch, skb);
+ qdisc_qstats_backlog_dec(sch, skb);
+ sch->q.qlen--;
+
+ return skb;
+ }
+
+ return NULL;
+}
+
+static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+
+ return q->dequeue(sch);
+}
+
static bool should_restart_cycle(const struct sched_gate_list *oper,
const struct sched_entry *entry)
{
@@ -672,10 +756,6 @@ static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = {
[TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 },
};
-static const struct nla_policy entry_list_policy[TCA_TAPRIO_SCHED_MAX + 1] = {
- [TCA_TAPRIO_SCHED_ENTRY] = { .type = NLA_NESTED },
-};
-
static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
[TCA_TAPRIO_ATTR_PRIOMAP] = {
.len = sizeof(struct tc_mqprio_qopt)
@@ -936,6 +1016,9 @@ static void taprio_start_sched(struct Qdisc *sch,
struct taprio_sched *q = qdisc_priv(sch);
ktime_t expires;
+ if (FULL_OFFLOAD_IS_ENABLED(q->flags))
+ return;
+
expires = hrtimer_get_expires(&q->advance_timer);
if (expires == 0)
expires = KTIME_MAX;
@@ -1015,6 +1098,254 @@ static void setup_txtime(struct taprio_sched *q,
}
}
+static struct tc_taprio_qopt_offload *taprio_offload_alloc(int num_entries)
+{
+ size_t size = sizeof(struct tc_taprio_sched_entry) * num_entries +
+ sizeof(struct __tc_taprio_qopt_offload);
+ struct __tc_taprio_qopt_offload *__offload;
+
+ __offload = kzalloc(size, GFP_KERNEL);
+ if (!__offload)
+ return NULL;
+
+ refcount_set(&__offload->users, 1);
+
+ return &__offload->offload;
+}
+
+struct tc_taprio_qopt_offload *taprio_offload_get(struct tc_taprio_qopt_offload
+ *offload)
+{
+ struct __tc_taprio_qopt_offload *__offload;
+
+ __offload = container_of(offload, struct __tc_taprio_qopt_offload,
+ offload);
+
+ refcount_inc(&__offload->users);
+
+ return offload;
+}
+EXPORT_SYMBOL_GPL(taprio_offload_get);
+
+void taprio_offload_free(struct tc_taprio_qopt_offload *offload)
+{
+ struct __tc_taprio_qopt_offload *__offload;
+
+ __offload = container_of(offload, struct __tc_taprio_qopt_offload,
+ offload);
+
+ if (!refcount_dec_and_test(&__offload->users))
+ return;
+
+ kfree(__offload);
+}
+EXPORT_SYMBOL_GPL(taprio_offload_free);
+
+/* The function will only serve to keep the pointers to the "oper" and "admin"
+ * schedules valid in relation to their base times, so when calling dump() the
+ * users looks at the right schedules.
+ * When using full offload, the admin configuration is promoted to oper at the
+ * base_time in the PHC time domain. But because the system time is not
+ * necessarily in sync with that, we can't just trigger a hrtimer to call
+ * switch_schedules at the right hardware time.
+ * At the moment we call this by hand right away from taprio, but in the future
+ * it will be useful to create a mechanism for drivers to notify taprio of the
+ * offload state (PENDING, ACTIVE, INACTIVE) so it can be visible in dump().
+ * This is left as TODO.
+ */
+void taprio_offload_config_changed(struct taprio_sched *q)
+{
+ struct sched_gate_list *oper, *admin;
+
+ spin_lock(&q->current_entry_lock);
+
+ oper = rcu_dereference_protected(q->oper_sched,
+ lockdep_is_held(&q->current_entry_lock));
+ admin = rcu_dereference_protected(q->admin_sched,
+ lockdep_is_held(&q->current_entry_lock));
+
+ switch_schedules(q, &admin, &oper);
+
+ spin_unlock(&q->current_entry_lock);
+}
+
+static void taprio_sched_to_offload(struct taprio_sched *q,
+ struct sched_gate_list *sched,
+ const struct tc_mqprio_qopt *mqprio,
+ struct tc_taprio_qopt_offload *offload)
+{
+ struct sched_entry *entry;
+ int i = 0;
+
+ offload->base_time = sched->base_time;
+ offload->cycle_time = sched->cycle_time;
+ offload->cycle_time_extension = sched->cycle_time_extension;
+
+ list_for_each_entry(entry, &sched->entries, list) {
+ struct tc_taprio_sched_entry *e = &offload->entries[i];
+
+ e->command = entry->command;
+ e->interval = entry->interval;
+ e->gate_mask = entry->gate_mask;
+ i++;
+ }
+
+ offload->num_entries = i;
+}
+
+static int taprio_enable_offload(struct net_device *dev,
+ struct tc_mqprio_qopt *mqprio,
+ struct taprio_sched *q,
+ struct sched_gate_list *sched,
+ struct netlink_ext_ack *extack)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+ struct tc_taprio_qopt_offload *offload;
+ int err = 0;
+
+ if (!ops->ndo_setup_tc) {
+ NL_SET_ERR_MSG(extack,
+ "Device does not support taprio offload");
+ return -EOPNOTSUPP;
+ }
+
+ offload = taprio_offload_alloc(sched->num_entries);
+ if (!offload) {
+ NL_SET_ERR_MSG(extack,
+ "Not enough memory for enabling offload mode");
+ return -ENOMEM;
+ }
+ offload->enable = 1;
+ taprio_sched_to_offload(q, sched, mqprio, offload);
+
+ err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
+ if (err < 0) {
+ NL_SET_ERR_MSG(extack,
+ "Device failed to setup taprio offload");
+ goto done;
+ }
+
+ taprio_offload_config_changed(q);
+
+done:
+ taprio_offload_free(offload);
+
+ return err;
+}
+
+static int taprio_disable_offload(struct net_device *dev,
+ struct taprio_sched *q,
+ struct netlink_ext_ack *extack)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+ struct tc_taprio_qopt_offload *offload;
+ int err;
+
+ if (!FULL_OFFLOAD_IS_ENABLED(q->flags))
+ return 0;
+
+ if (!ops->ndo_setup_tc)
+ return -EOPNOTSUPP;
+
+ offload = taprio_offload_alloc(0);
+ if (!offload) {
+ NL_SET_ERR_MSG(extack,
+ "Not enough memory to disable offload mode");
+ return -ENOMEM;
+ }
+ offload->enable = 0;
+
+ err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
+ if (err < 0) {
+ NL_SET_ERR_MSG(extack,
+ "Device failed to disable offload");
+ goto out;
+ }
+
+out:
+ taprio_offload_free(offload);
+
+ return err;
+}
+
+/* If full offload is enabled, the only possible clockid is the net device's
+ * PHC. For that reason, specifying a clockid through netlink is incorrect.
+ * For txtime-assist, it is implicitly assumed that the device's PHC is kept
+ * in sync with the specified clockid via a user space daemon such as phc2sys.
+ * For both software taprio and txtime-assist, the clockid is used for the
+ * hrtimer that advances the schedule and hence mandatory.
+ */
+static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb,
+ struct netlink_ext_ack *extack)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ int err = -EINVAL;
+
+ if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_ts_info info = {
+ .cmd = ETHTOOL_GET_TS_INFO,
+ .phc_index = -1,
+ };
+
+ if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
+ NL_SET_ERR_MSG(extack,
+ "The 'clockid' cannot be specified for full offload");
+ goto out;
+ }
+
+ if (ops && ops->get_ts_info)
+ err = ops->get_ts_info(dev, &info);
+
+ if (err || info.phc_index < 0) {
+ NL_SET_ERR_MSG(extack,
+ "Device does not have a PTP clock");
+ err = -ENOTSUPP;
+ goto out;
+ }
+ } else if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
+ int clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]);
+
+ /* We only support static clockids and we don't allow
+ * for it to be modified after the first init.
+ */
+ if (clockid < 0 ||
+ (q->clockid != -1 && q->clockid != clockid)) {
+ NL_SET_ERR_MSG(extack,
+ "Changing the 'clockid' of a running schedule is not supported");
+ err = -ENOTSUPP;
+ goto out;
+ }
+
+ switch (clockid) {
+ case CLOCK_REALTIME:
+ q->tk_offset = TK_OFFS_REAL;
+ break;
+ case CLOCK_MONOTONIC:
+ q->tk_offset = TK_OFFS_MAX;
+ break;
+ case CLOCK_BOOTTIME:
+ q->tk_offset = TK_OFFS_BOOT;
+ break;
+ case CLOCK_TAI:
+ q->tk_offset = TK_OFFS_TAI;
+ break;
+ default:
+ NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
+ err = -EINVAL;
+ goto out;
+ }
+
+ q->clockid = clockid;
+ } else {
+ NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory");
+ goto out;
+ }
+out:
+ return err;
+}
+
static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
@@ -1024,9 +1355,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct net_device *dev = qdisc_dev(sch);
struct tc_mqprio_qopt *mqprio = NULL;
u32 taprio_flags = 0;
- int i, err, clockid;
unsigned long flags;
ktime_t start;
+ int i, err;
err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_ATTR_MAX, opt,
taprio_policy, extack);
@@ -1042,7 +1373,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
if (q->flags != 0 && q->flags != taprio_flags) {
NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported");
return -EOPNOTSUPP;
- } else if (!FLAGS_VALID(taprio_flags)) {
+ } else if (!taprio_flags_valid(taprio_flags)) {
NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid");
return -EINVAL;
}
@@ -1082,30 +1413,19 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
goto free_sched;
}
- if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
- clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]);
-
- /* We only support static clockids and we don't allow
- * for it to be modified after the first init.
- */
- if (clockid < 0 ||
- (q->clockid != -1 && q->clockid != clockid)) {
- NL_SET_ERR_MSG(extack, "Changing the 'clockid' of a running schedule is not supported");
- err = -ENOTSUPP;
- goto free_sched;
- }
-
- q->clockid = clockid;
- }
-
- if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
- NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory");
- err = -EINVAL;
+ err = taprio_parse_clockid(sch, tb, extack);
+ if (err < 0)
goto free_sched;
- }
taprio_set_picos_per_byte(dev, q);
+ if (FULL_OFFLOAD_IS_ENABLED(taprio_flags))
+ err = taprio_enable_offload(dev, mqprio, q, new_admin, extack);
+ else
+ err = taprio_disable_offload(dev, q, extack);
+ if (err)
+ goto free_sched;
+
/* Protects against enqueue()/dequeue() */
spin_lock_bh(qdisc_lock(sch));
@@ -1120,6 +1440,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
}
if (!TXTIME_ASSIST_IS_ENABLED(taprio_flags) &&
+ !FULL_OFFLOAD_IS_ENABLED(taprio_flags) &&
!hrtimer_active(&q->advance_timer)) {
hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS);
q->advance_timer.function = advance_sched;
@@ -1138,23 +1459,15 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
mqprio->prio_tc_map[i]);
}
- switch (q->clockid) {
- case CLOCK_REALTIME:
- q->tk_offset = TK_OFFS_REAL;
- break;
- case CLOCK_MONOTONIC:
- q->tk_offset = TK_OFFS_MAX;
- break;
- case CLOCK_BOOTTIME:
- q->tk_offset = TK_OFFS_BOOT;
- break;
- case CLOCK_TAI:
- q->tk_offset = TK_OFFS_TAI;
- break;
- default:
- NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
- err = -EINVAL;
- goto unlock;
+ if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) {
+ q->dequeue = taprio_dequeue_offload;
+ q->peek = taprio_peek_offload;
+ } else {
+ /* Be sure to always keep the function pointers
+ * in a consistent state.
+ */
+ q->dequeue = taprio_dequeue_soft;
+ q->peek = taprio_peek_soft;
}
err = taprio_get_start_time(sch, new_admin, &start);
@@ -1216,6 +1529,8 @@ static void taprio_destroy(struct Qdisc *sch)
hrtimer_cancel(&q->advance_timer);
+ taprio_disable_offload(dev, q, NULL);
+
if (q->qdiscs) {
for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++)
qdisc_put(q->qdiscs[i]);
@@ -1245,6 +1560,9 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS);
q->advance_timer.function = advance_sched;
+ q->dequeue = taprio_dequeue_soft;
+ q->peek = taprio_peek_soft;
+
q->root = sch;
/* We only support static clockids. Use an invalid value as default
@@ -1427,7 +1745,8 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt))
goto options_error;
- if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid))
+ if (!FULL_OFFLOAD_IS_ENABLED(q->flags) &&
+ nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid))
goto options_error;
if (q->flags && nla_put_u32(skb, TCA_TAPRIO_ATTR_FLAGS, q->flags))
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 5010cce52c93..d2ffc9a0ba3a 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -54,7 +54,6 @@ static struct sctp_association *sctp_association_init(
const struct sock *sk,
enum sctp_scope scope, gfp_t gfp)
{
- struct net *net = sock_net(sk);
struct sctp_sock *sp;
struct sctp_paramhdr *p;
int i;
@@ -214,14 +213,6 @@ static struct sctp_association *sctp_association_init(
asoc->peer.sack_needed = 1;
asoc->peer.sack_generation = 1;
- /* Assume that the peer will tell us if he recognizes ASCONF
- * as part of INIT exchange.
- * The sctp_addip_noauth option is there for backward compatibility
- * and will revert old behavior.
- */
- if (net->sctp.addip_noauth)
- asoc->peer.asconf_capable = 1;
-
/* Create an input queue. */
sctp_inq_init(&asoc->base.inqueue);
sctp_inq_set_th_handler(&asoc->base.inqueue, sctp_assoc_bh_rcv);
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index de4c78d4a21e..4278764d82b8 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -389,7 +389,7 @@ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp)
/* If we don't support AUTH, or peer is not capable
* we don't need to do anything.
*/
- if (!asoc->ep->auth_enable || !asoc->peer.auth_capable)
+ if (!asoc->peer.auth_capable)
return 0;
/* If the key_id is non-zero and we couldn't find an
@@ -675,7 +675,7 @@ int sctp_auth_send_cid(enum sctp_cid chunk, const struct sctp_association *asoc)
if (!asoc)
return 0;
- if (!asoc->ep->auth_enable || !asoc->peer.auth_capable)
+ if (!asoc->peer.auth_capable)
return 0;
return __sctp_auth_cid(chunk, asoc->peer.peer_chunks);
@@ -687,7 +687,7 @@ int sctp_auth_recv_cid(enum sctp_cid chunk, const struct sctp_association *asoc)
if (!asoc)
return 0;
- if (!asoc->ep->auth_enable)
+ if (!asoc->peer.auth_capable)
return 0;
return __sctp_auth_cid(chunk,
@@ -831,10 +831,15 @@ int sctp_auth_set_key(struct sctp_endpoint *ep,
/* Try to find the given key id to see if
* we are doing a replace, or adding a new key
*/
- if (asoc)
+ if (asoc) {
+ if (!asoc->peer.auth_capable)
+ return -EACCES;
sh_keys = &asoc->endpoint_shared_keys;
- else
+ } else {
+ if (!ep->auth_enable)
+ return -EACCES;
sh_keys = &ep->endpoint_shared_keys;
+ }
key_for_each(shkey, sh_keys) {
if (shkey->key_id == auth_key->sca_keynumber) {
@@ -875,10 +880,15 @@ int sctp_auth_set_active_key(struct sctp_endpoint *ep,
int found = 0;
/* The key identifier MUST correst to an existing key */
- if (asoc)
+ if (asoc) {
+ if (!asoc->peer.auth_capable)
+ return -EACCES;
sh_keys = &asoc->endpoint_shared_keys;
- else
+ } else {
+ if (!ep->auth_enable)
+ return -EACCES;
sh_keys = &ep->endpoint_shared_keys;
+ }
key_for_each(key, sh_keys) {
if (key->key_id == key_id) {
@@ -911,11 +921,15 @@ int sctp_auth_del_key_id(struct sctp_endpoint *ep,
* The key identifier MUST correst to an existing key
*/
if (asoc) {
+ if (!asoc->peer.auth_capable)
+ return -EACCES;
if (asoc->active_key_id == key_id)
return -EINVAL;
sh_keys = &asoc->endpoint_shared_keys;
} else {
+ if (!ep->auth_enable)
+ return -EACCES;
if (ep->active_key_id == key_id)
return -EINVAL;
@@ -950,11 +964,15 @@ int sctp_auth_deact_key_id(struct sctp_endpoint *ep,
* The key identifier MUST correst to an existing key
*/
if (asoc) {
+ if (!asoc->peer.auth_capable)
+ return -EACCES;
if (asoc->active_key_id == key_id)
return -EINVAL;
sh_keys = &asoc->endpoint_shared_keys;
} else {
+ if (!ep->auth_enable)
+ return -EACCES;
if (ep->active_key_id == key_id)
return -EINVAL;
@@ -989,3 +1007,72 @@ int sctp_auth_deact_key_id(struct sctp_endpoint *ep,
return 0;
}
+
+int sctp_auth_init(struct sctp_endpoint *ep, gfp_t gfp)
+{
+ int err = -ENOMEM;
+
+ /* Allocate space for HMACS and CHUNKS authentication
+ * variables. There are arrays that we encode directly
+ * into parameters to make the rest of the operations easier.
+ */
+ if (!ep->auth_hmacs_list) {
+ struct sctp_hmac_algo_param *auth_hmacs;
+
+ auth_hmacs = kzalloc(struct_size(auth_hmacs, hmac_ids,
+ SCTP_AUTH_NUM_HMACS), gfp);
+ if (!auth_hmacs)
+ goto nomem;
+ /* Initialize the HMACS parameter.
+ * SCTP-AUTH: Section 3.3
+ * Every endpoint supporting SCTP chunk authentication MUST
+ * support the HMAC based on the SHA-1 algorithm.
+ */
+ auth_hmacs->param_hdr.type = SCTP_PARAM_HMAC_ALGO;
+ auth_hmacs->param_hdr.length =
+ htons(sizeof(struct sctp_paramhdr) + 2);
+ auth_hmacs->hmac_ids[0] = htons(SCTP_AUTH_HMAC_ID_SHA1);
+ ep->auth_hmacs_list = auth_hmacs;
+ }
+
+ if (!ep->auth_chunk_list) {
+ struct sctp_chunks_param *auth_chunks;
+
+ auth_chunks = kzalloc(sizeof(*auth_chunks) +
+ SCTP_NUM_CHUNK_TYPES, gfp);
+ if (!auth_chunks)
+ goto nomem;
+ /* Initialize the CHUNKS parameter */
+ auth_chunks->param_hdr.type = SCTP_PARAM_CHUNKS;
+ auth_chunks->param_hdr.length =
+ htons(sizeof(struct sctp_paramhdr));
+ ep->auth_chunk_list = auth_chunks;
+ }
+
+ /* Allocate and initialize transorms arrays for supported
+ * HMACs.
+ */
+ err = sctp_auth_init_hmacs(ep, gfp);
+ if (err)
+ goto nomem;
+
+ return 0;
+
+nomem:
+ /* Free all allocations */
+ kfree(ep->auth_hmacs_list);
+ kfree(ep->auth_chunk_list);
+ ep->auth_hmacs_list = NULL;
+ ep->auth_chunk_list = NULL;
+ return err;
+}
+
+void sctp_auth_free(struct sctp_endpoint *ep)
+{
+ kfree(ep->auth_hmacs_list);
+ kfree(ep->auth_chunk_list);
+ ep->auth_hmacs_list = NULL;
+ ep->auth_chunk_list = NULL;
+ sctp_auth_destroy_hmacs(ep->auth_hmacs);
+ ep->auth_hmacs = NULL;
+}
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 69cebb2c998b..ea53049d1db6 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -43,62 +43,21 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
gfp_t gfp)
{
struct net *net = sock_net(sk);
- struct sctp_hmac_algo_param *auth_hmacs = NULL;
- struct sctp_chunks_param *auth_chunks = NULL;
struct sctp_shared_key *null_key;
- int err;
ep->digest = kzalloc(SCTP_SIGNATURE_SIZE, gfp);
if (!ep->digest)
return NULL;
+ ep->asconf_enable = net->sctp.addip_enable;
ep->auth_enable = net->sctp.auth_enable;
if (ep->auth_enable) {
- /* Allocate space for HMACS and CHUNKS authentication
- * variables. There are arrays that we encode directly
- * into parameters to make the rest of the operations easier.
- */
- auth_hmacs = kzalloc(struct_size(auth_hmacs, hmac_ids,
- SCTP_AUTH_NUM_HMACS), gfp);
- if (!auth_hmacs)
- goto nomem;
-
- auth_chunks = kzalloc(sizeof(*auth_chunks) +
- SCTP_NUM_CHUNK_TYPES, gfp);
- if (!auth_chunks)
+ if (sctp_auth_init(ep, gfp))
goto nomem;
-
- /* Initialize the HMACS parameter.
- * SCTP-AUTH: Section 3.3
- * Every endpoint supporting SCTP chunk authentication MUST
- * support the HMAC based on the SHA-1 algorithm.
- */
- auth_hmacs->param_hdr.type = SCTP_PARAM_HMAC_ALGO;
- auth_hmacs->param_hdr.length =
- htons(sizeof(struct sctp_paramhdr) + 2);
- auth_hmacs->hmac_ids[0] = htons(SCTP_AUTH_HMAC_ID_SHA1);
-
- /* Initialize the CHUNKS parameter */
- auth_chunks->param_hdr.type = SCTP_PARAM_CHUNKS;
- auth_chunks->param_hdr.length =
- htons(sizeof(struct sctp_paramhdr));
-
- /* If the Add-IP functionality is enabled, we must
- * authenticate, ASCONF and ASCONF-ACK chunks
- */
- if (net->sctp.addip_enable) {
- auth_chunks->chunks[0] = SCTP_CID_ASCONF;
- auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK;
- auth_chunks->param_hdr.length =
- htons(sizeof(struct sctp_paramhdr) + 2);
+ if (ep->asconf_enable) {
+ sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF);
+ sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF_ACK);
}
-
- /* Allocate and initialize transorms arrays for supported
- * HMACs.
- */
- err = sctp_auth_init_hmacs(ep, gfp);
- if (err)
- goto nomem;
}
/* Initialize the base structure. */
@@ -145,10 +104,9 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
/* Add the null key to the endpoint shared keys list and
* set the hmcas and chunks pointers.
*/
- ep->auth_hmacs_list = auth_hmacs;
- ep->auth_chunk_list = auth_chunks;
ep->prsctp_enable = net->sctp.prsctp_enable;
ep->reconf_enable = net->sctp.reconf_enable;
+ ep->ecn_enable = net->sctp.ecn_enable;
/* Remember who we are attached to. */
ep->base.sk = sk;
@@ -157,11 +115,8 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
return ep;
nomem_shkey:
- sctp_auth_destroy_hmacs(ep->auth_hmacs);
+ sctp_auth_free(ep);
nomem:
- /* Free all allocations */
- kfree(auth_hmacs);
- kfree(auth_chunks);
kfree(ep->digest);
return NULL;
@@ -244,11 +199,7 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
* chunks and hmacs arrays that were allocated
*/
sctp_auth_destroy_keys(&ep->endpoint_shared_keys);
- kfree(ep->auth_hmacs_list);
- kfree(ep->auth_chunk_list);
-
- /* AUTH - Free any allocated HMAC transform containers */
- sctp_auth_destroy_hmacs(ep->auth_hmacs);
+ sctp_auth_free(ep);
/* Cleanup. */
sctp_inq_free(&ep->base.inqueue);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 53746ffeeca3..08d14d86ecfb 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1254,6 +1254,9 @@ static int __net_init sctp_defaults_init(struct net *net)
/* Disable AUTH by default. */
net->sctp.auth_enable = 0;
+ /* Enable ECN by default. */
+ net->sctp.ecn_enable = 1;
+
/* Set SCOPE policy to enabled */
net->sctp.scope_policy = SCTP_SCOPE_POLICY_ENABLE;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 36bd8a6e82df..e41ed2e0ae7d 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -207,7 +207,6 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
const struct sctp_bind_addr *bp,
gfp_t gfp, int vparam_len)
{
- struct net *net = sock_net(asoc->base.sk);
struct sctp_supported_ext_param ext_param;
struct sctp_adaptation_ind_param aiparam;
struct sctp_paramhdr *auth_chunks = NULL;
@@ -245,7 +244,9 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
chunksize = sizeof(init) + addrs_len;
chunksize += SCTP_PAD4(SCTP_SAT_LEN(num_types));
- chunksize += sizeof(ecap_param);
+
+ if (asoc->ep->ecn_enable)
+ chunksize += sizeof(ecap_param);
if (asoc->ep->prsctp_enable)
chunksize += sizeof(prsctp_param);
@@ -255,7 +256,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
* the ASCONF,the ASCONF-ACK, and the AUTH chunks in its INIT and
* INIT-ACK parameters.
*/
- if (net->sctp.addip_enable) {
+ if (asoc->ep->asconf_enable) {
extensions[num_ext] = SCTP_CID_ASCONF;
extensions[num_ext+1] = SCTP_CID_ASCONF_ACK;
num_ext += 2;
@@ -336,7 +337,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
sctp_addto_chunk(retval, sizeof(sat), &sat);
sctp_addto_chunk(retval, num_types * sizeof(__u16), &types);
- sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param);
+ if (asoc->ep->ecn_enable)
+ sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param);
/* Add the supported extensions parameter. Be nice and add this
* fist before addiding the parameters for the extensions themselves
@@ -1964,7 +1966,9 @@ static int sctp_process_hn_param(const struct sctp_association *asoc,
return 0;
}
-static int sctp_verify_ext_param(struct net *net, union sctp_params param)
+static int sctp_verify_ext_param(struct net *net,
+ const struct sctp_endpoint *ep,
+ union sctp_params param)
{
__u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
int have_asconf = 0;
@@ -1991,7 +1995,7 @@ static int sctp_verify_ext_param(struct net *net, union sctp_params param)
if (net->sctp.addip_noauth)
return 1;
- if (net->sctp.addip_enable && !have_auth && have_asconf)
+ if (ep->asconf_enable && !have_auth && have_asconf)
return 0;
return 1;
@@ -2001,7 +2005,6 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
union sctp_params param)
{
__u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
- struct net *net = sock_net(asoc->base.sk);
int i;
for (i = 0; i < num_ext; i++) {
@@ -2023,7 +2026,7 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
break;
case SCTP_CID_ASCONF:
case SCTP_CID_ASCONF_ACK:
- if (net->sctp.addip_enable)
+ if (asoc->ep->asconf_enable)
asoc->peer.asconf_capable = 1;
break;
case SCTP_CID_I_DATA:
@@ -2145,12 +2148,12 @@ static enum sctp_ierror sctp_verify_param(struct net *net,
break;
case SCTP_PARAM_SUPPORTED_EXT:
- if (!sctp_verify_ext_param(net, param))
+ if (!sctp_verify_ext_param(net, ep, param))
return SCTP_IERROR_ABORT;
break;
case SCTP_PARAM_SET_PRIMARY:
- if (net->sctp.addip_enable)
+ if (ep->asconf_enable)
break;
goto fallthrough;
@@ -2597,15 +2600,20 @@ do_addr_param:
break;
case SCTP_PARAM_ECN_CAPABLE:
- asoc->peer.ecn_capable = 1;
- break;
+ if (asoc->ep->ecn_enable) {
+ asoc->peer.ecn_capable = 1;
+ break;
+ }
+ /* Fall Through */
+ goto fall_through;
+
case SCTP_PARAM_ADAPTATION_LAYER_IND:
asoc->peer.adaptation_ind = ntohl(param.aind->adaptation_ind);
break;
case SCTP_PARAM_SET_PRIMARY:
- if (!net->sctp.addip_enable)
+ if (!ep->asconf_enable)
goto fall_through;
addr_param = param.v + sizeof(struct sctp_addip_param);
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 2c244b29a199..0c21c52fc408 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3721,7 +3721,8 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net,
* is received unauthenticated it MUST be silently discarded as
* described in [I-D.ietf-tsvwg-sctp-auth].
*/
- if (!net->sctp.addip_noauth && !chunk->auth)
+ if (!asoc->peer.asconf_capable ||
+ (!net->sctp.addip_noauth && !chunk->auth))
return sctp_sf_discard_chunk(net, ep, asoc, type, arg,
commands);
@@ -3863,7 +3864,8 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net,
* is received unauthenticated it MUST be silently discarded as
* described in [I-D.ietf-tsvwg-sctp-auth].
*/
- if (!net->sctp.addip_noauth && !asconf_ack->auth)
+ if (!asoc->peer.asconf_capable ||
+ (!net->sctp.addip_noauth && !asconf_ack->auth))
return sctp_sf_discard_chunk(net, ep, asoc, type, arg,
commands);
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 61ed9c6e3be3..88ea87f4f0e7 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -976,26 +976,22 @@ static const struct sctp_sm_table_entry *sctp_chunk_event_lookup(
if (cid <= SCTP_CID_BASE_MAX)
return &chunk_event_table[cid][state];
- if (net->sctp.prsctp_enable) {
- if (cid == SCTP_CID_FWD_TSN || cid == SCTP_CID_I_FWD_TSN)
- return &prsctp_chunk_event_table[0][state];
- }
+ switch ((u16)cid) {
+ case SCTP_CID_FWD_TSN:
+ case SCTP_CID_I_FWD_TSN:
+ return &prsctp_chunk_event_table[0][state];
- if (net->sctp.addip_enable) {
- if (cid == SCTP_CID_ASCONF)
- return &addip_chunk_event_table[0][state];
+ case SCTP_CID_ASCONF:
+ return &addip_chunk_event_table[0][state];
- if (cid == SCTP_CID_ASCONF_ACK)
- return &addip_chunk_event_table[1][state];
- }
+ case SCTP_CID_ASCONF_ACK:
+ return &addip_chunk_event_table[1][state];
- if (net->sctp.reconf_enable)
- if (cid == SCTP_CID_RECONF)
- return &reconf_chunk_event_table[0][state];
+ case SCTP_CID_RECONF:
+ return &reconf_chunk_event_table[0][state];
- if (net->sctp.auth_enable) {
- if (cid == SCTP_CID_AUTH)
- return &auth_chunk_event_table[0][state];
+ case SCTP_CID_AUTH:
+ return &auth_chunk_event_table[0][state];
}
return &chunk_event_table_unknown[state];
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index b083d4e66230..939b8d2595bc 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -525,7 +525,6 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
struct sockaddr *addrs,
int addrcnt)
{
- struct net *net = sock_net(sk);
struct sctp_sock *sp;
struct sctp_endpoint *ep;
struct sctp_association *asoc;
@@ -540,12 +539,12 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
int i;
int retval = 0;
- if (!net->sctp.addip_enable)
- return retval;
-
sp = sctp_sk(sk);
ep = sp->ep;
+ if (!ep->asconf_enable)
+ return retval;
+
pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n",
__func__, sk, addrs, addrcnt);
@@ -728,7 +727,6 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
struct sockaddr *addrs,
int addrcnt)
{
- struct net *net = sock_net(sk);
struct sctp_sock *sp;
struct sctp_endpoint *ep;
struct sctp_association *asoc;
@@ -744,12 +742,12 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
int stored = 0;
chunk = NULL;
- if (!net->sctp.addip_enable)
- return retval;
-
sp = sctp_sk(sk);
ep = sp->ep;
+ if (!ep->asconf_enable)
+ return retval;
+
pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n",
__func__, sk, addrs, addrcnt);
@@ -1045,158 +1043,161 @@ out:
return err;
}
-/* __sctp_connect(struct sock* sk, struct sockaddr *kaddrs, int addrs_size)
- *
- * Common routine for handling connect() and sctp_connectx().
- * Connect will come in with just a single address.
- */
-static int __sctp_connect(struct sock *sk,
- struct sockaddr *kaddrs,
- int addrs_size, int flags,
- sctp_assoc_t *assoc_id)
+static int sctp_connect_new_asoc(struct sctp_endpoint *ep,
+ const union sctp_addr *daddr,
+ const struct sctp_initmsg *init,
+ struct sctp_transport **tp)
{
+ struct sctp_association *asoc;
+ struct sock *sk = ep->base.sk;
struct net *net = sock_net(sk);
- struct sctp_sock *sp;
- struct sctp_endpoint *ep;
- struct sctp_association *asoc = NULL;
- struct sctp_association *asoc2;
- struct sctp_transport *transport;
- union sctp_addr to;
enum sctp_scope scope;
- long timeo;
- int err = 0;
- int addrcnt = 0;
- int walk_size = 0;
- union sctp_addr *sa_addr = NULL;
- void *addr_buf;
- unsigned short port;
+ int err;
- sp = sctp_sk(sk);
- ep = sp->ep;
+ if (sctp_endpoint_is_peeled_off(ep, daddr))
+ return -EADDRNOTAVAIL;
- /* connect() cannot be done on a socket that is already in ESTABLISHED
- * state - UDP-style peeled off socket or a TCP-style socket that
- * is already connected.
- * It cannot be done even on a TCP-style listening socket.
- */
- if (sctp_sstate(sk, ESTABLISHED) || sctp_sstate(sk, CLOSING) ||
- (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))) {
- err = -EISCONN;
- goto out_free;
+ if (!ep->base.bind_addr.port) {
+ if (sctp_autobind(sk))
+ return -EAGAIN;
+ } else {
+ if (ep->base.bind_addr.port < inet_prot_sock(net) &&
+ !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
+ return -EACCES;
}
- /* Walk through the addrs buffer and count the number of addresses. */
- addr_buf = kaddrs;
- while (walk_size < addrs_size) {
- struct sctp_af *af;
-
- if (walk_size + sizeof(sa_family_t) > addrs_size) {
- err = -EINVAL;
- goto out_free;
- }
+ scope = sctp_scope(daddr);
+ asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
+ if (!asoc)
+ return -ENOMEM;
- sa_addr = addr_buf;
- af = sctp_get_af_specific(sa_addr->sa.sa_family);
+ err = sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL);
+ if (err < 0)
+ goto free;
- /* If the address family is not supported or if this address
- * causes the address buffer to overflow return EINVAL.
- */
- if (!af || (walk_size + af->sockaddr_len) > addrs_size) {
- err = -EINVAL;
- goto out_free;
- }
+ *tp = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, SCTP_UNKNOWN);
+ if (!*tp) {
+ err = -ENOMEM;
+ goto free;
+ }
- port = ntohs(sa_addr->v4.sin_port);
+ if (!init)
+ return 0;
- /* Save current address so we can work with it */
- memcpy(&to, sa_addr, af->sockaddr_len);
+ if (init->sinit_num_ostreams) {
+ __u16 outcnt = init->sinit_num_ostreams;
- err = sctp_verify_addr(sk, &to, af->sockaddr_len);
+ asoc->c.sinit_num_ostreams = outcnt;
+ /* outcnt has been changed, need to re-init stream */
+ err = sctp_stream_init(&asoc->stream, outcnt, 0, GFP_KERNEL);
if (err)
- goto out_free;
+ goto free;
+ }
- /* Make sure the destination port is correctly set
- * in all addresses.
- */
- if (asoc && asoc->peer.port && asoc->peer.port != port) {
- err = -EINVAL;
- goto out_free;
- }
+ if (init->sinit_max_instreams)
+ asoc->c.sinit_max_instreams = init->sinit_max_instreams;
- /* Check if there already is a matching association on the
- * endpoint (other than the one created here).
- */
- asoc2 = sctp_endpoint_lookup_assoc(ep, &to, &transport);
- if (asoc2 && asoc2 != asoc) {
- if (asoc2->state >= SCTP_STATE_ESTABLISHED)
- err = -EISCONN;
- else
- err = -EALREADY;
- goto out_free;
- }
+ if (init->sinit_max_attempts)
+ asoc->max_init_attempts = init->sinit_max_attempts;
- /* If we could not find a matching association on the endpoint,
- * make sure that there is no peeled-off association matching
- * the peer address even on another socket.
- */
- if (sctp_endpoint_is_peeled_off(ep, &to)) {
- err = -EADDRNOTAVAIL;
- goto out_free;
- }
+ if (init->sinit_max_init_timeo)
+ asoc->max_init_timeo =
+ msecs_to_jiffies(init->sinit_max_init_timeo);
- if (!asoc) {
- /* If a bind() or sctp_bindx() is not called prior to
- * an sctp_connectx() call, the system picks an
- * ephemeral port and will choose an address set
- * equivalent to binding with a wildcard address.
- */
- if (!ep->base.bind_addr.port) {
- if (sctp_autobind(sk)) {
- err = -EAGAIN;
- goto out_free;
- }
- } else {
- /*
- * If an unprivileged user inherits a 1-many
- * style socket with open associations on a
- * privileged port, it MAY be permitted to
- * accept new associations, but it SHOULD NOT
- * be permitted to open new associations.
- */
- if (ep->base.bind_addr.port <
- inet_prot_sock(net) &&
- !ns_capable(net->user_ns,
- CAP_NET_BIND_SERVICE)) {
- err = -EACCES;
- goto out_free;
- }
- }
+ return 0;
+free:
+ sctp_association_free(asoc);
+ return err;
+}
- scope = sctp_scope(&to);
- asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
- if (!asoc) {
- err = -ENOMEM;
- goto out_free;
- }
+static int sctp_connect_add_peer(struct sctp_association *asoc,
+ union sctp_addr *daddr, int addr_len)
+{
+ struct sctp_endpoint *ep = asoc->ep;
+ struct sctp_association *old;
+ struct sctp_transport *t;
+ int err;
- err = sctp_assoc_set_bind_addr_from_ep(asoc, scope,
- GFP_KERNEL);
- if (err < 0) {
- goto out_free;
- }
+ err = sctp_verify_addr(ep->base.sk, daddr, addr_len);
+ if (err)
+ return err;
- }
+ old = sctp_endpoint_lookup_assoc(ep, daddr, &t);
+ if (old && old != asoc)
+ return old->state >= SCTP_STATE_ESTABLISHED ? -EISCONN
+ : -EALREADY;
+
+ if (sctp_endpoint_is_peeled_off(ep, daddr))
+ return -EADDRNOTAVAIL;
+
+ t = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, SCTP_UNKNOWN);
+ if (!t)
+ return -ENOMEM;
- /* Prime the peer's transport structures. */
- transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL,
- SCTP_UNKNOWN);
- if (!transport) {
- err = -ENOMEM;
+ return 0;
+}
+
+/* __sctp_connect(struct sock* sk, struct sockaddr *kaddrs, int addrs_size)
+ *
+ * Common routine for handling connect() and sctp_connectx().
+ * Connect will come in with just a single address.
+ */
+static int __sctp_connect(struct sock *sk, struct sockaddr *kaddrs,
+ int addrs_size, int flags, sctp_assoc_t *assoc_id)
+{
+ struct sctp_sock *sp = sctp_sk(sk);
+ struct sctp_endpoint *ep = sp->ep;
+ struct sctp_transport *transport;
+ struct sctp_association *asoc;
+ void *addr_buf = kaddrs;
+ union sctp_addr *daddr;
+ struct sctp_af *af;
+ int walk_size, err;
+ long timeo;
+
+ if (sctp_sstate(sk, ESTABLISHED) || sctp_sstate(sk, CLOSING) ||
+ (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)))
+ return -EISCONN;
+
+ daddr = addr_buf;
+ af = sctp_get_af_specific(daddr->sa.sa_family);
+ if (!af || af->sockaddr_len > addrs_size)
+ return -EINVAL;
+
+ err = sctp_verify_addr(sk, daddr, af->sockaddr_len);
+ if (err)
+ return err;
+
+ asoc = sctp_endpoint_lookup_assoc(ep, daddr, &transport);
+ if (asoc)
+ return asoc->state >= SCTP_STATE_ESTABLISHED ? -EISCONN
+ : -EALREADY;
+
+ err = sctp_connect_new_asoc(ep, daddr, NULL, &transport);
+ if (err)
+ return err;
+ asoc = transport->asoc;
+
+ addr_buf += af->sockaddr_len;
+ walk_size = af->sockaddr_len;
+ while (walk_size < addrs_size) {
+ err = -EINVAL;
+ if (walk_size + sizeof(sa_family_t) > addrs_size)
goto out_free;
- }
- addrcnt++;
- addr_buf += af->sockaddr_len;
+ daddr = addr_buf;
+ af = sctp_get_af_specific(daddr->sa.sa_family);
+ if (!af || af->sockaddr_len + walk_size > addrs_size)
+ goto out_free;
+
+ if (asoc->peer.port != ntohs(daddr->v4.sin_port))
+ goto out_free;
+
+ err = sctp_connect_add_peer(asoc, daddr, af->sockaddr_len);
+ if (err)
+ goto out_free;
+
+ addr_buf += af->sockaddr_len;
walk_size += af->sockaddr_len;
}
@@ -1209,40 +1210,25 @@ static int __sctp_connect(struct sock *sk,
goto out_free;
}
- err = sctp_primitive_ASSOCIATE(net, asoc, NULL);
- if (err < 0) {
+ err = sctp_primitive_ASSOCIATE(sock_net(sk), asoc, NULL);
+ if (err < 0)
goto out_free;
- }
/* Initialize sk's dport and daddr for getpeername() */
inet_sk(sk)->inet_dport = htons(asoc->peer.port);
- sp->pf->to_sk_daddr(sa_addr, sk);
+ sp->pf->to_sk_daddr(daddr, sk);
sk->sk_err = 0;
- timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
-
if (assoc_id)
*assoc_id = asoc->assoc_id;
- err = sctp_wait_for_connect(asoc, &timeo);
- /* Note: the asoc may be freed after the return of
- * sctp_wait_for_connect.
- */
-
- /* Don't free association on exit. */
- asoc = NULL;
+ timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
+ return sctp_wait_for_connect(asoc, &timeo);
out_free:
pr_debug("%s: took out_free path with asoc:%p kaddrs:%p err:%d\n",
__func__, asoc, kaddrs, err);
-
- if (asoc) {
- /* sctp_primitive_ASSOCIATE may have added this association
- * To the hash table, try to unhash it, just in case, its a noop
- * if it wasn't hashed so we're safe
- */
- sctp_association_free(asoc);
- }
+ sctp_association_free(asoc);
return err;
}
@@ -1312,7 +1298,8 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n",
__func__, sk, addrs, addrs_size);
- if (unlikely(addrs_size <= 0))
+ /* make sure the 1st addr's sa_family is accessible later */
+ if (unlikely(addrs_size < sizeof(sa_family_t)))
return -EINVAL;
kaddrs = memdup_user(addrs, addrs_size);
@@ -1660,9 +1647,7 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
struct sctp_transport **tp)
{
struct sctp_endpoint *ep = sctp_sk(sk)->ep;
- struct net *net = sock_net(sk);
struct sctp_association *asoc;
- enum sctp_scope scope;
struct cmsghdr *cmsg;
__be32 flowinfo = 0;
struct sctp_af *af;
@@ -1677,20 +1662,6 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
sctp_sstate(sk, CLOSING)))
return -EADDRNOTAVAIL;
- if (sctp_endpoint_is_peeled_off(ep, daddr))
- return -EADDRNOTAVAIL;
-
- if (!ep->base.bind_addr.port) {
- if (sctp_autobind(sk))
- return -EAGAIN;
- } else {
- if (ep->base.bind_addr.port < inet_prot_sock(net) &&
- !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
- return -EACCES;
- }
-
- scope = sctp_scope(daddr);
-
/* Label connection socket for first association 1-to-many
* style for client sequence socket()->sendmsg(). This
* needs to be done before sctp_assoc_add_peer() as that will
@@ -1706,45 +1677,10 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
if (err < 0)
return err;
- asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
- if (!asoc)
- return -ENOMEM;
-
- if (sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL) < 0) {
- err = -ENOMEM;
- goto free;
- }
-
- if (cmsgs->init) {
- struct sctp_initmsg *init = cmsgs->init;
-
- if (init->sinit_num_ostreams) {
- __u16 outcnt = init->sinit_num_ostreams;
-
- asoc->c.sinit_num_ostreams = outcnt;
- /* outcnt has been changed, need to re-init stream */
- err = sctp_stream_init(&asoc->stream, outcnt, 0,
- GFP_KERNEL);
- if (err)
- goto free;
- }
-
- if (init->sinit_max_instreams)
- asoc->c.sinit_max_instreams = init->sinit_max_instreams;
-
- if (init->sinit_max_attempts)
- asoc->max_init_attempts = init->sinit_max_attempts;
-
- if (init->sinit_max_init_timeo)
- asoc->max_init_timeo =
- msecs_to_jiffies(init->sinit_max_init_timeo);
- }
-
- *tp = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, SCTP_UNKNOWN);
- if (!*tp) {
- err = -ENOMEM;
- goto free;
- }
+ err = sctp_connect_new_asoc(ep, daddr, cmsgs->init, tp);
+ if (err)
+ return err;
+ asoc = (*tp)->asoc;
if (!cmsgs->addrs_msg)
return 0;
@@ -1754,8 +1690,6 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
/* sendv addr list parse */
for_each_cmsghdr(cmsg, cmsgs->addrs_msg) {
- struct sctp_transport *transport;
- struct sctp_association *old;
union sctp_addr _daddr;
int dlen;
@@ -1789,30 +1723,10 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
daddr->v6.sin6_port = htons(asoc->peer.port);
memcpy(&daddr->v6.sin6_addr, CMSG_DATA(cmsg), dlen);
}
- err = sctp_verify_addr(sk, daddr, sizeof(*daddr));
- if (err)
- goto free;
-
- old = sctp_endpoint_lookup_assoc(ep, daddr, &transport);
- if (old && old != asoc) {
- if (old->state >= SCTP_STATE_ESTABLISHED)
- err = -EISCONN;
- else
- err = -EALREADY;
- goto free;
- }
- if (sctp_endpoint_is_peeled_off(ep, daddr)) {
- err = -EADDRNOTAVAIL;
- goto free;
- }
-
- transport = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL,
- SCTP_UNKNOWN);
- if (!transport) {
- err = -ENOMEM;
+ err = sctp_connect_add_peer(asoc, daddr, sizeof(*daddr));
+ if (err)
goto free;
- }
}
return 0;
@@ -3415,7 +3329,6 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optval,
unsigned int optlen)
{
- struct net *net = sock_net(sk);
struct sctp_sock *sp;
struct sctp_association *asoc = NULL;
struct sctp_setpeerprim prim;
@@ -3425,7 +3338,7 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva
sp = sctp_sk(sk);
- if (!net->sctp.addip_enable)
+ if (!sp->ep->asconf_enable)
return -EPERM;
if (optlen != sizeof(struct sctp_setpeerprim))
@@ -3775,9 +3688,6 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
struct sctp_association *asoc;
int ret = -EINVAL;
- if (!ep->auth_enable)
- return -EACCES;
-
if (optlen <= sizeof(struct sctp_authkey))
return -EINVAL;
/* authkey->sca_keylength is u16, so optlen can't be bigger than
@@ -3844,9 +3754,6 @@ static int sctp_setsockopt_active_key(struct sock *sk,
struct sctp_authkeyid val;
int ret = 0;
- if (!ep->auth_enable)
- return -EACCES;
-
if (optlen != sizeof(struct sctp_authkeyid))
return -EINVAL;
if (copy_from_user(&val, optval, optlen))
@@ -3898,9 +3805,6 @@ static int sctp_setsockopt_del_key(struct sock *sk,
struct sctp_authkeyid val;
int ret = 0;
- if (!ep->auth_enable)
- return -EACCES;
-
if (optlen != sizeof(struct sctp_authkeyid))
return -EINVAL;
if (copy_from_user(&val, optval, optlen))
@@ -3951,9 +3855,6 @@ static int sctp_setsockopt_deactivate_key(struct sock *sk, char __user *optval,
struct sctp_authkeyid val;
int ret = 0;
- if (!ep->auth_enable)
- return -EACCES;
-
if (optlen != sizeof(struct sctp_authkeyid))
return -EINVAL;
if (copy_from_user(&val, optval, optlen))
@@ -4584,6 +4485,110 @@ static int sctp_setsockopt_event(struct sock *sk, char __user *optval,
return retval;
}
+static int sctp_setsockopt_asconf_supported(struct sock *sk,
+ char __user *optval,
+ unsigned int optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ struct sctp_endpoint *ep;
+ int retval = -EINVAL;
+
+ if (optlen != sizeof(params))
+ goto out;
+
+ if (copy_from_user(&params, optval, optlen)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+ sctp_style(sk, UDP))
+ goto out;
+
+ ep = sctp_sk(sk)->ep;
+ ep->asconf_enable = !!params.assoc_value;
+
+ if (ep->asconf_enable && ep->auth_enable) {
+ sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF);
+ sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF_ACK);
+ }
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
+static int sctp_setsockopt_auth_supported(struct sock *sk,
+ char __user *optval,
+ unsigned int optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ struct sctp_endpoint *ep;
+ int retval = -EINVAL;
+
+ if (optlen != sizeof(params))
+ goto out;
+
+ if (copy_from_user(&params, optval, optlen)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+ sctp_style(sk, UDP))
+ goto out;
+
+ ep = sctp_sk(sk)->ep;
+ if (params.assoc_value) {
+ retval = sctp_auth_init(ep, GFP_KERNEL);
+ if (retval)
+ goto out;
+ if (ep->asconf_enable) {
+ sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF);
+ sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF_ACK);
+ }
+ }
+
+ ep->auth_enable = !!params.assoc_value;
+ retval = 0;
+
+out:
+ return retval;
+}
+
+static int sctp_setsockopt_ecn_supported(struct sock *sk,
+ char __user *optval,
+ unsigned int optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ int retval = -EINVAL;
+
+ if (optlen != sizeof(params))
+ goto out;
+
+ if (copy_from_user(&params, optval, optlen)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+ sctp_style(sk, UDP))
+ goto out;
+
+ sctp_sk(sk)->ep->ecn_enable = !!params.assoc_value;
+ retval = 0;
+
+out:
+ return retval;
+}
+
/* API 6.2 setsockopt(), getsockopt()
*
* Applications use setsockopt() and getsockopt() to set or retrieve
@@ -4784,6 +4789,15 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
case SCTP_EVENT:
retval = sctp_setsockopt_event(sk, optval, optlen);
break;
+ case SCTP_ASCONF_SUPPORTED:
+ retval = sctp_setsockopt_asconf_supported(sk, optval, optlen);
+ break;
+ case SCTP_AUTH_SUPPORTED:
+ retval = sctp_setsockopt_auth_supported(sk, optval, optlen);
+ break;
+ case SCTP_ECN_SUPPORTED:
+ retval = sctp_setsockopt_ecn_supported(sk, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@ -6921,9 +6935,6 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
struct sctp_authkeyid val;
struct sctp_association *asoc;
- if (!ep->auth_enable)
- return -EACCES;
-
if (len < sizeof(struct sctp_authkeyid))
return -EINVAL;
@@ -6935,10 +6946,15 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP))
return -EINVAL;
- if (asoc)
+ if (asoc) {
+ if (!asoc->peer.auth_capable)
+ return -EACCES;
val.scact_keynumber = asoc->active_key_id;
- else
+ } else {
+ if (!ep->auth_enable)
+ return -EACCES;
val.scact_keynumber = ep->active_key_id;
+ }
if (put_user(len, optlen))
return -EFAULT;
@@ -6951,7 +6967,6 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
char __user *optval, int __user *optlen)
{
- struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_authchunks __user *p = (void __user *)optval;
struct sctp_authchunks val;
struct sctp_association *asoc;
@@ -6959,9 +6974,6 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
u32 num_chunks = 0;
char __user *to;
- if (!ep->auth_enable)
- return -EACCES;
-
if (len < sizeof(struct sctp_authchunks))
return -EINVAL;
@@ -6973,6 +6985,9 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
if (!asoc)
return -EINVAL;
+ if (!asoc->peer.auth_capable)
+ return -EACCES;
+
ch = asoc->peer.peer_chunks;
if (!ch)
goto num;
@@ -7004,9 +7019,6 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
u32 num_chunks = 0;
char __user *to;
- if (!ep->auth_enable)
- return -EACCES;
-
if (len < sizeof(struct sctp_authchunks))
return -EINVAL;
@@ -7019,8 +7031,15 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
sctp_style(sk, UDP))
return -EINVAL;
- ch = asoc ? (struct sctp_chunks_param *)asoc->c.auth_chunks
- : ep->auth_chunk_list;
+ if (asoc) {
+ if (!asoc->peer.auth_capable)
+ return -EACCES;
+ ch = (struct sctp_chunks_param *)asoc->c.auth_chunks;
+ } else {
+ if (!ep->auth_enable)
+ return -EACCES;
+ ch = ep->auth_chunk_list;
+ }
if (!ch)
goto num;
@@ -7764,6 +7783,123 @@ static int sctp_getsockopt_event(struct sock *sk, int len, char __user *optval,
return 0;
}
+static int sctp_getsockopt_asconf_supported(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ int retval = -EFAULT;
+
+ if (len < sizeof(params)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ len = sizeof(params);
+ if (copy_from_user(&params, optval, len))
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+ sctp_style(sk, UDP)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ params.assoc_value = asoc ? asoc->peer.asconf_capable
+ : sctp_sk(sk)->ep->asconf_enable;
+
+ if (put_user(len, optlen))
+ goto out;
+
+ if (copy_to_user(optval, &params, len))
+ goto out;
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
+static int sctp_getsockopt_auth_supported(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ int retval = -EFAULT;
+
+ if (len < sizeof(params)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ len = sizeof(params);
+ if (copy_from_user(&params, optval, len))
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+ sctp_style(sk, UDP)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ params.assoc_value = asoc ? asoc->peer.auth_capable
+ : sctp_sk(sk)->ep->auth_enable;
+
+ if (put_user(len, optlen))
+ goto out;
+
+ if (copy_to_user(optval, &params, len))
+ goto out;
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
+static int sctp_getsockopt_ecn_supported(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ int retval = -EFAULT;
+
+ if (len < sizeof(params)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ len = sizeof(params);
+ if (copy_from_user(&params, optval, len))
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+ sctp_style(sk, UDP)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ params.assoc_value = asoc ? asoc->peer.ecn_capable
+ : sctp_sk(sk)->ep->ecn_enable;
+
+ if (put_user(len, optlen))
+ goto out;
+
+ if (copy_to_user(optval, &params, len))
+ goto out;
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
static int sctp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -7965,6 +8101,17 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
case SCTP_EVENT:
retval = sctp_getsockopt_event(sk, len, optval, optlen);
break;
+ case SCTP_ASCONF_SUPPORTED:
+ retval = sctp_getsockopt_asconf_supported(sk, len, optval,
+ optlen);
+ break;
+ case SCTP_AUTH_SUPPORTED:
+ retval = sctp_getsockopt_auth_supported(sk, len, optval,
+ optlen);
+ break;
+ case SCTP_ECN_SUPPORTED:
+ retval = sctp_getsockopt_ecn_supported(sk, len, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 1250751bca1b..238cf1737576 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -278,6 +278,13 @@ static struct ctl_table sctp_net_table[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "ecn_enable",
+ .data = &init_net.sctp.ecn_enable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "addr_scope_policy",
.data = &init_net.sctp.scope_policy,
.maxlen = sizeof(int),
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index e2f8e369cd08..7235a6032671 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -43,8 +43,8 @@ static struct sctp_transport *sctp_transport_init(struct net *net,
gfp_t gfp)
{
/* Copy in the address. */
- peer->ipaddr = *addr;
peer->af_specific = sctp_get_af_specific(addr->sa.sa_family);
+ memcpy(&peer->ipaddr, addr, peer->af_specific->sockaddr_len);
memset(&peer->saddr, 0, sizeof(union sctp_addr));
peer->sack_generation = 0;
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index 550fdf18d3b3..3b7f721c023b 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -228,14 +228,11 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
ret = 0;
err_free_raw:
- memset(rawkey, 0, keybytes);
- kfree(rawkey);
+ kzfree(rawkey);
err_free_out:
- memset(outblockdata, 0, blocksize);
- kfree(outblockdata);
+ kzfree(outblockdata);
err_free_in:
- memset(inblockdata, 0, blocksize);
- kfree(inblockdata);
+ kzfree(inblockdata);
err_free_cipher:
crypto_free_sync_skcipher(cipher);
err_return:
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 1336f3cdad38..6ef1abdd525f 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -185,7 +185,7 @@ static void tipc_bcbase_xmit(struct net *net, struct sk_buff_head *xmitq)
}
/* We have to transmit across all bearers */
- skb_queue_head_init(&_xmitq);
+ __skb_queue_head_init(&_xmitq);
for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) {
if (!bb->dests[bearer_id])
continue;
@@ -256,7 +256,7 @@ static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts,
struct sk_buff_head xmitq;
int rc = 0;
- skb_queue_head_init(&xmitq);
+ __skb_queue_head_init(&xmitq);
tipc_bcast_lock(net);
if (tipc_link_bc_peers(l))
rc = tipc_link_xmit(l, pkts, &xmitq);
@@ -286,7 +286,7 @@ static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts,
u32 dnode, selector;
selector = msg_link_selector(buf_msg(skb_peek(pkts)));
- skb_queue_head_init(&_pkts);
+ __skb_queue_head_init(&_pkts);
list_for_each_entry_safe(dst, tmp, &dests->list, list) {
dnode = dst->node;
@@ -344,7 +344,7 @@ static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb,
msg_set_size(_hdr, MCAST_H_SIZE);
msg_set_is_rcast(_hdr, !msg_is_rcast(hdr));
- skb_queue_head_init(&tmpq);
+ __skb_queue_head_init(&tmpq);
__skb_queue_tail(&tmpq, _skb);
if (method->rcast)
tipc_bcast_xmit(net, &tmpq, cong_link_cnt);
@@ -378,7 +378,7 @@ int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts,
int rc = 0;
skb_queue_head_init(&inputq);
- skb_queue_head_init(&localq);
+ __skb_queue_head_init(&localq);
/* Clone packets before they are consumed by next call */
if (dests->local && !tipc_msg_reassemble(pkts, &localq)) {
@@ -406,8 +406,10 @@ int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts,
rc = tipc_bcast_xmit(net, pkts, cong_link_cnt);
}
- if (dests->local)
+ if (dests->local) {
+ tipc_loopback_trace(net, &localq);
tipc_sk_mcast_rcv(net, &localq, &inputq);
+ }
exit:
/* This queue should normally be empty by now */
__skb_queue_purge(pkts);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index a809c0ec8d15..0214aa1c4427 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -389,6 +389,11 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
dev_put(dev);
return -EINVAL;
}
+ if (dev == net->loopback_dev) {
+ dev_put(dev);
+ pr_info("Enabling <%s> not permitted\n", b->name);
+ return -EINVAL;
+ }
/* Autoconfigure own node identity if needed */
if (!tipc_own_id(net) && hwaddr_len <= NODE_ID_LEN) {
@@ -674,6 +679,65 @@ void tipc_bearer_stop(struct net *net)
}
}
+void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *pkts)
+{
+ struct net_device *dev = net->loopback_dev;
+ struct sk_buff *skb, *_skb;
+ int exp;
+
+ skb_queue_walk(pkts, _skb) {
+ skb = pskb_copy(_skb, GFP_ATOMIC);
+ if (!skb)
+ continue;
+
+ exp = SKB_DATA_ALIGN(dev->hard_header_len - skb_headroom(skb));
+ if (exp > 0 && pskb_expand_head(skb, exp, 0, GFP_ATOMIC)) {
+ kfree_skb(skb);
+ continue;
+ }
+
+ skb_reset_network_header(skb);
+ dev_hard_header(skb, dev, ETH_P_TIPC, dev->dev_addr,
+ dev->dev_addr, skb->len);
+ skb->dev = dev;
+ skb->pkt_type = PACKET_HOST;
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->protocol = eth_type_trans(skb, dev);
+ netif_rx_ni(skb);
+ }
+}
+
+static int tipc_loopback_rcv_pkt(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *od)
+{
+ consume_skb(skb);
+ return NET_RX_SUCCESS;
+}
+
+int tipc_attach_loopback(struct net *net)
+{
+ struct net_device *dev = net->loopback_dev;
+ struct tipc_net *tn = tipc_net(net);
+
+ if (!dev)
+ return -ENODEV;
+
+ dev_hold(dev);
+ tn->loopback_pt.dev = dev;
+ tn->loopback_pt.type = htons(ETH_P_TIPC);
+ tn->loopback_pt.func = tipc_loopback_rcv_pkt;
+ dev_add_pack(&tn->loopback_pt);
+ return 0;
+}
+
+void tipc_detach_loopback(struct net *net)
+{
+ struct tipc_net *tn = tipc_net(net);
+
+ dev_remove_pack(&tn->loopback_pt);
+ dev_put(net->loopback_dev);
+}
+
/* Caller should hold rtnl_lock to protect the bearer */
static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg,
struct tipc_bearer *bearer, int nlflags)
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 7f4c569594a5..ea0f3c49cbed 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -232,6 +232,16 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id,
struct tipc_media_addr *dst);
void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id,
struct sk_buff_head *xmitq);
+void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *pkts);
+int tipc_attach_loopback(struct net *net);
+void tipc_detach_loopback(struct net *net);
+
+static inline void tipc_loopback_trace(struct net *net,
+ struct sk_buff_head *pkts)
+{
+ if (unlikely(dev_nit_active(net->loopback_dev)))
+ tipc_clone_to_loopback(net, pkts);
+}
/* check if device MTU is too low for tipc headers */
static inline bool tipc_mtu_bad(struct net_device *dev, unsigned int reserve)
diff --git a/net/tipc/core.c b/net/tipc/core.c
index c8370722f0bb..23cb379a93d6 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -82,6 +82,10 @@ static int __net_init tipc_init_net(struct net *net)
if (err)
goto out_bclink;
+ err = tipc_attach_loopback(net);
+ if (err)
+ goto out_bclink;
+
return 0;
out_bclink:
@@ -94,6 +98,7 @@ out_sk_rht:
static void __net_exit tipc_exit_net(struct net *net)
{
+ tipc_detach_loopback(net);
tipc_net_stop(net);
tipc_bcast_stop(net);
tipc_nametbl_stop(net);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 7a68e1b6a066..60d829581068 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -125,6 +125,9 @@ struct tipc_net {
/* Cluster capabilities */
u16 capabilities;
+
+ /* Tracing of node internal messages */
+ struct packet_type loopback_pt;
};
static inline struct tipc_net *tipc_net(struct net *net)
diff --git a/net/tipc/group.c b/net/tipc/group.c
index 5f98d38bcf08..89257e2a980d 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -199,7 +199,7 @@ void tipc_group_join(struct net *net, struct tipc_group *grp, int *sk_rcvbuf)
struct tipc_member *m, *tmp;
struct sk_buff_head xmitq;
- skb_queue_head_init(&xmitq);
+ __skb_queue_head_init(&xmitq);
rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) {
tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, &xmitq);
tipc_group_update_member(m, 0);
@@ -435,7 +435,7 @@ bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport,
return true;
if (state == MBR_PENDING && adv == ADV_IDLE)
return true;
- skb_queue_head_init(&xmitq);
+ __skb_queue_head_init(&xmitq);
tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, &xmitq);
tipc_node_distr_xmit(grp->net, &xmitq);
return true;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index c2c5c53cad22..6cc75ffd9e2c 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -176,6 +176,7 @@ struct tipc_link {
/* Fragmentation/reassembly */
struct sk_buff *reasm_buf;
+ struct sk_buff *reasm_tnlmsg;
/* Broadcast */
u16 ackers;
@@ -849,18 +850,31 @@ static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr)
*/
static void link_prepare_wakeup(struct tipc_link *l)
{
+ struct sk_buff_head *wakeupq = &l->wakeupq;
+ struct sk_buff_head *inputq = l->inputq;
struct sk_buff *skb, *tmp;
- int imp, i = 0;
+ struct sk_buff_head tmpq;
+ int avail[5] = {0,};
+ int imp = 0;
+
+ __skb_queue_head_init(&tmpq);
+
+ for (; imp <= TIPC_SYSTEM_IMPORTANCE; imp++)
+ avail[imp] = l->backlog[imp].limit - l->backlog[imp].len;
- skb_queue_walk_safe(&l->wakeupq, skb, tmp) {
+ skb_queue_walk_safe(wakeupq, skb, tmp) {
imp = TIPC_SKB_CB(skb)->chain_imp;
- if (l->backlog[imp].len < l->backlog[imp].limit) {
- skb_unlink(skb, &l->wakeupq);
- skb_queue_tail(l->inputq, skb);
- } else if (i++ > 10) {
- break;
- }
+ if (avail[imp] <= 0)
+ continue;
+ avail[imp]--;
+ __skb_unlink(skb, wakeupq);
+ __skb_queue_tail(&tmpq, skb);
}
+
+ spin_lock_bh(&inputq->lock);
+ skb_queue_splice_tail(&tmpq, inputq);
+ spin_unlock_bh(&inputq->lock);
+
}
void tipc_link_reset(struct tipc_link *l)
@@ -893,8 +907,10 @@ void tipc_link_reset(struct tipc_link *l)
l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0;
l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0;
kfree_skb(l->reasm_buf);
+ kfree_skb(l->reasm_tnlmsg);
kfree_skb(l->failover_reasm_skb);
l->reasm_buf = NULL;
+ l->reasm_tnlmsg = NULL;
l->failover_reasm_skb = NULL;
l->rcv_unacked = 0;
l->snd_nxt = 1;
@@ -936,7 +952,10 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
int rc = 0;
if (unlikely(msg_size(hdr) > mtu)) {
- skb_queue_purge(list);
+ pr_warn("Too large msg, purging xmit list %d %d %d %d %d!\n",
+ skb_queue_len(list), msg_user(hdr),
+ msg_type(hdr), msg_size(hdr), mtu);
+ __skb_queue_purge(list);
return -EMSGSIZE;
}
@@ -965,7 +984,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
if (likely(skb_queue_len(transmq) < maxwin)) {
_skb = skb_clone(skb, GFP_ATOMIC);
if (!_skb) {
- skb_queue_purge(list);
+ __skb_queue_purge(list);
return -ENOBUFS;
}
__skb_dequeue(list);
@@ -1238,6 +1257,7 @@ static int tipc_link_tnl_rcv(struct tipc_link *l, struct sk_buff *skb,
struct sk_buff_head *inputq)
{
struct sk_buff **reasm_skb = &l->failover_reasm_skb;
+ struct sk_buff **reasm_tnlmsg = &l->reasm_tnlmsg;
struct sk_buff_head *fdefq = &l->failover_deferdq;
struct tipc_msg *hdr = buf_msg(skb);
struct sk_buff *iskb;
@@ -1245,40 +1265,56 @@ static int tipc_link_tnl_rcv(struct tipc_link *l, struct sk_buff *skb,
int rc = 0;
u16 seqno;
- /* SYNCH_MSG */
- if (msg_type(hdr) == SYNCH_MSG)
- goto drop;
+ if (msg_type(hdr) == SYNCH_MSG) {
+ kfree_skb(skb);
+ return 0;
+ }
- /* FAILOVER_MSG */
- if (!tipc_msg_extract(skb, &iskb, &ipos)) {
- pr_warn_ratelimited("Cannot extract FAILOVER_MSG, defq: %d\n",
- skb_queue_len(fdefq));
- return rc;
+ /* Not a fragment? */
+ if (likely(!msg_nof_fragms(hdr))) {
+ if (unlikely(!tipc_msg_extract(skb, &iskb, &ipos))) {
+ pr_warn_ratelimited("Unable to extract msg, defq: %d\n",
+ skb_queue_len(fdefq));
+ return 0;
+ }
+ kfree_skb(skb);
+ } else {
+ /* Set fragment type for buf_append */
+ if (msg_fragm_no(hdr) == 1)
+ msg_set_type(hdr, FIRST_FRAGMENT);
+ else if (msg_fragm_no(hdr) < msg_nof_fragms(hdr))
+ msg_set_type(hdr, FRAGMENT);
+ else
+ msg_set_type(hdr, LAST_FRAGMENT);
+
+ if (!tipc_buf_append(reasm_tnlmsg, &skb)) {
+ /* Successful but non-complete reassembly? */
+ if (*reasm_tnlmsg || link_is_bc_rcvlink(l))
+ return 0;
+ pr_warn_ratelimited("Unable to reassemble tunnel msg\n");
+ return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
+ }
+ iskb = skb;
}
do {
seqno = buf_seqno(iskb);
-
if (unlikely(less(seqno, l->drop_point))) {
kfree_skb(iskb);
continue;
}
-
if (unlikely(seqno != l->drop_point)) {
__tipc_skb_queue_sorted(fdefq, seqno, iskb);
continue;
}
l->drop_point++;
-
if (!tipc_data_input(l, iskb, inputq))
rc |= tipc_link_input(l, iskb, inputq, reasm_skb);
if (unlikely(rc))
break;
} while ((iskb = __tipc_skb_dequeue(fdefq, l->drop_point)));
-drop:
- kfree_skb(skb);
return rc;
}
@@ -1644,7 +1680,7 @@ void tipc_link_create_dummy_tnl_msg(struct tipc_link *l,
struct sk_buff *skb;
u32 dnode = l->addr;
- skb_queue_head_init(&tnlq);
+ __skb_queue_head_init(&tnlq);
skb = tipc_msg_create(TUNNEL_PROTOCOL, FAILOVER_MSG,
INT_H_SIZE, BASIC_H_SIZE,
dnode, onode, 0, 0, 0);
@@ -1675,14 +1711,18 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
struct sk_buff *skb, *tnlskb;
struct tipc_msg *hdr, tnlhdr;
struct sk_buff_head *queue = &l->transmq;
- struct sk_buff_head tmpxq, tnlq;
+ struct sk_buff_head tmpxq, tnlq, frags;
u16 pktlen, pktcnt, seqno = l->snd_nxt;
+ bool pktcnt_need_update = false;
+ u16 syncpt;
+ int rc;
if (!tnl)
return;
- skb_queue_head_init(&tnlq);
- skb_queue_head_init(&tmpxq);
+ __skb_queue_head_init(&tnlq);
+ __skb_queue_head_init(&tmpxq);
+ __skb_queue_head_init(&frags);
/* At least one packet required for safe algorithm => add dummy */
skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG,
@@ -1692,10 +1732,35 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
pr_warn("%sunable to create tunnel packet\n", link_co_err);
return;
}
- skb_queue_tail(&tnlq, skb);
+ __skb_queue_tail(&tnlq, skb);
tipc_link_xmit(l, &tnlq, &tmpxq);
__skb_queue_purge(&tmpxq);
+ /* Link Synching:
+ * From now on, send only one single ("dummy") SYNCH message
+ * to peer. The SYNCH message does not contain any data, just
+ * a header conveying the synch point to the peer.
+ */
+ if (mtyp == SYNCH_MSG && (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) {
+ tnlskb = tipc_msg_create(TUNNEL_PROTOCOL, SYNCH_MSG,
+ INT_H_SIZE, 0, l->addr,
+ tipc_own_addr(l->net),
+ 0, 0, 0);
+ if (!tnlskb) {
+ pr_warn("%sunable to create dummy SYNCH_MSG\n",
+ link_co_err);
+ return;
+ }
+
+ hdr = buf_msg(tnlskb);
+ syncpt = l->snd_nxt + skb_queue_len(&l->backlogq) - 1;
+ msg_set_syncpt(hdr, syncpt);
+ msg_set_bearer_id(hdr, l->peer_bearer_id);
+ __skb_queue_tail(&tnlq, tnlskb);
+ tipc_link_xmit(tnl, &tnlq, xmitq);
+ return;
+ }
+
/* Initialize reusable tunnel packet header */
tipc_msg_init(tipc_own_addr(l->net), &tnlhdr, TUNNEL_PROTOCOL,
mtyp, INT_H_SIZE, l->addr);
@@ -1713,6 +1778,39 @@ tnl:
if (queue == &l->backlogq)
msg_set_seqno(hdr, seqno++);
pktlen = msg_size(hdr);
+
+ /* Tunnel link MTU is not large enough? This could be
+ * due to:
+ * 1) Link MTU has just changed or set differently;
+ * 2) Or FAILOVER on the top of a SYNCH message
+ *
+ * The 2nd case should not happen if peer supports
+ * TIPC_TUNNEL_ENHANCED
+ */
+ if (pktlen > tnl->mtu - INT_H_SIZE) {
+ if (mtyp == FAILOVER_MSG &&
+ (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) {
+ rc = tipc_msg_fragment(skb, &tnlhdr, tnl->mtu,
+ &frags);
+ if (rc) {
+ pr_warn("%sunable to frag msg: rc %d\n",
+ link_co_err, rc);
+ return;
+ }
+ pktcnt += skb_queue_len(&frags) - 1;
+ pktcnt_need_update = true;
+ skb_queue_splice_tail_init(&frags, &tnlq);
+ continue;
+ }
+ /* Unluckily, peer doesn't have TIPC_TUNNEL_ENHANCED
+ * => Just warn it and return!
+ */
+ pr_warn_ratelimited("%stoo large msg <%d, %d>: %d!\n",
+ link_co_err, msg_user(hdr),
+ msg_type(hdr), msg_size(hdr));
+ return;
+ }
+
msg_set_size(&tnlhdr, pktlen + INT_H_SIZE);
tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE, GFP_ATOMIC);
if (!tnlskb) {
@@ -1728,6 +1826,12 @@ tnl:
goto tnl;
}
+ if (pktcnt_need_update)
+ skb_queue_walk(&tnlq, skb) {
+ hdr = buf_msg(skb);
+ msg_set_msgcnt(hdr, pktcnt);
+ }
+
tipc_link_xmit(tnl, &tnlq, xmitq);
if (mtyp == FAILOVER_MSG) {
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index f48e5857210f..e6d49cdc61b4 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -244,6 +244,65 @@ bool tipc_msg_validate(struct sk_buff **_skb)
}
/**
+ * tipc_msg_fragment - build a fragment skb list for TIPC message
+ *
+ * @skb: TIPC message skb
+ * @hdr: internal msg header to be put on the top of the fragments
+ * @pktmax: max size of a fragment incl. the header
+ * @frags: returned fragment skb list
+ *
+ * Returns 0 if the fragmentation is successful, otherwise: -EINVAL
+ * or -ENOMEM
+ */
+int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr,
+ int pktmax, struct sk_buff_head *frags)
+{
+ int pktno, nof_fragms, dsz, dmax, eat;
+ struct tipc_msg *_hdr;
+ struct sk_buff *_skb;
+ u8 *data;
+
+ /* Non-linear buffer? */
+ if (skb_linearize(skb))
+ return -ENOMEM;
+
+ data = (u8 *)skb->data;
+ dsz = msg_size(buf_msg(skb));
+ dmax = pktmax - INT_H_SIZE;
+ if (dsz <= dmax || !dmax)
+ return -EINVAL;
+
+ nof_fragms = dsz / dmax + 1;
+ for (pktno = 1; pktno <= nof_fragms; pktno++) {
+ if (pktno < nof_fragms)
+ eat = dmax;
+ else
+ eat = dsz % dmax;
+ /* Allocate a new fragment */
+ _skb = tipc_buf_acquire(INT_H_SIZE + eat, GFP_ATOMIC);
+ if (!_skb)
+ goto error;
+ skb_orphan(_skb);
+ __skb_queue_tail(frags, _skb);
+ /* Copy header & data to the fragment */
+ skb_copy_to_linear_data(_skb, hdr, INT_H_SIZE);
+ skb_copy_to_linear_data_offset(_skb, INT_H_SIZE, data, eat);
+ data += eat;
+ /* Update the fragment's header */
+ _hdr = buf_msg(_skb);
+ msg_set_fragm_no(_hdr, pktno);
+ msg_set_nof_fragms(_hdr, nof_fragms);
+ msg_set_size(_hdr, INT_H_SIZE + eat);
+ }
+ return 0;
+
+error:
+ __skb_queue_purge(frags);
+ __skb_queue_head_init(frags);
+ return -ENOMEM;
+}
+
+/**
* tipc_msg_build - create buffer chain containing specified header and data
* @mhdr: Message header, to be prepended to data
* @m: User message
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index d7ebc9e955f6..0daa6f04ca81 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -723,12 +723,26 @@ static inline void msg_set_last_bcast(struct tipc_msg *m, u32 n)
msg_set_bits(m, 4, 16, 0xffff, n);
}
+static inline u32 msg_nof_fragms(struct tipc_msg *m)
+{
+ return msg_bits(m, 4, 0, 0xffff);
+}
+
+static inline void msg_set_nof_fragms(struct tipc_msg *m, u32 n)
+{
+ msg_set_bits(m, 4, 0, 0xffff, n);
+}
+
+static inline u32 msg_fragm_no(struct tipc_msg *m)
+{
+ return msg_bits(m, 4, 16, 0xffff);
+}
+
static inline void msg_set_fragm_no(struct tipc_msg *m, u32 n)
{
msg_set_bits(m, 4, 16, 0xffff, n);
}
-
static inline u16 msg_next_sent(struct tipc_msg *m)
{
return msg_bits(m, 4, 0, 0xffff);
@@ -879,6 +893,16 @@ static inline void msg_set_msgcnt(struct tipc_msg *m, u16 n)
msg_set_bits(m, 9, 16, 0xffff, n);
}
+static inline u16 msg_syncpt(struct tipc_msg *m)
+{
+ return msg_bits(m, 9, 16, 0xffff);
+}
+
+static inline void msg_set_syncpt(struct tipc_msg *m, u16 n)
+{
+ msg_set_bits(m, 9, 16, 0xffff, n);
+}
+
static inline u32 msg_conn_ack(struct tipc_msg *m)
{
return msg_bits(m, 9, 16, 0xffff);
@@ -1037,6 +1061,8 @@ bool tipc_msg_bundle(struct sk_buff *skb, struct tipc_msg *msg, u32 mtu);
bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg,
u32 mtu, u32 dnode);
bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos);
+int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr,
+ int pktmax, struct sk_buff_head *frags);
int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
int offset, int dsz, int mtu, struct sk_buff_head *list);
bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err);
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 241ed2274473..836e629e8f4a 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -190,7 +190,7 @@ void tipc_named_node_up(struct net *net, u32 dnode)
struct name_table *nt = tipc_name_table(net);
struct sk_buff_head head;
- skb_queue_head_init(&head);
+ __skb_queue_head_init(&head);
read_lock_bh(&nt->cluster_scope_lock);
named_distribute(net, &head, dnode, &nt->cluster_scope);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 3a5be1d7e572..c8f6177dd5a2 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1443,13 +1443,15 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
int rc;
if (in_own_node(net, dnode)) {
+ tipc_loopback_trace(net, list);
+ spin_lock_init(&list->lock);
tipc_sk_rcv(net, list);
return 0;
}
n = tipc_node_find(net, dnode);
if (unlikely(!n)) {
- skb_queue_purge(list);
+ __skb_queue_purge(list);
return -EHOSTUNREACH;
}
@@ -1458,7 +1460,7 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
if (unlikely(bearer_id == INVALID_BEARER_ID)) {
tipc_node_read_unlock(n);
tipc_node_put(n);
- skb_queue_purge(list);
+ __skb_queue_purge(list);
return -EHOSTUNREACH;
}
@@ -1490,7 +1492,7 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode,
{
struct sk_buff_head head;
- skb_queue_head_init(&head);
+ __skb_queue_head_init(&head);
__skb_queue_tail(&head, skb);
tipc_node_xmit(net, &head, dnode, selector);
return 0;
@@ -1649,7 +1651,6 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
int usr = msg_user(hdr);
int mtyp = msg_type(hdr);
u16 oseqno = msg_seqno(hdr);
- u16 iseqno = msg_seqno(msg_inner_hdr(hdr));
u16 exp_pkts = msg_msgcnt(hdr);
u16 rcv_nxt, syncpt, dlv_nxt, inputq_len;
int state = n->state;
@@ -1748,7 +1749,10 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
/* Initiate synch mode if applicable */
if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG) && (oseqno == 1)) {
- syncpt = iseqno + exp_pkts - 1;
+ if (n->capabilities & TIPC_TUNNEL_ENHANCED)
+ syncpt = msg_syncpt(hdr);
+ else
+ syncpt = msg_seqno(msg_inner_hdr(hdr)) + exp_pkts - 1;
if (!tipc_link_is_up(l))
__tipc_node_link_up(n, bearer_id, xmitq);
if (n->state == SELF_UP_PEER_UP) {
diff --git a/net/tipc/node.h b/net/tipc/node.h
index c0bf49ea3de4..291d0ecd4101 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -53,7 +53,8 @@ enum {
TIPC_NODE_ID128 = (1 << 5),
TIPC_LINK_PROTO_SEQNO = (1 << 6),
TIPC_MCAST_RBCTL = (1 << 7),
- TIPC_GAP_ACK_BLOCK = (1 << 8)
+ TIPC_GAP_ACK_BLOCK = (1 << 8),
+ TIPC_TUNNEL_ENHANCED = (1 << 9)
};
#define TIPC_NODE_CAPABILITIES (TIPC_SYN_BIT | \
@@ -64,7 +65,8 @@ enum {
TIPC_NODE_ID128 | \
TIPC_LINK_PROTO_SEQNO | \
TIPC_MCAST_RBCTL | \
- TIPC_GAP_ACK_BLOCK)
+ TIPC_GAP_ACK_BLOCK | \
+ TIPC_TUNNEL_ENHANCED)
#define INVALID_BEARER_ID -1
void tipc_node_stop(struct net *net);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 83ae41d7e554..3b9f8cc328f5 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -809,7 +809,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
msg_set_nameupper(hdr, seq->upper);
/* Build message as chain of buffers */
- skb_queue_head_init(&pkts);
+ __skb_queue_head_init(&pkts);
rc = tipc_msg_build(hdr, msg, 0, dlen, mtu, &pkts);
/* Send message if build was successful */
@@ -853,7 +853,7 @@ static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk,
msg_set_grp_bc_seqno(hdr, bc_snd_nxt);
/* Build message as chain of buffers */
- skb_queue_head_init(&pkts);
+ __skb_queue_head_init(&pkts);
mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
if (unlikely(rc != dlen))
@@ -1058,7 +1058,7 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
msg_set_grp_bc_ack_req(hdr, ack);
/* Build message as chain of buffers */
- skb_queue_head_init(&pkts);
+ __skb_queue_head_init(&pkts);
rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
if (unlikely(rc != dlen))
return rc;
@@ -1387,7 +1387,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
if (unlikely(rc))
return rc;
- skb_queue_head_init(&pkts);
+ __skb_queue_head_init(&pkts);
mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
if (unlikely(rc != dlen))
@@ -1445,7 +1445,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
int send, sent = 0;
int rc = 0;
- skb_queue_head_init(&pkts);
+ __skb_queue_head_init(&pkts);
if (unlikely(dlen > INT_MAX))
return -EMSGSIZE;
@@ -1805,7 +1805,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
/* Send group flow control advertisement when applicable */
if (tsk->group && msg_in_group(hdr) && !grp_evt) {
- skb_queue_head_init(&xmitq);
+ __skb_queue_head_init(&xmitq);
tipc_group_update_rcv_win(tsk->group, tsk_blocks(hlen + dlen),
msg_orignode(hdr), msg_origport(hdr),
&xmitq);
@@ -2674,7 +2674,7 @@ static void tipc_sk_timeout(struct timer_list *t)
struct sk_buff_head list;
int rc = 0;
- skb_queue_head_init(&list);
+ __skb_queue_head_init(&list);
bh_lock_sock(sk);
/* Try again later if socket is busy */
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index ca8ac96d22a9..3a12fc18239b 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -40,6 +40,7 @@
#include "socket.h"
#include "addr.h"
#include "msg.h"
+#include "bearer.h"
#include <net/sock.h>
#include <linux/module.h>
@@ -608,6 +609,7 @@ static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt)
memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt));
skb_queue_head_init(&evtq);
__skb_queue_tail(&evtq, skb);
+ tipc_loopback_trace(net, &evtq);
tipc_sk_rcv(net, &evtq);
}
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 43922d86e510..f959487c5cd1 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -61,7 +61,7 @@ static void tls_device_free_ctx(struct tls_context *ctx)
if (ctx->rx_conf == TLS_HW)
kfree(tls_offload_ctx_rx(ctx));
- tls_ctx_free(ctx);
+ tls_ctx_free(NULL, ctx);
}
static void tls_device_gc_task(struct work_struct *work)
@@ -122,13 +122,10 @@ static struct net_device *get_netdev_for_sock(struct sock *sk)
static void destroy_record(struct tls_record_info *record)
{
- int nr_frags = record->num_frags;
- skb_frag_t *frag;
+ int i;
- while (nr_frags-- > 0) {
- frag = &record->frags[nr_frags];
- __skb_frag_unref(frag);
- }
+ for (i = 0; i < record->num_frags; i++)
+ __skb_frag_unref(&record->frags[i]);
kfree(record);
}
@@ -159,12 +156,8 @@ static void tls_icsk_clean_acked(struct sock *sk, u32 acked_seq)
spin_lock_irqsave(&ctx->lock, flags);
info = ctx->retransmit_hint;
- if (info && !before(acked_seq, info->end_seq)) {
+ if (info && !before(acked_seq, info->end_seq))
ctx->retransmit_hint = NULL;
- list_del(&info->list);
- destroy_record(info);
- deleted_records++;
- }
list_for_each_entry_safe(info, temp, &ctx->records_list, list) {
if (before(acked_seq, info->end_seq))
@@ -243,14 +236,14 @@ static void tls_append_frag(struct tls_record_info *record,
skb_frag_t *frag;
frag = &record->frags[record->num_frags - 1];
- if (frag->page.p == pfrag->page &&
- frag->page_offset + frag->size == pfrag->offset) {
- frag->size += size;
+ if (skb_frag_page(frag) == pfrag->page &&
+ skb_frag_off(frag) + skb_frag_size(frag) == pfrag->offset) {
+ skb_frag_size_add(frag, size);
} else {
++frag;
- frag->page.p = pfrag->page;
- frag->page_offset = pfrag->offset;
- frag->size = size;
+ __skb_frag_set_page(frag, pfrag->page);
+ skb_frag_off_set(frag, pfrag->offset);
+ skb_frag_size_set(frag, size);
++record->num_frags;
get_page(pfrag->page);
}
@@ -263,33 +256,15 @@ static int tls_push_record(struct sock *sk,
struct tls_context *ctx,
struct tls_offload_context_tx *offload_ctx,
struct tls_record_info *record,
- struct page_frag *pfrag,
- int flags,
- unsigned char record_type)
+ int flags)
{
struct tls_prot_info *prot = &ctx->prot_info;
struct tcp_sock *tp = tcp_sk(sk);
- struct page_frag dummy_tag_frag;
skb_frag_t *frag;
int i;
- /* fill prepend */
- frag = &record->frags[0];
- tls_fill_prepend(ctx,
- skb_frag_address(frag),
- record->len - prot->prepend_size,
- record_type,
- prot->version);
-
- /* HW doesn't care about the data in the tag, because it fills it. */
- dummy_tag_frag.page = skb_frag_page(frag);
- dummy_tag_frag.offset = 0;
-
- tls_append_frag(record, &dummy_tag_frag, prot->tag_size);
record->end_seq = tp->write_seq + record->len;
- spin_lock_irq(&offload_ctx->lock);
- list_add_tail(&record->list, &offload_ctx->records_list);
- spin_unlock_irq(&offload_ctx->lock);
+ list_add_tail_rcu(&record->list, &offload_ctx->records_list);
offload_ctx->open_record = NULL;
if (test_bit(TLS_TX_SYNC_SCHED, &ctx->flags))
@@ -301,8 +276,8 @@ static int tls_push_record(struct sock *sk,
frag = &record->frags[i];
sg_unmark_end(&offload_ctx->sg_tx_data[i]);
sg_set_page(&offload_ctx->sg_tx_data[i], skb_frag_page(frag),
- frag->size, frag->page_offset);
- sk_mem_charge(sk, frag->size);
+ skb_frag_size(frag), skb_frag_off(frag));
+ sk_mem_charge(sk, skb_frag_size(frag));
get_page(skb_frag_page(frag));
}
sg_mark_end(&offload_ctx->sg_tx_data[record->num_frags - 1]);
@@ -311,6 +286,38 @@ static int tls_push_record(struct sock *sk,
return tls_push_sg(sk, ctx, offload_ctx->sg_tx_data, 0, flags);
}
+static int tls_device_record_close(struct sock *sk,
+ struct tls_context *ctx,
+ struct tls_record_info *record,
+ struct page_frag *pfrag,
+ unsigned char record_type)
+{
+ struct tls_prot_info *prot = &ctx->prot_info;
+ int ret;
+
+ /* append tag
+ * device will fill in the tag, we just need to append a placeholder
+ * use socket memory to improve coalescing (re-using a single buffer
+ * increases frag count)
+ * if we can't allocate memory now, steal some back from data
+ */
+ if (likely(skb_page_frag_refill(prot->tag_size, pfrag,
+ sk->sk_allocation))) {
+ ret = 0;
+ tls_append_frag(record, pfrag, prot->tag_size);
+ } else {
+ ret = prot->tag_size;
+ if (record->len <= prot->overhead_size)
+ return -ENOMEM;
+ }
+
+ /* fill prepend */
+ tls_fill_prepend(ctx, skb_frag_address(&record->frags[0]),
+ record->len - prot->overhead_size,
+ record_type, prot->version);
+ return ret;
+}
+
static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx,
struct page_frag *pfrag,
size_t prepend_size)
@@ -324,7 +331,7 @@ static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx,
frag = &record->frags[0];
__skb_frag_set_page(frag, pfrag->page);
- frag->page_offset = pfrag->offset;
+ skb_frag_off_set(frag, pfrag->offset);
skb_frag_size_set(frag, prepend_size);
get_page(pfrag->page);
@@ -365,6 +372,31 @@ static int tls_do_allocation(struct sock *sk,
return 0;
}
+static int tls_device_copy_data(void *addr, size_t bytes, struct iov_iter *i)
+{
+ size_t pre_copy, nocache;
+
+ pre_copy = ~((unsigned long)addr - 1) & (SMP_CACHE_BYTES - 1);
+ if (pre_copy) {
+ pre_copy = min(pre_copy, bytes);
+ if (copy_from_iter(addr, pre_copy, i) != pre_copy)
+ return -EFAULT;
+ bytes -= pre_copy;
+ addr += pre_copy;
+ }
+
+ nocache = round_down(bytes, SMP_CACHE_BYTES);
+ if (copy_from_iter_nocache(addr, nocache, i) != nocache)
+ return -EFAULT;
+ bytes -= nocache;
+ addr += nocache;
+
+ if (bytes && copy_from_iter(addr, bytes, i) != bytes)
+ return -EFAULT;
+
+ return 0;
+}
+
static int tls_push_data(struct sock *sk,
struct iov_iter *msg_iter,
size_t size, int flags,
@@ -438,12 +470,10 @@ handle_error:
copy = min_t(size_t, size, (pfrag->size - pfrag->offset));
copy = min_t(size_t, copy, (max_open_record_len - record->len));
- if (copy_from_iter_nocache(page_address(pfrag->page) +
- pfrag->offset,
- copy, msg_iter) != copy) {
- rc = -EFAULT;
+ rc = tls_device_copy_data(page_address(pfrag->page) +
+ pfrag->offset, copy, msg_iter);
+ if (rc)
goto handle_error;
- }
tls_append_frag(record, pfrag, copy);
size -= copy;
@@ -461,13 +491,24 @@ last_record:
if (done || record->len >= max_open_record_len ||
(record->num_frags >= MAX_SKB_FRAGS - 1)) {
+ rc = tls_device_record_close(sk, tls_ctx, record,
+ pfrag, record_type);
+ if (rc) {
+ if (rc > 0) {
+ size += rc;
+ } else {
+ size = orig_size;
+ destroy_record(record);
+ ctx->open_record = NULL;
+ break;
+ }
+ }
+
rc = tls_push_record(sk,
tls_ctx,
ctx,
record,
- pfrag,
- tls_push_record_flags,
- record_type);
+ tls_push_record_flags);
if (rc < 0)
break;
}
@@ -542,12 +583,16 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
/* if retransmit_hint is irrelevant start
* from the beggining of the list
*/
- info = list_first_entry(&context->records_list,
- struct tls_record_info, list);
+ info = list_first_entry_or_null(&context->records_list,
+ struct tls_record_info, list);
+ if (!info)
+ return NULL;
record_sn = context->unacked_record_sn;
}
- list_for_each_entry_from(info, &context->records_list, list) {
+ /* We just need the _rcu for the READ_ONCE() */
+ rcu_read_lock();
+ list_for_each_entry_from_rcu(info, &context->records_list, list) {
if (before(seq, info->end_seq)) {
if (!context->retransmit_hint ||
after(info->end_seq,
@@ -556,12 +601,15 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
context->retransmit_hint = info;
}
*p_record_sn = record_sn;
- return info;
+ goto exit_rcu_unlock;
}
record_sn++;
}
+ info = NULL;
- return NULL;
+exit_rcu_unlock:
+ rcu_read_unlock();
+ return info;
}
EXPORT_SYMBOL(tls_get_record);
@@ -838,22 +886,18 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
struct net_device *netdev;
char *iv, *rec_seq;
struct sk_buff *skb;
- int rc = -EINVAL;
__be64 rcd_sn;
+ int rc;
if (!ctx)
- goto out;
+ return -EINVAL;
- if (ctx->priv_ctx_tx) {
- rc = -EEXIST;
- goto out;
- }
+ if (ctx->priv_ctx_tx)
+ return -EEXIST;
start_marker_record = kmalloc(sizeof(*start_marker_record), GFP_KERNEL);
- if (!start_marker_record) {
- rc = -ENOMEM;
- goto out;
- }
+ if (!start_marker_record)
+ return -ENOMEM;
offload_ctx = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_TX, GFP_KERNEL);
if (!offload_ctx) {
@@ -939,17 +983,11 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
if (skb)
TCP_SKB_CB(skb)->eor = 1;
- /* We support starting offload on multiple sockets
- * concurrently, so we only need a read lock here.
- * This lock must precede get_netdev_for_sock to prevent races between
- * NETDEV_DOWN and setsockopt.
- */
- down_read(&device_offload_lock);
netdev = get_netdev_for_sock(sk);
if (!netdev) {
pr_err_ratelimited("%s: netdev not found\n", __func__);
rc = -EINVAL;
- goto release_lock;
+ goto disable_cad;
}
if (!(netdev->features & NETIF_F_HW_TLS_TX)) {
@@ -960,10 +998,15 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
/* Avoid offloading if the device is down
* We don't want to offload new flows after
* the NETDEV_DOWN event
+ *
+ * device_offload_lock is taken in tls_devices's NETDEV_DOWN
+ * handler thus protecting from the device going down before
+ * ctx was added to tls_device_list.
*/
+ down_read(&device_offload_lock);
if (!(netdev->flags & IFF_UP)) {
rc = -EINVAL;
- goto release_netdev;
+ goto release_lock;
}
ctx->priv_ctx_tx = offload_ctx;
@@ -971,9 +1014,10 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
&ctx->crypto_send.info,
tcp_sk(sk)->write_seq);
if (rc)
- goto release_netdev;
+ goto release_lock;
tls_device_attach(ctx, sk, netdev);
+ up_read(&device_offload_lock);
/* following this assignment tls_is_sk_tx_device_offloaded
* will return true and the context might be accessed
@@ -981,13 +1025,14 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
*/
smp_store_release(&sk->sk_validate_xmit_skb, tls_validate_xmit_skb);
dev_put(netdev);
- up_read(&device_offload_lock);
- goto out;
-release_netdev:
- dev_put(netdev);
+ return 0;
+
release_lock:
up_read(&device_offload_lock);
+release_netdev:
+ dev_put(netdev);
+disable_cad:
clean_acked_data_disable(inet_csk(sk));
crypto_free_aead(offload_ctx->aead_send);
free_rec_seq:
@@ -999,7 +1044,6 @@ free_offload_ctx:
ctx->priv_ctx_tx = NULL;
free_marker_record:
kfree(start_marker_record);
-out:
return rc;
}
@@ -1012,17 +1056,10 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
if (ctx->crypto_recv.info.version != TLS_1_2_VERSION)
return -EOPNOTSUPP;
- /* We support starting offload on multiple sockets
- * concurrently, so we only need a read lock here.
- * This lock must precede get_netdev_for_sock to prevent races between
- * NETDEV_DOWN and setsockopt.
- */
- down_read(&device_offload_lock);
netdev = get_netdev_for_sock(sk);
if (!netdev) {
pr_err_ratelimited("%s: netdev not found\n", __func__);
- rc = -EINVAL;
- goto release_lock;
+ return -EINVAL;
}
if (!(netdev->features & NETIF_F_HW_TLS_RX)) {
@@ -1033,16 +1070,21 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
/* Avoid offloading if the device is down
* We don't want to offload new flows after
* the NETDEV_DOWN event
+ *
+ * device_offload_lock is taken in tls_devices's NETDEV_DOWN
+ * handler thus protecting from the device going down before
+ * ctx was added to tls_device_list.
*/
+ down_read(&device_offload_lock);
if (!(netdev->flags & IFF_UP)) {
rc = -EINVAL;
- goto release_netdev;
+ goto release_lock;
}
context = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_RX, GFP_KERNEL);
if (!context) {
rc = -ENOMEM;
- goto release_netdev;
+ goto release_lock;
}
context->resync_nh_reset = 1;
@@ -1058,7 +1100,11 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
goto free_sw_resources;
tls_device_attach(ctx, sk, netdev);
- goto release_netdev;
+ up_read(&device_offload_lock);
+
+ dev_put(netdev);
+
+ return 0;
free_sw_resources:
up_read(&device_offload_lock);
@@ -1066,10 +1112,10 @@ free_sw_resources:
down_read(&device_offload_lock);
release_ctx:
ctx->priv_ctx_rx = NULL;
-release_netdev:
- dev_put(netdev);
release_lock:
up_read(&device_offload_lock);
+release_netdev:
+ dev_put(netdev);
return rc;
}
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index 9070d68a92a4..28895333701e 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -273,7 +273,7 @@ static int fill_sg_in(struct scatterlist *sg_in,
__skb_frag_ref(frag);
sg_set_page(sg_in + i, skb_frag_page(frag),
- skb_frag_size(frag), frag->page_offset);
+ skb_frag_size(frag), skb_frag_off(frag));
remaining -= skb_frag_size(frag);
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 43252a801c3f..ac88877dcade 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -39,6 +39,7 @@
#include <linux/netdevice.h>
#include <linux/sched/signal.h>
#include <linux/inetdevice.h>
+#include <linux/inet_diag.h>
#include <net/tls.h>
@@ -251,14 +252,26 @@ static void tls_write_space(struct sock *sk)
ctx->sk_write_space(sk);
}
-void tls_ctx_free(struct tls_context *ctx)
+/**
+ * tls_ctx_free() - free TLS ULP context
+ * @sk: socket to with @ctx is attached
+ * @ctx: TLS context structure
+ *
+ * Free TLS context. If @sk is %NULL caller guarantees that the socket
+ * to which @ctx was attached has no outstanding references.
+ */
+void tls_ctx_free(struct sock *sk, struct tls_context *ctx)
{
if (!ctx)
return;
memzero_explicit(&ctx->crypto_send, sizeof(ctx->crypto_send));
memzero_explicit(&ctx->crypto_recv, sizeof(ctx->crypto_recv));
- kfree(ctx);
+
+ if (sk)
+ kfree_rcu(ctx, rcu);
+ else
+ kfree(ctx);
}
static void tls_sk_proto_cleanup(struct sock *sk,
@@ -273,19 +286,14 @@ static void tls_sk_proto_cleanup(struct sock *sk,
kfree(ctx->tx.rec_seq);
kfree(ctx->tx.iv);
tls_sw_release_resources_tx(sk);
-#ifdef CONFIG_TLS_DEVICE
} else if (ctx->tx_conf == TLS_HW) {
tls_device_free_resources_tx(sk);
-#endif
}
if (ctx->rx_conf == TLS_SW)
tls_sw_release_resources_rx(sk);
-
-#ifdef CONFIG_TLS_DEVICE
- if (ctx->rx_conf == TLS_HW)
+ else if (ctx->rx_conf == TLS_HW)
tls_device_offload_cleanup_rx(sk);
-#endif
}
static void tls_sk_proto_close(struct sock *sk, long timeout)
@@ -306,7 +314,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
write_lock_bh(&sk->sk_callback_lock);
if (free_ctx)
- icsk->icsk_ulp_data = NULL;
+ rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
sk->sk_prot = ctx->sk_proto;
if (sk->sk_write_space == tls_write_space)
sk->sk_write_space = ctx->sk_write_space;
@@ -318,10 +326,10 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
tls_sw_strparser_done(ctx);
if (ctx->rx_conf == TLS_SW)
tls_sw_free_ctx_rx(ctx);
- ctx->sk_proto_close(sk, timeout);
+ ctx->sk_proto->close(sk, timeout);
if (free_ctx)
- tls_ctx_free(ctx);
+ tls_ctx_free(sk, ctx);
}
static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval,
@@ -438,7 +446,8 @@ static int tls_getsockopt(struct sock *sk, int level, int optname,
struct tls_context *ctx = tls_get_ctx(sk);
if (level != SOL_TLS)
- return ctx->getsockopt(sk, level, optname, optval, optlen);
+ return ctx->sk_proto->getsockopt(sk, level,
+ optname, optval, optlen);
return do_tls_getsockopt(sk, optname, optval, optlen);
}
@@ -523,26 +532,18 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
}
if (tx) {
-#ifdef CONFIG_TLS_DEVICE
rc = tls_set_device_offload(sk, ctx);
conf = TLS_HW;
if (rc) {
-#else
- {
-#endif
rc = tls_set_sw_offload(sk, ctx, 1);
if (rc)
goto err_crypto_info;
conf = TLS_SW;
}
} else {
-#ifdef CONFIG_TLS_DEVICE
rc = tls_set_device_offload_rx(sk, ctx);
conf = TLS_HW;
if (rc) {
-#else
- {
-#endif
rc = tls_set_sw_offload(sk, ctx, 0);
if (rc)
goto err_crypto_info;
@@ -596,7 +597,8 @@ static int tls_setsockopt(struct sock *sk, int level, int optname,
struct tls_context *ctx = tls_get_ctx(sk);
if (level != SOL_TLS)
- return ctx->setsockopt(sk, level, optname, optval, optlen);
+ return ctx->sk_proto->setsockopt(sk, level, optname, optval,
+ optlen);
return do_tls_setsockopt(sk, optname, optval, optlen);
}
@@ -610,11 +612,8 @@ static struct tls_context *create_ctx(struct sock *sk)
if (!ctx)
return NULL;
- icsk->icsk_ulp_data = ctx;
- ctx->setsockopt = sk->sk_prot->setsockopt;
- ctx->getsockopt = sk->sk_prot->getsockopt;
- ctx->sk_proto_close = sk->sk_prot->close;
- ctx->unhash = sk->sk_prot->unhash;
+ rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
+ ctx->sk_proto = sk->sk_prot;
return ctx;
}
@@ -651,8 +650,8 @@ static void tls_hw_sk_destruct(struct sock *sk)
ctx->sk_destruct(sk);
/* Free ctx */
- tls_ctx_free(ctx);
- icsk->icsk_ulp_data = NULL;
+ rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
+ tls_ctx_free(sk, ctx);
}
static int tls_hw_prot(struct sock *sk)
@@ -670,9 +669,6 @@ static int tls_hw_prot(struct sock *sk)
spin_unlock_bh(&device_spinlock);
tls_build_proto(sk);
- ctx->hash = sk->sk_prot->hash;
- ctx->unhash = sk->sk_prot->unhash;
- ctx->sk_proto_close = sk->sk_prot->close;
ctx->sk_destruct = sk->sk_destruct;
sk->sk_destruct = tls_hw_sk_destruct;
ctx->rx_conf = TLS_HW_RECORD;
@@ -704,7 +700,7 @@ static void tls_hw_unhash(struct sock *sk)
}
}
spin_unlock_bh(&device_spinlock);
- ctx->unhash(sk);
+ ctx->sk_proto->unhash(sk);
}
static int tls_hw_hash(struct sock *sk)
@@ -713,7 +709,7 @@ static int tls_hw_hash(struct sock *sk)
struct tls_device *dev;
int err;
- err = ctx->hash(sk);
+ err = ctx->sk_proto->hash(sk);
spin_lock_bh(&device_spinlock);
list_for_each_entry(dev, &device_list, dev_list) {
if (dev->hash) {
@@ -803,7 +799,6 @@ static int tls_init(struct sock *sk)
ctx->tx_conf = TLS_BASE;
ctx->rx_conf = TLS_BASE;
- ctx->sk_proto = sk->sk_prot;
update_sk_prot(sk, ctx);
out:
write_unlock_bh(&sk->sk_callback_lock);
@@ -815,12 +810,71 @@ static void tls_update(struct sock *sk, struct proto *p)
struct tls_context *ctx;
ctx = tls_get_ctx(sk);
- if (likely(ctx)) {
- ctx->sk_proto_close = p->close;
+ if (likely(ctx))
ctx->sk_proto = p;
- } else {
+ else
sk->sk_prot = p;
- }
+}
+
+static int tls_get_info(const struct sock *sk, struct sk_buff *skb)
+{
+ u16 version, cipher_type;
+ struct tls_context *ctx;
+ struct nlattr *start;
+ int err;
+
+ start = nla_nest_start_noflag(skb, INET_ULP_INFO_TLS);
+ if (!start)
+ return -EMSGSIZE;
+
+ rcu_read_lock();
+ ctx = rcu_dereference(inet_csk(sk)->icsk_ulp_data);
+ if (!ctx) {
+ err = 0;
+ goto nla_failure;
+ }
+ version = ctx->prot_info.version;
+ if (version) {
+ err = nla_put_u16(skb, TLS_INFO_VERSION, version);
+ if (err)
+ goto nla_failure;
+ }
+ cipher_type = ctx->prot_info.cipher_type;
+ if (cipher_type) {
+ err = nla_put_u16(skb, TLS_INFO_CIPHER, cipher_type);
+ if (err)
+ goto nla_failure;
+ }
+ err = nla_put_u16(skb, TLS_INFO_TXCONF, tls_user_config(ctx, true));
+ if (err)
+ goto nla_failure;
+
+ err = nla_put_u16(skb, TLS_INFO_RXCONF, tls_user_config(ctx, false));
+ if (err)
+ goto nla_failure;
+
+ rcu_read_unlock();
+ nla_nest_end(skb, start);
+ return 0;
+
+nla_failure:
+ rcu_read_unlock();
+ nla_nest_cancel(skb, start);
+ return err;
+}
+
+static size_t tls_get_info_size(const struct sock *sk)
+{
+ size_t size = 0;
+
+ size += nla_total_size(0) + /* INET_ULP_INFO_TLS */
+ nla_total_size(sizeof(u16)) + /* TLS_INFO_VERSION */
+ nla_total_size(sizeof(u16)) + /* TLS_INFO_CIPHER */
+ nla_total_size(sizeof(u16)) + /* TLS_INFO_RXCONF */
+ nla_total_size(sizeof(u16)) + /* TLS_INFO_TXCONF */
+ 0;
+
+ return size;
}
void tls_register_device(struct tls_device *device)
@@ -844,6 +898,8 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
.owner = THIS_MODULE,
.init = tls_init,
.update = tls_update,
+ .get_info = tls_get_info,
+ .get_info_size = tls_get_info_size,
};
static int __init tls_register(void)
@@ -851,9 +907,7 @@ static int __init tls_register(void)
tls_sw_proto_ops = inet_stream_ops;
tls_sw_proto_ops.splice_read = tls_sw_splice_read;
-#ifdef CONFIG_TLS_DEVICE
tls_device_init();
-#endif
tcp_register_ulp(&tcp_tls_ulp_ops);
return 0;
@@ -862,9 +916,7 @@ static int __init tls_register(void)
static void __exit tls_unregister(void)
{
tcp_unregister_ulp(&tcp_tls_ulp_ops);
-#ifdef CONFIG_TLS_DEVICE
tls_device_cleanup();
-#endif
}
module_init(tls_register);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 91d21b048a9b..c2b5e0d2ba1a 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1489,13 +1489,12 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
int pad, err = 0;
if (!ctx->decrypted) {
-#ifdef CONFIG_TLS_DEVICE
if (tls_ctx->rx_conf == TLS_HW) {
err = tls_device_decrypted(sk, skb);
if (err < 0)
return err;
}
-#endif
+
/* Still not decrypted after tls_device */
if (!ctx->decrypted) {
err = decrypt_internal(sk, skb, dest, NULL, chunk, zc,
@@ -2014,10 +2013,9 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
ret = -EINVAL;
goto read_failure;
}
-#ifdef CONFIG_TLS_DEVICE
+
tls_device_rx_resync_new_rec(strp->sk, data_len + TLS_HEADER_SIZE,
TCP_SKB_CB(skb)->seq + rxm->offset);
-#endif
return data_len + TLS_HEADER_SIZE;
read_failure:
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 9d864ebeb7b3..261521d286d6 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -77,11 +77,11 @@ struct hvs_send_buf {
VMBUS_PKT_TRAILER_SIZE)
union hvs_service_id {
- uuid_le srv_id;
+ guid_t srv_id;
struct {
unsigned int svm_port;
- unsigned char b[sizeof(uuid_le) - sizeof(unsigned int)];
+ unsigned char b[sizeof(guid_t) - sizeof(unsigned int)];
};
};
@@ -89,8 +89,8 @@ union hvs_service_id {
struct hvsock {
struct vsock_sock *vsk;
- uuid_le vm_srv_id;
- uuid_le host_srv_id;
+ guid_t vm_srv_id;
+ guid_t host_srv_id;
struct vmbus_channel *chan;
struct vmpacket_descriptor *recv_desc;
@@ -159,21 +159,21 @@ struct hvsock {
#define MIN_HOST_EPHEMERAL_PORT (MAX_HOST_LISTEN_PORT + 1)
/* 00000000-facb-11e6-bd58-64006a7986d3 */
-static const uuid_le srv_id_template =
- UUID_LE(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58,
- 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3);
+static const guid_t srv_id_template =
+ GUID_INIT(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58,
+ 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3);
-static bool is_valid_srv_id(const uuid_le *id)
+static bool is_valid_srv_id(const guid_t *id)
{
- return !memcmp(&id->b[4], &srv_id_template.b[4], sizeof(uuid_le) - 4);
+ return !memcmp(&id->b[4], &srv_id_template.b[4], sizeof(guid_t) - 4);
}
-static unsigned int get_port_by_srv_id(const uuid_le *svr_id)
+static unsigned int get_port_by_srv_id(const guid_t *svr_id)
{
return *((unsigned int *)svr_id);
}
-static void hvs_addr_init(struct sockaddr_vm *addr, const uuid_le *svr_id)
+static void hvs_addr_init(struct sockaddr_vm *addr, const guid_t *svr_id)
{
unsigned int port = get_port_by_srv_id(svr_id);
@@ -321,7 +321,7 @@ static void hvs_close_connection(struct vmbus_channel *chan)
static void hvs_open_connection(struct vmbus_channel *chan)
{
- uuid_le *if_instance, *if_type;
+ guid_t *if_instance, *if_type;
unsigned char conn_from_host;
struct sockaddr_vm addr;
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 0815d1357861..082a30936690 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -307,6 +307,7 @@ static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
break;
}
+ pkt->buf_len = buf_len;
pkt->len = buf_len;
sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr));
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 6f1a8aff65c5..5bb70c692b1e 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -26,6 +26,9 @@
/* How long to wait for graceful shutdown of a connection */
#define VSOCK_CLOSE_TIMEOUT (8 * HZ)
+/* Threshold for detecting small packets to copy */
+#define GOOD_COPY_LEN 128
+
static const struct virtio_transport *virtio_transport_get_ops(void)
{
const struct vsock_transport *t = vsock_core_get_transport();
@@ -64,6 +67,9 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
pkt->buf = kmalloc(len, GFP_KERNEL);
if (!pkt->buf)
goto out_pkt;
+
+ pkt->buf_len = len;
+
err = memcpy_from_msg(pkt->buf, info->msg, len);
if (err)
goto out;
@@ -91,8 +97,17 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
struct virtio_vsock_pkt *pkt = opaque;
struct af_vsockmon_hdr *hdr;
struct sk_buff *skb;
+ size_t payload_len;
+ void *payload_buf;
+
+ /* A packet could be split to fit the RX buffer, so we can retrieve
+ * the payload length from the header and the buffer pointer taking
+ * care of the offset in the original packet.
+ */
+ payload_len = le32_to_cpu(pkt->hdr.len);
+ payload_buf = pkt->buf + pkt->off;
- skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + pkt->len,
+ skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + payload_len,
GFP_ATOMIC);
if (!skb)
return NULL;
@@ -132,8 +147,8 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
skb_put_data(skb, &pkt->hdr, sizeof(pkt->hdr));
- if (pkt->len) {
- skb_put_data(skb, pkt->buf, pkt->len);
+ if (payload_len) {
+ skb_put_data(skb, payload_buf, payload_len);
}
return skb;
@@ -166,8 +181,8 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
vvs = vsk->trans;
/* we can send less than pkt_len bytes */
- if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE)
- pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
+ if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
+ pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
/* virtio_transport_get_credit might return less than pkt_len credit */
pkt_len = virtio_transport_get_credit(vvs, pkt_len);
@@ -204,10 +219,11 @@ static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs,
void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt)
{
- spin_lock_bh(&vvs->tx_lock);
+ spin_lock_bh(&vvs->rx_lock);
+ vvs->last_fwd_cnt = vvs->fwd_cnt;
pkt->hdr.fwd_cnt = cpu_to_le32(vvs->fwd_cnt);
pkt->hdr.buf_alloc = cpu_to_le32(vvs->buf_alloc);
- spin_unlock_bh(&vvs->tx_lock);
+ spin_unlock_bh(&vvs->rx_lock);
}
EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt);
@@ -255,6 +271,7 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
struct virtio_vsock_sock *vvs = vsk->trans;
struct virtio_vsock_pkt *pkt;
size_t bytes, total = 0;
+ u32 free_space;
int err = -EFAULT;
spin_lock_bh(&vvs->rx_lock);
@@ -285,11 +302,24 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
virtio_transport_free_pkt(pkt);
}
}
+
+ free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt);
+
spin_unlock_bh(&vvs->rx_lock);
- /* Send a credit pkt to peer */
- virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM,
- NULL);
+ /* To reduce the number of credit update messages,
+ * don't update credits as long as lots of space is available.
+ * Note: the limit chosen here is arbitrary. Setting the limit
+ * too high causes extra messages. Too low causes transmitter
+ * stalls. As stalls are in theory more expensive than extra
+ * messages, we set the limit to a high value. TODO: experiment
+ * with different values.
+ */
+ if (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
+ virtio_transport_send_credit_update(vsk,
+ VIRTIO_VSOCK_TYPE_STREAM,
+ NULL);
+ }
return total;
@@ -841,24 +871,60 @@ destroy:
return err;
}
+static void
+virtio_transport_recv_enqueue(struct vsock_sock *vsk,
+ struct virtio_vsock_pkt *pkt)
+{
+ struct virtio_vsock_sock *vvs = vsk->trans;
+ bool free_pkt = false;
+
+ pkt->len = le32_to_cpu(pkt->hdr.len);
+ pkt->off = 0;
+
+ spin_lock_bh(&vvs->rx_lock);
+
+ virtio_transport_inc_rx_pkt(vvs, pkt);
+
+ /* Try to copy small packets into the buffer of last packet queued,
+ * to avoid wasting memory queueing the entire buffer with a small
+ * payload.
+ */
+ if (pkt->len <= GOOD_COPY_LEN && !list_empty(&vvs->rx_queue)) {
+ struct virtio_vsock_pkt *last_pkt;
+
+ last_pkt = list_last_entry(&vvs->rx_queue,
+ struct virtio_vsock_pkt, list);
+
+ /* If there is space in the last packet queued, we copy the
+ * new packet in its buffer.
+ */
+ if (pkt->len <= last_pkt->buf_len - last_pkt->len) {
+ memcpy(last_pkt->buf + last_pkt->len, pkt->buf,
+ pkt->len);
+ last_pkt->len += pkt->len;
+ free_pkt = true;
+ goto out;
+ }
+ }
+
+ list_add_tail(&pkt->list, &vvs->rx_queue);
+
+out:
+ spin_unlock_bh(&vvs->rx_lock);
+ if (free_pkt)
+ virtio_transport_free_pkt(pkt);
+}
+
static int
virtio_transport_recv_connected(struct sock *sk,
struct virtio_vsock_pkt *pkt)
{
struct vsock_sock *vsk = vsock_sk(sk);
- struct virtio_vsock_sock *vvs = vsk->trans;
int err = 0;
switch (le16_to_cpu(pkt->hdr.op)) {
case VIRTIO_VSOCK_OP_RW:
- pkt->len = le32_to_cpu(pkt->hdr.len);
- pkt->off = 0;
-
- spin_lock_bh(&vvs->rx_lock);
- virtio_transport_inc_rx_pkt(vvs, pkt);
- list_add_tail(&pkt->list, &vvs->rx_queue);
- spin_unlock_bh(&vvs->rx_lock);
-
+ virtio_transport_recv_enqueue(vsk, pkt);
sk->sk_data_ready(sk);
return err;
case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
diff --git a/net/wimax/debugfs.c b/net/wimax/debugfs.c
index 1af56df30276..3c54bb6b925a 100644
--- a/net/wimax/debugfs.c
+++ b/net/wimax/debugfs.c
@@ -13,49 +13,23 @@
#define D_SUBMODULE debugfs
#include "debug-levels.h"
-
-#define __debugfs_register(prefix, name, parent) \
-do { \
- result = d_level_register_debugfs(prefix, name, parent); \
- if (result < 0) \
- goto error; \
-} while (0)
-
-
-int wimax_debugfs_add(struct wimax_dev *wimax_dev)
+void wimax_debugfs_add(struct wimax_dev *wimax_dev)
{
- int result;
struct net_device *net_dev = wimax_dev->net_dev;
- struct device *dev = net_dev->dev.parent;
struct dentry *dentry;
char buf[128];
snprintf(buf, sizeof(buf), "wimax:%s", net_dev->name);
dentry = debugfs_create_dir(buf, NULL);
- result = PTR_ERR(dentry);
- if (IS_ERR(dentry)) {
- if (result == -ENODEV)
- result = 0; /* No debugfs support */
- else
- dev_err(dev, "Can't create debugfs dentry: %d\n",
- result);
- goto out;
- }
wimax_dev->debugfs_dentry = dentry;
- __debugfs_register("wimax_dl_", debugfs, dentry);
- __debugfs_register("wimax_dl_", id_table, dentry);
- __debugfs_register("wimax_dl_", op_msg, dentry);
- __debugfs_register("wimax_dl_", op_reset, dentry);
- __debugfs_register("wimax_dl_", op_rfkill, dentry);
- __debugfs_register("wimax_dl_", op_state_get, dentry);
- __debugfs_register("wimax_dl_", stack, dentry);
- result = 0;
-out:
- return result;
-error:
- debugfs_remove_recursive(wimax_dev->debugfs_dentry);
- return result;
+ d_level_register_debugfs("wimax_dl_", debugfs, dentry);
+ d_level_register_debugfs("wimax_dl_", id_table, dentry);
+ d_level_register_debugfs("wimax_dl_", op_msg, dentry);
+ d_level_register_debugfs("wimax_dl_", op_reset, dentry);
+ d_level_register_debugfs("wimax_dl_", op_rfkill, dentry);
+ d_level_register_debugfs("wimax_dl_", op_state_get, dentry);
+ d_level_register_debugfs("wimax_dl_", stack, dentry);
}
void wimax_debugfs_rm(struct wimax_dev *wimax_dev)
diff --git a/net/wimax/stack.c b/net/wimax/stack.c
index 1ba99d65feca..4b9b1c5e8f3a 100644
--- a/net/wimax/stack.c
+++ b/net/wimax/stack.c
@@ -481,12 +481,7 @@ int wimax_dev_add(struct wimax_dev *wimax_dev, struct net_device *net_dev)
/* Set up user-space interaction */
mutex_lock(&wimax_dev->mutex);
wimax_id_table_add(wimax_dev);
- result = wimax_debugfs_add(wimax_dev);
- if (result < 0) {
- dev_err(dev, "cannot initialize debugfs: %d\n",
- result);
- goto error_debugfs_add;
- }
+ wimax_debugfs_add(wimax_dev);
__wimax_state_set(wimax_dev, WIMAX_ST_DOWN);
mutex_unlock(&wimax_dev->mutex);
@@ -498,10 +493,6 @@ int wimax_dev_add(struct wimax_dev *wimax_dev, struct net_device *net_dev)
d_fnend(3, dev, "(wimax_dev %p net_dev %p) = 0\n", wimax_dev, net_dev);
return 0;
-error_debugfs_add:
- wimax_id_table_rm(wimax_dev);
- mutex_unlock(&wimax_dev->mutex);
- wimax_rfkill_rm(wimax_dev);
error_rfkill_add:
d_fnend(3, dev, "(wimax_dev %p net_dev %p) = %d\n",
wimax_dev, net_dev, result);
diff --git a/net/wimax/wimax-internal.h b/net/wimax/wimax-internal.h
index e819a09337ee..40751207296c 100644
--- a/net/wimax/wimax-internal.h
+++ b/net/wimax/wimax-internal.h
@@ -57,13 +57,10 @@ void __wimax_state_set(struct wimax_dev *wimax_dev, enum wimax_st state)
void __wimax_state_change(struct wimax_dev *, enum wimax_st);
#ifdef CONFIG_DEBUG_FS
-int wimax_debugfs_add(struct wimax_dev *);
+void wimax_debugfs_add(struct wimax_dev *);
void wimax_debugfs_rm(struct wimax_dev *);
#else
-static inline int wimax_debugfs_add(struct wimax_dev *wimax_dev)
-{
- return 0;
-}
+static inline void wimax_debugfs_add(struct wimax_dev *wimax_dev) {}
static inline void wimax_debugfs_rm(struct wimax_dev *wimax_dev) {}
#endif
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 67f8360dfcee..63cf7131f601 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -217,6 +217,8 @@ config LIB80211_CRYPT_WEP
config LIB80211_CRYPT_CCMP
tristate
+ select CRYPTO_AES
+ select CRYPTO_CCM
config LIB80211_CRYPT_TKIP
tristate
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 7dc1bbd0888f..e851cafd8e2f 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -14,6 +14,11 @@
#include "core.h"
#include "rdev-ops.h"
+static bool cfg80211_valid_60g_freq(u32 freq)
+{
+ return freq >= 58320 && freq <= 70200;
+}
+
void cfg80211_chandef_create(struct cfg80211_chan_def *chandef,
struct ieee80211_channel *chan,
enum nl80211_channel_type chan_type)
@@ -23,6 +28,8 @@ void cfg80211_chandef_create(struct cfg80211_chan_def *chandef,
chandef->chan = chan;
chandef->center_freq2 = 0;
+ chandef->edmg.bw_config = 0;
+ chandef->edmg.channels = 0;
switch (chan_type) {
case NL80211_CHAN_NO_HT:
@@ -47,6 +54,91 @@ void cfg80211_chandef_create(struct cfg80211_chan_def *chandef,
}
EXPORT_SYMBOL(cfg80211_chandef_create);
+static bool cfg80211_edmg_chandef_valid(const struct cfg80211_chan_def *chandef)
+{
+ int max_contiguous = 0;
+ int num_of_enabled = 0;
+ int contiguous = 0;
+ int i;
+
+ if (!chandef->edmg.channels || !chandef->edmg.bw_config)
+ return false;
+
+ if (!cfg80211_valid_60g_freq(chandef->chan->center_freq))
+ return false;
+
+ for (i = 0; i < 6; i++) {
+ if (chandef->edmg.channels & BIT(i)) {
+ contiguous++;
+ num_of_enabled++;
+ } else {
+ contiguous = 0;
+ }
+
+ max_contiguous = max(contiguous, max_contiguous);
+ }
+ /* basic verification of edmg configuration according to
+ * IEEE P802.11ay/D4.0 section 9.4.2.251
+ */
+ /* check bw_config against contiguous edmg channels */
+ switch (chandef->edmg.bw_config) {
+ case IEEE80211_EDMG_BW_CONFIG_4:
+ case IEEE80211_EDMG_BW_CONFIG_8:
+ case IEEE80211_EDMG_BW_CONFIG_12:
+ if (max_contiguous < 1)
+ return false;
+ break;
+ case IEEE80211_EDMG_BW_CONFIG_5:
+ case IEEE80211_EDMG_BW_CONFIG_9:
+ case IEEE80211_EDMG_BW_CONFIG_13:
+ if (max_contiguous < 2)
+ return false;
+ break;
+ case IEEE80211_EDMG_BW_CONFIG_6:
+ case IEEE80211_EDMG_BW_CONFIG_10:
+ case IEEE80211_EDMG_BW_CONFIG_14:
+ if (max_contiguous < 3)
+ return false;
+ break;
+ case IEEE80211_EDMG_BW_CONFIG_7:
+ case IEEE80211_EDMG_BW_CONFIG_11:
+ case IEEE80211_EDMG_BW_CONFIG_15:
+ if (max_contiguous < 4)
+ return false;
+ break;
+
+ default:
+ return false;
+ }
+
+ /* check bw_config against aggregated (non contiguous) edmg channels */
+ switch (chandef->edmg.bw_config) {
+ case IEEE80211_EDMG_BW_CONFIG_4:
+ case IEEE80211_EDMG_BW_CONFIG_5:
+ case IEEE80211_EDMG_BW_CONFIG_6:
+ case IEEE80211_EDMG_BW_CONFIG_7:
+ break;
+ case IEEE80211_EDMG_BW_CONFIG_8:
+ case IEEE80211_EDMG_BW_CONFIG_9:
+ case IEEE80211_EDMG_BW_CONFIG_10:
+ case IEEE80211_EDMG_BW_CONFIG_11:
+ if (num_of_enabled < 2)
+ return false;
+ break;
+ case IEEE80211_EDMG_BW_CONFIG_12:
+ case IEEE80211_EDMG_BW_CONFIG_13:
+ case IEEE80211_EDMG_BW_CONFIG_14:
+ case IEEE80211_EDMG_BW_CONFIG_15:
+ if (num_of_enabled < 4 || max_contiguous < 2)
+ return false;
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef)
{
u32 control_freq;
@@ -112,6 +204,10 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef)
return false;
}
+ if (cfg80211_chandef_is_edmg(chandef) &&
+ !cfg80211_edmg_chandef_valid(chandef))
+ return false;
+
return true;
}
EXPORT_SYMBOL(cfg80211_chandef_valid);
@@ -721,12 +817,66 @@ static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy,
return true;
}
+/* check if the operating channels are valid and supported */
+static bool cfg80211_edmg_usable(struct wiphy *wiphy, u8 edmg_channels,
+ enum ieee80211_edmg_bw_config edmg_bw_config,
+ int primary_channel,
+ struct ieee80211_edmg *edmg_cap)
+{
+ struct ieee80211_channel *chan;
+ int i, freq;
+ int channels_counter = 0;
+
+ if (!edmg_channels && !edmg_bw_config)
+ return true;
+
+ if ((!edmg_channels && edmg_bw_config) ||
+ (edmg_channels && !edmg_bw_config))
+ return false;
+
+ if (!(edmg_channels & BIT(primary_channel - 1)))
+ return false;
+
+ /* 60GHz channels 1..6 */
+ for (i = 0; i < 6; i++) {
+ if (!(edmg_channels & BIT(i)))
+ continue;
+
+ if (!(edmg_cap->channels & BIT(i)))
+ return false;
+
+ channels_counter++;
+
+ freq = ieee80211_channel_to_frequency(i + 1,
+ NL80211_BAND_60GHZ);
+ chan = ieee80211_get_channel(wiphy, freq);
+ if (!chan || chan->flags & IEEE80211_CHAN_DISABLED)
+ return false;
+ }
+
+ /* IEEE802.11 allows max 4 channels */
+ if (channels_counter > 4)
+ return false;
+
+ /* check bw_config is a subset of what driver supports
+ * (see IEEE P802.11ay/D4.0 section 9.4.2.251, Table 13)
+ */
+ if ((edmg_bw_config % 4) > (edmg_cap->bw_config % 4))
+ return false;
+
+ if (edmg_bw_config > edmg_cap->bw_config)
+ return false;
+
+ return true;
+}
+
bool cfg80211_chandef_usable(struct wiphy *wiphy,
const struct cfg80211_chan_def *chandef,
u32 prohibited_flags)
{
struct ieee80211_sta_ht_cap *ht_cap;
struct ieee80211_sta_vht_cap *vht_cap;
+ struct ieee80211_edmg *edmg_cap;
u32 width, control_freq, cap;
if (WARN_ON(!cfg80211_chandef_valid(chandef)))
@@ -734,6 +884,15 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
ht_cap = &wiphy->bands[chandef->chan->band]->ht_cap;
vht_cap = &wiphy->bands[chandef->chan->band]->vht_cap;
+ edmg_cap = &wiphy->bands[chandef->chan->band]->edmg_cap;
+
+ if (edmg_cap->channels &&
+ !cfg80211_edmg_usable(wiphy,
+ chandef->edmg.channels,
+ chandef->edmg.bw_config,
+ chandef->chan->hw_value,
+ edmg_cap))
+ return false;
control_freq = chandef->chan->center_freq;
@@ -894,7 +1053,8 @@ static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy,
if (chan == other_chan)
return true;
- if (chan->band != NL80211_BAND_5GHZ)
+ if (chan->band != NL80211_BAND_5GHZ &&
+ chan->band != NL80211_BAND_6GHZ)
continue;
r1 = cfg80211_get_unii(chan->center_freq);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 32b3c719fdfc..350513744575 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -5,7 +5,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018-2019 Intel Corporation
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -142,12 +142,10 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
if (result)
return result;
- if (rdev->wiphy.debugfsdir &&
- !debugfs_rename(rdev->wiphy.debugfsdir->d_parent,
- rdev->wiphy.debugfsdir,
- rdev->wiphy.debugfsdir->d_parent,
- newname))
- pr_err("failed to rename debugfs dir to %s!\n", newname);
+ if (rdev->wiphy.debugfsdir)
+ debugfs_rename(rdev->wiphy.debugfsdir->d_parent,
+ rdev->wiphy.debugfsdir,
+ rdev->wiphy.debugfsdir->d_parent, newname);
nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY);
@@ -302,12 +300,13 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked)
return 0;
}
-static void cfg80211_rfkill_sync_work(struct work_struct *work)
+static void cfg80211_rfkill_block_work(struct work_struct *work)
{
struct cfg80211_registered_device *rdev;
- rdev = container_of(work, struct cfg80211_registered_device, rfkill_sync);
- cfg80211_rfkill_set_block(rdev, rfkill_blocked(rdev->rfkill));
+ rdev = container_of(work, struct cfg80211_registered_device,
+ rfkill_block);
+ cfg80211_rfkill_set_block(rdev, true);
}
static void cfg80211_event_work(struct work_struct *work)
@@ -518,7 +517,7 @@ use_default_name:
return NULL;
}
- INIT_WORK(&rdev->rfkill_sync, cfg80211_rfkill_sync_work);
+ INIT_WORK(&rdev->rfkill_block, cfg80211_rfkill_block_work);
INIT_WORK(&rdev->conn_work, cfg80211_conn_work);
INIT_WORK(&rdev->event_work, cfg80211_event_work);
@@ -899,11 +898,8 @@ int wiphy_register(struct wiphy *wiphy)
cfg80211_rdev_list_generation++;
/* add to debugfs */
- rdev->wiphy.debugfsdir =
- debugfs_create_dir(wiphy_name(&rdev->wiphy),
- ieee80211_debugfs_dir);
- if (IS_ERR(rdev->wiphy.debugfsdir))
- rdev->wiphy.debugfsdir = NULL;
+ rdev->wiphy.debugfsdir = debugfs_create_dir(wiphy_name(&rdev->wiphy),
+ ieee80211_debugfs_dir);
cfg80211_debugfs_rdev_add(rdev);
nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY);
@@ -1066,7 +1062,7 @@ void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked)
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
if (rfkill_set_hw_state(rdev->rfkill, blocked))
- schedule_work(&rdev->rfkill_sync);
+ schedule_work(&rdev->rfkill_block);
}
EXPORT_SYMBOL(wiphy_rfkill_set_hw_state);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index ee8388fe4a92..ed487e324571 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -28,7 +28,7 @@ struct cfg80211_registered_device {
/* rfkill support */
struct rfkill_ops rfkill_ops;
struct rfkill *rfkill;
- struct work_struct rfkill_sync;
+ struct work_struct rfkill_block;
/* ISO / IEC 3166 alpha2 for which this device is receiving
* country IEs on, this can help disregard country IEs from APs
@@ -306,6 +306,8 @@ void ieee80211_set_bitrate_flags(struct wiphy *wiphy);
void cfg80211_bss_expire(struct cfg80211_registered_device *rdev);
void cfg80211_bss_age(struct cfg80211_registered_device *rdev,
unsigned long age_secs);
+void cfg80211_update_assoc_bss_entry(struct wireless_dev *wdev,
+ struct ieee80211_channel *channel);
/* IBSS */
int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index d1743e6abc34..ae8fe66a9bb8 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -104,13 +104,19 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
* use the mandatory rate set for 11b or
* 11a for maximum compatibility.
*/
- struct ieee80211_supported_band *sband =
- rdev->wiphy.bands[params->chandef.chan->band];
+ struct ieee80211_supported_band *sband;
+ enum nl80211_band band;
+ u32 flag;
int j;
- u32 flag = params->chandef.chan->band == NL80211_BAND_5GHZ ?
- IEEE80211_RATE_MANDATORY_A :
- IEEE80211_RATE_MANDATORY_B;
+ band = params->chandef.chan->band;
+ if (band == NL80211_BAND_5GHZ ||
+ band == NL80211_BAND_6GHZ)
+ flag = IEEE80211_RATE_MANDATORY_A;
+ else
+ flag = IEEE80211_RATE_MANDATORY_B;
+
+ sband = rdev->wiphy.bands[band];
for (j = 0; j < sband->n_bitrates; j++) {
if (sband->bitrates[j].flags & flag)
params->basic_rates |= BIT(j);
diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c
index 7e8ff9d7dcfa..6a5f08f7491e 100644
--- a/net/wireless/lib80211_crypt_ccmp.c
+++ b/net/wireless/lib80211_crypt_ccmp.c
@@ -22,6 +22,7 @@
#include <linux/ieee80211.h>
#include <linux/crypto.h>
+#include <crypto/aead.h>
#include <net/lib80211.h>
@@ -48,20 +49,13 @@ struct lib80211_ccmp_data {
int key_idx;
- struct crypto_cipher *tfm;
+ struct crypto_aead *tfm;
/* scratch buffers for virt_to_page() (crypto API) */
- u8 tx_b0[AES_BLOCK_LEN], tx_b[AES_BLOCK_LEN],
- tx_e[AES_BLOCK_LEN], tx_s0[AES_BLOCK_LEN];
- u8 rx_b0[AES_BLOCK_LEN], rx_b[AES_BLOCK_LEN], rx_a[AES_BLOCK_LEN];
+ u8 tx_aad[2 * AES_BLOCK_LEN];
+ u8 rx_aad[2 * AES_BLOCK_LEN];
};
-static inline void lib80211_ccmp_aes_encrypt(struct crypto_cipher *tfm,
- const u8 pt[16], u8 ct[16])
-{
- crypto_cipher_encrypt_one(tfm, ct, pt);
-}
-
static void *lib80211_ccmp_init(int key_idx)
{
struct lib80211_ccmp_data *priv;
@@ -71,7 +65,7 @@ static void *lib80211_ccmp_init(int key_idx)
goto fail;
priv->key_idx = key_idx;
- priv->tfm = crypto_alloc_cipher("aes", 0, 0);
+ priv->tfm = crypto_alloc_aead("ccm(aes)", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(priv->tfm)) {
priv->tfm = NULL;
goto fail;
@@ -82,7 +76,7 @@ static void *lib80211_ccmp_init(int key_idx)
fail:
if (priv) {
if (priv->tfm)
- crypto_free_cipher(priv->tfm);
+ crypto_free_aead(priv->tfm);
kfree(priv);
}
@@ -93,25 +87,16 @@ static void lib80211_ccmp_deinit(void *priv)
{
struct lib80211_ccmp_data *_priv = priv;
if (_priv && _priv->tfm)
- crypto_free_cipher(_priv->tfm);
+ crypto_free_aead(_priv->tfm);
kfree(priv);
}
-static inline void xor_block(u8 * b, u8 * a, size_t len)
-{
- int i;
- for (i = 0; i < len; i++)
- b[i] ^= a[i];
-}
-
-static void ccmp_init_blocks(struct crypto_cipher *tfm,
- struct ieee80211_hdr *hdr,
- u8 * pn, size_t dlen, u8 * b0, u8 * auth, u8 * s0)
+static int ccmp_init_iv_and_aad(const struct ieee80211_hdr *hdr,
+ const u8 *pn, u8 *iv, u8 *aad)
{
u8 *pos, qc = 0;
size_t aad_len;
int a4_included, qc_included;
- u8 aad[2 * AES_BLOCK_LEN];
a4_included = ieee80211_has_a4(hdr->frame_control);
qc_included = ieee80211_is_data_qos(hdr->frame_control);
@@ -127,17 +112,19 @@ static void ccmp_init_blocks(struct crypto_cipher *tfm,
aad_len += 2;
}
- /* CCM Initial Block:
- * Flag (Include authentication header, M=3 (8-octet MIC),
- * L=1 (2-octet Dlen))
- * Nonce: 0x00 | A2 | PN
- * Dlen */
- b0[0] = 0x59;
- b0[1] = qc;
- memcpy(b0 + 2, hdr->addr2, ETH_ALEN);
- memcpy(b0 + 8, pn, CCMP_PN_LEN);
- b0[14] = (dlen >> 8) & 0xff;
- b0[15] = dlen & 0xff;
+ /* In CCM, the initial vectors (IV) used for CTR mode encryption and CBC
+ * mode authentication are not allowed to collide, yet both are derived
+ * from the same vector. We only set L := 1 here to indicate that the
+ * data size can be represented in (L+1) bytes. The CCM layer will take
+ * care of storing the data length in the top (L+1) bytes and setting
+ * and clearing the other bits as is required to derive the two IVs.
+ */
+ iv[0] = 0x1;
+
+ /* Nonce: QC | A2 | PN */
+ iv[1] = qc;
+ memcpy(iv + 2, hdr->addr2, ETH_ALEN);
+ memcpy(iv + 8, pn, CCMP_PN_LEN);
/* AAD:
* FC with bits 4..6 and 11..13 masked to zero; 14 is always one
@@ -147,31 +134,20 @@ static void ccmp_init_blocks(struct crypto_cipher *tfm,
* QC (if present)
*/
pos = (u8 *) hdr;
- aad[0] = 0; /* aad_len >> 8 */
- aad[1] = aad_len & 0xff;
- aad[2] = pos[0] & 0x8f;
- aad[3] = pos[1] & 0xc7;
- memcpy(aad + 4, hdr->addr1, 3 * ETH_ALEN);
+ aad[0] = pos[0] & 0x8f;
+ aad[1] = pos[1] & 0xc7;
+ memcpy(aad + 2, hdr->addr1, 3 * ETH_ALEN);
pos = (u8 *) & hdr->seq_ctrl;
- aad[22] = pos[0] & 0x0f;
- aad[23] = 0; /* all bits masked */
- memset(aad + 24, 0, 8);
+ aad[20] = pos[0] & 0x0f;
+ aad[21] = 0; /* all bits masked */
+ memset(aad + 22, 0, 8);
if (a4_included)
- memcpy(aad + 24, hdr->addr4, ETH_ALEN);
+ memcpy(aad + 22, hdr->addr4, ETH_ALEN);
if (qc_included) {
- aad[a4_included ? 30 : 24] = qc;
+ aad[a4_included ? 28 : 22] = qc;
/* rest of QC masked */
}
-
- /* Start with the first block and AAD */
- lib80211_ccmp_aes_encrypt(tfm, b0, auth);
- xor_block(auth, aad, AES_BLOCK_LEN);
- lib80211_ccmp_aes_encrypt(tfm, auth, auth);
- xor_block(auth, &aad[AES_BLOCK_LEN], AES_BLOCK_LEN);
- lib80211_ccmp_aes_encrypt(tfm, auth, auth);
- b0[0] &= 0x07;
- b0[14] = b0[15] = 0;
- lib80211_ccmp_aes_encrypt(tfm, b0, s0);
+ return aad_len;
}
static int lib80211_ccmp_hdr(struct sk_buff *skb, int hdr_len,
@@ -214,13 +190,13 @@ static int lib80211_ccmp_hdr(struct sk_buff *skb, int hdr_len,
static int lib80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
{
struct lib80211_ccmp_data *key = priv;
- int data_len, i, blocks, last, len;
- u8 *pos, *mic;
struct ieee80211_hdr *hdr;
- u8 *b0 = key->tx_b0;
- u8 *b = key->tx_b;
- u8 *e = key->tx_e;
- u8 *s0 = key->tx_s0;
+ struct aead_request *req;
+ struct scatterlist sg[2];
+ u8 *aad = key->tx_aad;
+ u8 iv[AES_BLOCK_LEN];
+ int len, data_len, aad_len;
+ int ret;
if (skb_tailroom(skb) < CCMP_MIC_LEN || skb->len < hdr_len)
return -1;
@@ -230,31 +206,28 @@ static int lib80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
if (len < 0)
return -1;
- pos = skb->data + hdr_len + CCMP_HDR_LEN;
+ req = aead_request_alloc(key->tfm, GFP_ATOMIC);
+ if (!req)
+ return -ENOMEM;
+
hdr = (struct ieee80211_hdr *)skb->data;
- ccmp_init_blocks(key->tfm, hdr, key->tx_pn, data_len, b0, b, s0);
-
- blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN);
- last = data_len % AES_BLOCK_LEN;
-
- for (i = 1; i <= blocks; i++) {
- len = (i == blocks && last) ? last : AES_BLOCK_LEN;
- /* Authentication */
- xor_block(b, pos, len);
- lib80211_ccmp_aes_encrypt(key->tfm, b, b);
- /* Encryption, with counter */
- b0[14] = (i >> 8) & 0xff;
- b0[15] = i & 0xff;
- lib80211_ccmp_aes_encrypt(key->tfm, b0, e);
- xor_block(pos, e, len);
- pos += len;
- }
+ aad_len = ccmp_init_iv_and_aad(hdr, key->tx_pn, iv, aad);
- mic = skb_put(skb, CCMP_MIC_LEN);
- for (i = 0; i < CCMP_MIC_LEN; i++)
- mic[i] = b[i] ^ s0[i];
+ skb_put(skb, CCMP_MIC_LEN);
- return 0;
+ sg_init_table(sg, 2);
+ sg_set_buf(&sg[0], aad, aad_len);
+ sg_set_buf(&sg[1], skb->data + hdr_len + CCMP_HDR_LEN,
+ data_len + CCMP_MIC_LEN);
+
+ aead_request_set_callback(req, 0, NULL, NULL);
+ aead_request_set_ad(req, aad_len);
+ aead_request_set_crypt(req, sg, sg, data_len, iv);
+
+ ret = crypto_aead_encrypt(req);
+ aead_request_free(req);
+
+ return ret;
}
/*
@@ -283,13 +256,13 @@ static int lib80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
struct lib80211_ccmp_data *key = priv;
u8 keyidx, *pos;
struct ieee80211_hdr *hdr;
- u8 *b0 = key->rx_b0;
- u8 *b = key->rx_b;
- u8 *a = key->rx_a;
+ struct aead_request *req;
+ struct scatterlist sg[2];
+ u8 *aad = key->rx_aad;
+ u8 iv[AES_BLOCK_LEN];
u8 pn[6];
- int i, blocks, last, len;
- size_t data_len = skb->len - hdr_len - CCMP_HDR_LEN - CCMP_MIC_LEN;
- u8 *mic = skb->data + skb->len - CCMP_MIC_LEN;
+ int aad_len, ret;
+ size_t data_len = skb->len - hdr_len - CCMP_HDR_LEN;
if (skb->len < hdr_len + CCMP_HDR_LEN + CCMP_MIC_LEN) {
key->dot11RSNAStatsCCMPFormatErrors++;
@@ -337,28 +310,26 @@ static int lib80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
return -4;
}
- ccmp_init_blocks(key->tfm, hdr, pn, data_len, b0, a, b);
- xor_block(mic, b, CCMP_MIC_LEN);
-
- blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN);
- last = data_len % AES_BLOCK_LEN;
-
- for (i = 1; i <= blocks; i++) {
- len = (i == blocks && last) ? last : AES_BLOCK_LEN;
- /* Decrypt, with counter */
- b0[14] = (i >> 8) & 0xff;
- b0[15] = i & 0xff;
- lib80211_ccmp_aes_encrypt(key->tfm, b0, b);
- xor_block(pos, b, len);
- /* Authentication */
- xor_block(a, pos, len);
- lib80211_ccmp_aes_encrypt(key->tfm, a, a);
- pos += len;
- }
+ req = aead_request_alloc(key->tfm, GFP_ATOMIC);
+ if (!req)
+ return -ENOMEM;
- if (memcmp(mic, a, CCMP_MIC_LEN) != 0) {
- net_dbg_ratelimited("CCMP: decrypt failed: STA=%pM\n",
- hdr->addr2);
+ aad_len = ccmp_init_iv_and_aad(hdr, pn, iv, aad);
+
+ sg_init_table(sg, 2);
+ sg_set_buf(&sg[0], aad, aad_len);
+ sg_set_buf(&sg[1], pos, data_len);
+
+ aead_request_set_callback(req, 0, NULL, NULL);
+ aead_request_set_ad(req, aad_len);
+ aead_request_set_crypt(req, sg, sg, data_len, iv);
+
+ ret = crypto_aead_decrypt(req);
+ aead_request_free(req);
+
+ if (ret) {
+ net_dbg_ratelimited("CCMP: decrypt failed: STA=%pM (%d)\n",
+ hdr->addr2, ret);
key->dot11RSNAStatsCCMPDecryptErrors++;
return -5;
}
@@ -377,7 +348,7 @@ static int lib80211_ccmp_set_key(void *key, int len, u8 * seq, void *priv)
{
struct lib80211_ccmp_data *data = priv;
int keyidx;
- struct crypto_cipher *tfm = data->tfm;
+ struct crypto_aead *tfm = data->tfm;
keyidx = data->key_idx;
memset(data, 0, sizeof(*data));
@@ -394,7 +365,9 @@ static int lib80211_ccmp_set_key(void *key, int len, u8 * seq, void *priv)
data->rx_pn[4] = seq[1];
data->rx_pn[5] = seq[0];
}
- crypto_cipher_setkey(data->tfm, data->key, CCMP_TK_LEN);
+ if (crypto_aead_setauthsize(data->tfm, CCMP_MIC_LEN) ||
+ crypto_aead_setkey(data->tfm, data->key, CCMP_TK_LEN))
+ return -1;
} else if (len == 0)
data->key_set = 0;
else
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index fd05ae1437a9..d21b1581a665 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -281,7 +281,16 @@ nl80211_pmsr_attr_policy[NL80211_PMSR_ATTR_MAX + 1] = {
NLA_POLICY_NESTED_ARRAY(nl80211_psmr_peer_attr_policy),
};
+static const struct nla_policy
+he_obss_pd_policy[NL80211_HE_OBSS_PD_ATTR_MAX + 1] = {
+ [NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET] =
+ NLA_POLICY_RANGE(NLA_U8, 1, 20),
+ [NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET] =
+ NLA_POLICY_RANGE(NLA_U8, 1, 20),
+};
+
const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
+ [0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD },
[NL80211_ATTR_WIPHY] = { .type = NLA_U32 },
[NL80211_ATTR_WIPHY_NAME] = { .type = NLA_NUL_STRING,
.len = 20-1 },
@@ -289,6 +298,13 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_WIPHY_FREQ] = { .type = NLA_U32 },
[NL80211_ATTR_WIPHY_CHANNEL_TYPE] = { .type = NLA_U32 },
+ [NL80211_ATTR_WIPHY_EDMG_CHANNELS] = NLA_POLICY_RANGE(NLA_U8,
+ NL80211_EDMG_CHANNELS_MIN,
+ NL80211_EDMG_CHANNELS_MAX),
+ [NL80211_ATTR_WIPHY_EDMG_BW_CONFIG] = NLA_POLICY_RANGE(NLA_U8,
+ NL80211_EDMG_BW_CONFIG_MIN,
+ NL80211_EDMG_BW_CONFIG_MAX),
+
[NL80211_ATTR_CHANNEL_WIDTH] = { .type = NLA_U32 },
[NL80211_ATTR_CENTER_FREQ1] = { .type = NLA_U32 },
[NL80211_ATTR_CENTER_FREQ2] = { .type = NLA_U32 },
@@ -574,6 +590,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_SAE_PASSWORD] = { .type = NLA_BINARY,
.len = SAE_PASSWORD_MAX_LEN },
[NL80211_ATTR_TWT_RESPONDER] = { .type = NLA_FLAG },
+ [NL80211_ATTR_HE_OBSS_PD] = NLA_POLICY_NESTED(he_obss_pd_policy),
};
/* policy for the key attributes */
@@ -667,6 +684,7 @@ static const struct nla_policy
nl80211_match_band_rssi_policy[NUM_NL80211_BANDS] = {
[NL80211_BAND_2GHZ] = { .type = NLA_S32 },
[NL80211_BAND_5GHZ] = { .type = NLA_S32 },
+ [NL80211_BAND_6GHZ] = { .type = NLA_S32 },
[NL80211_BAND_60GHZ] = { .type = NLA_S32 },
};
@@ -749,17 +767,25 @@ int nl80211_prepare_wdev_dump(struct netlink_callback *cb,
int err;
if (!cb->args[0]) {
+ struct nlattr **attrbuf;
+
+ attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf),
+ GFP_KERNEL);
+ if (!attrbuf)
+ return -ENOMEM;
+
err = nlmsg_parse_deprecated(cb->nlh,
GENL_HDRLEN + nl80211_fam.hdrsize,
- genl_family_attrbuf(&nl80211_fam),
- nl80211_fam.maxattr,
+ attrbuf, nl80211_fam.maxattr,
nl80211_policy, NULL);
- if (err)
+ if (err) {
+ kfree(attrbuf);
return err;
+ }
- *wdev = __cfg80211_wdev_from_attrs(
- sock_net(cb->skb->sk),
- genl_family_attrbuf(&nl80211_fam));
+ *wdev = __cfg80211_wdev_from_attrs(sock_net(cb->skb->sk),
+ attrbuf);
+ kfree(attrbuf);
if (IS_ERR(*wdev))
return PTR_ERR(*wdev);
*rdev = wiphy_to_rdev((*wdev)->wiphy);
@@ -1555,6 +1581,15 @@ static int nl80211_send_band_rateinfo(struct sk_buff *msg,
nla_nest_end(msg, nl_iftype_data);
}
+ /* add EDMG info */
+ if (sband->edmg_cap.channels &&
+ (nla_put_u8(msg, NL80211_BAND_ATTR_EDMG_CHANNELS,
+ sband->edmg_cap.channels) ||
+ nla_put_u8(msg, NL80211_BAND_ATTR_EDMG_BW_CONFIG,
+ sband->edmg_cap.bw_config)))
+
+ return -ENOBUFS;
+
/* add bitrates */
nl_rates = nla_nest_start_noflag(msg, NL80211_BAND_ATTR_RATES);
if (!nl_rates)
@@ -2065,6 +2100,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
CMD(add_tx_ts, ADD_TX_TS);
CMD(set_multicast_to_unicast, SET_MULTICAST_TO_UNICAST);
CMD(update_connect_params, UPDATE_CONNECT_PARAMS);
+ CMD(update_ft_ies, UPDATE_FT_IES);
}
#undef CMD
@@ -2172,6 +2208,30 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
rdev->wiphy.vht_capa_mod_mask))
goto nla_put_failure;
+ if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN,
+ rdev->wiphy.perm_addr))
+ goto nla_put_failure;
+
+ if (!is_zero_ether_addr(rdev->wiphy.addr_mask) &&
+ nla_put(msg, NL80211_ATTR_MAC_MASK, ETH_ALEN,
+ rdev->wiphy.addr_mask))
+ goto nla_put_failure;
+
+ if (rdev->wiphy.n_addresses > 1) {
+ void *attr;
+
+ attr = nla_nest_start(msg, NL80211_ATTR_MAC_ADDRS);
+ if (!attr)
+ goto nla_put_failure;
+
+ for (i = 0; i < rdev->wiphy.n_addresses; i++)
+ if (nla_put(msg, i + 1, ETH_ALEN,
+ rdev->wiphy.addresses[i].addr))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+ }
+
state->split_start++;
break;
case 10:
@@ -2366,14 +2426,21 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb,
struct netlink_callback *cb,
struct nl80211_dump_wiphy_state *state)
{
- struct nlattr **tb = genl_family_attrbuf(&nl80211_fam);
- int ret = nlmsg_parse_deprecated(cb->nlh,
- GENL_HDRLEN + nl80211_fam.hdrsize,
- tb, nl80211_fam.maxattr,
- nl80211_policy, NULL);
+ struct nlattr **tb = kcalloc(NUM_NL80211_ATTR, sizeof(*tb), GFP_KERNEL);
+ int ret;
+
+ if (!tb)
+ return -ENOMEM;
+
+ ret = nlmsg_parse_deprecated(cb->nlh,
+ GENL_HDRLEN + nl80211_fam.hdrsize,
+ tb, nl80211_fam.maxattr,
+ nl80211_policy, NULL);
/* ignore parse errors for backward compatibility */
- if (ret)
- return 0;
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
state->split = tb[NL80211_ATTR_SPLIT_WIPHY_DUMP];
if (tb[NL80211_ATTR_WIPHY])
@@ -2386,8 +2453,10 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb,
int ifidx = nla_get_u32(tb[NL80211_ATTR_IFINDEX]);
netdev = __dev_get_by_index(sock_net(skb->sk), ifidx);
- if (!netdev)
- return -ENODEV;
+ if (!netdev) {
+ ret = -ENODEV;
+ goto out;
+ }
if (netdev->ieee80211_ptr) {
rdev = wiphy_to_rdev(
netdev->ieee80211_ptr->wiphy);
@@ -2395,7 +2464,10 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb,
}
}
- return 0;
+ ret = 0;
+out:
+ kfree(tb);
+ return ret;
}
static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
@@ -2622,6 +2694,18 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ2]);
}
+ if (info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]) {
+ chandef->edmg.channels =
+ nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]);
+
+ if (info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG])
+ chandef->edmg.bw_config =
+ nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]);
+ } else {
+ chandef->edmg.bw_config = 0;
+ chandef->edmg.channels = 0;
+ }
+
if (!cfg80211_chandef_valid(chandef)) {
NL_SET_ERR_MSG(extack, "invalid channel definition");
return -EINVAL;
@@ -4359,6 +4443,34 @@ static int nl80211_parse_beacon(struct cfg80211_registered_device *rdev,
return 0;
}
+static int nl80211_parse_he_obss_pd(struct nlattr *attrs,
+ struct ieee80211_he_obss_pd *he_obss_pd)
+{
+ struct nlattr *tb[NL80211_HE_OBSS_PD_ATTR_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(tb, NL80211_HE_OBSS_PD_ATTR_MAX, attrs,
+ he_obss_pd_policy, NULL);
+ if (err)
+ return err;
+
+ if (!tb[NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET] ||
+ !tb[NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET])
+ return -EINVAL;
+
+ he_obss_pd->min_offset =
+ nla_get_u32(tb[NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET]);
+ he_obss_pd->max_offset =
+ nla_get_u32(tb[NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET]);
+
+ if (he_obss_pd->min_offset >= he_obss_pd->max_offset)
+ return -EINVAL;
+
+ he_obss_pd->enable = true;
+
+ return 0;
+}
+
static void nl80211_check_ap_rate_selectors(struct cfg80211_ap_settings *params,
const u8 *rates)
{
@@ -4643,6 +4755,14 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
params.twt_responder =
nla_get_flag(info->attrs[NL80211_ATTR_TWT_RESPONDER]);
+ if (info->attrs[NL80211_ATTR_HE_OBSS_PD]) {
+ err = nl80211_parse_he_obss_pd(
+ info->attrs[NL80211_ATTR_HE_OBSS_PD],
+ &params.he_obss_pd);
+ if (err)
+ return err;
+ }
+
nl80211_calculate_ap_params(&params);
if (info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT])
@@ -4941,6 +5061,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
PUT_SINFO(CONNECTED_TIME, connected_time, u32);
PUT_SINFO(INACTIVE_TIME, inactive_time, u32);
+ PUT_SINFO_U64(ASSOC_AT_BOOTTIME, assoc_at);
if (sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES) |
BIT_ULL(NL80211_STA_INFO_RX_BYTES64)) &&
@@ -8685,6 +8806,10 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq,
nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_SCAN,
survey->time_scan, NL80211_SURVEY_INFO_PAD))
goto nla_put_failure;
+ if ((survey->filled & SURVEY_INFO_TIME_BSS_RX) &&
+ nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_BSS_RX,
+ survey->time_bss_rx, NL80211_SURVEY_INFO_PAD))
+ goto nla_put_failure;
nla_nest_end(msg, infoattr);
@@ -8698,7 +8823,7 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq,
static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam);
+ struct nlattr **attrbuf;
struct survey_info survey;
struct cfg80211_registered_device *rdev;
struct wireless_dev *wdev;
@@ -8706,6 +8831,10 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
int res;
bool radio_stats;
+ attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf), GFP_KERNEL);
+ if (!attrbuf)
+ return -ENOMEM;
+
rtnl_lock();
res = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
if (res)
@@ -8750,6 +8879,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
cb->args[2] = survey_idx;
res = skb->len;
out_err:
+ kfree(attrbuf);
rtnl_unlock();
return res;
}
@@ -9609,6 +9739,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb,
struct netlink_callback *cb)
{
struct cfg80211_registered_device *rdev;
+ struct nlattr **attrbuf = NULL;
int err;
long phy_idx;
void *data = NULL;
@@ -9629,7 +9760,12 @@ static int nl80211_testmode_dump(struct sk_buff *skb,
goto out_err;
}
} else {
- struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam);
+ attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf),
+ GFP_KERNEL);
+ if (!attrbuf) {
+ err = -ENOMEM;
+ goto out_err;
+ }
err = nlmsg_parse_deprecated(cb->nlh,
GENL_HDRLEN + nl80211_fam.hdrsize,
@@ -9696,6 +9832,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb,
/* see above */
cb->args[0] = phy_idx + 1;
out_err:
+ kfree(attrbuf);
rtnl_unlock();
return err;
}
@@ -9790,6 +9927,15 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
+ if (info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]) {
+ connect.edmg.channels =
+ nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]);
+
+ if (info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG])
+ connect.edmg.bw_config =
+ nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]);
+ }
+
if (connect.privacy && info->attrs[NL80211_ATTR_KEYS]) {
connkeys = nl80211_parse_connkeys(rdev, info, NULL);
if (IS_ERR(connkeys))
@@ -10659,9 +10805,11 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
hyst = wdev->cqm_config->rssi_hyst;
n = wdev->cqm_config->n_rssi_thresholds;
- for (i = 0; i < n; i++)
+ for (i = 0; i < n; i++) {
+ i = array_index_nospec(i, n);
if (last < wdev->cqm_config->rssi_thresholds[i])
break;
+ }
low_index = i - 1;
if (low_index >= 0) {
@@ -12789,7 +12937,7 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
struct cfg80211_registered_device **rdev,
struct wireless_dev **wdev)
{
- struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam);
+ struct nlattr **attrbuf;
u32 vid, subcmd;
unsigned int i;
int vcmd_idx = -1;
@@ -12820,24 +12968,32 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
return 0;
}
+ attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf), GFP_KERNEL);
+ if (!attrbuf)
+ return -ENOMEM;
+
err = nlmsg_parse_deprecated(cb->nlh,
GENL_HDRLEN + nl80211_fam.hdrsize,
attrbuf, nl80211_fam.maxattr,
nl80211_policy, NULL);
if (err)
- return err;
+ goto out;
if (!attrbuf[NL80211_ATTR_VENDOR_ID] ||
- !attrbuf[NL80211_ATTR_VENDOR_SUBCMD])
- return -EINVAL;
+ !attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) {
+ err = -EINVAL;
+ goto out;
+ }
*wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), attrbuf);
if (IS_ERR(*wdev))
*wdev = NULL;
*rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf);
- if (IS_ERR(*rdev))
- return PTR_ERR(*rdev);
+ if (IS_ERR(*rdev)) {
+ err = PTR_ERR(*rdev);
+ goto out;
+ }
vid = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_ID]);
subcmd = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_SUBCMD]);
@@ -12850,15 +13006,19 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
if (vcmd->info.vendor_id != vid || vcmd->info.subcmd != subcmd)
continue;
- if (!vcmd->dumpit)
- return -EOPNOTSUPP;
+ if (!vcmd->dumpit) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
vcmd_idx = i;
break;
}
- if (vcmd_idx < 0)
- return -EOPNOTSUPP;
+ if (vcmd_idx < 0) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
if (attrbuf[NL80211_ATTR_VENDOR_DATA]) {
data = nla_data(attrbuf[NL80211_ATTR_VENDOR_DATA]);
@@ -12869,7 +13029,7 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
attrbuf[NL80211_ATTR_VENDOR_DATA],
cb->extack);
if (err)
- return err;
+ goto out;
}
/* 0 is the first index - add 1 to parse only once */
@@ -12881,7 +13041,10 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
cb->args[4] = data_len;
/* keep rtnl locked in successful case */
- return 0;
+ err = 0;
+out:
+ kfree(attrbuf);
+ return err;
}
static int nl80211_vendor_cmd_dump(struct sk_buff *skb,
@@ -14559,6 +14722,7 @@ static struct genl_family nl80211_fam __ro_after_init = {
.n_ops = ARRAY_SIZE(nl80211_ops),
.mcgrps = nl80211_mcgrps,
.n_mcgrps = ARRAY_SIZE(nl80211_mcgrps),
+ .parallel_ops = true,
};
/* notification functions */
@@ -14835,12 +14999,10 @@ void nl80211_common_reg_change_event(enum nl80211_commands cmd_id,
return;
hdr = nl80211hdr_put(msg, 0, 0, 0, cmd_id);
- if (!hdr) {
- nlmsg_free(msg);
- return;
- }
+ if (!hdr)
+ goto nla_put_failure;
- if (nl80211_reg_change_event_fill(msg, request) == false)
+ if (!nl80211_reg_change_event_fill(msg, request))
goto nla_put_failure;
genlmsg_end(msg, hdr);
@@ -16090,7 +16252,9 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
if (wdev->iftype == NL80211_IFTYPE_STATION &&
!WARN_ON(!wdev->current_bss))
- wdev->current_bss->pub.channel = chandef->chan;
+ cfg80211_update_assoc_bss_entry(wdev, chandef->chan);
+
+ cfg80211_sched_dfs_chan_update(rdev);
nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL,
NL80211_CMD_CH_SWITCH_NOTIFY, 0);
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 327479ce69f5..5311d0ae2454 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -3806,8 +3806,9 @@ void wiphy_regulatory_deregister(struct wiphy *wiphy)
}
/*
- * See http://www.fcc.gov/document/5-ghz-unlicensed-spectrum-unii, for
- * UNII band definitions
+ * See FCC notices for UNII band definitions
+ * 5GHz: https://www.fcc.gov/document/5-ghz-unlicensed-spectrum-unii
+ * 6GHz: https://www.fcc.gov/document/fcc-proposes-more-spectrum-unlicensed-use-0
*/
int cfg80211_get_unii(int freq)
{
@@ -3831,6 +3832,22 @@ int cfg80211_get_unii(int freq)
if (freq > 5725 && freq <= 5825)
return 4;
+ /* UNII-5 */
+ if (freq > 5925 && freq <= 6425)
+ return 5;
+
+ /* UNII-6 */
+ if (freq > 6425 && freq <= 6525)
+ return 6;
+
+ /* UNII-7 */
+ if (freq > 6525 && freq <= 6875)
+ return 7;
+
+ /* UNII-8 */
+ if (freq > 6875 && freq <= 7125)
+ return 8;
+
return -EINVAL;
}
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index d66e6d4b7555..d313c9befa23 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -1091,6 +1091,93 @@ struct cfg80211_non_tx_bss {
u8 bssid_index;
};
+static bool
+cfg80211_update_known_bss(struct cfg80211_registered_device *rdev,
+ struct cfg80211_internal_bss *known,
+ struct cfg80211_internal_bss *new,
+ bool signal_valid)
+{
+ lockdep_assert_held(&rdev->bss_lock);
+
+ /* Update IEs */
+ if (rcu_access_pointer(new->pub.proberesp_ies)) {
+ const struct cfg80211_bss_ies *old;
+
+ old = rcu_access_pointer(known->pub.proberesp_ies);
+
+ rcu_assign_pointer(known->pub.proberesp_ies,
+ new->pub.proberesp_ies);
+ /* Override possible earlier Beacon frame IEs */
+ rcu_assign_pointer(known->pub.ies,
+ new->pub.proberesp_ies);
+ if (old)
+ kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head);
+ } else if (rcu_access_pointer(new->pub.beacon_ies)) {
+ const struct cfg80211_bss_ies *old;
+ struct cfg80211_internal_bss *bss;
+
+ if (known->pub.hidden_beacon_bss &&
+ !list_empty(&known->hidden_list)) {
+ const struct cfg80211_bss_ies *f;
+
+ /* The known BSS struct is one of the probe
+ * response members of a group, but we're
+ * receiving a beacon (beacon_ies in the new
+ * bss is used). This can only mean that the
+ * AP changed its beacon from not having an
+ * SSID to showing it, which is confusing so
+ * drop this information.
+ */
+
+ f = rcu_access_pointer(new->pub.beacon_ies);
+ kfree_rcu((struct cfg80211_bss_ies *)f, rcu_head);
+ return false;
+ }
+
+ old = rcu_access_pointer(known->pub.beacon_ies);
+
+ rcu_assign_pointer(known->pub.beacon_ies, new->pub.beacon_ies);
+
+ /* Override IEs if they were from a beacon before */
+ if (old == rcu_access_pointer(known->pub.ies))
+ rcu_assign_pointer(known->pub.ies, new->pub.beacon_ies);
+
+ /* Assign beacon IEs to all sub entries */
+ list_for_each_entry(bss, &known->hidden_list, hidden_list) {
+ const struct cfg80211_bss_ies *ies;
+
+ ies = rcu_access_pointer(bss->pub.beacon_ies);
+ WARN_ON(ies != old);
+
+ rcu_assign_pointer(bss->pub.beacon_ies,
+ new->pub.beacon_ies);
+ }
+
+ if (old)
+ kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head);
+ }
+
+ known->pub.beacon_interval = new->pub.beacon_interval;
+
+ /* don't update the signal if beacon was heard on
+ * adjacent channel.
+ */
+ if (signal_valid)
+ known->pub.signal = new->pub.signal;
+ known->pub.capability = new->pub.capability;
+ known->ts = new->ts;
+ known->ts_boottime = new->ts_boottime;
+ known->parent_tsf = new->parent_tsf;
+ known->pub.chains = new->pub.chains;
+ memcpy(known->pub.chain_signal, new->pub.chain_signal,
+ IEEE80211_MAX_CHAINS);
+ ether_addr_copy(known->parent_bssid, new->parent_bssid);
+ known->pub.max_bssid_indicator = new->pub.max_bssid_indicator;
+ known->pub.bssid_index = new->pub.bssid_index;
+
+ return true;
+}
+
/* Returned bss is reference counted and must be cleaned up appropriately. */
struct cfg80211_internal_bss *
cfg80211_bss_update(struct cfg80211_registered_device *rdev,
@@ -1114,88 +1201,8 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
found = rb_find_bss(rdev, tmp, BSS_CMP_REGULAR);
if (found) {
- /* Update IEs */
- if (rcu_access_pointer(tmp->pub.proberesp_ies)) {
- const struct cfg80211_bss_ies *old;
-
- old = rcu_access_pointer(found->pub.proberesp_ies);
-
- rcu_assign_pointer(found->pub.proberesp_ies,
- tmp->pub.proberesp_ies);
- /* Override possible earlier Beacon frame IEs */
- rcu_assign_pointer(found->pub.ies,
- tmp->pub.proberesp_ies);
- if (old)
- kfree_rcu((struct cfg80211_bss_ies *)old,
- rcu_head);
- } else if (rcu_access_pointer(tmp->pub.beacon_ies)) {
- const struct cfg80211_bss_ies *old;
- struct cfg80211_internal_bss *bss;
-
- if (found->pub.hidden_beacon_bss &&
- !list_empty(&found->hidden_list)) {
- const struct cfg80211_bss_ies *f;
-
- /*
- * The found BSS struct is one of the probe
- * response members of a group, but we're
- * receiving a beacon (beacon_ies in the tmp
- * bss is used). This can only mean that the
- * AP changed its beacon from not having an
- * SSID to showing it, which is confusing so
- * drop this information.
- */
-
- f = rcu_access_pointer(tmp->pub.beacon_ies);
- kfree_rcu((struct cfg80211_bss_ies *)f,
- rcu_head);
- goto drop;
- }
-
- old = rcu_access_pointer(found->pub.beacon_ies);
-
- rcu_assign_pointer(found->pub.beacon_ies,
- tmp->pub.beacon_ies);
-
- /* Override IEs if they were from a beacon before */
- if (old == rcu_access_pointer(found->pub.ies))
- rcu_assign_pointer(found->pub.ies,
- tmp->pub.beacon_ies);
-
- /* Assign beacon IEs to all sub entries */
- list_for_each_entry(bss, &found->hidden_list,
- hidden_list) {
- const struct cfg80211_bss_ies *ies;
-
- ies = rcu_access_pointer(bss->pub.beacon_ies);
- WARN_ON(ies != old);
-
- rcu_assign_pointer(bss->pub.beacon_ies,
- tmp->pub.beacon_ies);
- }
-
- if (old)
- kfree_rcu((struct cfg80211_bss_ies *)old,
- rcu_head);
- }
-
- found->pub.beacon_interval = tmp->pub.beacon_interval;
- /*
- * don't update the signal if beacon was heard on
- * adjacent channel.
- */
- if (signal_valid)
- found->pub.signal = tmp->pub.signal;
- found->pub.capability = tmp->pub.capability;
- found->ts = tmp->ts;
- found->ts_boottime = tmp->ts_boottime;
- found->parent_tsf = tmp->parent_tsf;
- found->pub.chains = tmp->pub.chains;
- memcpy(found->pub.chain_signal, tmp->pub.chain_signal,
- IEEE80211_MAX_CHAINS);
- ether_addr_copy(found->parent_bssid, tmp->parent_bssid);
- found->pub.max_bssid_indicator = tmp->pub.max_bssid_indicator;
- found->pub.bssid_index = tmp->pub.bssid_index;
+ if (!cfg80211_update_known_bss(rdev, found, tmp, signal_valid))
+ goto drop;
} else {
struct cfg80211_internal_bss *new;
struct cfg80211_internal_bss *hidden;
@@ -1368,6 +1375,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
struct cfg80211_internal_bss tmp = {}, *res;
int bss_type;
bool signal_valid;
+ unsigned long ts;
if (WARN_ON(!wiphy))
return NULL;
@@ -1390,8 +1398,11 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
tmp.ts_boottime = data->boottime_ns;
if (non_tx_data) {
tmp.pub.transmitted_bss = non_tx_data->tx_bss;
+ ts = bss_from_pub(non_tx_data->tx_bss)->ts;
tmp.pub.bssid_index = non_tx_data->bssid_index;
tmp.pub.max_bssid_indicator = non_tx_data->max_bssid_indicator;
+ } else {
+ ts = jiffies;
}
/*
@@ -1425,8 +1436,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
signal_valid = abs(data->chan->center_freq - channel->center_freq) <=
wiphy->max_adj_channel_rssi_comp;
- res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid,
- jiffies);
+ res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid, ts);
if (!res)
return NULL;
@@ -1440,7 +1450,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
regulatory_hint_found_beacon(wiphy, channel, gfp);
}
- if (non_tx_data && non_tx_data->tx_bss) {
+ if (non_tx_data) {
/* this is a nontransmitting bss, we need to add it to
* transmitting bss' list if it is not there
*/
@@ -1659,6 +1669,8 @@ cfg80211_inform_bss_data(struct wiphy *wiphy,
res = cfg80211_inform_single_bss_data(wiphy, data, ftype, bssid, tsf,
capability, beacon_interval, ie,
ielen, NULL, gfp);
+ if (!res)
+ return NULL;
non_tx_data.tx_bss = res;
cfg80211_parse_mbssid_data(wiphy, data, ftype, bssid, tsf,
beacon_interval, ie, ielen, &non_tx_data,
@@ -1776,7 +1788,6 @@ static struct cfg80211_bss *
cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy,
struct cfg80211_inform_bss *data,
struct ieee80211_mgmt *mgmt, size_t len,
- struct cfg80211_non_tx_bss *non_tx_data,
gfp_t gfp)
{
struct cfg80211_internal_bss tmp = {}, *res;
@@ -1835,11 +1846,6 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy,
tmp.pub.chains = data->chains;
memcpy(tmp.pub.chain_signal, data->chain_signal, IEEE80211_MAX_CHAINS);
ether_addr_copy(tmp.parent_bssid, data->parent_bssid);
- if (non_tx_data) {
- tmp.pub.transmitted_bss = non_tx_data->tx_bss;
- tmp.pub.bssid_index = non_tx_data->bssid_index;
- tmp.pub.max_bssid_indicator = non_tx_data->max_bssid_indicator;
- }
signal_valid = abs(data->chan->center_freq - channel->center_freq) <=
wiphy->max_adj_channel_rssi_comp;
@@ -1877,7 +1883,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
struct cfg80211_non_tx_bss non_tx_data;
res = cfg80211_inform_single_bss_frame_data(wiphy, data, mgmt,
- len, NULL, gfp);
+ len, gfp);
if (!res || !wiphy->support_mbssid ||
!cfg80211_find_ie(WLAN_EID_MULTIPLE_BSSID, ie, ielen))
return res;
@@ -1995,6 +2001,85 @@ void cfg80211_bss_iter(struct wiphy *wiphy,
}
EXPORT_SYMBOL(cfg80211_bss_iter);
+void cfg80211_update_assoc_bss_entry(struct wireless_dev *wdev,
+ struct ieee80211_channel *chan)
+{
+ struct wiphy *wiphy = wdev->wiphy;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+ struct cfg80211_internal_bss *cbss = wdev->current_bss;
+ struct cfg80211_internal_bss *new = NULL;
+ struct cfg80211_internal_bss *bss;
+ struct cfg80211_bss *nontrans_bss;
+ struct cfg80211_bss *tmp;
+
+ spin_lock_bh(&rdev->bss_lock);
+
+ if (WARN_ON(cbss->pub.channel == chan))
+ goto done;
+
+ /* use transmitting bss */
+ if (cbss->pub.transmitted_bss)
+ cbss = container_of(cbss->pub.transmitted_bss,
+ struct cfg80211_internal_bss,
+ pub);
+
+ cbss->pub.channel = chan;
+
+ list_for_each_entry(bss, &rdev->bss_list, list) {
+ if (!cfg80211_bss_type_match(bss->pub.capability,
+ bss->pub.channel->band,
+ wdev->conn_bss_type))
+ continue;
+
+ if (bss == cbss)
+ continue;
+
+ if (!cmp_bss(&bss->pub, &cbss->pub, BSS_CMP_REGULAR)) {
+ new = bss;
+ break;
+ }
+ }
+
+ if (new) {
+ /* to save time, update IEs for transmitting bss only */
+ if (cfg80211_update_known_bss(rdev, cbss, new, false)) {
+ new->pub.proberesp_ies = NULL;
+ new->pub.beacon_ies = NULL;
+ }
+
+ list_for_each_entry_safe(nontrans_bss, tmp,
+ &new->pub.nontrans_list,
+ nontrans_list) {
+ bss = container_of(nontrans_bss,
+ struct cfg80211_internal_bss, pub);
+ if (__cfg80211_unlink_bss(rdev, bss))
+ rdev->bss_generation++;
+ }
+
+ WARN_ON(atomic_read(&new->hold));
+ if (!WARN_ON(!__cfg80211_unlink_bss(rdev, new)))
+ rdev->bss_generation++;
+ }
+
+ rb_erase(&cbss->rbn, &rdev->bss_tree);
+ rb_insert_bss(rdev, cbss);
+ rdev->bss_generation++;
+
+ list_for_each_entry_safe(nontrans_bss, tmp,
+ &cbss->pub.nontrans_list,
+ nontrans_list) {
+ bss = container_of(nontrans_bss,
+ struct cfg80211_internal_bss, pub);
+ bss->pub.channel = chan;
+ rb_erase(&bss->rbn, &rdev->bss_tree);
+ rb_insert_bss(rdev, bss);
+ rdev->bss_generation++;
+ }
+
+done:
+ spin_unlock_bh(&rdev->bss_lock);
+}
+
#ifdef CONFIG_CFG80211_WEXT
static struct cfg80211_registered_device *
cfg80211_get_dev_from_ifindex(struct net *net, int ifindex)
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 4fbb91a511ae..d98ad2b3143b 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2446,10 +2446,11 @@ TRACE_EVENT(rdev_set_mcast_rate,
sizeof(int) * NUM_NL80211_BANDS);
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", "
- "mcast_rates [2.4GHz=0x%x, 5.2GHz=0x%x, 60GHz=0x%x]",
+ "mcast_rates [2.4GHz=0x%x, 5.2GHz=0x%x, 6GHz=0x%x, 60GHz=0x%x]",
WIPHY_PR_ARG, NETDEV_PR_ARG,
__entry->mcast_rate[NL80211_BAND_2GHZ],
__entry->mcast_rate[NL80211_BAND_5GHZ],
+ __entry->mcast_rate[NL80211_BAND_6GHZ],
__entry->mcast_rate[NL80211_BAND_60GHZ])
);
diff --git a/net/wireless/util.c b/net/wireless/util.c
index e74837824cea..419eb12c1e93 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -91,6 +91,11 @@ int ieee80211_channel_to_frequency(int chan, enum nl80211_band band)
else
return 5000 + chan * 5;
break;
+ case NL80211_BAND_6GHZ:
+ /* see 802.11ax D4.1 27.3.22.2 */
+ if (chan <= 253)
+ return 5940 + chan * 5;
+ break;
case NL80211_BAND_60GHZ:
if (chan < 7)
return 56160 + chan * 2160;
@@ -111,8 +116,11 @@ int ieee80211_frequency_to_channel(int freq)
return (freq - 2407) / 5;
else if (freq >= 4910 && freq <= 4980)
return (freq - 4000) / 5;
- else if (freq <= 45000) /* DMG band lower limit */
+ else if (freq < 5945)
return (freq - 5000) / 5;
+ else if (freq <= 45000) /* DMG band lower limit */
+ /* see 802.11ax D4.1 27.3.22.2 */
+ return (freq - 5940) / 5;
else if (freq >= 58320 && freq <= 70200)
return (freq - 56160) / 2160;
else
@@ -148,6 +156,7 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband)
switch (sband->band) {
case NL80211_BAND_5GHZ:
+ case NL80211_BAND_6GHZ:
want = 3;
for (i = 0; i < sband->n_bitrates; i++) {
if (sband->bitrates[i].bitrate == 60 ||
@@ -960,6 +969,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
}
cfg80211_process_rdev_events(rdev);
+ cfg80211_mlme_purge_registrations(dev->ieee80211_ptr);
}
err = rdev_change_virtual_intf(rdev, dev, ntype, params);
@@ -1039,7 +1049,7 @@ static u32 cfg80211_calculate_bitrate_ht(struct rate_info *rate)
return (bitrate + 50000) / 100000;
}
-static u32 cfg80211_calculate_bitrate_60g(struct rate_info *rate)
+static u32 cfg80211_calculate_bitrate_dmg(struct rate_info *rate)
{
static const u32 __mcs2bitrate[] = {
/* control PHY */
@@ -1086,6 +1096,40 @@ static u32 cfg80211_calculate_bitrate_60g(struct rate_info *rate)
return __mcs2bitrate[rate->mcs];
}
+static u32 cfg80211_calculate_bitrate_edmg(struct rate_info *rate)
+{
+ static const u32 __mcs2bitrate[] = {
+ /* control PHY */
+ [0] = 275,
+ /* SC PHY */
+ [1] = 3850,
+ [2] = 7700,
+ [3] = 9625,
+ [4] = 11550,
+ [5] = 12512, /* 1251.25 mbps */
+ [6] = 13475,
+ [7] = 15400,
+ [8] = 19250,
+ [9] = 23100,
+ [10] = 25025,
+ [11] = 26950,
+ [12] = 30800,
+ [13] = 38500,
+ [14] = 46200,
+ [15] = 50050,
+ [16] = 53900,
+ [17] = 57750,
+ [18] = 69300,
+ [19] = 75075,
+ [20] = 80850,
+ };
+
+ if (WARN_ON_ONCE(rate->mcs >= ARRAY_SIZE(__mcs2bitrate)))
+ return 0;
+
+ return __mcs2bitrate[rate->mcs] * rate->n_bonded_ch;
+}
+
static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate)
{
static const u32 base[4][10] = {
@@ -1258,8 +1302,10 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate)
{
if (rate->flags & RATE_INFO_FLAGS_MCS)
return cfg80211_calculate_bitrate_ht(rate);
- if (rate->flags & RATE_INFO_FLAGS_60G)
- return cfg80211_calculate_bitrate_60g(rate);
+ if (rate->flags & RATE_INFO_FLAGS_DMG)
+ return cfg80211_calculate_bitrate_dmg(rate);
+ if (rate->flags & RATE_INFO_FLAGS_EDMG)
+ return cfg80211_calculate_bitrate_edmg(rate);
if (rate->flags & RATE_INFO_FLAGS_VHT_MCS)
return cfg80211_calculate_bitrate_vht(rate);
if (rate->flags & RATE_INFO_FLAGS_HE_MCS)
@@ -1471,6 +1517,9 @@ bool ieee80211_operating_class_to_band(u8 operating_class,
case 128 ... 130:
*band = NL80211_BAND_5GHZ;
return true;
+ case 131 ... 135:
+ *band = NL80211_BAND_6GHZ;
+ return true;
case 81:
case 82:
case 83:
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 46e4d69db845..7b6529d81c61 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -7,6 +7,7 @@
* we directly assign the wireless handlers of wireless interfaces.
*
* Copyright 2008-2009 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright (C) 2019 Intel Corporation
*/
#include <linux/export.h>
@@ -864,8 +865,8 @@ static int cfg80211_wext_siwtxpower(struct net_device *dev,
}
}
} else {
- rfkill_set_sw_state(rdev->rfkill, true);
- schedule_work(&rdev->rfkill_sync);
+ if (rfkill_set_sw_state(rdev->rfkill, true))
+ schedule_work(&rdev->rfkill_block);
return 0;
}
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 688aac7a6943..947b8ff0227e 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -14,6 +14,7 @@
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
#include <linux/idr.h>
+#include <linux/vmalloc.h>
#include "xdp_umem.h"
#include "xsk_queue.h"
@@ -105,14 +106,22 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
umem->dev = dev;
umem->queue_id = queue_id;
+ if (flags & XDP_USE_NEED_WAKEUP) {
+ umem->flags |= XDP_UMEM_USES_NEED_WAKEUP;
+ /* Tx needs to be explicitly woken up the first time.
+ * Also for supporting drivers that do not implement this
+ * feature. They will always have to call sendto().
+ */
+ xsk_set_tx_need_wakeup(umem);
+ }
+
dev_hold(dev);
if (force_copy)
/* For copy-mode, we are done. */
return 0;
- if (!dev->netdev_ops->ndo_bpf ||
- !dev->netdev_ops->ndo_xsk_async_xmit) {
+ if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_wakeup) {
err = -EOPNOTSUPP;
goto err_unreg_umem;
}
@@ -164,6 +173,37 @@ void xdp_umem_clear_dev(struct xdp_umem *umem)
umem->zc = false;
}
+static void xdp_umem_unmap_pages(struct xdp_umem *umem)
+{
+ unsigned int i;
+
+ for (i = 0; i < umem->npgs; i++)
+ if (PageHighMem(umem->pgs[i]))
+ vunmap(umem->pages[i].addr);
+}
+
+static int xdp_umem_map_pages(struct xdp_umem *umem)
+{
+ unsigned int i;
+ void *addr;
+
+ for (i = 0; i < umem->npgs; i++) {
+ if (PageHighMem(umem->pgs[i]))
+ addr = vmap(&umem->pgs[i], 1, VM_MAP, PAGE_KERNEL);
+ else
+ addr = page_address(umem->pgs[i]);
+
+ if (!addr) {
+ xdp_umem_unmap_pages(umem);
+ return -ENOMEM;
+ }
+
+ umem->pages[i].addr = addr;
+ }
+
+ return 0;
+}
+
static void xdp_umem_unpin_pages(struct xdp_umem *umem)
{
unsigned int i;
@@ -207,6 +247,7 @@ static void xdp_umem_release(struct xdp_umem *umem)
xsk_reuseq_destroy(umem);
+ xdp_umem_unmap_pages(umem);
xdp_umem_unpin_pages(umem);
kfree(umem->pages);
@@ -299,10 +340,11 @@ static int xdp_umem_account_pages(struct xdp_umem *umem)
static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
{
+ bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
unsigned int chunks, chunks_per_page;
u64 addr = mr->addr, size = mr->len;
- int size_chk, err, i;
+ int size_chk, err;
if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
/* Strictly speaking we could support this, if:
@@ -314,7 +356,11 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
return -EINVAL;
}
- if (!is_power_of_2(chunk_size))
+ if (mr->flags & ~(XDP_UMEM_UNALIGNED_CHUNK_FLAG |
+ XDP_UMEM_USES_NEED_WAKEUP))
+ return -EINVAL;
+
+ if (!unaligned_chunks && !is_power_of_2(chunk_size))
return -EINVAL;
if (!PAGE_ALIGNED(addr)) {
@@ -331,9 +377,11 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
if (chunks == 0)
return -EINVAL;
- chunks_per_page = PAGE_SIZE / chunk_size;
- if (chunks < chunks_per_page || chunks % chunks_per_page)
- return -EINVAL;
+ if (!unaligned_chunks) {
+ chunks_per_page = PAGE_SIZE / chunk_size;
+ if (chunks < chunks_per_page || chunks % chunks_per_page)
+ return -EINVAL;
+ }
headroom = ALIGN(headroom, 64);
@@ -342,13 +390,15 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
return -EINVAL;
umem->address = (unsigned long)addr;
- umem->chunk_mask = ~((u64)chunk_size - 1);
+ umem->chunk_mask = unaligned_chunks ? XSK_UNALIGNED_BUF_ADDR_MASK
+ : ~((u64)chunk_size - 1);
umem->size = size;
umem->headroom = headroom;
umem->chunk_size_nohr = chunk_size - headroom;
umem->npgs = size / PAGE_SIZE;
umem->pgs = NULL;
umem->user = NULL;
+ umem->flags = mr->flags;
INIT_LIST_HEAD(&umem->xsk_list);
spin_lock_init(&umem->xsk_list_lock);
@@ -368,10 +418,11 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
goto out_pin;
}
- for (i = 0; i < umem->npgs; i++)
- umem->pages[i].addr = page_address(umem->pgs[i]);
+ err = xdp_umem_map_pages(umem);
+ if (!err)
+ return 0;
- return 0;
+ kfree(umem->pages);
out_pin:
xdp_umem_unpin_pages(umem);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 59b57d708697..c2f1af3b6a7c 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -45,7 +45,7 @@ EXPORT_SYMBOL(xsk_umem_has_addrs);
u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
{
- return xskq_peek_addr(umem->fq, addr);
+ return xskq_peek_addr(umem->fq, addr, umem);
}
EXPORT_SYMBOL(xsk_umem_peek_addr);
@@ -55,21 +55,103 @@ void xsk_umem_discard_addr(struct xdp_umem *umem)
}
EXPORT_SYMBOL(xsk_umem_discard_addr);
+void xsk_set_rx_need_wakeup(struct xdp_umem *umem)
+{
+ if (umem->need_wakeup & XDP_WAKEUP_RX)
+ return;
+
+ umem->fq->ring->flags |= XDP_RING_NEED_WAKEUP;
+ umem->need_wakeup |= XDP_WAKEUP_RX;
+}
+EXPORT_SYMBOL(xsk_set_rx_need_wakeup);
+
+void xsk_set_tx_need_wakeup(struct xdp_umem *umem)
+{
+ struct xdp_sock *xs;
+
+ if (umem->need_wakeup & XDP_WAKEUP_TX)
+ return;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+ xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP;
+ }
+ rcu_read_unlock();
+
+ umem->need_wakeup |= XDP_WAKEUP_TX;
+}
+EXPORT_SYMBOL(xsk_set_tx_need_wakeup);
+
+void xsk_clear_rx_need_wakeup(struct xdp_umem *umem)
+{
+ if (!(umem->need_wakeup & XDP_WAKEUP_RX))
+ return;
+
+ umem->fq->ring->flags &= ~XDP_RING_NEED_WAKEUP;
+ umem->need_wakeup &= ~XDP_WAKEUP_RX;
+}
+EXPORT_SYMBOL(xsk_clear_rx_need_wakeup);
+
+void xsk_clear_tx_need_wakeup(struct xdp_umem *umem)
+{
+ struct xdp_sock *xs;
+
+ if (!(umem->need_wakeup & XDP_WAKEUP_TX))
+ return;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+ xs->tx->ring->flags &= ~XDP_RING_NEED_WAKEUP;
+ }
+ rcu_read_unlock();
+
+ umem->need_wakeup &= ~XDP_WAKEUP_TX;
+}
+EXPORT_SYMBOL(xsk_clear_tx_need_wakeup);
+
+bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem)
+{
+ return umem->flags & XDP_UMEM_USES_NEED_WAKEUP;
+}
+EXPORT_SYMBOL(xsk_umem_uses_need_wakeup);
+
+/* If a buffer crosses a page boundary, we need to do 2 memcpy's, one for
+ * each page. This is only required in copy mode.
+ */
+static void __xsk_rcv_memcpy(struct xdp_umem *umem, u64 addr, void *from_buf,
+ u32 len, u32 metalen)
+{
+ void *to_buf = xdp_umem_get_data(umem, addr);
+
+ addr = xsk_umem_add_offset_to_addr(addr);
+ if (xskq_crosses_non_contig_pg(umem, addr, len + metalen)) {
+ void *next_pg_addr = umem->pages[(addr >> PAGE_SHIFT) + 1].addr;
+ u64 page_start = addr & ~(PAGE_SIZE - 1);
+ u64 first_len = PAGE_SIZE - (addr - page_start);
+
+ memcpy(to_buf, from_buf, first_len + metalen);
+ memcpy(next_pg_addr, from_buf + first_len, len - first_len);
+
+ return;
+ }
+
+ memcpy(to_buf, from_buf, len + metalen);
+}
+
static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{
- void *to_buf, *from_buf;
+ u64 offset = xs->umem->headroom;
+ u64 addr, memcpy_addr;
+ void *from_buf;
u32 metalen;
- u64 addr;
int err;
- if (!xskq_peek_addr(xs->umem->fq, &addr) ||
+ if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) ||
len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
xs->rx_dropped++;
return -ENOSPC;
}
- addr += xs->umem->headroom;
-
if (unlikely(xdp_data_meta_unsupported(xdp))) {
from_buf = xdp->data;
metalen = 0;
@@ -78,9 +160,11 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
metalen = xdp->data - xdp->data_meta;
}
- to_buf = xdp_umem_get_data(xs->umem, addr);
- memcpy(to_buf, from_buf, len + metalen);
- addr += metalen;
+ memcpy_addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
+ __xsk_rcv_memcpy(xs->umem, memcpy_addr, from_buf, len, metalen);
+
+ offset += metalen;
+ addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
err = xskq_produce_batch_desc(xs->rx, addr, len);
if (!err) {
xskq_discard_addr(xs->umem->fq);
@@ -102,10 +186,23 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
return err;
}
+static bool xsk_is_bound(struct xdp_sock *xs)
+{
+ if (READ_ONCE(xs->state) == XSK_BOUND) {
+ /* Matches smp_wmb() in bind(). */
+ smp_rmb();
+ return true;
+ }
+ return false;
+}
+
int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
u32 len;
+ if (!xsk_is_bound(xs))
+ return -EINVAL;
+
if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
return -EINVAL;
@@ -125,6 +222,7 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
u32 metalen = xdp->data - xdp->data_meta;
u32 len = xdp->data_end - xdp->data;
+ u64 offset = xs->umem->headroom;
void *buffer;
u64 addr;
int err;
@@ -136,17 +234,17 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
goto out_unlock;
}
- if (!xskq_peek_addr(xs->umem->fq, &addr) ||
+ if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) ||
len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
err = -ENOSPC;
goto out_drop;
}
- addr += xs->umem->headroom;
-
+ addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
buffer = xdp_umem_get_data(xs->umem, addr);
memcpy(buffer, xdp->data_meta, len + metalen);
- addr += metalen;
+
+ addr = xsk_umem_adjust_offset(xs->umem, addr, metalen);
err = xskq_produce_batch_desc(xs->rx, addr, len);
if (err)
goto out_drop;
@@ -190,7 +288,7 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc)
rcu_read_lock();
list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
- if (!xskq_peek_desc(xs->tx, desc))
+ if (!xskq_peek_desc(xs->tx, desc, umem))
continue;
if (xskq_produce_addr_lazy(umem->cq, desc->addr))
@@ -212,7 +310,8 @@ static int xsk_zc_xmit(struct sock *sk)
struct xdp_sock *xs = xdp_sk(sk);
struct net_device *dev = xs->dev;
- return dev->netdev_ops->ndo_xsk_async_xmit(dev, xs->queue_id);
+ return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
+ XDP_WAKEUP_TX);
}
static void xsk_destruct_skb(struct sk_buff *skb)
@@ -243,7 +342,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
if (xs->queue_id >= xs->dev->real_num_tx_queues)
goto out;
- while (xskq_peek_desc(xs->tx, &desc)) {
+ while (xskq_peek_desc(xs->tx, &desc, xs->umem)) {
char *buffer;
u64 addr;
u32 len;
@@ -272,7 +371,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
skb->dev = xs->dev;
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
- skb_shinfo(skb)->destructor_arg = (void *)(long)addr;
+ skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr;
skb->destructor = xsk_destruct_skb;
err = dev_direct_xmit(skb, xs->queue_id);
@@ -301,7 +400,7 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
struct sock *sk = sock->sk;
struct xdp_sock *xs = xdp_sk(sk);
- if (unlikely(!xs->dev))
+ if (unlikely(!xsk_is_bound(xs)))
return -ENXIO;
if (unlikely(!(xs->dev->flags & IFF_UP)))
return -ENETDOWN;
@@ -317,8 +416,19 @@ static unsigned int xsk_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait)
{
unsigned int mask = datagram_poll(file, sock, wait);
- struct sock *sk = sock->sk;
- struct xdp_sock *xs = xdp_sk(sk);
+ struct xdp_sock *xs = xdp_sk(sock->sk);
+ struct net_device *dev;
+ struct xdp_umem *umem;
+
+ if (unlikely(!xsk_is_bound(xs)))
+ return mask;
+
+ dev = xs->dev;
+ umem = xs->umem;
+
+ if (umem->need_wakeup)
+ dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
+ umem->need_wakeup);
if (xs->rx && !xskq_empty_desc(xs->rx))
mask |= POLLIN | POLLRDNORM;
@@ -342,7 +452,7 @@ static int xsk_init_queue(u32 entries, struct xsk_queue **queue,
/* Make sure queue is ready before it can be seen by others */
smp_wmb();
- *queue = q;
+ WRITE_ONCE(*queue, q);
return 0;
}
@@ -350,10 +460,9 @@ static void xsk_unbind_dev(struct xdp_sock *xs)
{
struct net_device *dev = xs->dev;
- if (!dev || xs->state != XSK_BOUND)
+ if (xs->state != XSK_BOUND)
return;
-
- xs->state = XSK_UNBOUND;
+ WRITE_ONCE(xs->state, XSK_UNBOUND);
/* Wait for driver to stop using the xdp socket. */
xdp_del_sk_umem(xs->umem, xs);
@@ -362,6 +471,52 @@ static void xsk_unbind_dev(struct xdp_sock *xs)
dev_put(dev);
}
+static struct xsk_map *xsk_get_map_list_entry(struct xdp_sock *xs,
+ struct xdp_sock ***map_entry)
+{
+ struct xsk_map *map = NULL;
+ struct xsk_map_node *node;
+
+ *map_entry = NULL;
+
+ spin_lock_bh(&xs->map_list_lock);
+ node = list_first_entry_or_null(&xs->map_list, struct xsk_map_node,
+ node);
+ if (node) {
+ WARN_ON(xsk_map_inc(node->map));
+ map = node->map;
+ *map_entry = node->map_entry;
+ }
+ spin_unlock_bh(&xs->map_list_lock);
+ return map;
+}
+
+static void xsk_delete_from_maps(struct xdp_sock *xs)
+{
+ /* This function removes the current XDP socket from all the
+ * maps it resides in. We need to take extra care here, due to
+ * the two locks involved. Each map has a lock synchronizing
+ * updates to the entries, and each socket has a lock that
+ * synchronizes access to the list of maps (map_list). For
+ * deadlock avoidance the locks need to be taken in the order
+ * "map lock"->"socket map list lock". We start off by
+ * accessing the socket map list, and take a reference to the
+ * map to guarantee existence between the
+ * xsk_get_map_list_entry() and xsk_map_try_sock_delete()
+ * calls. Then we ask the map to remove the socket, which
+ * tries to remove the socket from the map. Note that there
+ * might be updates to the map between
+ * xsk_get_map_list_entry() and xsk_map_try_sock_delete().
+ */
+ struct xdp_sock **map_entry = NULL;
+ struct xsk_map *map;
+
+ while ((map = xsk_get_map_list_entry(xs, &map_entry))) {
+ xsk_map_try_sock_delete(map, xs, map_entry);
+ xsk_map_put(map);
+ }
+}
+
static int xsk_release(struct socket *sock)
{
struct sock *sk = sock->sk;
@@ -381,7 +536,10 @@ static int xsk_release(struct socket *sock)
sock_prot_inuse_add(net, sk->sk_prot, -1);
local_bh_enable();
+ xsk_delete_from_maps(xs);
+ mutex_lock(&xs->mutex);
xsk_unbind_dev(xs);
+ mutex_unlock(&xs->mutex);
xskq_destroy(xs->rx);
xskq_destroy(xs->tx);
@@ -412,6 +570,24 @@ static struct socket *xsk_lookup_xsk_from_fd(int fd)
return sock;
}
+/* Check if umem pages are contiguous.
+ * If zero-copy mode, use the DMA address to do the page contiguity check
+ * For all other modes we use addr (kernel virtual address)
+ * Store the result in the low bits of addr.
+ */
+static void xsk_check_page_contiguity(struct xdp_umem *umem, u32 flags)
+{
+ struct xdp_umem_page *pgs = umem->pages;
+ int i, is_contig;
+
+ for (i = 0; i < umem->npgs - 1; i++) {
+ is_contig = (flags & XDP_ZEROCOPY) ?
+ (pgs[i].dma + PAGE_SIZE == pgs[i + 1].dma) :
+ (pgs[i].addr + PAGE_SIZE == pgs[i + 1].addr);
+ pgs[i].addr += is_contig << XSK_NEXT_PG_CONTIG_SHIFT;
+ }
+}
+
static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
{
struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
@@ -427,7 +603,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
return -EINVAL;
flags = sxdp->sxdp_flags;
- if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY))
+ if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY |
+ XDP_USE_NEED_WAKEUP))
return -EINVAL;
rtnl_lock();
@@ -454,7 +631,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
struct xdp_sock *umem_xs;
struct socket *sock;
- if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY)) {
+ if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY) ||
+ (flags & XDP_USE_NEED_WAKEUP)) {
/* Cannot specify flags for shared sockets. */
err = -EINVAL;
goto out_unlock;
@@ -473,19 +651,19 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
}
umem_xs = xdp_sk(sock->sk);
- if (!umem_xs->umem) {
- /* No umem to inherit. */
+ if (!xsk_is_bound(umem_xs)) {
err = -EBADF;
sockfd_put(sock);
goto out_unlock;
- } else if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
+ }
+ if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
err = -EINVAL;
sockfd_put(sock);
goto out_unlock;
}
xdp_get_umem(umem_xs->umem);
- xs->umem = umem_xs->umem;
+ WRITE_ONCE(xs->umem, umem_xs->umem);
sockfd_put(sock);
} else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
err = -EINVAL;
@@ -500,6 +678,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
err = xdp_umem_assign_dev(xs->umem, dev, qid, flags);
if (err)
goto out_unlock;
+
+ xsk_check_page_contiguity(xs->umem, flags);
}
xs->dev = dev;
@@ -510,16 +690,28 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
xdp_add_sk_umem(xs->umem, xs);
out_unlock:
- if (err)
+ if (err) {
dev_put(dev);
- else
- xs->state = XSK_BOUND;
+ } else {
+ /* Matches smp_rmb() in bind() for shared umem
+ * sockets, and xsk_is_bound().
+ */
+ smp_wmb();
+ WRITE_ONCE(xs->state, XSK_BOUND);
+ }
out_release:
mutex_unlock(&xs->mutex);
rtnl_unlock();
return err;
}
+struct xdp_umem_reg_v1 {
+ __u64 addr; /* Start of packet data area */
+ __u64 len; /* Length of packet data area */
+ __u32 chunk_size;
+ __u32 headroom;
+};
+
static int xsk_setsockopt(struct socket *sock, int level, int optname,
char __user *optval, unsigned int optlen)
{
@@ -549,15 +741,24 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
}
q = (optname == XDP_TX_RING) ? &xs->tx : &xs->rx;
err = xsk_init_queue(entries, q, false);
+ if (!err && optname == XDP_TX_RING)
+ /* Tx needs to be explicitly woken up the first time */
+ xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP;
mutex_unlock(&xs->mutex);
return err;
}
case XDP_UMEM_REG:
{
- struct xdp_umem_reg mr;
+ size_t mr_size = sizeof(struct xdp_umem_reg);
+ struct xdp_umem_reg mr = {};
struct xdp_umem *umem;
- if (copy_from_user(&mr, optval, sizeof(mr)))
+ if (optlen < sizeof(struct xdp_umem_reg_v1))
+ return -EINVAL;
+ else if (optlen < sizeof(mr))
+ mr_size = sizeof(struct xdp_umem_reg_v1);
+
+ if (copy_from_user(&mr, optval, mr_size))
return -EFAULT;
mutex_lock(&xs->mutex);
@@ -574,7 +775,7 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
/* Make sure umem is ready before it can be seen by others */
smp_wmb();
- xs->umem = umem;
+ WRITE_ONCE(xs->umem, umem);
mutex_unlock(&xs->mutex);
return 0;
}
@@ -610,6 +811,20 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
return -ENOPROTOOPT;
}
+static void xsk_enter_rxtx_offsets(struct xdp_ring_offset_v1 *ring)
+{
+ ring->producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
+ ring->consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
+ ring->desc = offsetof(struct xdp_rxtx_ring, desc);
+}
+
+static void xsk_enter_umem_offsets(struct xdp_ring_offset_v1 *ring)
+{
+ ring->producer = offsetof(struct xdp_umem_ring, ptrs.producer);
+ ring->consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
+ ring->desc = offsetof(struct xdp_umem_ring, desc);
+}
+
static int xsk_getsockopt(struct socket *sock, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -649,26 +864,49 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname,
case XDP_MMAP_OFFSETS:
{
struct xdp_mmap_offsets off;
+ struct xdp_mmap_offsets_v1 off_v1;
+ bool flags_supported = true;
+ void *to_copy;
- if (len < sizeof(off))
+ if (len < sizeof(off_v1))
return -EINVAL;
+ else if (len < sizeof(off))
+ flags_supported = false;
+
+ if (flags_supported) {
+ /* xdp_ring_offset is identical to xdp_ring_offset_v1
+ * except for the flags field added to the end.
+ */
+ xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *)
+ &off.rx);
+ xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *)
+ &off.tx);
+ xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *)
+ &off.fr);
+ xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *)
+ &off.cr);
+ off.rx.flags = offsetof(struct xdp_rxtx_ring,
+ ptrs.flags);
+ off.tx.flags = offsetof(struct xdp_rxtx_ring,
+ ptrs.flags);
+ off.fr.flags = offsetof(struct xdp_umem_ring,
+ ptrs.flags);
+ off.cr.flags = offsetof(struct xdp_umem_ring,
+ ptrs.flags);
+
+ len = sizeof(off);
+ to_copy = &off;
+ } else {
+ xsk_enter_rxtx_offsets(&off_v1.rx);
+ xsk_enter_rxtx_offsets(&off_v1.tx);
+ xsk_enter_umem_offsets(&off_v1.fr);
+ xsk_enter_umem_offsets(&off_v1.cr);
+
+ len = sizeof(off_v1);
+ to_copy = &off_v1;
+ }
- off.rx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
- off.rx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
- off.rx.desc = offsetof(struct xdp_rxtx_ring, desc);
- off.tx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
- off.tx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
- off.tx.desc = offsetof(struct xdp_rxtx_ring, desc);
-
- off.fr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
- off.fr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
- off.fr.desc = offsetof(struct xdp_umem_ring, desc);
- off.cr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
- off.cr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
- off.cr.desc = offsetof(struct xdp_umem_ring, desc);
-
- len = sizeof(off);
- if (copy_to_user(optval, &off, len))
+ if (copy_to_user(optval, to_copy, len))
return -EFAULT;
if (put_user(len, optlen))
return -EFAULT;
@@ -713,7 +951,7 @@ static int xsk_mmap(struct file *file, struct socket *sock,
unsigned long pfn;
struct page *qpg;
- if (xs->state != XSK_READY)
+ if (READ_ONCE(xs->state) != XSK_READY)
return -EBUSY;
if (offset == XDP_PGOFF_RX_RING) {
@@ -855,6 +1093,9 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
spin_lock_init(&xs->rx_lock);
spin_lock_init(&xs->tx_completion_lock);
+ INIT_LIST_HEAD(&xs->map_list);
+ spin_lock_init(&xs->map_list_lock);
+
mutex_lock(&net->xdp.lock);
sk_add_node_rcu(sk, &net->xdp.list);
mutex_unlock(&net->xdp.lock);
diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h
index ba8120610426..4cfd106bdb53 100644
--- a/net/xdp/xsk.h
+++ b/net/xdp/xsk.h
@@ -4,6 +4,19 @@
#ifndef XSK_H_
#define XSK_H_
+struct xdp_ring_offset_v1 {
+ __u64 producer;
+ __u64 consumer;
+ __u64 desc;
+};
+
+struct xdp_mmap_offsets_v1 {
+ struct xdp_ring_offset_v1 rx;
+ struct xdp_ring_offset_v1 tx;
+ struct xdp_ring_offset_v1 fr;
+ struct xdp_ring_offset_v1 cr;
+};
+
static inline struct xdp_sock *xdp_sk(struct sock *sk)
{
return (struct xdp_sock *)sk;
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
index d5e06c8e0cbf..f59791ba43a0 100644
--- a/net/xdp/xsk_diag.c
+++ b/net/xdp/xsk_diag.c
@@ -56,7 +56,7 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
du.id = umem->id;
du.size = umem->size;
du.num_pages = umem->npgs;
- du.chunk_size = (__u32)(~umem->chunk_mask + 1);
+ du.chunk_size = umem->chunk_size_nohr + umem->headroom;
du.headroom = umem->headroom;
du.ifindex = umem->dev ? umem->dev->ifindex : 0;
du.queue_id = umem->queue_id;
@@ -97,6 +97,7 @@ static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb,
msg->xdiag_ino = sk_ino;
sock_diag_save_cookie(sk, msg->xdiag_cookie);
+ mutex_lock(&xs->mutex);
if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb))
goto out_nlmsg_trim;
@@ -117,10 +118,12 @@ static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb,
sock_diag_put_meminfo(sk, nlskb, XDP_DIAG_MEMINFO))
goto out_nlmsg_trim;
+ mutex_unlock(&xs->mutex);
nlmsg_end(nlskb, nlh);
return 0;
out_nlmsg_trim:
+ mutex_unlock(&xs->mutex);
nlmsg_cancel(nlskb, nlh);
return -EMSGSIZE;
}
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 909c5168ed0f..eddae4688862 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -16,6 +16,7 @@
struct xdp_ring {
u32 producer ____cacheline_aligned_in_smp;
u32 consumer ____cacheline_aligned_in_smp;
+ u32 flags;
};
/* Used for the RX and TX queues for packets */
@@ -133,6 +134,17 @@ static inline bool xskq_has_addrs(struct xsk_queue *q, u32 cnt)
/* UMEM queue */
+static inline bool xskq_crosses_non_contig_pg(struct xdp_umem *umem, u64 addr,
+ u64 length)
+{
+ bool cross_pg = (addr & (PAGE_SIZE - 1)) + length > PAGE_SIZE;
+ bool next_pg_contig =
+ (unsigned long)umem->pages[(addr >> PAGE_SHIFT)].addr &
+ XSK_NEXT_PG_CONTIG_MASK;
+
+ return cross_pg && !next_pg_contig;
+}
+
static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr)
{
if (addr >= q->size) {
@@ -143,23 +155,51 @@ static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr)
return true;
}
-static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr)
+static inline bool xskq_is_valid_addr_unaligned(struct xsk_queue *q, u64 addr,
+ u64 length,
+ struct xdp_umem *umem)
+{
+ u64 base_addr = xsk_umem_extract_addr(addr);
+
+ addr = xsk_umem_add_offset_to_addr(addr);
+ if (base_addr >= q->size || addr >= q->size ||
+ xskq_crosses_non_contig_pg(umem, addr, length)) {
+ q->invalid_descs++;
+ return false;
+ }
+
+ return true;
+}
+
+static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr,
+ struct xdp_umem *umem)
{
while (q->cons_tail != q->cons_head) {
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
unsigned int idx = q->cons_tail & q->ring_mask;
*addr = READ_ONCE(ring->desc[idx]) & q->chunk_mask;
+
+ if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) {
+ if (xskq_is_valid_addr_unaligned(q, *addr,
+ umem->chunk_size_nohr,
+ umem))
+ return addr;
+ goto out;
+ }
+
if (xskq_is_valid_addr(q, *addr))
return addr;
+out:
q->cons_tail++;
}
return NULL;
}
-static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr)
+static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr,
+ struct xdp_umem *umem)
{
if (q->cons_tail == q->cons_head) {
smp_mb(); /* D, matches A */
@@ -170,7 +210,7 @@ static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr)
smp_rmb();
}
- return xskq_validate_addr(q, addr);
+ return xskq_validate_addr(q, addr, umem);
}
static inline void xskq_discard_addr(struct xsk_queue *q)
@@ -229,8 +269,21 @@ static inline int xskq_reserve_addr(struct xsk_queue *q)
/* Rx/Tx queue */
-static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d)
+static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d,
+ struct xdp_umem *umem)
{
+ if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) {
+ if (!xskq_is_valid_addr_unaligned(q, d->addr, d->len, umem))
+ return false;
+
+ if (d->len > umem->chunk_size_nohr || d->options) {
+ q->invalid_descs++;
+ return false;
+ }
+
+ return true;
+ }
+
if (!xskq_is_valid_addr(q, d->addr))
return false;
@@ -244,14 +297,15 @@ static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d)
}
static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q,
- struct xdp_desc *desc)
+ struct xdp_desc *desc,
+ struct xdp_umem *umem)
{
while (q->cons_tail != q->cons_head) {
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
unsigned int idx = q->cons_tail & q->ring_mask;
*desc = READ_ONCE(ring->desc[idx]);
- if (xskq_is_valid_desc(q, desc))
+ if (xskq_is_valid_desc(q, desc, umem))
return desc;
q->cons_tail++;
@@ -261,7 +315,8 @@ static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q,
}
static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q,
- struct xdp_desc *desc)
+ struct xdp_desc *desc,
+ struct xdp_umem *umem)
{
if (q->cons_tail == q->cons_head) {
smp_mb(); /* D, matches A */
@@ -272,7 +327,7 @@ static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q,
smp_rmb(); /* C, matches B */
}
- return xskq_validate_desc(q, desc);
+ return xskq_validate_desc(q, desc, umem);
}
static inline void xskq_discard_desc(struct xsk_queue *q)
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index 32c364d3bfb3..4d422447aadc 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -85,7 +85,7 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
if (dlen < len)
len = dlen;
- frag->page_offset = 0;
+ skb_frag_off_set(frag, 0);
skb_frag_size_set(frag, len);
memcpy(skb_frag_address(frag), scratch, len);