summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-11-14 10:05:33 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2024-11-14 10:05:33 -0800
commitcfaaa7d010d1fc58f9717fcc8591201e741d2d49 (patch)
treed2dda42587f7d6c5f714aafe1f262321a135291c
parent4abcd80f23357808b0444d261ed08e5a77dbaa9a (diff)
parentca34aceb322bfcd6ab498884f1805ee12f983259 (diff)
Merge tag 'net-6.12-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Paolo Abeni: "Including fixes from bluetooth. Quite calm week. No new regression under investigation. Current release - regressions: - eth: revert "igb: Disable threaded IRQ for igb_msix_other" Current release - new code bugs: - bluetooth: btintel: direct exception event to bluetooth stack Previous releases - regressions: - core: fix data-races around sk->sk_forward_alloc - netlink: terminate outstanding dump on socket close - mptcp: error out earlier on disconnect - vsock: fix accept_queue memory leak - phylink: ensure PHY momentary link-fails are handled - eth: mlx5: - fix null-ptr-deref in add rule err flow - lock FTE when checking if active - eth: dwmac-mediatek: fix inverted handling of mediatek,mac-wol Previous releases - always broken: - sched: fix u32's systematic failure to free IDR entries for hnodes. - sctp: fix possible UAF in sctp_v6_available() - eth: bonding: add ns target multicast address to slave device - eth: mlx5: fix msix vectors to respect platform limit - eth: icssg-prueth: fix 1 PPS sync" * tag 'net-6.12-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (38 commits) net: sched: u32: Add test case for systematic hnode IDR leaks selftests: bonding: add ns multicast group testing bonding: add ns target multicast address to slave device net: ti: icssg-prueth: Fix 1 PPS sync stmmac: dwmac-intel-plat: fix call balance of tx_clk handling routines net: Make copy_safe_from_sockptr() match documentation net: stmmac: dwmac-mediatek: Fix inverted handling of mediatek,mac-wol ipmr: Fix access to mfc_cache_list without lock held samples: pktgen: correct dev to DEV net: phylink: ensure PHY momentary link-fails are handled mptcp: pm: use _rcu variant under rcu_read_lock mptcp: hold pm lock when deleting entry mptcp: update local address flags when setting it net: sched: cls_u32: Fix u32's systematic failure to free IDR entries for hnodes. MAINTAINERS: Re-add cancelled Renesas driver sections Revert "igb: Disable threaded IRQ for igb_msix_other" Bluetooth: btintel: Direct exception event to bluetooth stack Bluetooth: hci_core: Fix calling mgmt_device_connected virtio/vsock: Improve MSG_ZEROCOPY error handling vsock: Fix sk_error_queue memory leak ...
-rw-r--r--Documentation/networking/devmem.rst9
-rw-r--r--MAINTAINERS30
-rw-r--r--drivers/bluetooth/btintel.c5
-rw-r--r--drivers/net/bonding/bond_main.c16
-rw-r--r--drivers/net/bonding/bond_options.c82
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c32
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c25
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c4
-rw-r--r--drivers/net/ethernet/ti/icssg/icssg_prueth.c13
-rw-r--r--drivers/net/ethernet/ti/icssg/icssg_prueth.h12
-rw-r--r--drivers/net/ethernet/vertexcom/mse102x.c4
-rw-r--r--drivers/net/phy/phylink.c14
-rw-r--r--include/linux/sockptr.h4
-rw-r--r--include/net/bond_options.h2
-rw-r--r--net/bluetooth/hci_core.c2
-rw-r--r--net/core/sock.c42
-rw-r--r--net/dccp/ipv6.c2
-rw-r--r--net/ipv4/ipmr_base.c3
-rw-r--r--net/ipv6/tcp_ipv6.c4
-rw-r--r--net/mptcp/pm_netlink.c3
-rw-r--r--net/mptcp/pm_userspace.c15
-rw-r--r--net/mptcp/protocol.c16
-rw-r--r--net/netlink/af_netlink.c31
-rw-r--r--net/netlink/af_netlink.h2
-rw-r--r--net/sched/cls_u32.c18
-rw-r--r--net/sctp/ipv6.c19
-rw-r--r--net/vmw_vsock/af_vsock.c3
-rw-r--r--net/vmw_vsock/virtio_transport_common.c9
-rwxr-xr-xsamples/pktgen/pktgen_sample01_simple.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_options.sh54
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile1
-rw-r--r--tools/testing/selftests/net/netlink-dumps.c110
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/u32.json24
41 files changed, 543 insertions, 109 deletions
diff --git a/Documentation/networking/devmem.rst b/Documentation/networking/devmem.rst
index a55bf21f671c..d95363645331 100644
--- a/Documentation/networking/devmem.rst
+++ b/Documentation/networking/devmem.rst
@@ -225,6 +225,15 @@ The user must ensure the tokens are returned to the kernel in a timely manner.
Failure to do so will exhaust the limited dmabuf that is bound to the RX queue
and will lead to packet drops.
+The user must pass no more than 128 tokens, with no more than 1024 total frags
+among the token->token_count across all the tokens. If the user provides more
+than 1024 frags, the kernel will free up to 1024 frags and return early.
+
+The kernel returns the number of actual frags freed. The number of frags freed
+can be less than the tokens provided by the user in case of:
+
+(a) an internal kernel leak bug.
+(b) the user passed more than 1024 frags.
Implementation & Caveats
========================
diff --git a/MAINTAINERS b/MAINTAINERS
index 21fdaa19229a..b878ddc99f94 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -19579,6 +19579,17 @@ S: Supported
F: Documentation/devicetree/bindings/i2c/renesas,iic-emev2.yaml
F: drivers/i2c/busses/i2c-emev2.c
+RENESAS ETHERNET AVB DRIVER
+M: Paul Barker <paul.barker.ct@bp.renesas.com>
+M: Niklas Söderlund <niklas.soderlund@ragnatech.se>
+L: netdev@vger.kernel.org
+L: linux-renesas-soc@vger.kernel.org
+S: Supported
+F: Documentation/devicetree/bindings/net/renesas,etheravb.yaml
+F: drivers/net/ethernet/renesas/Kconfig
+F: drivers/net/ethernet/renesas/Makefile
+F: drivers/net/ethernet/renesas/ravb*
+
RENESAS ETHERNET SWITCH DRIVER
R: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
L: netdev@vger.kernel.org
@@ -19628,6 +19639,14 @@ F: Documentation/devicetree/bindings/i2c/renesas,rmobile-iic.yaml
F: drivers/i2c/busses/i2c-rcar.c
F: drivers/i2c/busses/i2c-sh_mobile.c
+RENESAS R-CAR SATA DRIVER
+M: Geert Uytterhoeven <geert+renesas@glider.be>
+L: linux-ide@vger.kernel.org
+L: linux-renesas-soc@vger.kernel.org
+S: Supported
+F: Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml
+F: drivers/ata/sata_rcar.c
+
RENESAS R-CAR THERMAL DRIVERS
M: Niklas Söderlund <niklas.soderlund@ragnatech.se>
L: linux-renesas-soc@vger.kernel.org
@@ -19703,6 +19722,17 @@ S: Supported
F: Documentation/devicetree/bindings/i2c/renesas,rzv2m.yaml
F: drivers/i2c/busses/i2c-rzv2m.c
+RENESAS SUPERH ETHERNET DRIVER
+M: Niklas Söderlund <niklas.soderlund@ragnatech.se>
+L: netdev@vger.kernel.org
+L: linux-renesas-soc@vger.kernel.org
+S: Supported
+F: Documentation/devicetree/bindings/net/renesas,ether.yaml
+F: drivers/net/ethernet/renesas/Kconfig
+F: drivers/net/ethernet/renesas/Makefile
+F: drivers/net/ethernet/renesas/sh_eth*
+F: include/linux/sh_eth.h
+
RENESAS USB PHY DRIVER
M: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
L: linux-renesas-soc@vger.kernel.org
diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c
index 438b92967bc3..30a32ebbcc68 100644
--- a/drivers/bluetooth/btintel.c
+++ b/drivers/bluetooth/btintel.c
@@ -3288,13 +3288,12 @@ static int btintel_diagnostics(struct hci_dev *hdev, struct sk_buff *skb)
case INTEL_TLV_TEST_EXCEPTION:
/* Generate devcoredump from exception */
if (!hci_devcd_init(hdev, skb->len)) {
- hci_devcd_append(hdev, skb);
+ hci_devcd_append(hdev, skb_clone(skb, GFP_ATOMIC));
hci_devcd_complete(hdev);
} else {
bt_dev_err(hdev, "Failed to generate devcoredump");
- kfree_skb(skb);
}
- return 0;
+ break;
default:
bt_dev_err(hdev, "Invalid exception type %02X", tlv->val[0]);
}
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index b1bffd8e9a95..15e0f14d0d49 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1008,6 +1008,8 @@ static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active,
if (bond->dev->flags & IFF_UP)
bond_hw_addr_flush(bond->dev, old_active->dev);
+
+ bond_slave_ns_maddrs_add(bond, old_active);
}
if (new_active) {
@@ -1024,6 +1026,8 @@ static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active,
dev_mc_sync(new_active->dev, bond->dev);
netif_addr_unlock_bh(bond->dev);
}
+
+ bond_slave_ns_maddrs_del(bond, new_active);
}
}
@@ -2341,6 +2345,11 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
bond_compute_features(bond);
bond_set_carrier(bond);
+ /* Needs to be called before bond_select_active_slave(), which will
+ * remove the maddrs if the slave is selected as active slave.
+ */
+ bond_slave_ns_maddrs_add(bond, new_slave);
+
if (bond_uses_primary(bond)) {
block_netpoll_tx();
bond_select_active_slave(bond);
@@ -2350,7 +2359,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
if (bond_mode_can_use_xmit_hash(bond))
bond_update_slave_arr(bond, NULL);
-
if (!slave_dev->netdev_ops->ndo_bpf ||
!slave_dev->netdev_ops->ndo_xdp_xmit) {
if (bond->xdp_prog) {
@@ -2548,6 +2556,12 @@ static int __bond_release_one(struct net_device *bond_dev,
if (oldcurrent == slave)
bond_change_active_slave(bond, NULL);
+ /* Must be called after bond_change_active_slave () as the slave
+ * might change from an active slave to a backup slave. Then it is
+ * necessary to clear the maddrs on the backup slave.
+ */
+ bond_slave_ns_maddrs_del(bond, slave);
+
if (bond_is_lb(bond)) {
/* Must be called only after the slave has been
* detached from the list and the curr_active_slave
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 95d59a18c022..327b6ecdc77e 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -15,6 +15,7 @@
#include <linux/sched/signal.h>
#include <net/bonding.h>
+#include <net/ndisc.h>
static int bond_option_active_slave_set(struct bonding *bond,
const struct bond_opt_value *newval);
@@ -1234,6 +1235,68 @@ static int bond_option_arp_ip_targets_set(struct bonding *bond,
}
#if IS_ENABLED(CONFIG_IPV6)
+static bool slave_can_set_ns_maddr(const struct bonding *bond, struct slave *slave)
+{
+ return BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP &&
+ !bond_is_active_slave(slave) &&
+ slave->dev->flags & IFF_MULTICAST;
+}
+
+static void slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool add)
+{
+ struct in6_addr *targets = bond->params.ns_targets;
+ char slot_maddr[MAX_ADDR_LEN];
+ int i;
+
+ if (!slave_can_set_ns_maddr(bond, slave))
+ return;
+
+ for (i = 0; i < BOND_MAX_NS_TARGETS; i++) {
+ if (ipv6_addr_any(&targets[i]))
+ break;
+
+ if (!ndisc_mc_map(&targets[i], slot_maddr, slave->dev, 0)) {
+ if (add)
+ dev_mc_add(slave->dev, slot_maddr);
+ else
+ dev_mc_del(slave->dev, slot_maddr);
+ }
+ }
+}
+
+void bond_slave_ns_maddrs_add(struct bonding *bond, struct slave *slave)
+{
+ if (!bond->params.arp_validate)
+ return;
+ slave_set_ns_maddrs(bond, slave, true);
+}
+
+void bond_slave_ns_maddrs_del(struct bonding *bond, struct slave *slave)
+{
+ if (!bond->params.arp_validate)
+ return;
+ slave_set_ns_maddrs(bond, slave, false);
+}
+
+static void slave_set_ns_maddr(struct bonding *bond, struct slave *slave,
+ struct in6_addr *target, struct in6_addr *slot)
+{
+ char target_maddr[MAX_ADDR_LEN], slot_maddr[MAX_ADDR_LEN];
+
+ if (!bond->params.arp_validate || !slave_can_set_ns_maddr(bond, slave))
+ return;
+
+ /* remove the previous maddr from slave */
+ if (!ipv6_addr_any(slot) &&
+ !ndisc_mc_map(slot, slot_maddr, slave->dev, 0))
+ dev_mc_del(slave->dev, slot_maddr);
+
+ /* add new maddr on slave if target is set */
+ if (!ipv6_addr_any(target) &&
+ !ndisc_mc_map(target, target_maddr, slave->dev, 0))
+ dev_mc_add(slave->dev, target_maddr);
+}
+
static void _bond_options_ns_ip6_target_set(struct bonding *bond, int slot,
struct in6_addr *target,
unsigned long last_rx)
@@ -1243,8 +1306,10 @@ static void _bond_options_ns_ip6_target_set(struct bonding *bond, int slot,
struct slave *slave;
if (slot >= 0 && slot < BOND_MAX_NS_TARGETS) {
- bond_for_each_slave(bond, slave, iter)
+ bond_for_each_slave(bond, slave, iter) {
slave->target_last_arp_rx[slot] = last_rx;
+ slave_set_ns_maddr(bond, slave, target, &targets[slot]);
+ }
targets[slot] = *target;
}
}
@@ -1296,15 +1361,30 @@ static int bond_option_ns_ip6_targets_set(struct bonding *bond,
{
return -EPERM;
}
+
+static void slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool add) {}
+
+void bond_slave_ns_maddrs_add(struct bonding *bond, struct slave *slave) {}
+
+void bond_slave_ns_maddrs_del(struct bonding *bond, struct slave *slave) {}
#endif
static int bond_option_arp_validate_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
+ bool changed = !!bond->params.arp_validate != !!newval->value;
+ struct list_head *iter;
+ struct slave *slave;
+
netdev_dbg(bond->dev, "Setting arp_validate to %s (%llu)\n",
newval->string, newval->value);
bond->params.arp_validate = newval->value;
+ if (changed) {
+ bond_for_each_slave(bond, slave, iter)
+ slave_set_ns_maddrs(bond, slave, !!bond->params.arp_validate);
+ }
+
return 0;
}
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index b83df5f94b1f..f1d088168723 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -907,7 +907,7 @@ static int igb_request_msix(struct igb_adapter *adapter)
int i, err = 0, vector = 0, free_vector = 0;
err = request_irq(adapter->msix_entries[vector].vector,
- igb_msix_other, IRQF_NO_THREAD, netdev->name, adapter);
+ igb_msix_other, 0, netdev->name, adapter);
if (err)
goto err_out;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index dcfccaaa8d91..92d5cfec3dc0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -866,7 +866,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
return 0;
err_rule:
- mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
+ mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, attr, zone_rule->mh);
mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
err_mod_hdr:
kfree(attr);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index d61be26a4df1..3db31cc10719 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -660,7 +660,7 @@ tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx,
while (remaining > 0) {
skb_frag_t *frag = &record->frags[i];
- get_page(skb_frag_page(frag));
+ page_ref_inc(skb_frag_page(frag));
remaining -= skb_frag_size(frag);
info->frags[i++] = *frag;
}
@@ -763,7 +763,7 @@ void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
stats = sq->stats;
mlx5e_tx_dma_unmap(sq->pdev, dma);
- put_page(wi->resync_dump_frag_page);
+ page_ref_dec(wi->resync_dump_frag_page);
stats->tls_dump_packets++;
stats->tls_dump_bytes += wi->num_bytes;
}
@@ -816,12 +816,12 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx,
err_out:
for (; i < info.nr_frags; i++)
- /* The put_page() here undoes the page ref obtained in tx_sync_info_get().
+ /* The page_ref_dec() here undoes the page ref obtained in tx_sync_info_get().
* Page refs obtained for the DUMP WQEs above (by page_ref_add) will be
* released only upon their completions (or in mlx5e_free_txqsq_descs,
* if channel closes).
*/
- put_page(skb_frag_page(&info.frags[i]));
+ page_ref_dec(skb_frag_page(&info.frags[i]));
return MLX5E_KTLS_SYNC_FAIL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index e601324a690a..13a3fa8dc0cb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4267,7 +4267,8 @@ void mlx5e_set_xdp_feature(struct net_device *netdev)
struct mlx5e_params *params = &priv->channels.params;
xdp_features_t val;
- if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
+ if (!netdev->netdev_ops->ndo_bpf ||
+ params->packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
xdp_clear_features_flag(netdev);
return;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
index 5bf8318cc48b..1d60465cc2ca 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -36,6 +36,7 @@
#include "en.h"
#include "en/port.h"
#include "eswitch.h"
+#include "lib/mlx5.h"
static int mlx5e_test_health_info(struct mlx5e_priv *priv)
{
@@ -247,6 +248,9 @@ static int mlx5e_cond_loopback(struct mlx5e_priv *priv)
if (is_mdev_switchdev_mode(priv->mdev))
return -EOPNOTSUPP;
+ if (mlx5_get_sd(priv->mdev))
+ return -EOPNOTSUPP;
+
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index f24f91d213f2..8cf61ae8b89d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -2527,8 +2527,11 @@ static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw,
struct mlx5_eswitch_rep *rep, u8 rep_type)
{
if (atomic_cmpxchg(&rep->rep_data[rep_type].state,
- REP_LOADED, REP_REGISTERED) == REP_LOADED)
+ REP_LOADED, REP_REGISTERED) == REP_LOADED) {
+ if (rep_type == REP_ETH)
+ __esw_offloads_unload_rep(esw, rep, REP_IB);
esw->offloads.rep_ops[rep_type]->unload(rep);
+ }
}
static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 8505d5e241e1..6e4f8aaf8d2f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -2105,13 +2105,22 @@ lookup_fte_locked(struct mlx5_flow_group *g,
fte_tmp = NULL;
goto out;
}
+
+ nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD);
+
if (!fte_tmp->node.active) {
+ up_write_ref_node(&fte_tmp->node, false);
+
+ if (take_write)
+ up_write_ref_node(&g->node, false);
+ else
+ up_read_ref_node(&g->node);
+
tree_put_node(&fte_tmp->node, false);
- fte_tmp = NULL;
- goto out;
+
+ return NULL;
}
- nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD);
out:
if (take_write)
up_write_ref_node(&g->node, false);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 81a9232a03e1..7db9cab9bedf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -593,9 +593,11 @@ static void irq_pool_free(struct mlx5_irq_pool *pool)
kvfree(pool);
}
-static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec)
+static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec,
+ bool dynamic_vec)
{
struct mlx5_irq_table *table = dev->priv.irq_table;
+ int sf_vec_available = sf_vec;
int num_sf_ctrl;
int err;
@@ -616,6 +618,13 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec)
num_sf_ctrl = DIV_ROUND_UP(mlx5_sf_max_functions(dev),
MLX5_SFS_PER_CTRL_IRQ);
num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl);
+ if (!dynamic_vec && (num_sf_ctrl + 1) > sf_vec_available) {
+ mlx5_core_dbg(dev,
+ "Not enough IRQs for SFs control and completion pool, required=%d avail=%d\n",
+ num_sf_ctrl + 1, sf_vec_available);
+ return 0;
+ }
+
table->sf_ctrl_pool = irq_pool_alloc(dev, pcif_vec, num_sf_ctrl,
"mlx5_sf_ctrl",
MLX5_EQ_SHARE_IRQ_MIN_CTRL,
@@ -624,9 +633,11 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec)
err = PTR_ERR(table->sf_ctrl_pool);
goto err_pf;
}
- /* init sf_comp_pool */
+ sf_vec_available -= num_sf_ctrl;
+
+ /* init sf_comp_pool, remaining vectors are for the SF completions */
table->sf_comp_pool = irq_pool_alloc(dev, pcif_vec + num_sf_ctrl,
- sf_vec - num_sf_ctrl, "mlx5_sf_comp",
+ sf_vec_available, "mlx5_sf_comp",
MLX5_EQ_SHARE_IRQ_MIN_COMP,
MLX5_EQ_SHARE_IRQ_MAX_COMP);
if (IS_ERR(table->sf_comp_pool)) {
@@ -715,6 +726,7 @@ int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table)
int mlx5_irq_table_create(struct mlx5_core_dev *dev)
{
int num_eqs = mlx5_max_eq_cap_get(dev);
+ bool dynamic_vec;
int total_vec;
int pcif_vec;
int req_vec;
@@ -724,21 +736,31 @@ int mlx5_irq_table_create(struct mlx5_core_dev *dev)
if (mlx5_core_is_sf(dev))
return 0;
+ /* PCI PF vectors usage is limited by online cpus, device EQs and
+ * PCI MSI-X capability.
+ */
pcif_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1;
pcif_vec = min_t(int, pcif_vec, num_eqs);
+ pcif_vec = min_t(int, pcif_vec, pci_msix_vec_count(dev->pdev));
total_vec = pcif_vec;
if (mlx5_sf_max_functions(dev))
total_vec += MLX5_MAX_MSIX_PER_SF * mlx5_sf_max_functions(dev);
total_vec = min_t(int, total_vec, pci_msix_vec_count(dev->pdev));
- pcif_vec = min_t(int, pcif_vec, pci_msix_vec_count(dev->pdev));
req_vec = pci_msix_can_alloc_dyn(dev->pdev) ? 1 : total_vec;
n = pci_alloc_irq_vectors(dev->pdev, 1, req_vec, PCI_IRQ_MSIX);
if (n < 0)
return n;
- err = irq_pools_init(dev, total_vec - pcif_vec, pcif_vec);
+ /* Further limit vectors of the pools based on platform for non dynamic case */
+ dynamic_vec = pci_msix_can_alloc_dyn(dev->pdev);
+ if (!dynamic_vec) {
+ pcif_vec = min_t(int, n, pcif_vec);
+ total_vec = min_t(int, n, total_vec);
+ }
+
+ err = irq_pools_init(dev, total_vec - pcif_vec, pcif_vec, dynamic_vec);
if (err)
pci_free_irq_vectors(dev->pdev);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
index d68f0c4e7835..9739bc9867c5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
@@ -108,7 +108,12 @@ static int intel_eth_plat_probe(struct platform_device *pdev)
if (IS_ERR(dwmac->tx_clk))
return PTR_ERR(dwmac->tx_clk);
- clk_prepare_enable(dwmac->tx_clk);
+ ret = clk_prepare_enable(dwmac->tx_clk);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "Failed to enable tx_clk\n");
+ return ret;
+ }
/* Check and configure TX clock rate */
rate = clk_get_rate(dwmac->tx_clk);
@@ -119,7 +124,7 @@ static int intel_eth_plat_probe(struct platform_device *pdev)
if (ret) {
dev_err(&pdev->dev,
"Failed to set tx_clk\n");
- return ret;
+ goto err_tx_clk_disable;
}
}
}
@@ -133,7 +138,7 @@ static int intel_eth_plat_probe(struct platform_device *pdev)
if (ret) {
dev_err(&pdev->dev,
"Failed to set clk_ptp_ref\n");
- return ret;
+ goto err_tx_clk_disable;
}
}
}
@@ -149,12 +154,15 @@ static int intel_eth_plat_probe(struct platform_device *pdev)
}
ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
- if (ret) {
- clk_disable_unprepare(dwmac->tx_clk);
- return ret;
- }
+ if (ret)
+ goto err_tx_clk_disable;
return 0;
+
+err_tx_clk_disable:
+ if (dwmac->data->tx_clk_en)
+ clk_disable_unprepare(dwmac->tx_clk);
+ return ret;
}
static void intel_eth_plat_remove(struct platform_device *pdev)
@@ -162,7 +170,8 @@ static void intel_eth_plat_remove(struct platform_device *pdev)
struct intel_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev);
stmmac_pltfr_remove(pdev);
- clk_disable_unprepare(dwmac->tx_clk);
+ if (dwmac->data->tx_clk_en)
+ clk_disable_unprepare(dwmac->tx_clk);
}
static struct platform_driver intel_eth_plat_driver = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
index 2a9132d6d743..001857c294fb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
@@ -589,9 +589,9 @@ static int mediatek_dwmac_common_data(struct platform_device *pdev,
plat->mac_interface = priv_plat->phy_mode;
if (priv_plat->mac_wol)
- plat->flags |= STMMAC_FLAG_USE_PHY_WOL;
- else
plat->flags &= ~STMMAC_FLAG_USE_PHY_WOL;
+ else
+ plat->flags |= STMMAC_FLAG_USE_PHY_WOL;
plat->riwt_off = 1;
plat->maxmtu = ETH_DATA_LEN;
plat->host_dma_width = priv_plat->variant->dma_bit_mask;
diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
index 5c20ceb164df..fe2fd1bfc904 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
@@ -16,6 +16,7 @@
#include <linux/if_hsr.h>
#include <linux/if_vlan.h>
#include <linux/interrupt.h>
+#include <linux/io-64-nonatomic-hi-lo.h>
#include <linux/kernel.h>
#include <linux/mfd/syscon.h>
#include <linux/module.h>
@@ -411,6 +412,8 @@ static int prueth_perout_enable(void *clockops_data,
struct prueth_emac *emac = clockops_data;
u32 reduction_factor = 0, offset = 0;
struct timespec64 ts;
+ u64 current_cycle;
+ u64 start_offset;
u64 ns_period;
if (!on)
@@ -449,8 +452,14 @@ static int prueth_perout_enable(void *clockops_data,
writel(reduction_factor, emac->prueth->shram.va +
TIMESYNC_FW_WC_SYNCOUT_REDUCTION_FACTOR_OFFSET);
- writel(0, emac->prueth->shram.va +
- TIMESYNC_FW_WC_SYNCOUT_START_TIME_CYCLECOUNT_OFFSET);
+ current_cycle = icssg_read_time(emac->prueth->shram.va +
+ TIMESYNC_FW_WC_CYCLECOUNT_OFFSET);
+
+ /* Rounding of current_cycle count to next second */
+ start_offset = roundup(current_cycle, MSEC_PER_SEC);
+
+ hi_lo_writeq(start_offset, emac->prueth->shram.va +
+ TIMESYNC_FW_WC_SYNCOUT_START_TIME_CYCLECOUNT_OFFSET);
return 0;
}
diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
index 8722bb4a268a..f5c1d473e9f9 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h
+++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
@@ -330,6 +330,18 @@ static inline int prueth_emac_slice(struct prueth_emac *emac)
extern const struct ethtool_ops icssg_ethtool_ops;
extern const struct dev_pm_ops prueth_dev_pm_ops;
+static inline u64 icssg_read_time(const void __iomem *addr)
+{
+ u32 low, high;
+
+ do {
+ high = readl(addr + 4);
+ low = readl(addr);
+ } while (high != readl(addr + 4));
+
+ return low + ((u64)high << 32);
+}
+
/* Classifier helpers */
void icssg_class_set_mac_addr(struct regmap *miig_rt, int slice, u8 *mac);
void icssg_class_set_host_mac_addr(struct regmap *miig_rt, const u8 *mac);
diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c
index 2c37957478fb..89dc4c401a8d 100644
--- a/drivers/net/ethernet/vertexcom/mse102x.c
+++ b/drivers/net/ethernet/vertexcom/mse102x.c
@@ -437,13 +437,15 @@ static void mse102x_tx_work(struct work_struct *work)
mse = &mses->mse102x;
while ((txb = skb_dequeue(&mse->txq))) {
+ unsigned int len = max_t(unsigned int, txb->len, ETH_ZLEN);
+
mutex_lock(&mses->lock);
ret = mse102x_tx_pkt_spi(mse, txb, work_timeout);
mutex_unlock(&mses->lock);
if (ret) {
mse->ndev->stats.tx_dropped++;
} else {
- mse->ndev->stats.tx_bytes += txb->len;
+ mse->ndev->stats.tx_bytes += len;
mse->ndev->stats.tx_packets++;
}
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 4309317de3d1..3e9957b6aa14 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -78,7 +78,7 @@ struct phylink {
unsigned int pcs_neg_mode;
unsigned int pcs_state;
- bool mac_link_dropped;
+ bool link_failed;
bool using_mac_select_pcs;
struct sfp_bus *sfp_bus;
@@ -1475,9 +1475,9 @@ static void phylink_resolve(struct work_struct *w)
cur_link_state = pl->old_link_state;
if (pl->phylink_disable_state) {
- pl->mac_link_dropped = false;
+ pl->link_failed = false;
link_state.link = false;
- } else if (pl->mac_link_dropped) {
+ } else if (pl->link_failed) {
link_state.link = false;
retrigger = true;
} else {
@@ -1572,7 +1572,7 @@ static void phylink_resolve(struct work_struct *w)
phylink_link_up(pl, link_state);
}
if (!link_state.link && retrigger) {
- pl->mac_link_dropped = false;
+ pl->link_failed = false;
queue_work(system_power_efficient_wq, &pl->resolve);
}
mutex_unlock(&pl->state_mutex);
@@ -1835,6 +1835,8 @@ static void phylink_phy_change(struct phy_device *phydev, bool up)
pl->phy_state.pause |= MLO_PAUSE_RX;
pl->phy_state.interface = phydev->interface;
pl->phy_state.link = up;
+ if (!up)
+ pl->link_failed = true;
mutex_unlock(&pl->state_mutex);
phylink_run_resolve(pl);
@@ -2158,7 +2160,7 @@ EXPORT_SYMBOL_GPL(phylink_disconnect_phy);
static void phylink_link_changed(struct phylink *pl, bool up, const char *what)
{
if (!up)
- pl->mac_link_dropped = true;
+ pl->link_failed = true;
phylink_run_resolve(pl);
phylink_dbg(pl, "%s link %s\n", what, up ? "up" : "down");
}
@@ -2792,7 +2794,7 @@ int phylink_ethtool_set_pauseparam(struct phylink *pl,
* link will cycle.
*/
if (manual_changed) {
- pl->mac_link_dropped = true;
+ pl->link_failed = true;
phylink_run_resolve(pl);
}
diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h
index fc5a206c4043..195debe2b1db 100644
--- a/include/linux/sockptr.h
+++ b/include/linux/sockptr.h
@@ -77,7 +77,9 @@ static inline int copy_safe_from_sockptr(void *dst, size_t ksize,
{
if (optlen < ksize)
return -EINVAL;
- return copy_from_sockptr(dst, optval, ksize);
+ if (copy_from_sockptr(dst, optval, ksize))
+ return -EFAULT;
+ return 0;
}
static inline int copy_struct_from_sockptr(void *dst, size_t ksize,
diff --git a/include/net/bond_options.h b/include/net/bond_options.h
index 473a0147769e..18687ccf0638 100644
--- a/include/net/bond_options.h
+++ b/include/net/bond_options.h
@@ -161,5 +161,7 @@ void bond_option_arp_ip_targets_clear(struct bonding *bond);
#if IS_ENABLED(CONFIG_IPV6)
void bond_option_ns_ip6_targets_clear(struct bonding *bond);
#endif
+void bond_slave_ns_maddrs_add(struct bonding *bond, struct slave *slave);
+void bond_slave_ns_maddrs_del(struct bonding *bond, struct slave *slave);
#endif /* _NET_BOND_OPTIONS_H */
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 96d097b21d13..0ac354db8177 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3788,8 +3788,6 @@ static void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb)
hci_dev_lock(hdev);
conn = hci_conn_hash_lookup_handle(hdev, handle);
- if (conn && hci_dev_test_flag(hdev, HCI_MGMT))
- mgmt_device_connected(hdev, conn, NULL, 0);
hci_dev_unlock(hdev);
if (conn) {
diff --git a/net/core/sock.c b/net/core/sock.c
index 039be95c40cf..da50df485090 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1052,32 +1052,34 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
#ifdef CONFIG_PAGE_POOL
-/* This is the number of tokens that the user can SO_DEVMEM_DONTNEED in
- * 1 syscall. The limit exists to limit the amount of memory the kernel
- * allocates to copy these tokens.
+/* This is the number of tokens and frags that the user can SO_DEVMEM_DONTNEED
+ * in 1 syscall. The limit exists to limit the amount of memory the kernel
+ * allocates to copy these tokens, and to prevent looping over the frags for
+ * too long.
*/
#define MAX_DONTNEED_TOKENS 128
+#define MAX_DONTNEED_FRAGS 1024
static noinline_for_stack int
sock_devmem_dontneed(struct sock *sk, sockptr_t optval, unsigned int optlen)
{
unsigned int num_tokens, i, j, k, netmem_num = 0;
struct dmabuf_token *tokens;
+ int ret = 0, num_frags = 0;
netmem_ref netmems[16];
- int ret = 0;
if (!sk_is_tcp(sk))
return -EBADF;
- if (optlen % sizeof(struct dmabuf_token) ||
+ if (optlen % sizeof(*tokens) ||
optlen > sizeof(*tokens) * MAX_DONTNEED_TOKENS)
return -EINVAL;
- tokens = kvmalloc_array(optlen, sizeof(*tokens), GFP_KERNEL);
+ num_tokens = optlen / sizeof(*tokens);
+ tokens = kvmalloc_array(num_tokens, sizeof(*tokens), GFP_KERNEL);
if (!tokens)
return -ENOMEM;
- num_tokens = optlen / sizeof(struct dmabuf_token);
if (copy_from_sockptr(tokens, optval, optlen)) {
kvfree(tokens);
return -EFAULT;
@@ -1086,24 +1088,28 @@ sock_devmem_dontneed(struct sock *sk, sockptr_t optval, unsigned int optlen)
xa_lock_bh(&sk->sk_user_frags);
for (i = 0; i < num_tokens; i++) {
for (j = 0; j < tokens[i].token_count; j++) {
+ if (++num_frags > MAX_DONTNEED_FRAGS)
+ goto frag_limit_reached;
+
netmem_ref netmem = (__force netmem_ref)__xa_erase(
&sk->sk_user_frags, tokens[i].token_start + j);
- if (netmem &&
- !WARN_ON_ONCE(!netmem_is_net_iov(netmem))) {
- netmems[netmem_num++] = netmem;
- if (netmem_num == ARRAY_SIZE(netmems)) {
- xa_unlock_bh(&sk->sk_user_frags);
- for (k = 0; k < netmem_num; k++)
- WARN_ON_ONCE(!napi_pp_put_page(netmems[k]));
- netmem_num = 0;
- xa_lock_bh(&sk->sk_user_frags);
- }
- ret++;
+ if (!netmem || WARN_ON_ONCE(!netmem_is_net_iov(netmem)))
+ continue;
+
+ netmems[netmem_num++] = netmem;
+ if (netmem_num == ARRAY_SIZE(netmems)) {
+ xa_unlock_bh(&sk->sk_user_frags);
+ for (k = 0; k < netmem_num; k++)
+ WARN_ON_ONCE(!napi_pp_put_page(netmems[k]));
+ netmem_num = 0;
+ xa_lock_bh(&sk->sk_user_frags);
}
+ ret++;
}
}
+frag_limit_reached:
xa_unlock_bh(&sk->sk_user_frags);
for (k = 0; k < netmem_num; k++)
WARN_ON_ONCE(!napi_pp_put_page(netmems[k]));
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index da5dba120bc9..d6649246188d 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -618,7 +618,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
by tcp. Feel free to propose better solution.
--ANK (980728)
*/
- if (np->rxopt.all)
+ if (np->rxopt.all && sk->sk_state != DCCP_LISTEN)
opt_skb = skb_clone_and_charge_r(skb, sk);
if (sk->sk_state == DCCP_OPEN) { /* Fast path */
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index 271dc03fc6db..f0af12a2f70b 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -310,7 +310,8 @@ int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb,
if (filter->filter_set)
flags |= NLM_F_DUMP_FILTERED;
- list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
+ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list,
+ lockdep_rtnl_is_held()) {
if (e < s_e)
goto next_entry;
if (filter->dev &&
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d71ab4e1efe1..c9de5ef8f267 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1618,7 +1618,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
by tcp. Feel free to propose better solution.
--ANK (980728)
*/
- if (np->rxopt.all)
+ if (np->rxopt.all && sk->sk_state != TCP_LISTEN)
opt_skb = skb_clone_and_charge_r(skb, sk);
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
@@ -1656,8 +1656,6 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (reason)
goto reset;
}
- if (opt_skb)
- __kfree_skb(opt_skb);
return 0;
}
} else
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index db586a5b3866..45a2b5f05d38 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -524,7 +524,8 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info)
{
struct mptcp_pm_addr_entry *entry;
- list_for_each_entry(entry, &pernet->local_addr_list, list) {
+ list_for_each_entry_rcu(entry, &pernet->local_addr_list, list,
+ lockdep_is_held(&pernet->lock)) {
if (mptcp_addresses_equal(&entry->addr, info, entry->addr.port))
return entry;
}
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index 56dfea9862b7..e35178f5205f 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -308,14 +308,17 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info)
lock_sock(sk);
+ spin_lock_bh(&msk->pm.lock);
match = mptcp_userspace_pm_lookup_addr_by_id(msk, id_val);
if (!match) {
GENL_SET_ERR_MSG(info, "address with specified id not found");
+ spin_unlock_bh(&msk->pm.lock);
release_sock(sk);
goto out;
}
list_move(&match->list, &free_list);
+ spin_unlock_bh(&msk->pm.lock);
mptcp_pm_remove_addrs(msk, &free_list);
@@ -560,6 +563,7 @@ int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info)
struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
struct net *net = sock_net(skb->sk);
+ struct mptcp_pm_addr_entry *entry;
struct mptcp_sock *msk;
int ret = -EINVAL;
struct sock *sk;
@@ -601,6 +605,17 @@ int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info)
if (loc.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
bkup = 1;
+ spin_lock_bh(&msk->pm.lock);
+ list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
+ if (mptcp_addresses_equal(&entry->addr, &loc.addr, false)) {
+ if (bkup)
+ entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+ else
+ entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
+ }
+ }
+ spin_unlock_bh(&msk->pm.lock);
+
lock_sock(sk);
ret = mptcp_pm_nl_mp_prio_send_ack(msk, &loc.addr, &rem.addr, bkup);
release_sock(sk);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index d263091659e0..48d480982b78 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2082,7 +2082,8 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
slow = lock_sock_fast(ssk);
WRITE_ONCE(ssk->sk_rcvbuf, rcvbuf);
WRITE_ONCE(tcp_sk(ssk)->window_clamp, window_clamp);
- tcp_cleanup_rbuf(ssk, 1);
+ if (tcp_can_send_ack(ssk))
+ tcp_cleanup_rbuf(ssk, 1);
unlock_sock_fast(ssk, slow);
}
}
@@ -2205,7 +2206,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
cmsg_flags = MPTCP_CMSG_INQ;
while (copied < len) {
- int bytes_read;
+ int err, bytes_read;
bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags, &tss, &cmsg_flags);
if (unlikely(bytes_read < 0)) {
@@ -2267,9 +2268,16 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
}
pr_debug("block timeout %ld\n", timeo);
- sk_wait_data(sk, &timeo, NULL);
+ mptcp_rcv_space_adjust(msk, copied);
+ err = sk_wait_data(sk, &timeo, NULL);
+ if (err < 0) {
+ err = copied ? : err;
+ goto out_err;
+ }
}
+ mptcp_rcv_space_adjust(msk, copied);
+
out_err:
if (cmsg_flags && copied >= 0) {
if (cmsg_flags & MPTCP_CMSG_TS)
@@ -2285,8 +2293,6 @@ out_err:
pr_debug("msk=%p rx queue empty=%d:%d copied=%d\n",
msk, skb_queue_empty_lockless(&sk->sk_receive_queue),
skb_queue_empty(&msk->receive_queue), copied);
- if (!(flags & MSG_PEEK))
- mptcp_rcv_space_adjust(msk, copied);
release_sock(sk);
return copied;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 0a9287fadb47..f84aad420d44 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -393,15 +393,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
static void netlink_sock_destruct(struct sock *sk)
{
- struct netlink_sock *nlk = nlk_sk(sk);
-
- if (nlk->cb_running) {
- if (nlk->cb.done)
- nlk->cb.done(&nlk->cb);
- module_put(nlk->cb.module);
- kfree_skb(nlk->cb.skb);
- }
-
skb_queue_purge(&sk->sk_receive_queue);
if (!sock_flag(sk, SOCK_DEAD)) {
@@ -414,14 +405,6 @@ static void netlink_sock_destruct(struct sock *sk)
WARN_ON(nlk_sk(sk)->groups);
}
-static void netlink_sock_destruct_work(struct work_struct *work)
-{
- struct netlink_sock *nlk = container_of(work, struct netlink_sock,
- work);
-
- sk_free(&nlk->sk);
-}
-
/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
* SMP. Look, when several writers sleep and reader wakes them up, all but one
* immediately hit write lock and grab all the cpus. Exclusive sleep solves
@@ -731,12 +714,6 @@ static void deferred_put_nlk_sk(struct rcu_head *head)
if (!refcount_dec_and_test(&sk->sk_refcnt))
return;
- if (nlk->cb_running && nlk->cb.done) {
- INIT_WORK(&nlk->work, netlink_sock_destruct_work);
- schedule_work(&nlk->work);
- return;
- }
-
sk_free(sk);
}
@@ -788,6 +765,14 @@ static int netlink_release(struct socket *sock)
NETLINK_URELEASE, &n);
}
+ /* Terminate any outstanding dump */
+ if (nlk->cb_running) {
+ if (nlk->cb.done)
+ nlk->cb.done(&nlk->cb);
+ module_put(nlk->cb.module);
+ kfree_skb(nlk->cb.skb);
+ }
+
module_put(nlk->module);
if (netlink_is_kernel(sk)) {
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 5b0e4e62ab8b..778a3809361f 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -4,7 +4,6 @@
#include <linux/rhashtable.h>
#include <linux/atomic.h>
-#include <linux/workqueue.h>
#include <net/sock.h>
/* flags */
@@ -50,7 +49,6 @@ struct netlink_sock {
struct rhash_head node;
struct rcu_head rcu;
- struct work_struct work;
};
static inline struct netlink_sock *nlk_sk(struct sock *sk)
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 9412d88a99bc..d3a03c57545b 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -92,6 +92,16 @@ struct tc_u_common {
long knodes;
};
+static u32 handle2id(u32 h)
+{
+ return ((h & 0x80000000) ? ((h >> 20) & 0x7FF) : h);
+}
+
+static u32 id2handle(u32 id)
+{
+ return (id | 0x800U) << 20;
+}
+
static inline unsigned int u32_hash_fold(__be32 key,
const struct tc_u32_sel *sel,
u8 fshift)
@@ -310,7 +320,7 @@ static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr)
int id = idr_alloc_cyclic(&tp_c->handle_idr, ptr, 1, 0x7FF, GFP_KERNEL);
if (id < 0)
return 0;
- return (id | 0x800U) << 20;
+ return id2handle(id);
}
static struct hlist_head *tc_u_common_hash;
@@ -360,7 +370,7 @@ static int u32_init(struct tcf_proto *tp)
return -ENOBUFS;
refcount_set(&root_ht->refcnt, 1);
- root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000;
+ root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : id2handle(0);
root_ht->prio = tp->prio;
root_ht->is_root = true;
idr_init(&root_ht->handle_idr);
@@ -612,7 +622,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
if (phn == ht) {
u32_clear_hw_hnode(tp, ht, extack);
idr_destroy(&ht->handle_idr);
- idr_remove(&tp_c->handle_idr, ht->handle);
+ idr_remove(&tp_c->handle_idr, handle2id(ht->handle));
RCU_INIT_POINTER(*hn, ht->next);
kfree_rcu(ht, rcu);
return 0;
@@ -989,7 +999,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
err = u32_replace_hw_hnode(tp, ht, userflags, extack);
if (err) {
- idr_remove(&tp_c->handle_idr, handle);
+ idr_remove(&tp_c->handle_idr, handle2id(handle));
kfree(ht);
return err;
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index f7b809c0d142..38e2fbdcbeac 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -683,7 +683,7 @@ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp)
struct sock *sk = &sp->inet.sk;
struct net *net = sock_net(sk);
struct net_device *dev = NULL;
- int type;
+ int type, res, bound_dev_if;
type = ipv6_addr_type(in6);
if (IPV6_ADDR_ANY == type)
@@ -697,14 +697,21 @@ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp)
if (!(type & IPV6_ADDR_UNICAST))
return 0;
- if (sk->sk_bound_dev_if) {
- dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
+ rcu_read_lock();
+ bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
+ if (bound_dev_if) {
+ res = 0;
+ dev = dev_get_by_index_rcu(net, bound_dev_if);
if (!dev)
- return 0;
+ goto out;
}
- return ipv6_can_nonlocal_bind(net, &sp->inet) ||
- ipv6_chk_addr(net, in6, dev, 0);
+ res = ipv6_can_nonlocal_bind(net, &sp->inet) ||
+ ipv6_chk_addr(net, in6, dev, 0);
+
+out:
+ rcu_read_unlock();
+ return res;
}
/* This function checks if the address is a valid address to be used for
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 35681adedd9a..dfd29160fe11 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -836,6 +836,9 @@ static void vsock_sk_destruct(struct sock *sk)
{
struct vsock_sock *vsk = vsock_sk(sk);
+ /* Flush MSG_ZEROCOPY leftovers. */
+ __skb_queue_purge(&sk->sk_error_queue);
+
vsock_deassign_transport(vsk);
/* When clearing these addresses, there's no need to set the family and
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index fc5666c8298f..9acc13ab3f82 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -400,6 +400,7 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
if (virtio_transport_init_zcopy_skb(vsk, skb,
info->msg,
can_zcopy)) {
+ kfree_skb(skb);
ret = -ENOMEM;
break;
}
@@ -1513,6 +1514,14 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
return -ENOMEM;
}
+ /* __vsock_release() might have already flushed accept_queue.
+ * Subsequent enqueues would lead to a memory leak.
+ */
+ if (sk->sk_shutdown == SHUTDOWN_MASK) {
+ virtio_transport_reset_no_sock(t, skb);
+ return -ESHUTDOWN;
+ }
+
child = vsock_create_connected(sk);
if (!child) {
virtio_transport_reset_no_sock(t, skb);
diff --git a/samples/pktgen/pktgen_sample01_simple.sh b/samples/pktgen/pktgen_sample01_simple.sh
index cdb9f497f87d..66cb707479e6 100755
--- a/samples/pktgen/pktgen_sample01_simple.sh
+++ b/samples/pktgen/pktgen_sample01_simple.sh
@@ -76,7 +76,7 @@ if [ -n "$DST_PORT" ]; then
pg_set $DEV "udp_dst_max $UDP_DST_MAX"
fi
-[ ! -z "$UDP_CSUM" ] && pg_set $dev "flag UDPCSUM"
+[ ! -z "$UDP_CSUM" ] && pg_set $DEV "flag UDPCSUM"
# Setup random UDP port src range
pg_set $DEV "flag UDPSRC_RND"
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
index 41d0859feb7d..edc56e2cc606 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
@@ -11,6 +11,8 @@ ALL_TESTS="
lib_dir=$(dirname "$0")
source ${lib_dir}/bond_topo_3d1c.sh
+c_maddr="33:33:00:00:00:10"
+g_maddr="33:33:00:00:02:54"
skip_prio()
{
@@ -240,6 +242,54 @@ arp_validate_test()
done
}
+# Testing correct multicast groups are added to slaves for ns targets
+arp_validate_mcast()
+{
+ RET=0
+ local arp_valid=$(cmd_jq "ip -n ${s_ns} -j -d link show bond0" ".[].linkinfo.info_data.arp_validate")
+ local active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+
+ for i in $(seq 0 2); do
+ maddr_list=$(ip -n ${s_ns} maddr show dev eth${i})
+
+ # arp_valid == 0 or active_slave should not join any maddrs
+ if { [ "$arp_valid" == "null" ] || [ "eth${i}" == ${active_slave} ]; } && \
+ echo "$maddr_list" | grep -qE "${c_maddr}|${g_maddr}"; then
+ RET=1
+ check_err 1 "arp_valid $arp_valid active_slave $active_slave, eth$i has mcast group"
+ # arp_valid != 0 and backup_slave should join both maddrs
+ elif [ "$arp_valid" != "null" ] && [ "eth${i}" != ${active_slave} ] && \
+ ( ! echo "$maddr_list" | grep -q "${c_maddr}" || \
+ ! echo "$maddr_list" | grep -q "${m_maddr}"); then
+ RET=1
+ check_err 1 "arp_valid $arp_valid active_slave $active_slave, eth$i has mcast group"
+ fi
+ done
+
+ # Do failover
+ ip -n ${s_ns} link set ${active_slave} down
+ # wait for active link change
+ slowwait 2 active_slave_changed $active_slave
+ active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+
+ for i in $(seq 0 2); do
+ maddr_list=$(ip -n ${s_ns} maddr show dev eth${i})
+
+ # arp_valid == 0 or active_slave should not join any maddrs
+ if { [ "$arp_valid" == "null" ] || [ "eth${i}" == ${active_slave} ]; } && \
+ echo "$maddr_list" | grep -qE "${c_maddr}|${g_maddr}"; then
+ RET=1
+ check_err 1 "arp_valid $arp_valid active_slave $active_slave, eth$i has mcast group"
+ # arp_valid != 0 and backup_slave should join both maddrs
+ elif [ "$arp_valid" != "null" ] && [ "eth${i}" != ${active_slave} ] && \
+ ( ! echo "$maddr_list" | grep -q "${c_maddr}" || \
+ ! echo "$maddr_list" | grep -q "${m_maddr}"); then
+ RET=1
+ check_err 1 "arp_valid $arp_valid active_slave $active_slave, eth$i has mcast group"
+ fi
+ done
+}
+
arp_validate_arp()
{
local mode=$1
@@ -261,8 +311,10 @@ arp_validate_ns()
fi
for val in $(seq 0 6); do
- arp_validate_test "mode $mode arp_interval 100 ns_ip6_target ${g_ip6} arp_validate $val"
+ arp_validate_test "mode $mode arp_interval 100 ns_ip6_target ${g_ip6},${c_ip6} arp_validate $val"
log_test "arp_validate" "$mode ns_ip6_target arp_validate $val"
+ arp_validate_mcast
+ log_test "arp_validate" "join mcast group"
done
}
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 217d8b7a7365..59fe07ee2df9 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -19,6 +19,7 @@ log.txt
msg_oob
msg_zerocopy
ncdevmem
+netlink-dumps
nettest
psock_fanout
psock_snd
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 649f1fe0dc46..5e86f7a51b43 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -78,6 +78,7 @@ TEST_PROGS += test_vxlan_vnifiltering.sh
TEST_GEN_FILES += io_uring_zerocopy_tx
TEST_PROGS += io_uring_zerocopy_tx.sh
TEST_GEN_FILES += bind_bhash
+TEST_GEN_PROGS += netlink-dumps
TEST_GEN_PROGS += sk_bind_sendto_listen
TEST_GEN_PROGS += sk_connect_zero_addr
TEST_GEN_PROGS += sk_so_peek_off
diff --git a/tools/testing/selftests/net/netlink-dumps.c b/tools/testing/selftests/net/netlink-dumps.c
new file mode 100644
index 000000000000..7ee6dcd334df
--- /dev/null
+++ b/tools/testing/selftests/net/netlink-dumps.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <linux/genetlink.h>
+#include <linux/netlink.h>
+#include <linux/mqueue.h>
+
+#include "../kselftest_harness.h"
+
+static const struct {
+ struct nlmsghdr nlhdr;
+ struct genlmsghdr genlhdr;
+ struct nlattr ahdr;
+ __u16 val;
+ __u16 pad;
+} dump_policies = {
+ .nlhdr = {
+ .nlmsg_len = sizeof(dump_policies),
+ .nlmsg_type = GENL_ID_CTRL,
+ .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP,
+ .nlmsg_seq = 1,
+ },
+ .genlhdr = {
+ .cmd = CTRL_CMD_GETPOLICY,
+ .version = 2,
+ },
+ .ahdr = {
+ .nla_len = 6,
+ .nla_type = CTRL_ATTR_FAMILY_ID,
+ },
+ .val = GENL_ID_CTRL,
+ .pad = 0,
+};
+
+// Sanity check for the test itself, make sure the dump doesn't fit in one msg
+TEST(test_sanity)
+{
+ int netlink_sock;
+ char buf[8192];
+ ssize_t n;
+
+ netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+ ASSERT_GE(netlink_sock, 0);
+
+ n = send(netlink_sock, &dump_policies, sizeof(dump_policies), 0);
+ ASSERT_EQ(n, sizeof(dump_policies));
+
+ n = recv(netlink_sock, buf, sizeof(buf), MSG_DONTWAIT);
+ ASSERT_GE(n, sizeof(struct nlmsghdr));
+
+ n = recv(netlink_sock, buf, sizeof(buf), MSG_DONTWAIT);
+ ASSERT_GE(n, sizeof(struct nlmsghdr));
+
+ close(netlink_sock);
+}
+
+TEST(close_in_progress)
+{
+ int netlink_sock;
+ ssize_t n;
+
+ netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+ ASSERT_GE(netlink_sock, 0);
+
+ n = send(netlink_sock, &dump_policies, sizeof(dump_policies), 0);
+ ASSERT_EQ(n, sizeof(dump_policies));
+
+ close(netlink_sock);
+}
+
+TEST(close_with_ref)
+{
+ char cookie[NOTIFY_COOKIE_LEN] = {};
+ int netlink_sock, mq_fd;
+ struct sigevent sigev;
+ ssize_t n;
+
+ netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+ ASSERT_GE(netlink_sock, 0);
+
+ n = send(netlink_sock, &dump_policies, sizeof(dump_policies), 0);
+ ASSERT_EQ(n, sizeof(dump_policies));
+
+ mq_fd = syscall(__NR_mq_open, "sed", O_CREAT | O_WRONLY, 0600, 0);
+ ASSERT_GE(mq_fd, 0);
+
+ memset(&sigev, 0, sizeof(sigev));
+ sigev.sigev_notify = SIGEV_THREAD;
+ sigev.sigev_value.sival_ptr = cookie;
+ sigev.sigev_signo = netlink_sock;
+
+ syscall(__NR_mq_notify, mq_fd, &sigev);
+
+ close(netlink_sock);
+
+ // give mqueue time to fire
+ usleep(100 * 1000);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
index 24bd0c2a3014..b2ca9d4e991b 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
@@ -329,5 +329,29 @@
"teardown": [
"$TC qdisc del dev $DEV1 parent root drr"
]
+ },
+ {
+ "id": "1234",
+ "name": "Exercise IDR leaks by creating/deleting a filter many (2048) times",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 parent root handle 10: drr",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 match ip src 0.0.0.2/32 action drop",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 3 u32 match ip src 0.0.0.3/32 action drop"
+ ],
+ "cmdUnderTest": "bash -c 'for i in {1..2048} ;do echo filter delete dev $DEV1 pref 3;echo filter add dev $DEV1 parent 10:0 protocol ip prio 3 u32 match ip src 0.0.0.3/32 action drop;done | $TC -b -'",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1",
+ "matchPattern": "protocol ip pref 3 u32",
+ "matchCount": "3",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 parent root drr"
+ ]
}
]