summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-22 17:59:51 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-06-22 17:59:51 -0700
commit8a28a0b6f1a1dcbf5a834600a9acfbe2ba51e5eb (patch)
treeead02230ac94e3259f2fc2b72bdca1e8be9d56b4 /net
parent412d070b31c3e4018afc6bb1712709df0464da48 (diff)
parent2ba7e7ebb6a71407cbe25cd349c9b05d40520bf0 (diff)
Merge tag 'net-6.4-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Paolo Abeni: "Including fixes from ipsec, bpf, mptcp and netfilter. Current release - regressions: - netfilter: add NFT_TRANS_PREPARE_ERROR to deal with bound set/chain - eth: mlx5e: - fix scheduling of IPsec ASO query while in atomic - free IRQ rmap and notifier on kernel shutdown Current release - new code bugs: - phy: manual remove LEDs to ensure correct ordering Previous releases - regressions: - mptcp: fix possible divide by zero in recvmsg() - dsa: revert "net: phy: dp83867: perform soft reset and retain established link" Previous releases - always broken: - sched: netem: acquire qdisc lock in netem_change() - bpf: - fix verifier id tracking of scalars on spill - fix NULL dereference on exceptions - accept function names that contain dots - netfilter: disallow element updates of bound anonymous sets - mptcp: ensure listener is unhashed before updating the sk status - xfrm: - add missed call to delete offloaded policies - fix inbound ipv4/udp/esp packets to UDPv6 dualstack sockets - selftests: fixes for FIPS mode - dsa: mt7530: fix multiple CPU ports, BPDU and LLDP handling - eth: sfc: use budget for TX completions Misc: - wifi: iwlwifi: add support for SO-F device with PCI id 0x7AF0" * tag 'net-6.4-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (74 commits) revert "net: align SO_RCVMARK required privileges with SO_MARK" net: wwan: iosm: Convert single instance struct member to flexible array sch_netem: acquire qdisc lock in netem_change() selftests: forwarding: Fix race condition in mirror installation wifi: mac80211: report all unusable beacon frames mptcp: ensure listener is unhashed before updating the sk status mptcp: drop legacy code around RX EOF mptcp: consolidate fallback and non fallback state machine mptcp: fix possible list corruption on passive MPJ mptcp: fix possible divide by zero in recvmsg() mptcp: handle correctly disconnect() failures bpf: Force kprobe multi expected_attach_type for kprobe_multi link bpf/btf: Accept function names that contain dots Revert "net: phy: dp83867: perform soft reset and retain established link" net: mdio: fix the wrong parameters netfilter: nf_tables: Fix for deleting base chains with payload netfilter: nfnetlink_osf: fix module autoload netfilter: nf_tables: drop module reference after updating chain netfilter: nf_tables: disallow timeout for anonymous sets netfilter: nf_tables: disallow updates of anonymous sets ...
Diffstat (limited to 'net')
-rw-r--r--net/core/sock.c6
-rw-r--r--net/dsa/dsa.c24
-rw-r--r--net/ieee802154/trace.h2
-rw-r--r--net/ipv4/esp4_offload.c3
-rw-r--r--net/ipv4/xfrm4_input.c1
-rw-r--r--net/ipv6/esp6_offload.c3
-rw-r--r--net/ipv6/xfrm6_input.c3
-rw-r--r--net/mac80211/rx.c2
-rw-r--r--net/mac802154/trace.h2
-rw-r--r--net/mptcp/pm_netlink.c1
-rw-r--r--net/mptcp/protocol.c160
-rw-r--r--net/mptcp/protocol.h5
-rw-r--r--net/mptcp/subflow.c17
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c2
-rw-r--r--net/netfilter/nf_tables_api.c366
-rw-r--r--net/netfilter/nfnetlink_osf.c1
-rw-r--r--net/netfilter/nft_immediate.c90
-rw-r--r--net/netfilter/nft_set_bitmap.c5
-rw-r--r--net/netfilter/nft_set_hash.c23
-rw-r--r--net/netfilter/nft_set_pipapo.c20
-rw-r--r--net/netfilter/nft_set_rbtree.c5
-rw-r--r--net/netfilter/xt_osf.c1
-rw-r--r--net/sched/sch_netem.c8
-rw-r--r--net/xfrm/xfrm_input.c8
-rw-r--r--net/xfrm/xfrm_interface_core.c54
-rw-r--r--net/xfrm/xfrm_policy.c14
26 files changed, 603 insertions, 223 deletions
diff --git a/net/core/sock.c b/net/core/sock.c
index 24f2761bdb1d..6e5662ca00fe 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1362,12 +1362,6 @@ set_sndbuf:
__sock_set_mark(sk, val);
break;
case SO_RCVMARK:
- if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
- !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
- ret = -EPERM;
- break;
- }
-
sock_valbool_flag(sk, SOCK_RCVMARK, valbool);
break;
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index ab1afe67fd18..1afed89e03c0 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -403,6 +403,24 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
return 0;
}
+static struct dsa_port *
+dsa_switch_preferred_default_local_cpu_port(struct dsa_switch *ds)
+{
+ struct dsa_port *cpu_dp;
+
+ if (!ds->ops->preferred_default_local_cpu_port)
+ return NULL;
+
+ cpu_dp = ds->ops->preferred_default_local_cpu_port(ds);
+ if (!cpu_dp)
+ return NULL;
+
+ if (WARN_ON(!dsa_port_is_cpu(cpu_dp) || cpu_dp->ds != ds))
+ return NULL;
+
+ return cpu_dp;
+}
+
/* Perform initial assignment of CPU ports to user ports and DSA links in the
* fabric, giving preference to CPU ports local to each switch. Default to
* using the first CPU port in the switch tree if the port does not have a CPU
@@ -410,12 +428,16 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
*/
static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst)
{
- struct dsa_port *cpu_dp, *dp;
+ struct dsa_port *preferred_cpu_dp, *cpu_dp, *dp;
list_for_each_entry(cpu_dp, &dst->ports, list) {
if (!dsa_port_is_cpu(cpu_dp))
continue;
+ preferred_cpu_dp = dsa_switch_preferred_default_local_cpu_port(cpu_dp->ds);
+ if (preferred_cpu_dp && preferred_cpu_dp != cpu_dp)
+ continue;
+
/* Prefer a local CPU port */
dsa_switch_for_each_port(dp, cpu_dp->ds) {
/* Prefer the first local CPU port found */
diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h
index e5d8439b9e45..c16db0b326fa 100644
--- a/net/ieee802154/trace.h
+++ b/net/ieee802154/trace.h
@@ -13,7 +13,7 @@
#define MAXNAME 32
#define WPAN_PHY_ENTRY __array(char, wpan_phy_name, MAXNAME)
-#define WPAN_PHY_ASSIGN strlcpy(__entry->wpan_phy_name, \
+#define WPAN_PHY_ASSIGN strscpy(__entry->wpan_phy_name, \
wpan_phy_name(wpan_phy), \
MAXNAME)
#define WPAN_PHY_PR_FMT "%s"
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 3969fa805679..ee848be59e65 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -340,6 +340,9 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_
secpath_reset(skb);
+ if (skb_needs_linearize(skb, skb->dev->features) &&
+ __skb_linearize(skb))
+ return -ENOMEM;
return 0;
}
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index ad2afeef4f10..eac206a290d0 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -164,6 +164,7 @@ drop:
kfree_skb(skb);
return 0;
}
+EXPORT_SYMBOL(xfrm4_udp_encap_rcv);
int xfrm4_rcv(struct sk_buff *skb)
{
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 75c02992c520..772340268997 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -374,6 +374,9 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features
secpath_reset(skb);
+ if (skb_needs_linearize(skb, skb->dev->features) &&
+ __skb_linearize(skb))
+ return -ENOMEM;
return 0;
}
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 04cbeefd8982..4907ab241d6b 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -86,6 +86,9 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
__be32 *udpdata32;
__u16 encap_type = up->encap_type;
+ if (skb->protocol == htons(ETH_P_IP))
+ return xfrm4_udp_encap_rcv(sk, skb);
+
/* if this is not encapsulated socket, then just return now */
if (!encap_type)
return 1;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index d996aa2579df..fc6e130364da 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2110,7 +2110,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
/* either the frame has been decrypted or will be dropped */
status->flag |= RX_FLAG_DECRYPTED;
- if (unlikely(ieee80211_is_beacon(fc) && result == RX_DROP_UNUSABLE &&
+ if (unlikely(ieee80211_is_beacon(fc) && (result & RX_DROP_UNUSABLE) &&
rx->sdata->dev))
cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev,
skb->data, skb->len);
diff --git a/net/mac802154/trace.h b/net/mac802154/trace.h
index 689396d6c76a..1574ecc48075 100644
--- a/net/mac802154/trace.h
+++ b/net/mac802154/trace.h
@@ -14,7 +14,7 @@
#define MAXNAME 32
#define LOCAL_ENTRY __array(char, wpan_phy_name, MAXNAME)
-#define LOCAL_ASSIGN strlcpy(__entry->wpan_phy_name, \
+#define LOCAL_ASSIGN strscpy(__entry->wpan_phy_name, \
wpan_phy_name(local->hw.phy), MAXNAME)
#define LOCAL_PR_FMT "%s"
#define LOCAL_PR_ARG __entry->wpan_phy_name
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 59f8f3124855..1224dfca5bf3 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -1047,6 +1047,7 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
if (err)
return err;
+ inet_sk_state_store(newsk, TCP_LISTEN);
err = kernel_listen(ssock, backlog);
if (err)
return err;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 67311e7d5b21..a6c7f2d24909 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -44,7 +44,7 @@ enum {
static struct percpu_counter mptcp_sockets_allocated ____cacheline_aligned_in_smp;
static void __mptcp_destroy_sock(struct sock *sk);
-static void __mptcp_check_send_data_fin(struct sock *sk);
+static void mptcp_check_send_data_fin(struct sock *sk);
DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
static struct net_device mptcp_napi_dev;
@@ -424,8 +424,7 @@ static bool mptcp_pending_data_fin_ack(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- return !__mptcp_check_fallback(msk) &&
- ((1 << sk->sk_state) &
+ return ((1 << sk->sk_state) &
(TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK)) &&
msk->write_seq == READ_ONCE(msk->snd_una);
}
@@ -583,9 +582,6 @@ static bool mptcp_check_data_fin(struct sock *sk)
u64 rcv_data_fin_seq;
bool ret = false;
- if (__mptcp_check_fallback(msk))
- return ret;
-
/* Need to ack a DATA_FIN received from a peer while this side
* of the connection is in ESTABLISHED, FIN_WAIT1, or FIN_WAIT2.
* msk->rcv_data_fin was set when parsing the incoming options
@@ -623,7 +619,8 @@ static bool mptcp_check_data_fin(struct sock *sk)
}
ret = true;
- mptcp_send_ack(msk);
+ if (!__mptcp_check_fallback(msk))
+ mptcp_send_ack(msk);
mptcp_close_wake_up(sk);
}
return ret;
@@ -850,12 +847,12 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
return true;
}
-static void __mptcp_flush_join_list(struct sock *sk)
+static void __mptcp_flush_join_list(struct sock *sk, struct list_head *join_list)
{
struct mptcp_subflow_context *tmp, *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
- list_for_each_entry_safe(subflow, tmp, &msk->join_list, node) {
+ list_for_each_entry_safe(subflow, tmp, join_list, node) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
bool slow = lock_sock_fast(ssk);
@@ -897,49 +894,6 @@ bool mptcp_schedule_work(struct sock *sk)
return false;
}
-void mptcp_subflow_eof(struct sock *sk)
-{
- if (!test_and_set_bit(MPTCP_WORK_EOF, &mptcp_sk(sk)->flags))
- mptcp_schedule_work(sk);
-}
-
-static void mptcp_check_for_eof(struct mptcp_sock *msk)
-{
- struct mptcp_subflow_context *subflow;
- struct sock *sk = (struct sock *)msk;
- int receivers = 0;
-
- mptcp_for_each_subflow(msk, subflow)
- receivers += !subflow->rx_eof;
- if (receivers)
- return;
-
- if (!(sk->sk_shutdown & RCV_SHUTDOWN)) {
- /* hopefully temporary hack: propagate shutdown status
- * to msk, when all subflows agree on it
- */
- WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN);
-
- smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
- sk->sk_data_ready(sk);
- }
-
- switch (sk->sk_state) {
- case TCP_ESTABLISHED:
- inet_sk_state_store(sk, TCP_CLOSE_WAIT);
- break;
- case TCP_FIN_WAIT1:
- inet_sk_state_store(sk, TCP_CLOSING);
- break;
- case TCP_FIN_WAIT2:
- inet_sk_state_store(sk, TCP_CLOSE);
- break;
- default:
- return;
- }
- mptcp_close_wake_up(sk);
-}
-
static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;
@@ -1609,7 +1563,7 @@ out:
if (!mptcp_timer_pending(sk))
mptcp_reset_timer(sk);
if (do_check_data_fin)
- __mptcp_check_send_data_fin(sk);
+ mptcp_check_send_data_fin(sk);
}
static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool first)
@@ -1727,7 +1681,13 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR)
*copied_syn = 0;
} else if (ret && ret != -EINPROGRESS) {
- mptcp_disconnect(sk, 0);
+ /* The disconnect() op called by tcp_sendmsg_fastopen()/
+ * __inet_stream_connect() can fail, due to looking check,
+ * see mptcp_disconnect().
+ * Attempt it again outside the problematic scope.
+ */
+ if (!mptcp_disconnect(sk, 0))
+ sk->sk_socket->state = SS_UNCONNECTED;
}
inet_sk(sk)->defer_connect = 0;
@@ -2158,9 +2118,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
break;
}
- if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
- mptcp_check_for_eof(msk);
-
if (sk->sk_shutdown & RCV_SHUTDOWN) {
/* race breaker: the shutdown could be after the
* previous receive queue check
@@ -2389,7 +2346,10 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk);
if (!dispose_it) {
- tcp_disconnect(ssk, 0);
+ /* The MPTCP code never wait on the subflow sockets, TCP-level
+ * disconnect should never fail
+ */
+ WARN_ON_ONCE(tcp_disconnect(ssk, 0));
msk->subflow->state = SS_UNCONNECTED;
mptcp_subflow_ctx_reset(subflow);
release_sock(ssk);
@@ -2408,13 +2368,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
kfree_rcu(subflow, rcu);
} else {
/* otherwise tcp will dispose of the ssk and subflow ctx */
- if (ssk->sk_state == TCP_LISTEN) {
- tcp_set_state(ssk, TCP_CLOSE);
- mptcp_subflow_queue_clean(sk, ssk);
- inet_csk_listen_stop(ssk);
- mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
- }
-
__tcp_close(ssk, 0);
/* close acquired an extra ref */
@@ -2671,16 +2624,12 @@ static void mptcp_worker(struct work_struct *work)
if (unlikely((1 << state) & (TCPF_CLOSE | TCPF_LISTEN)))
goto unlock;
- mptcp_check_data_fin_ack(sk);
-
mptcp_check_fastclose(msk);
mptcp_pm_nl_work(msk);
- if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
- mptcp_check_for_eof(msk);
-
- __mptcp_check_send_data_fin(sk);
+ mptcp_check_send_data_fin(sk);
+ mptcp_check_data_fin_ack(sk);
mptcp_check_data_fin(sk);
if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
@@ -2812,13 +2761,19 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
break;
fallthrough;
case TCP_SYN_SENT:
- tcp_disconnect(ssk, O_NONBLOCK);
+ WARN_ON_ONCE(tcp_disconnect(ssk, O_NONBLOCK));
break;
default:
if (__mptcp_check_fallback(mptcp_sk(sk))) {
pr_debug("Fallback");
ssk->sk_shutdown |= how;
tcp_shutdown(ssk, how);
+
+ /* simulate the data_fin ack reception to let the state
+ * machine move forward
+ */
+ WRITE_ONCE(mptcp_sk(sk)->snd_una, mptcp_sk(sk)->snd_nxt);
+ mptcp_schedule_work(sk);
} else {
pr_debug("Sending DATA_FIN on subflow %p", ssk);
tcp_send_ack(ssk);
@@ -2858,7 +2813,7 @@ static int mptcp_close_state(struct sock *sk)
return next & TCP_ACTION_FIN;
}
-static void __mptcp_check_send_data_fin(struct sock *sk)
+static void mptcp_check_send_data_fin(struct sock *sk)
{
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -2876,19 +2831,6 @@ static void __mptcp_check_send_data_fin(struct sock *sk)
WRITE_ONCE(msk->snd_nxt, msk->write_seq);
- /* fallback socket will not get data_fin/ack, can move to the next
- * state now
- */
- if (__mptcp_check_fallback(msk)) {
- WRITE_ONCE(msk->snd_una, msk->write_seq);
- if ((1 << sk->sk_state) & (TCPF_CLOSING | TCPF_LAST_ACK)) {
- inet_sk_state_store(sk, TCP_CLOSE);
- mptcp_close_wake_up(sk);
- } else if (sk->sk_state == TCP_FIN_WAIT1) {
- inet_sk_state_store(sk, TCP_FIN_WAIT2);
- }
- }
-
mptcp_for_each_subflow(msk, subflow) {
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
@@ -2908,7 +2850,7 @@ static void __mptcp_wr_shutdown(struct sock *sk)
WRITE_ONCE(msk->write_seq, msk->write_seq + 1);
WRITE_ONCE(msk->snd_data_fin_enable, 1);
- __mptcp_check_send_data_fin(sk);
+ mptcp_check_send_data_fin(sk);
}
static void __mptcp_destroy_sock(struct sock *sk)
@@ -2953,10 +2895,24 @@ static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
return EPOLLIN | EPOLLRDNORM;
}
-static void mptcp_listen_inuse_dec(struct sock *sk)
+static void mptcp_check_listen_stop(struct sock *sk)
{
- if (inet_sk_state_load(sk) == TCP_LISTEN)
- sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+ struct sock *ssk;
+
+ if (inet_sk_state_load(sk) != TCP_LISTEN)
+ return;
+
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+ ssk = mptcp_sk(sk)->first;
+ if (WARN_ON_ONCE(!ssk || inet_sk_state_load(ssk) != TCP_LISTEN))
+ return;
+
+ lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
+ mptcp_subflow_queue_clean(sk, ssk);
+ inet_csk_listen_stop(ssk);
+ mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
+ tcp_set_state(ssk, TCP_CLOSE);
+ release_sock(ssk);
}
bool __mptcp_close(struct sock *sk, long timeout)
@@ -2969,7 +2925,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) {
- mptcp_listen_inuse_dec(sk);
+ mptcp_check_listen_stop(sk);
inet_sk_state_store(sk, TCP_CLOSE);
goto cleanup;
}
@@ -3073,15 +3029,20 @@ static int mptcp_disconnect(struct sock *sk, int flags)
{
struct mptcp_sock *msk = mptcp_sk(sk);
+ /* Deny disconnect if other threads are blocked in sk_wait_event()
+ * or inet_wait_for_connect().
+ */
+ if (sk->sk_wait_pending)
+ return -EBUSY;
+
/* We are on the fastopen error path. We can't call straight into the
* subflows cleanup code due to lock nesting (we are already under
- * msk->firstsocket lock). Do nothing and leave the cleanup to the
- * caller.
+ * msk->firstsocket lock).
*/
if (msk->fastopening)
- return 0;
+ return -EBUSY;
- mptcp_listen_inuse_dec(sk);
+ mptcp_check_listen_stop(sk);
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_stop_timer(sk);
@@ -3140,6 +3101,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk);
#endif
+ nsk->sk_wait_pending = 0;
__mptcp_init_sock(nsk);
msk = mptcp_sk(nsk);
@@ -3327,9 +3289,14 @@ static void mptcp_release_cb(struct sock *sk)
for (;;) {
unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) |
msk->push_pending;
+ struct list_head join_list;
+
if (!flags)
break;
+ INIT_LIST_HEAD(&join_list);
+ list_splice_init(&msk->join_list, &join_list);
+
/* the following actions acquire the subflow socket lock
*
* 1) can't be invoked in atomic scope
@@ -3340,8 +3307,9 @@ static void mptcp_release_cb(struct sock *sk)
msk->push_pending = 0;
msk->cb_flags &= ~flags;
spin_unlock_bh(&sk->sk_lock.slock);
+
if (flags & BIT(MPTCP_FLUSH_JOIN_LIST))
- __mptcp_flush_join_list(sk);
+ __mptcp_flush_join_list(sk, &join_list);
if (flags & BIT(MPTCP_PUSH_PENDING))
__mptcp_push_pending(sk, 0);
if (flags & BIT(MPTCP_RETRANSMIT))
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 70c957bc56a8..d3783a7056e1 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -113,7 +113,6 @@
/* MPTCP socket atomic flags */
#define MPTCP_NOSPACE 1
#define MPTCP_WORK_RTX 2
-#define MPTCP_WORK_EOF 3
#define MPTCP_FALLBACK_DONE 4
#define MPTCP_WORK_CLOSE_SUBFLOW 5
@@ -476,14 +475,13 @@ struct mptcp_subflow_context {
send_mp_fail : 1,
send_fastclose : 1,
send_infinite_map : 1,
- rx_eof : 1,
remote_key_valid : 1, /* received the peer key from */
disposable : 1, /* ctx can be free at ulp release time */
stale : 1, /* unable to snd/rcv data, do not use for xmit */
local_id_valid : 1, /* local_id is correctly initialized */
valid_csum_seen : 1, /* at least one csum validated */
is_mptfo : 1, /* subflow is doing TFO */
- __unused : 8;
+ __unused : 9;
enum mptcp_data_avail data_avail;
u32 remote_nonce;
u64 thmac;
@@ -720,7 +718,6 @@ static inline u64 mptcp_expand_seq(u64 old_seq, u64 cur_seq, bool use_64bit)
void __mptcp_check_push(struct sock *sk, struct sock *ssk);
void __mptcp_data_acked(struct sock *sk);
void __mptcp_error_report(struct sock *sk);
-void mptcp_subflow_eof(struct sock *sk);
bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit);
static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
{
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 4688daa6b38b..d9c8b21c6076 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1749,14 +1749,16 @@ static void subflow_state_change(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct sock *parent = subflow->conn;
+ struct mptcp_sock *msk;
__subflow_state_change(sk);
+ msk = mptcp_sk(parent);
if (subflow_simultaneous_connect(sk)) {
mptcp_propagate_sndbuf(parent, sk);
mptcp_do_fallback(sk);
- mptcp_rcv_space_init(mptcp_sk(parent), sk);
- pr_fallback(mptcp_sk(parent));
+ mptcp_rcv_space_init(msk, sk);
+ pr_fallback(msk);
subflow->conn_finished = 1;
mptcp_set_connected(parent);
}
@@ -1772,11 +1774,12 @@ static void subflow_state_change(struct sock *sk)
subflow_sched_work_if_closed(mptcp_sk(parent), sk);
- if (__mptcp_check_fallback(mptcp_sk(parent)) &&
- !subflow->rx_eof && subflow_is_done(sk)) {
- subflow->rx_eof = 1;
- mptcp_subflow_eof(parent);
- }
+ /* when the fallback subflow closes the rx side, trigger a 'dummy'
+ * ingress data fin, so that the msk state will follow along
+ */
+ if (__mptcp_check_fallback(msk) && subflow_is_done(sk) && msk->first == sk &&
+ mptcp_update_rcv_data_fin(msk, READ_ONCE(msk->ack_seq), true))
+ mptcp_schedule_work(parent);
}
void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk)
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index feb1d7fcb09f..a80b960223e1 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -1207,6 +1207,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->transport_header = skb->network_header;
skb_set_inner_ipproto(skb, next_protocol);
+ skb_set_inner_mac_header(skb, skb_inner_network_offset(skb));
if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
bool check = false;
@@ -1349,6 +1350,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->transport_header = skb->network_header;
skb_set_inner_ipproto(skb, next_protocol);
+ skb_set_inner_mac_header(skb, skb_inner_network_offset(skb));
if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
bool check = false;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 69bceefaa5c8..4c7937fd803f 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -151,6 +151,7 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx,
return NULL;
INIT_LIST_HEAD(&trans->list);
+ INIT_LIST_HEAD(&trans->binding_list);
trans->msg_type = msg_type;
trans->ctx = *ctx;
@@ -163,13 +164,20 @@ static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx,
return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL);
}
-static void nft_trans_destroy(struct nft_trans *trans)
+static void nft_trans_list_del(struct nft_trans *trans)
{
list_del(&trans->list);
+ list_del(&trans->binding_list);
+}
+
+static void nft_trans_destroy(struct nft_trans *trans)
+{
+ nft_trans_list_del(trans);
kfree(trans);
}
-static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
+static void __nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set,
+ bool bind)
{
struct nftables_pernet *nft_net;
struct net *net = ctx->net;
@@ -183,16 +191,80 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
switch (trans->msg_type) {
case NFT_MSG_NEWSET:
if (nft_trans_set(trans) == set)
- nft_trans_set_bound(trans) = true;
+ nft_trans_set_bound(trans) = bind;
break;
case NFT_MSG_NEWSETELEM:
if (nft_trans_elem_set(trans) == set)
- nft_trans_elem_set_bound(trans) = true;
+ nft_trans_elem_set_bound(trans) = bind;
break;
}
}
}
+static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ return __nft_set_trans_bind(ctx, set, true);
+}
+
+static void nft_set_trans_unbind(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ return __nft_set_trans_bind(ctx, set, false);
+}
+
+static void __nft_chain_trans_bind(const struct nft_ctx *ctx,
+ struct nft_chain *chain, bool bind)
+{
+ struct nftables_pernet *nft_net;
+ struct net *net = ctx->net;
+ struct nft_trans *trans;
+
+ if (!nft_chain_binding(chain))
+ return;
+
+ nft_net = nft_pernet(net);
+ list_for_each_entry_reverse(trans, &nft_net->commit_list, list) {
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWCHAIN:
+ if (nft_trans_chain(trans) == chain)
+ nft_trans_chain_bound(trans) = bind;
+ break;
+ case NFT_MSG_NEWRULE:
+ if (trans->ctx.chain == chain)
+ nft_trans_rule_bound(trans) = bind;
+ break;
+ }
+ }
+}
+
+static void nft_chain_trans_bind(const struct nft_ctx *ctx,
+ struct nft_chain *chain)
+{
+ __nft_chain_trans_bind(ctx, chain, true);
+}
+
+int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain)
+{
+ if (!nft_chain_binding(chain))
+ return 0;
+
+ if (nft_chain_binding(ctx->chain))
+ return -EOPNOTSUPP;
+
+ if (chain->bound)
+ return -EBUSY;
+
+ chain->bound = true;
+ chain->use++;
+ nft_chain_trans_bind(ctx, chain);
+
+ return 0;
+}
+
+void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain)
+{
+ __nft_chain_trans_bind(ctx, chain, false);
+}
+
static int nft_netdev_register_hooks(struct net *net,
struct list_head *hook_list)
{
@@ -292,6 +364,19 @@ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *tr
{
struct nftables_pernet *nft_net = nft_pernet(net);
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWSET:
+ if (!nft_trans_set_update(trans) &&
+ nft_set_is_anonymous(nft_trans_set(trans)))
+ list_add_tail(&trans->binding_list, &nft_net->binding_list);
+ break;
+ case NFT_MSG_NEWCHAIN:
+ if (!nft_trans_chain_update(trans) &&
+ nft_chain_binding(nft_trans_chain(trans)))
+ list_add_tail(&trans->binding_list, &nft_net->binding_list);
+ break;
+ }
+
list_add_tail(&trans->list, &nft_net->commit_list);
}
@@ -338,8 +423,9 @@ static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
ntohl(nla_get_be32(ctx->nla[NFTA_CHAIN_ID]));
}
}
-
+ nft_trans_chain(trans) = ctx->chain;
nft_trans_commit_list_add_tail(ctx->net, trans);
+
return trans;
}
@@ -357,8 +443,7 @@ static int nft_delchain(struct nft_ctx *ctx)
return 0;
}
-static void nft_rule_expr_activate(const struct nft_ctx *ctx,
- struct nft_rule *rule)
+void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_rule *rule)
{
struct nft_expr *expr;
@@ -371,9 +456,8 @@ static void nft_rule_expr_activate(const struct nft_ctx *ctx,
}
}
-static void nft_rule_expr_deactivate(const struct nft_ctx *ctx,
- struct nft_rule *rule,
- enum nft_trans_phase phase)
+void nft_rule_expr_deactivate(const struct nft_ctx *ctx, struct nft_rule *rule,
+ enum nft_trans_phase phase)
{
struct nft_expr *expr;
@@ -495,6 +579,58 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
return __nft_trans_set_add(ctx, msg_type, set, NULL);
}
+static void nft_setelem_data_deactivate(const struct net *net,
+ const struct nft_set *set,
+ struct nft_set_elem *elem);
+
+static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
+ struct nft_set *set,
+ const struct nft_set_iter *iter,
+ struct nft_set_elem *elem)
+{
+ nft_setelem_data_deactivate(ctx->net, set, elem);
+
+ return 0;
+}
+
+struct nft_set_elem_catchall {
+ struct list_head list;
+ struct rcu_head rcu;
+ void *elem;
+};
+
+static void nft_map_catchall_deactivate(const struct nft_ctx *ctx,
+ struct nft_set *set)
+{
+ u8 genmask = nft_genmask_next(ctx->net);
+ struct nft_set_elem_catchall *catchall;
+ struct nft_set_elem elem;
+ struct nft_set_ext *ext;
+
+ list_for_each_entry(catchall, &set->catchall_list, list) {
+ ext = nft_set_elem_ext(set, catchall->elem);
+ if (!nft_set_elem_active(ext, genmask))
+ continue;
+
+ elem.priv = catchall->elem;
+ nft_setelem_data_deactivate(ctx->net, set, &elem);
+ break;
+ }
+}
+
+static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ struct nft_set_iter iter = {
+ .genmask = nft_genmask_next(ctx->net),
+ .fn = nft_mapelem_deactivate,
+ };
+
+ set->ops->walk(ctx, set, &iter);
+ WARN_ON_ONCE(iter.err);
+
+ nft_map_catchall_deactivate(ctx, set);
+}
+
static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
{
int err;
@@ -503,6 +639,9 @@ static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
if (err < 0)
return err;
+ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_deactivate(ctx, set);
+
nft_deactivate_next(ctx->net, set);
ctx->table->use--;
@@ -2226,7 +2365,7 @@ static int nft_basechain_init(struct nft_base_chain *basechain, u8 family,
return 0;
}
-static int nft_chain_add(struct nft_table *table, struct nft_chain *chain)
+int nft_chain_add(struct nft_table *table, struct nft_chain *chain)
{
int err;
@@ -2528,6 +2667,8 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
nft_trans_basechain(trans) = basechain;
INIT_LIST_HEAD(&nft_trans_chain_hooks(trans));
list_splice(&hook.list, &nft_trans_chain_hooks(trans));
+ if (nla[NFTA_CHAIN_HOOK])
+ module_put(hook.type->owner);
nft_trans_commit_list_add_tail(ctx->net, trans);
@@ -2670,21 +2811,18 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info,
return nf_tables_addchain(&ctx, family, genmask, policy, flags, extack);
}
-static int nft_delchain_hook(struct nft_ctx *ctx, struct nft_chain *chain,
+static int nft_delchain_hook(struct nft_ctx *ctx,
+ struct nft_base_chain *basechain,
struct netlink_ext_ack *extack)
{
+ const struct nft_chain *chain = &basechain->chain;
const struct nlattr * const *nla = ctx->nla;
struct nft_chain_hook chain_hook = {};
- struct nft_base_chain *basechain;
struct nft_hook *this, *hook;
LIST_HEAD(chain_del_list);
struct nft_trans *trans;
int err;
- if (!nft_is_base_chain(chain))
- return -EOPNOTSUPP;
-
- basechain = nft_base_chain(chain);
err = nft_chain_parse_hook(ctx->net, basechain, nla, &chain_hook,
ctx->family, chain->flags, extack);
if (err < 0)
@@ -2769,7 +2907,12 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info,
if (chain->flags & NFT_CHAIN_HW_OFFLOAD)
return -EOPNOTSUPP;
- return nft_delchain_hook(&ctx, chain, extack);
+ if (nft_is_base_chain(chain)) {
+ struct nft_base_chain *basechain = nft_base_chain(chain);
+
+ if (nft_base_chain_netdev(table->family, basechain->ops.hooknum))
+ return nft_delchain_hook(&ctx, basechain, extack);
+ }
}
if (info->nlh->nlmsg_flags & NLM_F_NONREC &&
@@ -3490,8 +3633,7 @@ err_fill_rule_info:
return err;
}
-static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
- struct nft_rule *rule)
+void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule)
{
struct nft_expr *expr, *next;
@@ -3508,7 +3650,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
kfree(rule);
}
-void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule)
+static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule)
{
nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE);
nf_tables_rule_destroy(ctx, rule);
@@ -3596,12 +3738,6 @@ int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
return 0;
}
-struct nft_set_elem_catchall {
- struct list_head list;
- struct rcu_head rcu;
- void *elem;
-};
-
int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set)
{
u8 genmask = nft_genmask_next(ctx->net);
@@ -3844,7 +3980,7 @@ err_destroy_flow_rule:
if (flow)
nft_flow_rule_destroy(flow);
err_release_rule:
- nft_rule_expr_deactivate(&ctx, rule, NFT_TRANS_PREPARE);
+ nft_rule_expr_deactivate(&ctx, rule, NFT_TRANS_PREPARE_ERROR);
nf_tables_rule_destroy(&ctx, rule);
err_release_expr:
for (i = 0; i < n; i++) {
@@ -4777,6 +4913,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (!(flags & NFT_SET_TIMEOUT))
return -EINVAL;
+ if (flags & NFT_SET_ANONYMOUS)
+ return -EOPNOTSUPP;
+
err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout);
if (err)
return err;
@@ -4785,6 +4924,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
if (!(flags & NFT_SET_TIMEOUT))
return -EINVAL;
+
+ if (flags & NFT_SET_ANONYMOUS)
+ return -EOPNOTSUPP;
+
desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
}
@@ -4831,6 +4974,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
+ if (nft_set_is_anonymous(set))
+ return -EOPNOTSUPP;
+
err = nft_set_expr_alloc(&ctx, set, nla, exprs, &num_exprs, flags);
if (err < 0)
return err;
@@ -4934,7 +5080,7 @@ err_set_expr_alloc:
for (i = 0; i < set->num_exprs; i++)
nft_expr_destroy(&ctx, set->exprs[i]);
err_set_destroy:
- ops->destroy(set);
+ ops->destroy(&ctx, set);
err_set_init:
kfree(set->name);
err_set_name:
@@ -4949,7 +5095,7 @@ static void nft_set_catchall_destroy(const struct nft_ctx *ctx,
list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
list_del_rcu(&catchall->list);
- nft_set_elem_destroy(set, catchall->elem, true);
+ nf_tables_set_elem_destroy(ctx, set, catchall->elem);
kfree_rcu(catchall, rcu);
}
}
@@ -4964,7 +5110,7 @@ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
for (i = 0; i < set->num_exprs; i++)
nft_expr_destroy(ctx, set->exprs[i]);
- set->ops->destroy(set);
+ set->ops->destroy(ctx, set);
nft_set_catchall_destroy(ctx, set);
kfree(set->name);
kvfree(set);
@@ -5129,10 +5275,60 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
}
}
+static void nft_setelem_data_activate(const struct net *net,
+ const struct nft_set *set,
+ struct nft_set_elem *elem);
+
+static int nft_mapelem_activate(const struct nft_ctx *ctx,
+ struct nft_set *set,
+ const struct nft_set_iter *iter,
+ struct nft_set_elem *elem)
+{
+ nft_setelem_data_activate(ctx->net, set, elem);
+
+ return 0;
+}
+
+static void nft_map_catchall_activate(const struct nft_ctx *ctx,
+ struct nft_set *set)
+{
+ u8 genmask = nft_genmask_next(ctx->net);
+ struct nft_set_elem_catchall *catchall;
+ struct nft_set_elem elem;
+ struct nft_set_ext *ext;
+
+ list_for_each_entry(catchall, &set->catchall_list, list) {
+ ext = nft_set_elem_ext(set, catchall->elem);
+ if (!nft_set_elem_active(ext, genmask))
+ continue;
+
+ elem.priv = catchall->elem;
+ nft_setelem_data_activate(ctx->net, set, &elem);
+ break;
+ }
+}
+
+static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ struct nft_set_iter iter = {
+ .genmask = nft_genmask_next(ctx->net),
+ .fn = nft_mapelem_activate,
+ };
+
+ set->ops->walk(ctx, set, &iter);
+ WARN_ON_ONCE(iter.err);
+
+ nft_map_catchall_activate(ctx, set);
+}
+
void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set)
{
- if (nft_set_is_anonymous(set))
+ if (nft_set_is_anonymous(set)) {
+ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_activate(ctx, set);
+
nft_clear(ctx->net, set);
+ }
set->use++;
}
@@ -5143,14 +5339,28 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
enum nft_trans_phase phase)
{
switch (phase) {
- case NFT_TRANS_PREPARE:
+ case NFT_TRANS_PREPARE_ERROR:
+ nft_set_trans_unbind(ctx, set);
if (nft_set_is_anonymous(set))
nft_deactivate_next(ctx->net, set);
set->use--;
+ break;
+ case NFT_TRANS_PREPARE:
+ if (nft_set_is_anonymous(set)) {
+ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_deactivate(ctx, set);
+
+ nft_deactivate_next(ctx->net, set);
+ }
+ set->use--;
return;
case NFT_TRANS_ABORT:
case NFT_TRANS_RELEASE:
+ if (nft_set_is_anonymous(set) &&
+ set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_deactivate(ctx, set);
+
set->use--;
fallthrough;
default:
@@ -5903,6 +6113,7 @@ static void nft_set_elem_expr_destroy(const struct nft_ctx *ctx,
__nft_set_elem_expr_destroy(ctx, expr);
}
+/* Drop references and destroy. Called from gc, dynset and abort path. */
void nft_set_elem_destroy(const struct nft_set *set, void *elem,
bool destroy_expr)
{
@@ -5924,11 +6135,11 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
}
EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
-/* Only called from commit path, nft_setelem_data_deactivate() already deals
- * with the refcounting from the preparation phase.
+/* Destroy element. References have been already dropped in the preparation
+ * path via nft_setelem_data_deactivate().
*/
-static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
- const struct nft_set *set, void *elem)
+void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set, void *elem)
{
struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
@@ -6491,19 +6702,19 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (flags)
*nft_set_ext_flags(ext) = flags;
+ if (obj) {
+ *nft_set_ext_obj(ext) = obj;
+ obj->use++;
+ }
if (ulen > 0) {
if (nft_set_ext_check(&tmpl, NFT_SET_EXT_USERDATA, ulen) < 0) {
err = -EINVAL;
- goto err_elem_userdata;
+ goto err_elem_free;
}
udata = nft_set_ext_userdata(ext);
udata->len = ulen - 1;
nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen);
}
- if (obj) {
- *nft_set_ext_obj(ext) = obj;
- obj->use++;
- }
err = nft_set_elem_expr_setup(ctx, &tmpl, ext, expr_array, num_exprs);
if (err < 0)
goto err_elem_free;
@@ -6558,10 +6769,7 @@ err_set_full:
err_element_clash:
kfree(trans);
err_elem_free:
- if (obj)
- obj->use--;
-err_elem_userdata:
- nf_tables_set_elem_destroy(ctx, set, elem.priv);
+ nft_set_elem_destroy(set, elem.priv, true);
err_parse_data:
if (nla[NFTA_SET_ELEM_DATA] != NULL)
nft_data_release(&elem.data.val, desc.type);
@@ -6605,7 +6813,8 @@ static int nf_tables_newsetelem(struct sk_buff *skb,
if (IS_ERR(set))
return PTR_ERR(set);
- if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
+ if (!list_empty(&set->bindings) &&
+ (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS)))
return -EBUSY;
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
@@ -6638,7 +6847,6 @@ static int nf_tables_newsetelem(struct sk_buff *skb,
void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
{
struct nft_chain *chain;
- struct nft_rule *rule;
if (type == NFT_DATA_VERDICT) {
switch (data->verdict.code) {
@@ -6646,15 +6854,6 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
case NFT_GOTO:
chain = data->verdict.chain;
chain->use++;
-
- if (!nft_chain_is_bound(chain))
- break;
-
- chain->table->use++;
- list_for_each_entry(rule, &chain->rules, list)
- chain->use++;
-
- nft_chain_add(chain->table, chain);
break;
}
}
@@ -6889,7 +7088,9 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
if (IS_ERR(set))
return PTR_ERR(set);
- if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
+
+ if (!list_empty(&set->bindings) &&
+ (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS)))
return -EBUSY;
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
@@ -7671,6 +7872,7 @@ void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx,
enum nft_trans_phase phase)
{
switch (phase) {
+ case NFT_TRANS_PREPARE_ERROR:
case NFT_TRANS_PREPARE:
case NFT_TRANS_ABORT:
case NFT_TRANS_RELEASE:
@@ -8943,7 +9145,7 @@ static void nf_tables_trans_destroy_work(struct work_struct *w)
synchronize_rcu();
list_for_each_entry_safe(trans, next, &head, list) {
- list_del(&trans->list);
+ nft_trans_list_del(trans);
nft_commit_release(trans);
}
}
@@ -9308,6 +9510,27 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
return 0;
}
+ list_for_each_entry(trans, &nft_net->binding_list, binding_list) {
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWSET:
+ if (!nft_trans_set_update(trans) &&
+ nft_set_is_anonymous(nft_trans_set(trans)) &&
+ !nft_trans_set_bound(trans)) {
+ pr_warn_once("nftables ruleset with unbound set\n");
+ return -EINVAL;
+ }
+ break;
+ case NFT_MSG_NEWCHAIN:
+ if (!nft_trans_chain_update(trans) &&
+ nft_chain_binding(nft_trans_chain(trans)) &&
+ !nft_trans_chain_bound(trans)) {
+ pr_warn_once("nftables ruleset with unbound chain\n");
+ return -EINVAL;
+ }
+ break;
+ }
+ }
+
/* 0. Validate ruleset, otherwise roll back for error reporting. */
if (nf_tables_validate(net) < 0)
return -EAGAIN;
@@ -9677,7 +9900,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
kfree(nft_trans_chain_name(trans));
nft_trans_destroy(trans);
} else {
- if (nft_chain_is_bound(trans->ctx.chain)) {
+ if (nft_trans_chain_bound(trans)) {
nft_trans_destroy(trans);
break;
}
@@ -9700,6 +9923,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWRULE:
+ if (nft_trans_rule_bound(trans)) {
+ nft_trans_destroy(trans);
+ break;
+ }
trans->ctx.chain->use--;
list_del_rcu(&nft_trans_rule(trans)->list);
nft_rule_expr_deactivate(&trans->ctx,
@@ -9734,6 +9961,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
case NFT_MSG_DESTROYSET:
trans->ctx.table->use++;
nft_clear(trans->ctx.net, nft_trans_set(trans));
+ if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_activate(&trans->ctx, nft_trans_set(trans));
+
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSETELEM:
@@ -9814,7 +10044,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
list_for_each_entry_safe_reverse(trans, next,
&nft_net->commit_list, list) {
- list_del(&trans->list);
+ nft_trans_list_del(trans);
nf_tables_abort_release(trans);
}
@@ -10263,22 +10493,12 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
static void nft_verdict_uninit(const struct nft_data *data)
{
struct nft_chain *chain;
- struct nft_rule *rule;
switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
chain = data->verdict.chain;
chain->use--;
-
- if (!nft_chain_is_bound(chain))
- break;
-
- chain->table->use--;
- list_for_each_entry(rule, &chain->rules, list)
- chain->use--;
-
- nft_chain_del(chain);
break;
}
}
@@ -10513,6 +10733,9 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
list_for_each_entry_safe(set, ns, &table->sets, list) {
list_del(&set->list);
table->use--;
+ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_deactivate(&ctx, set);
+
nft_set_destroy(&ctx, set);
}
list_for_each_entry_safe(obj, ne, &table->objects, list) {
@@ -10597,6 +10820,7 @@ static int __net_init nf_tables_init_net(struct net *net)
INIT_LIST_HEAD(&nft_net->tables);
INIT_LIST_HEAD(&nft_net->commit_list);
+ INIT_LIST_HEAD(&nft_net->binding_list);
INIT_LIST_HEAD(&nft_net->module_list);
INIT_LIST_HEAD(&nft_net->notify_list);
mutex_init(&nft_net->commit_mutex);
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index ee6840bd5933..8f1bfa6ccc2d 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -439,3 +439,4 @@ module_init(nfnl_osf_init);
module_exit(nfnl_osf_fini);
MODULE_LICENSE("GPL");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF);
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index c9d2f7c29f53..3d76ebfe8939 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -76,11 +76,9 @@ static int nft_immediate_init(const struct nft_ctx *ctx,
switch (priv->data.verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
- if (nft_chain_is_bound(chain)) {
- err = -EBUSY;
- goto err1;
- }
- chain->bound = true;
+ err = nf_tables_bind_chain(ctx, chain);
+ if (err < 0)
+ return err;
break;
default:
break;
@@ -98,6 +96,31 @@ static void nft_immediate_activate(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+ const struct nft_data *data = &priv->data;
+ struct nft_ctx chain_ctx;
+ struct nft_chain *chain;
+ struct nft_rule *rule;
+
+ if (priv->dreg == NFT_REG_VERDICT) {
+ switch (data->verdict.code) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+ chain = data->verdict.chain;
+ if (!nft_chain_binding(chain))
+ break;
+
+ chain_ctx = *ctx;
+ chain_ctx.chain = chain;
+
+ list_for_each_entry(rule, &chain->rules, list)
+ nft_rule_expr_activate(&chain_ctx, rule);
+
+ nft_clear(ctx->net, chain);
+ break;
+ default:
+ break;
+ }
+ }
return nft_data_hold(&priv->data, nft_dreg_to_type(priv->dreg));
}
@@ -107,6 +130,43 @@ static void nft_immediate_deactivate(const struct nft_ctx *ctx,
enum nft_trans_phase phase)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+ const struct nft_data *data = &priv->data;
+ struct nft_ctx chain_ctx;
+ struct nft_chain *chain;
+ struct nft_rule *rule;
+
+ if (priv->dreg == NFT_REG_VERDICT) {
+ switch (data->verdict.code) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+ chain = data->verdict.chain;
+ if (!nft_chain_binding(chain))
+ break;
+
+ chain_ctx = *ctx;
+ chain_ctx.chain = chain;
+
+ list_for_each_entry(rule, &chain->rules, list)
+ nft_rule_expr_deactivate(&chain_ctx, rule, phase);
+
+ switch (phase) {
+ case NFT_TRANS_PREPARE_ERROR:
+ nf_tables_unbind_chain(ctx, chain);
+ fallthrough;
+ case NFT_TRANS_PREPARE:
+ nft_deactivate_next(ctx->net, chain);
+ break;
+ default:
+ nft_chain_del(chain);
+ chain->bound = false;
+ chain->table->use--;
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ }
if (phase == NFT_TRANS_COMMIT)
return;
@@ -131,15 +191,27 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx,
case NFT_GOTO:
chain = data->verdict.chain;
- if (!nft_chain_is_bound(chain))
+ if (!nft_chain_binding(chain))
+ break;
+
+ /* Rule construction failed, but chain is already bound:
+ * let the transaction records release this chain and its rules.
+ */
+ if (chain->bound) {
+ chain->use--;
break;
+ }
+ /* Rule has been deleted, release chain and its rules. */
chain_ctx = *ctx;
chain_ctx.chain = chain;
- list_for_each_entry_safe(rule, n, &chain->rules, list)
- nf_tables_rule_release(&chain_ctx, rule);
-
+ chain->use--;
+ list_for_each_entry_safe(rule, n, &chain->rules, list) {
+ chain->use--;
+ list_del(&rule->list);
+ nf_tables_rule_destroy(&chain_ctx, rule);
+ }
nf_tables_chain_destroy(&chain_ctx);
break;
default:
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 96081ac8d2b4..1e5e7a181e0b 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -271,13 +271,14 @@ static int nft_bitmap_init(const struct nft_set *set,
return 0;
}
-static void nft_bitmap_destroy(const struct nft_set *set)
+static void nft_bitmap_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_bitmap *priv = nft_set_priv(set);
struct nft_bitmap_elem *be, *n;
list_for_each_entry_safe(be, n, &priv->list, head)
- nft_set_elem_destroy(set, be, true);
+ nf_tables_set_elem_destroy(ctx, set, be);
}
static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 76de6c8d9865..0b73cb0e752f 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -400,19 +400,31 @@ static int nft_rhash_init(const struct nft_set *set,
return 0;
}
+struct nft_rhash_ctx {
+ const struct nft_ctx ctx;
+ const struct nft_set *set;
+};
+
static void nft_rhash_elem_destroy(void *ptr, void *arg)
{
- nft_set_elem_destroy(arg, ptr, true);
+ struct nft_rhash_ctx *rhash_ctx = arg;
+
+ nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, ptr);
}
-static void nft_rhash_destroy(const struct nft_set *set)
+static void nft_rhash_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_rhash *priv = nft_set_priv(set);
+ struct nft_rhash_ctx rhash_ctx = {
+ .ctx = *ctx,
+ .set = set,
+ };
cancel_delayed_work_sync(&priv->gc_work);
rcu_barrier();
rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
- (void *)set);
+ (void *)&rhash_ctx);
}
/* Number of buckets is stored in u32, so cap our result to 1U<<31 */
@@ -643,7 +655,8 @@ static int nft_hash_init(const struct nft_set *set,
return 0;
}
-static void nft_hash_destroy(const struct nft_set *set)
+static void nft_hash_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_hash *priv = nft_set_priv(set);
struct nft_hash_elem *he;
@@ -653,7 +666,7 @@ static void nft_hash_destroy(const struct nft_set *set)
for (i = 0; i < priv->buckets; i++) {
hlist_for_each_entry_safe(he, next, &priv->table[i], node) {
hlist_del_rcu(&he->node);
- nft_set_elem_destroy(set, he, true);
+ nf_tables_set_elem_destroy(ctx, set, he);
}
}
}
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 15e451dc3fc4..0452ee586c1c 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1974,12 +1974,16 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_iter *iter)
{
struct nft_pipapo *priv = nft_set_priv(set);
+ struct net *net = read_pnet(&set->net);
struct nft_pipapo_match *m;
struct nft_pipapo_field *f;
int i, r;
rcu_read_lock();
- m = rcu_dereference(priv->match);
+ if (iter->genmask == nft_genmask_cur(net))
+ m = rcu_dereference(priv->match);
+ else
+ m = priv->clone;
if (unlikely(!m))
goto out;
@@ -2148,10 +2152,12 @@ out_scratch:
/**
* nft_set_pipapo_match_destroy() - Destroy elements from key mapping array
+ * @ctx: context
* @set: nftables API set representation
* @m: matching data pointing to key mapping array
*/
-static void nft_set_pipapo_match_destroy(const struct nft_set *set,
+static void nft_set_pipapo_match_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set,
struct nft_pipapo_match *m)
{
struct nft_pipapo_field *f;
@@ -2168,15 +2174,17 @@ static void nft_set_pipapo_match_destroy(const struct nft_set *set,
e = f->mt[r].e;
- nft_set_elem_destroy(set, e, true);
+ nf_tables_set_elem_destroy(ctx, set, e);
}
}
/**
* nft_pipapo_destroy() - Free private data for set and all committed elements
+ * @ctx: context
* @set: nftables API set representation
*/
-static void nft_pipapo_destroy(const struct nft_set *set)
+static void nft_pipapo_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m;
@@ -2186,7 +2194,7 @@ static void nft_pipapo_destroy(const struct nft_set *set)
if (m) {
rcu_barrier();
- nft_set_pipapo_match_destroy(set, m);
+ nft_set_pipapo_match_destroy(ctx, set, m);
#ifdef NFT_PIPAPO_ALIGN
free_percpu(m->scratch_aligned);
@@ -2203,7 +2211,7 @@ static void nft_pipapo_destroy(const struct nft_set *set)
m = priv->clone;
if (priv->dirty)
- nft_set_pipapo_match_destroy(set, m);
+ nft_set_pipapo_match_destroy(ctx, set, m);
#ifdef NFT_PIPAPO_ALIGN
free_percpu(priv->clone->scratch_aligned);
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 2f114aa10f1a..5c05c9b990fb 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -664,7 +664,8 @@ static int nft_rbtree_init(const struct nft_set *set,
return 0;
}
-static void nft_rbtree_destroy(const struct nft_set *set)
+static void nft_rbtree_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe;
@@ -675,7 +676,7 @@ static void nft_rbtree_destroy(const struct nft_set *set)
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
- nft_set_elem_destroy(set, rbe, true);
+ nf_tables_set_elem_destroy(ctx, set, rbe);
}
}
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index e1990baf3a3b..dc9485854002 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -71,4 +71,3 @@ MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
MODULE_DESCRIPTION("Passive OS fingerprint matching.");
MODULE_ALIAS("ipt_osf");
MODULE_ALIAS("ip6t_osf");
-MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 6ef3021e1169..e79be1b3e74d 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -966,6 +966,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
if (ret < 0)
return ret;
+ sch_tree_lock(sch);
/* backup q->clg and q->loss_model */
old_clg = q->clg;
old_loss_model = q->loss_model;
@@ -974,7 +975,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]);
if (ret) {
q->loss_model = old_loss_model;
- return ret;
+ goto unlock;
}
} else {
q->loss_model = CLG_RANDOM;
@@ -1041,6 +1042,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
/* capping jitter to the range acceptable by tabledist() */
q->jitter = min_t(s64, abs(q->jitter), INT_MAX);
+unlock:
+ sch_tree_unlock(sch);
return ret;
get_table_failure:
@@ -1050,7 +1053,8 @@ get_table_failure:
*/
q->clg = old_clg;
q->loss_model = old_loss_model;
- return ret;
+
+ goto unlock;
}
static int netem_init(struct Qdisc *sch, struct nlattr *opt,
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 39fb91ff23d9..815b38080401 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -131,6 +131,7 @@ struct sec_path *secpath_set(struct sk_buff *skb)
memset(sp->ovec, 0, sizeof(sp->ovec));
sp->olen = 0;
sp->len = 0;
+ sp->verified_cnt = 0;
return sp;
}
@@ -330,11 +331,10 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x,
{
switch (x->props.mode) {
case XFRM_MODE_BEET:
- switch (XFRM_MODE_SKB_CB(skb)->protocol) {
- case IPPROTO_IPIP:
- case IPPROTO_BEETPH:
+ switch (x->sel.family) {
+ case AF_INET:
return xfrm4_remove_beet_encap(x, skb);
- case IPPROTO_IPV6:
+ case AF_INET6:
return xfrm6_remove_beet_encap(x, skb);
}
break;
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index 1f99dc469027..35279c220bd7 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -310,6 +310,52 @@ static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet)
skb->mark = 0;
}
+static int xfrmi_input(struct sk_buff *skb, int nexthdr, __be32 spi,
+ int encap_type, unsigned short family)
+{
+ struct sec_path *sp;
+
+ sp = skb_sec_path(skb);
+ if (sp && (sp->len || sp->olen) &&
+ !xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family))
+ goto discard;
+
+ XFRM_SPI_SKB_CB(skb)->family = family;
+ if (family == AF_INET) {
+ XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
+ } else {
+ XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+ }
+
+ return xfrm_input(skb, nexthdr, spi, encap_type);
+discard:
+ kfree_skb(skb);
+ return 0;
+}
+
+static int xfrmi4_rcv(struct sk_buff *skb)
+{
+ return xfrmi_input(skb, ip_hdr(skb)->protocol, 0, 0, AF_INET);
+}
+
+static int xfrmi6_rcv(struct sk_buff *skb)
+{
+ return xfrmi_input(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
+ 0, 0, AF_INET6);
+}
+
+static int xfrmi4_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
+{
+ return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET);
+}
+
+static int xfrmi6_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
+{
+ return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET6);
+}
+
static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
{
const struct xfrm_mode *inner_mode;
@@ -945,8 +991,8 @@ static struct pernet_operations xfrmi_net_ops = {
};
static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = {
- .handler = xfrm6_rcv,
- .input_handler = xfrm_input,
+ .handler = xfrmi6_rcv,
+ .input_handler = xfrmi6_input,
.cb_handler = xfrmi_rcv_cb,
.err_handler = xfrmi6_err,
.priority = 10,
@@ -996,8 +1042,8 @@ static struct xfrm6_tunnel xfrmi_ip6ip_handler __read_mostly = {
#endif
static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = {
- .handler = xfrm4_rcv,
- .input_handler = xfrm_input,
+ .handler = xfrmi4_rcv,
+ .input_handler = xfrmi4_input,
.cb_handler = xfrmi_rcv_cb,
.err_handler = xfrmi4_err,
.priority = 10,
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 6d15788b5123..e7617c9959c3 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1831,6 +1831,7 @@ again:
__xfrm_policy_unlink(pol, dir);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ xfrm_dev_policy_delete(pol);
cnt++;
xfrm_audit_policy_delete(pol, 1, task_valid);
xfrm_policy_kill(pol);
@@ -1869,6 +1870,7 @@ again:
__xfrm_policy_unlink(pol, dir);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ xfrm_dev_policy_delete(pol);
cnt++;
xfrm_audit_policy_delete(pol, 1, task_valid);
xfrm_policy_kill(pol);
@@ -3349,6 +3351,13 @@ xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int star
if (xfrm_state_ok(tmpl, sp->xvec[idx], family, if_id))
return ++idx;
if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
+ if (idx < sp->verified_cnt) {
+ /* Secpath entry previously verified, consider optional and
+ * continue searching
+ */
+ continue;
+ }
+
if (start == -1)
start = -2-idx;
break;
@@ -3723,6 +3732,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
* Order is _important_. Later we will implement
* some barriers, but at the moment barriers
* are implied between each two transformations.
+ * Upon success, marks secpath entries as having been
+ * verified to allow them to be skipped in future policy
+ * checks (e.g. nested tunnels).
*/
for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
k = xfrm_policy_ok(tpp[i], sp, k, family, if_id);
@@ -3741,6 +3753,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
}
xfrm_pols_put(pols, npols);
+ sp->verified_cnt = k;
+
return 1;
}
XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);