diff options
author | David S. Miller <davem@davemloft.net> | 2018-06-03 08:24:27 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-06-03 08:24:27 -0400 |
commit | 4cb160d0a63bb16718fc25c52fe51fe416a1f09e (patch) | |
tree | 5298d1333c969020c364831d20035ce9cc79070d | |
parent | 1ffdd8e1643f6ce28792edd3314be84167faabf1 (diff) | |
parent | 1b2470e59fb1e983a3655feba30cdfc03e609d51 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says:
====================
Netfilter updates for net-next
The following patchset contains Netfilter updates for your net-next tree:
1) Get rid of nf_sk_is_transparent(), use inet_sk_transparent() instead.
From Máté Eckl.
2) Move shared tproxy infrastructure to nf_tproxy_ipv4 and nf_tproxy_ipv6.
Also from Máté.
3) Add hashtable to speed up chain lookups by name, from Florian Westphal.
4) Patch series to add connlimit support reusing part of the
nf_conncount infrastructure. This includes preparation changes such
passing context to the object and expression destroy interface;
garbage collection for expressions embedded into set elements, and
the introduction of the clone_destroy interface for expressions.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/netfilter/nf_conntrack_count.h | 11 | ||||
-rw-r--r-- | include/net/netfilter/nf_socket.h | 13 | ||||
-rw-r--r-- | include/net/netfilter/nf_tables.h | 20 | ||||
-rw-r--r-- | include/net/netfilter/nf_tproxy.h | 113 | ||||
-rw-r--r-- | include/uapi/linux/netfilter/nf_tables.h | 21 | ||||
-rw-r--r-- | net/ipv4/netfilter/Kconfig | 5 | ||||
-rw-r--r-- | net/ipv4/netfilter/Makefile | 1 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_tproxy_ipv4.c | 147 | ||||
-rw-r--r-- | net/ipv6/netfilter/Kconfig | 5 | ||||
-rw-r--r-- | net/ipv6/netfilter/Makefile | 1 | ||||
-rw-r--r-- | net/ipv6/netfilter/nf_tproxy_ipv6.c | 146 | ||||
-rw-r--r-- | net/netfilter/Kconfig | 11 | ||||
-rw-r--r-- | net/netfilter/Makefile | 1 | ||||
-rw-r--r-- | net/netfilter/nf_conncount.c | 36 | ||||
-rw-r--r-- | net/netfilter/nf_tables_api.c | 151 | ||||
-rw-r--r-- | net/netfilter/nft_connlimit.c | 297 | ||||
-rw-r--r-- | net/netfilter/nft_counter.c | 4 | ||||
-rw-r--r-- | net/netfilter/nft_ct.c | 3 | ||||
-rw-r--r-- | net/netfilter/nft_dynset.c | 9 | ||||
-rw-r--r-- | net/netfilter/nft_set_hash.c | 21 | ||||
-rw-r--r-- | net/netfilter/nft_socket.c | 3 | ||||
-rw-r--r-- | net/netfilter/xt_TPROXY.c | 366 | ||||
-rw-r--r-- | net/netfilter/xt_socket.c | 4 |
23 files changed, 977 insertions, 412 deletions
diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h index e61184fbfb71..1910b6572430 100644 --- a/include/net/netfilter/nf_conntrack_count.h +++ b/include/net/netfilter/nf_conntrack_count.h @@ -13,4 +13,15 @@ unsigned int nf_conncount_count(struct net *net, const u32 *key, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_zone *zone); + +unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone, + bool *addit); + +bool nf_conncount_add(struct hlist_head *head, + const struct nf_conntrack_tuple *tuple); + +void nf_conncount_cache_free(struct hlist_head *hhead); + #endif diff --git a/include/net/netfilter/nf_socket.h b/include/net/netfilter/nf_socket.h index 29b6313f0557..f9d7bee9bd4e 100644 --- a/include/net/netfilter/nf_socket.h +++ b/include/net/netfilter/nf_socket.h @@ -3,19 +3,6 @@ #define _NF_SOCK_H_ #include <net/sock.h> -#include <net/inet_timewait_sock.h> - -static inline bool nf_sk_is_transparent(struct sock *sk) -{ - switch (sk->sk_state) { - case TCP_TIME_WAIT: - return inet_twsk(sk)->tw_transparent; - case TCP_NEW_SYN_RECV: - return inet_rsk(inet_reqsk(sk))->no_srccheck; - default: - return inet_sk(sk)->transparent; - } -} struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb, const struct net_device *indev); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 435c32d8a995..08c005ce56e9 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -9,6 +9,7 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter/nf_tables.h> #include <linux/u64_stats_sync.h> +#include <linux/rhashtable.h> #include <net/netfilter/nf_flow_table.h> #include <net/netlink.h> @@ -342,6 +343,7 @@ struct nft_set_ops { const struct nft_set_desc *desc, const struct nlattr * const nla[]); void (*destroy)(const struct nft_set *set); + void (*gc_init)(const struct nft_set *set); unsigned int elemsize; }; @@ -370,6 +372,8 @@ void nft_unregister_set(struct nft_set_type *type); * * @list: table set list node * @bindings: list of set bindings + * @table: table this set belongs to + * @net: netnamespace this set belongs to * @name: name of the set * @handle: unique handle of the set * @ktype: key type (numeric type defined by userspace, not used in the kernel) @@ -393,6 +397,8 @@ void nft_unregister_set(struct nft_set_type *type); struct nft_set { struct list_head list; struct list_head bindings; + struct nft_table *table; + possible_net_t net; char *name; u64 handle; u32 ktype; @@ -708,6 +714,7 @@ struct nft_expr_type { }; #define NFT_EXPR_STATEFUL 0x1 +#define NFT_EXPR_GC 0x2 /** * struct nft_expr_ops - nf_tables expression operations @@ -739,11 +746,15 @@ struct nft_expr_ops { const struct nft_expr *expr); void (*destroy)(const struct nft_ctx *ctx, const struct nft_expr *expr); + void (*destroy_clone)(const struct nft_ctx *ctx, + const struct nft_expr *expr); int (*dump)(struct sk_buff *skb, const struct nft_expr *expr); int (*validate)(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data); + bool (*gc)(struct net *net, + const struct nft_expr *expr); const struct nft_expr_type *type; void *data; }; @@ -850,6 +861,7 @@ enum nft_chain_flags { * * @rules: list of rules in the chain * @list: used internally + * @rhlhead: used internally * @table: table that this chain belongs to * @handle: chain handle * @use: number of jump references to this chain @@ -862,6 +874,7 @@ struct nft_chain { struct nft_rule *__rcu *rules_gen_1; struct list_head rules; struct list_head list; + struct rhlist_head rhlhead; struct nft_table *table; u64 handle; u32 use; @@ -955,7 +968,8 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); * struct nft_table - nf_tables table * * @list: used internally - * @chains: chains in the table + * @chains_ht: chains in the table + * @chains: same, for stable walks * @sets: sets in the table * @objects: stateful objects in the table * @flowtables: flow tables in the table @@ -969,6 +983,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); */ struct nft_table { struct list_head list; + struct rhltable chains_ht; struct list_head chains; struct list_head sets; struct list_head objects; @@ -1070,7 +1085,8 @@ struct nft_object_ops { int (*init)(const struct nft_ctx *ctx, const struct nlattr *const tb[], struct nft_object *obj); - void (*destroy)(struct nft_object *obj); + void (*destroy)(const struct nft_ctx *ctx, + struct nft_object *obj); int (*dump)(struct sk_buff *skb, struct nft_object *obj, bool reset); diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h new file mode 100644 index 000000000000..9754a50ecde9 --- /dev/null +++ b/include/net/netfilter/nf_tproxy.h @@ -0,0 +1,113 @@ +#ifndef _NF_TPROXY_H_ +#define _NF_TPROXY_H_ + +#include <net/tcp.h> + +enum nf_tproxy_lookup_t { + NF_TPROXY_LOOKUP_LISTENER, + NF_TPROXY_LOOKUP_ESTABLISHED, +}; + +static inline bool nf_tproxy_sk_is_transparent(struct sock *sk) +{ + if (inet_sk_transparent(sk)) + return true; + + sock_gen_put(sk); + return false; +} + +__be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr); + +/** + * nf_tproxy_handle_time_wait4 - handle IPv4 TCP TIME_WAIT reopen redirections + * @skb: The skb being processed. + * @laddr: IPv4 address to redirect to or zero. + * @lport: TCP port to redirect to or zero. + * @sk: The TIME_WAIT TCP socket found by the lookup. + * + * We have to handle SYN packets arriving to TIME_WAIT sockets + * differently: instead of reopening the connection we should rather + * redirect the new connection to the proxy if there's a listener + * socket present. + * + * nf_tproxy_handle_time_wait4() consumes the socket reference passed in. + * + * Returns the listener socket if there's one, the TIME_WAIT socket if + * no such listener is found, or NULL if the TCP header is incomplete. + */ +struct sock * +nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb, + __be32 laddr, __be16 lport, struct sock *sk); + +/* + * This is used when the user wants to intercept a connection matching + * an explicit iptables rule. In this case the sockets are assumed + * matching in preference order: + * + * - match: if there's a fully established connection matching the + * _packet_ tuple, it is returned, assuming the redirection + * already took place and we process a packet belonging to an + * established connection + * + * - match: if there's a listening socket matching the redirection + * (e.g. on-port & on-ip of the connection), it is returned, + * regardless if it was bound to 0.0.0.0 or an explicit + * address. The reasoning is that if there's an explicit rule, it + * does not really matter if the listener is bound to an interface + * or to 0. The user already stated that he wants redirection + * (since he added the rule). + * + * Please note that there's an overlap between what a TPROXY target + * and a socket match will match. Normally if you have both rules the + * "socket" match will be the first one, effectively all packets + * belonging to established connections going through that one. + */ +struct sock * +nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp, + const u8 protocol, + const __be32 saddr, const __be32 daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in, + const enum nf_tproxy_lookup_t lookup_type); + +const struct in6_addr * +nf_tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr, + const struct in6_addr *daddr); + +/** + * nf_tproxy_handle_time_wait6 - handle IPv6 TCP TIME_WAIT reopen redirections + * @skb: The skb being processed. + * @tproto: Transport protocol. + * @thoff: Transport protocol header offset. + * @net: Network namespace. + * @laddr: IPv6 address to redirect to. + * @lport: TCP port to redirect to or zero. + * @sk: The TIME_WAIT TCP socket found by the lookup. + * + * We have to handle SYN packets arriving to TIME_WAIT sockets + * differently: instead of reopening the connection we should rather + * redirect the new connection to the proxy if there's a listener + * socket present. + * + * nf_tproxy_handle_time_wait6() consumes the socket reference passed in. + * + * Returns the listener socket if there's one, the TIME_WAIT socket if + * no such listener is found, or NULL if the TCP header is incomplete. + */ +struct sock * +nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, + struct net *net, + const struct in6_addr *laddr, + const __be16 lport, + struct sock *sk); + +struct sock * +nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp, + const u8 protocol, + const struct in6_addr *saddr, const struct in6_addr *daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in, + const enum nf_tproxy_lookup_t lookup_type); + +#endif /* _NF_TPROXY_H_ */ diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index a089af092a29..ae00a3c49b8a 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1043,6 +1043,24 @@ enum nft_limit_attributes { }; #define NFTA_LIMIT_MAX (__NFTA_LIMIT_MAX - 1) +enum nft_connlimit_flags { + NFT_CONNLIMIT_F_INV = (1 << 0), +}; + +/** + * enum nft_connlimit_attributes - nf_tables connlimit expression netlink attributes + * + * @NFTA_CONNLIMIT_COUNT: number of connections (NLA_U32) + * @NFTA_CONNLIMIT_FLAGS: flags (NLA_U32: enum nft_connlimit_flags) + */ +enum nft_connlimit_attributes { + NFTA_CONNLIMIT_UNSPEC, + NFTA_CONNLIMIT_COUNT, + NFTA_CONNLIMIT_FLAGS, + __NFTA_CONNLIMIT_MAX +}; +#define NFTA_CONNLIMIT_MAX (__NFTA_CONNLIMIT_MAX - 1) + /** * enum nft_counter_attributes - nf_tables counter expression netlink attributes * @@ -1357,7 +1375,8 @@ enum nft_ct_helper_attributes { #define NFT_OBJECT_QUOTA 2 #define NFT_OBJECT_CT_HELPER 3 #define NFT_OBJECT_LIMIT 4 -#define __NFT_OBJECT_MAX 5 +#define NFT_OBJECT_CONNLIMIT 5 +#define __NFT_OBJECT_MAX 6 #define NFT_OBJECT_MAX (__NFT_OBJECT_MAX - 1) /** diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index d03bc5a01a70..bbfc356cb1b5 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -29,7 +29,10 @@ config NF_SOCKET_IPV4 tristate "IPv4 socket lookup support" help This option enables the IPv4 socket lookup infrastructure. This is - is required by the iptables socket match. + is required by the {ip,nf}tables socket match. + +config NF_TPROXY_IPV4 + tristate "IPv4 tproxy support" if NF_TABLES diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index c4b05b174091..8394c17c269f 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -17,6 +17,7 @@ obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o obj-$(CONFIG_NF_SOCKET_IPV4) += nf_socket_ipv4.o +obj-$(CONFIG_NF_TPROXY_IPV4) += nf_tproxy_ipv4.o # logging obj-$(CONFIG_NF_LOG_ARP) += nf_log_arp.o diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c new file mode 100644 index 000000000000..805e83ec3ad9 --- /dev/null +++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c @@ -0,0 +1,147 @@ +/* + * Copyright (C) 2007-2008 BalaBit IT Ltd. + * Author: Krisztian Kovacs + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <net/netfilter/nf_tproxy.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <net/inet_sock.h> +#include <linux/ip.h> +#include <net/checksum.h> +#include <net/udp.h> +#include <net/tcp.h> +#include <linux/inetdevice.h> + +struct sock * +nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb, + __be32 laddr, __be16 lport, struct sock *sk) +{ + const struct iphdr *iph = ip_hdr(skb); + struct tcphdr _hdr, *hp; + + hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr); + if (hp == NULL) { + inet_twsk_put(inet_twsk(sk)); + return NULL; + } + + if (hp->syn && !hp->rst && !hp->ack && !hp->fin) { + /* SYN to a TIME_WAIT socket, we'd rather redirect it + * to a listener socket if there's one */ + struct sock *sk2; + + sk2 = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol, + iph->saddr, laddr ? laddr : iph->daddr, + hp->source, lport ? lport : hp->dest, + skb->dev, NF_TPROXY_LOOKUP_LISTENER); + if (sk2) { + inet_twsk_deschedule_put(inet_twsk(sk)); + sk = sk2; + } + } + + return sk; +} +EXPORT_SYMBOL_GPL(nf_tproxy_handle_time_wait4); + +__be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr) +{ + struct in_device *indev; + __be32 laddr; + + if (user_laddr) + return user_laddr; + + laddr = 0; + indev = __in_dev_get_rcu(skb->dev); + for_primary_ifa(indev) { + laddr = ifa->ifa_local; + break; + } endfor_ifa(indev); + + return laddr ? laddr : daddr; +} +EXPORT_SYMBOL_GPL(nf_tproxy_laddr4); + +struct sock * +nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp, + const u8 protocol, + const __be32 saddr, const __be32 daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in, + const enum nf_tproxy_lookup_t lookup_type) +{ + struct sock *sk; + struct tcphdr *tcph; + + switch (protocol) { + case IPPROTO_TCP: + switch (lookup_type) { + case NF_TPROXY_LOOKUP_LISTENER: + tcph = hp; + sk = inet_lookup_listener(net, &tcp_hashinfo, skb, + ip_hdrlen(skb) + + __tcp_hdrlen(tcph), + saddr, sport, + daddr, dport, + in->ifindex, 0); + + if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) + sk = NULL; + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too + */ + break; + case NF_TPROXY_LOOKUP_ESTABLISHED: + sk = inet_lookup_established(net, &tcp_hashinfo, + saddr, sport, daddr, dport, + in->ifindex); + break; + default: + BUG(); + } + break; + case IPPROTO_UDP: + sk = udp4_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + if (sk) { + int connected = (sk->sk_state == TCP_ESTABLISHED); + int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0); + + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too + */ + if ((lookup_type == NF_TPROXY_LOOKUP_ESTABLISHED && + (!connected || wildcard)) || + (lookup_type == NF_TPROXY_LOOKUP_LISTENER && connected)) { + sock_put(sk); + sk = NULL; + } + } + break; + default: + WARN_ON(1); + sk = NULL; + } + + pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n", + protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk); + + return sk; +} +EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v4); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Balazs Scheidler, Krisztian Kovacs"); +MODULE_DESCRIPTION("Netfilter IPv4 transparent proxy support"); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 9f5b00a39adf..37b14dc9d863 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -29,7 +29,10 @@ config NF_SOCKET_IPV6 tristate "IPv6 socket lookup support" help This option enables the IPv6 socket lookup infrastructure. This - is used by the ip6tables socket match. + is used by the {ip6,nf}tables socket match. + +config NF_TPROXY_IPV6 + tristate "IPv6 tproxy support" if NF_TABLES diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 71518f22ae39..10a5a1c87320 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -26,6 +26,7 @@ nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o obj-$(CONFIG_NF_SOCKET_IPV6) += nf_socket_ipv6.o +obj-$(CONFIG_NF_TPROXY_IPV6) += nf_tproxy_ipv6.o # logging obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o diff --git a/net/ipv6/netfilter/nf_tproxy_ipv6.c b/net/ipv6/netfilter/nf_tproxy_ipv6.c new file mode 100644 index 000000000000..bf1d6c421e3b --- /dev/null +++ b/net/ipv6/netfilter/nf_tproxy_ipv6.c @@ -0,0 +1,146 @@ +#include <net/netfilter/nf_tproxy.h> +#include <linux/module.h> +#include <net/inet6_hashtables.h> +#include <net/addrconf.h> +#include <net/udp.h> +#include <net/tcp.h> + +const struct in6_addr * +nf_tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr, + const struct in6_addr *daddr) +{ + struct inet6_dev *indev; + struct inet6_ifaddr *ifa; + struct in6_addr *laddr; + + if (!ipv6_addr_any(user_laddr)) + return user_laddr; + laddr = NULL; + + indev = __in6_dev_get(skb->dev); + if (indev) { + read_lock_bh(&indev->lock); + list_for_each_entry(ifa, &indev->addr_list, if_list) { + if (ifa->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED)) + continue; + + laddr = &ifa->addr; + break; + } + read_unlock_bh(&indev->lock); + } + + return laddr ? laddr : daddr; +} +EXPORT_SYMBOL_GPL(nf_tproxy_laddr6); + +struct sock * +nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, + struct net *net, + const struct in6_addr *laddr, + const __be16 lport, + struct sock *sk) +{ + const struct ipv6hdr *iph = ipv6_hdr(skb); + struct tcphdr _hdr, *hp; + + hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr); + if (hp == NULL) { + inet_twsk_put(inet_twsk(sk)); + return NULL; + } + + if (hp->syn && !hp->rst && !hp->ack && !hp->fin) { + /* SYN to a TIME_WAIT socket, we'd rather redirect it + * to a listener socket if there's one */ + struct sock *sk2; + + sk2 = nf_tproxy_get_sock_v6(net, skb, thoff, hp, tproto, + &iph->saddr, + nf_tproxy_laddr6(skb, laddr, &iph->daddr), + hp->source, + lport ? lport : hp->dest, + skb->dev, NF_TPROXY_LOOKUP_LISTENER); + if (sk2) { + inet_twsk_deschedule_put(inet_twsk(sk)); + sk = sk2; + } + } + + return sk; +} +EXPORT_SYMBOL_GPL(nf_tproxy_handle_time_wait6); + +struct sock * +nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp, + const u8 protocol, + const struct in6_addr *saddr, const struct in6_addr *daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in, + const enum nf_tproxy_lookup_t lookup_type) +{ + struct sock *sk; + struct tcphdr *tcph; + + switch (protocol) { + case IPPROTO_TCP: + switch (lookup_type) { + case NF_TPROXY_LOOKUP_LISTENER: + tcph = hp; + sk = inet6_lookup_listener(net, &tcp_hashinfo, skb, + thoff + __tcp_hdrlen(tcph), + saddr, sport, + daddr, ntohs(dport), + in->ifindex, 0); + + if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) + sk = NULL; + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too + */ + break; + case NF_TPROXY_LOOKUP_ESTABLISHED: + sk = __inet6_lookup_established(net, &tcp_hashinfo, + saddr, sport, daddr, ntohs(dport), + in->ifindex, 0); + break; + default: + BUG(); + } + break; + case IPPROTO_UDP: + sk = udp6_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + if (sk) { + int connected = (sk->sk_state == TCP_ESTABLISHED); + int wildcard = ipv6_addr_any(&sk->sk_v6_rcv_saddr); + + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too + */ + if ((lookup_type == NF_TPROXY_LOOKUP_ESTABLISHED && (!connected || wildcard)) || + (lookup_type == NF_TPROXY_LOOKUP_LISTENER && connected)) { + sock_put(sk); + sk = NULL; + } + } + break; + default: + WARN_ON(1); + sk = NULL; + } + + pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n", + protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk); + + return sk; +} +EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v6); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Balazs Scheidler, Krisztian Kovacs"); +MODULE_DESCRIPTION("Netfilter IPv4 transparent proxy support"); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 276e1e32f44e..dbd7d1fad277 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -517,6 +517,15 @@ config NFT_COUNTER This option adds the "counter" expression that you can use to include packet and byte counters in a rule. +config NFT_CONNLIMIT + tristate "Netfilter nf_tables connlimit module" + depends on NF_CONNTRACK + depends on NETFILTER_ADVANCED + select NETFILTER_CONNCOUNT + help + This option adds the "connlimit" expression that you can use to + ratelimit rule matchings per connections. + config NFT_LOG tristate "Netfilter nf_tables log module" help @@ -989,6 +998,8 @@ config NETFILTER_XT_TARGET_TPROXY depends on IP_NF_MANGLE select NF_DEFRAG_IPV4 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n + select NF_TPROXY_IPV4 + select NF_TPROXY_IPV6 if IP6_NF_IPTABLES help This option adds a `TPROXY' target, which is somewhat similar to REDIRECT. It can only be used in the mangle table and is useful diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index eec169555731..44449389e527 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -80,6 +80,7 @@ nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \ obj-$(CONFIG_NF_TABLES) += nf_tables.o obj-$(CONFIG_NFT_COMPAT) += nft_compat.o +obj-$(CONFIG_NFT_CONNLIMIT) += nft_connlimit.o obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o obj-$(CONFIG_NFT_CT) += nft_ct.o obj-$(CONFIG_NFT_FLOW_OFFLOAD) += nft_flow_offload.o diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 153e690e2893..3b5059a8dcdd 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -79,7 +79,7 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen) return memcmp(a, b, klen * sizeof(u32)); } -static bool add_hlist(struct hlist_head *head, +bool nf_conncount_add(struct hlist_head *head, const struct nf_conntrack_tuple *tuple) { struct nf_conncount_tuple *conn; @@ -91,12 +91,12 @@ static bool add_hlist(struct hlist_head *head, hlist_add_head(&conn->node, head); return true; } +EXPORT_SYMBOL_GPL(nf_conncount_add); -static unsigned int check_hlist(struct net *net, - struct hlist_head *head, - const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_zone *zone, - bool *addit) +unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone, + bool *addit) { const struct nf_conntrack_tuple_hash *found; struct nf_conncount_tuple *conn; @@ -141,6 +141,7 @@ static unsigned int check_hlist(struct net *net, return length; } +EXPORT_SYMBOL_GPL(nf_conncount_lookup); static void tree_nodes_free(struct rb_root *root, struct nf_conncount_rb *gc_nodes[], @@ -187,13 +188,15 @@ count_tree(struct net *net, struct rb_root *root, } else { /* same source network -> be counted! */ unsigned int count; - count = check_hlist(net, &rbconn->hhead, tuple, zone, &addit); + + count = nf_conncount_lookup(net, &rbconn->hhead, tuple, + zone, &addit); tree_nodes_free(root, gc_nodes, gc_count); if (!addit) return count; - if (!add_hlist(&rbconn->hhead, tuple)) + if (!nf_conncount_add(&rbconn->hhead, tuple)) return 0; /* hotdrop */ return count + 1; @@ -203,7 +206,7 @@ count_tree(struct net *net, struct rb_root *root, continue; /* only used for GC on hhead, retval and 'addit' ignored */ - check_hlist(net, &rbconn->hhead, tuple, zone, &addit); + nf_conncount_lookup(net, &rbconn->hhead, tuple, zone, &addit); if (hlist_empty(&rbconn->hhead)) gc_nodes[gc_count++] = rbconn; } @@ -303,11 +306,19 @@ struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family } EXPORT_SYMBOL_GPL(nf_conncount_init); -static void destroy_tree(struct rb_root *r) +void nf_conncount_cache_free(struct hlist_head *hhead) { struct nf_conncount_tuple *conn; - struct nf_conncount_rb *rbconn; struct hlist_node *n; + + hlist_for_each_entry_safe(conn, n, hhead, node) + kmem_cache_free(conncount_conn_cachep, conn); +} +EXPORT_SYMBOL_GPL(nf_conncount_cache_free); + +static void destroy_tree(struct rb_root *r) +{ + struct nf_conncount_rb *rbconn; struct rb_node *node; while ((node = rb_first(r)) != NULL) { @@ -315,8 +326,7 @@ static void destroy_tree(struct rb_root *r) rb_erase(node, r); - hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node) - kmem_cache_free(conncount_conn_cachep, conn); + nf_conncount_cache_free(&rbconn->hhead); kmem_cache_free(conncount_rb_cachep, rbconn); } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c785bc5a66f1..2e8fd961746d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -34,6 +34,20 @@ enum { NFT_VALIDATE_DO, }; +static u32 nft_chain_hash(const void *data, u32 len, u32 seed); +static u32 nft_chain_hash_obj(const void *data, u32 len, u32 seed); +static int nft_chain_hash_cmp(struct rhashtable_compare_arg *, const void *); + +static const struct rhashtable_params nft_chain_ht_params = { + .head_offset = offsetof(struct nft_chain, rhlhead), + .key_offset = offsetof(struct nft_chain, name), + .hashfn = nft_chain_hash, + .obj_hashfn = nft_chain_hash_obj, + .obj_cmpfn = nft_chain_hash_cmp, + .locks_mul = 1, + .automatic_shrinking = true, +}; + static void nft_validate_state_update(struct net *net, u8 new_validate_state) { switch (net->nft.validate_state) { @@ -720,6 +734,29 @@ err: return ret; } +static u32 nft_chain_hash(const void *data, u32 len, u32 seed) +{ + const char *name = data; + + return jhash(name, strlen(name), seed); +} + +static u32 nft_chain_hash_obj(const void *data, u32 len, u32 seed) +{ + const struct nft_chain *chain = data; + + return nft_chain_hash(chain->name, 0, seed); +} + +static int nft_chain_hash_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct nft_chain *chain = ptr; + const char *name = arg->key; + + return strcmp(chain->name, name); +} + static int nf_tables_newtable(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[], @@ -766,6 +803,10 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, if (table->name == NULL) goto err_strdup; + err = rhltable_init(&table->chains_ht, &nft_chain_ht_params); + if (err) + goto err_chain_ht; + INIT_LIST_HEAD(&table->chains); INIT_LIST_HEAD(&table->sets); INIT_LIST_HEAD(&table->objects); @@ -782,6 +823,8 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, list_add_tail_rcu(&table->list, &net->nft.tables); return 0; err_trans: + rhltable_destroy(&table->chains_ht); +err_chain_ht: kfree(table->name); err_strdup: kfree(table); @@ -922,6 +965,7 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx) { BUG_ON(ctx->table->use > 0); + rhltable_destroy(&ctx->table->chains_ht); kfree(ctx->table->name); kfree(ctx->table); } @@ -967,21 +1011,35 @@ nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask) return ERR_PTR(-ENOENT); } -static struct nft_chain *nft_chain_lookup(const struct nft_table *table, +static struct nft_chain *nft_chain_lookup(struct nft_table *table, const struct nlattr *nla, u8 genmask) { + char search[NFT_CHAIN_MAXNAMELEN + 1]; + struct rhlist_head *tmp, *list; struct nft_chain *chain; if (nla == NULL) return ERR_PTR(-EINVAL); - list_for_each_entry_rcu(chain, &table->chains, list) { - if (!nla_strcmp(nla, chain->name) && - nft_active_genmask(chain, genmask)) - return chain; - } + nla_strlcpy(search, nla, sizeof(search)); - return ERR_PTR(-ENOENT); + WARN_ON(!rcu_read_lock_held() && + !lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES)); + + chain = ERR_PTR(-ENOENT); + rcu_read_lock(); + list = rhltable_lookup(&table->chains_ht, search, nft_chain_ht_params); + if (!list) + goto out_unlock; + + rhl_for_each_entry_rcu(chain, tmp, list, rhlhead) { + if (nft_active_genmask(chain, genmask)) + goto out_unlock; + } + chain = ERR_PTR(-ENOENT); +out_unlock: + rcu_read_unlock(); + return chain; } static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = { @@ -1185,8 +1243,8 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_cur(net); - const struct nft_table *table; const struct nft_chain *chain; + struct nft_table *table; struct sk_buff *skb2; int family = nfmsg->nfgen_family; int err; @@ -1504,9 +1562,17 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (err < 0) goto err1; + err = rhltable_insert_key(&table->chains_ht, chain->name, + &chain->rhlhead, nft_chain_ht_params); + if (err) + goto err2; + err = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); - if (err < 0) + if (err < 0) { + rhltable_remove(&table->chains_ht, &chain->rhlhead, + nft_chain_ht_params); goto err2; + } table->use++; list_add_tail_rcu(&chain->list, &table->chains); @@ -2206,9 +2272,9 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_cur(net); - const struct nft_table *table; const struct nft_chain *chain; const struct nft_rule *rule; + struct nft_table *table; struct sk_buff *skb2; int family = nfmsg->nfgen_family; int err; @@ -3359,6 +3425,8 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, } INIT_LIST_HEAD(&set->bindings); + set->table = table; + write_pnet(&set->net, net); set->ops = ops; set->ktype = ktype; set->klen = desc.klen; @@ -4036,12 +4104,24 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, bool destroy_expr) { struct nft_set_ext *ext = nft_set_elem_ext(set, elem); + struct nft_ctx ctx = { + .net = read_pnet(&set->net), + .family = set->table->family, + }; nft_data_release(nft_set_ext_key(ext), NFT_DATA_VALUE); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_release(nft_set_ext_data(ext), set->dtype); - if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) - nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext)); + if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) { + struct nft_expr *expr = nft_set_ext_expr(ext); + + if (expr->ops->destroy_clone) { + expr->ops->destroy_clone(&ctx, expr); + module_put(expr->ops->type->owner); + } else { + nf_tables_expr_destroy(&ctx, expr); + } + } if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) (*nft_set_ext_obj(ext))->use--; kfree(elem); @@ -4051,12 +4131,13 @@ EXPORT_SYMBOL_GPL(nft_set_elem_destroy); /* Only called from commit path, nft_set_elem_deactivate() already deals with * the refcounting from the preparation phase. */ -static void nf_tables_set_elem_destroy(const struct nft_set *set, void *elem) +static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, + const struct nft_set *set, void *elem) { struct nft_set_ext *ext = nft_set_elem_ext(set, elem); if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) - nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext)); + nf_tables_expr_destroy(ctx, nft_set_ext_expr(ext)); kfree(elem); } @@ -4787,7 +4868,7 @@ err3: kfree(obj->name); err2: if (obj->ops->destroy) - obj->ops->destroy(obj); + obj->ops->destroy(&ctx, obj); kfree(obj); err1: module_put(type->owner); @@ -4997,10 +5078,10 @@ err: return err; } -static void nft_obj_destroy(struct nft_object *obj) +static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj) { if (obj->ops->destroy) - obj->ops->destroy(obj); + obj->ops->destroy(ctx, obj); module_put(obj->ops->type->owner); kfree(obj->name); @@ -5966,8 +6047,16 @@ static void nft_chain_commit_update(struct nft_trans *trans) { struct nft_base_chain *basechain; - if (nft_trans_chain_name(trans)) + if (nft_trans_chain_name(trans)) { + rhltable_remove(&trans->ctx.table->chains_ht, + &trans->ctx.chain->rhlhead, + nft_chain_ht_params); swap(trans->ctx.chain->name, nft_trans_chain_name(trans)); + rhltable_insert_key(&trans->ctx.table->chains_ht, + trans->ctx.chain->name, + &trans->ctx.chain->rhlhead, + nft_chain_ht_params); + } if (!nft_is_base_chain(trans->ctx.chain)) return; @@ -5999,11 +6088,12 @@ static void nft_commit_release(struct nft_trans *trans) nft_set_destroy(nft_trans_set(trans)); break; case NFT_MSG_DELSETELEM: - nf_tables_set_elem_destroy(nft_trans_elem_set(trans), + nf_tables_set_elem_destroy(&trans->ctx, + nft_trans_elem_set(trans), nft_trans_elem(trans).priv); break; case NFT_MSG_DELOBJ: - nft_obj_destroy(nft_trans_obj(trans)); + nft_obj_destroy(&trans->ctx, nft_trans_obj(trans)); break; case NFT_MSG_DELFLOWTABLE: nf_tables_flowtable_destroy(nft_trans_flowtable(trans)); @@ -6143,6 +6233,15 @@ static void nf_tables_commit_chain_active(struct net *net, struct nft_chain *cha nf_tables_commit_chain_free_rules_old(g0); } +static void nft_chain_del(struct nft_chain *chain) +{ + struct nft_table *table = chain->table; + + WARN_ON_ONCE(rhltable_remove(&table->chains_ht, &chain->rhlhead, + nft_chain_ht_params)); + list_del_rcu(&chain->list); +} + static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nft_trans *trans, *next; @@ -6217,7 +6316,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_trans_destroy(trans); break; case NFT_MSG_DELCHAIN: - list_del_rcu(&trans->ctx.chain->list); + nft_chain_del(trans->ctx.chain); nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN); nf_tables_unregister_hook(trans->ctx.net, trans->ctx.table, @@ -6328,7 +6427,7 @@ static void nf_tables_abort_release(struct nft_trans *trans) nft_trans_elem(trans).priv, true); break; case NFT_MSG_NEWOBJ: - nft_obj_destroy(nft_trans_obj(trans)); + nft_obj_destroy(&trans->ctx, nft_trans_obj(trans)); break; case NFT_MSG_NEWFLOWTABLE: nf_tables_flowtable_destroy(nft_trans_flowtable(trans)); @@ -6368,7 +6467,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) nft_trans_destroy(trans); } else { trans->ctx.table->use--; - list_del_rcu(&trans->ctx.chain->list); + nft_chain_del(trans->ctx.chain); nf_tables_unregister_hook(trans->ctx.net, trans->ctx.table, trans->ctx.chain); @@ -6970,7 +7069,7 @@ int __nft_release_basechain(struct nft_ctx *ctx) ctx->chain->use--; nf_tables_rule_release(ctx, rule); } - list_del(&ctx->chain->list); + nft_chain_del(ctx->chain); ctx->table->use--; nf_tables_chain_destroy(ctx); @@ -7022,11 +7121,11 @@ static void __nft_release_tables(struct net *net) list_for_each_entry_safe(obj, ne, &table->objects, list) { list_del(&obj->list); table->use--; - nft_obj_destroy(obj); + nft_obj_destroy(&ctx, obj); } list_for_each_entry_safe(chain, nc, &table->chains, list) { ctx.chain = chain; - list_del(&chain->list); + nft_chain_del(chain); table->use--; nf_tables_chain_destroy(&ctx); } diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c new file mode 100644 index 000000000000..50c068d660e5 --- /dev/null +++ b/net/netfilter/nft_connlimit.c @@ -0,0 +1,297 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_count.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_tuple.h> +#include <net/netfilter/nf_conntrack_zones.h> + +struct nft_connlimit { + spinlock_t lock; + struct hlist_head hhead; + u32 limit; + bool invert; +}; + +static inline void nft_connlimit_do_eval(struct nft_connlimit *priv, + struct nft_regs *regs, + const struct nft_pktinfo *pkt, + const struct nft_set_ext *ext) +{ + const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; + const struct nf_conntrack_tuple *tuple_ptr; + struct nf_conntrack_tuple tuple; + enum ip_conntrack_info ctinfo; + const struct nf_conn *ct; + unsigned int count; + bool addit; + + tuple_ptr = &tuple; + + ct = nf_ct_get(pkt->skb, &ctinfo); + if (ct != NULL) { + tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + zone = nf_ct_zone(ct); + } else if (!nf_ct_get_tuplepr(pkt->skb, skb_network_offset(pkt->skb), + nft_pf(pkt), nft_net(pkt), &tuple)) { + regs->verdict.code = NF_DROP; + return; + } + + spin_lock_bh(&priv->lock); + count = nf_conncount_lookup(nft_net(pkt), &priv->hhead, tuple_ptr, zone, + &addit); + + if (!addit) + goto out; + + if (!nf_conncount_add(&priv->hhead, tuple_ptr)) { + regs->verdict.code = NF_DROP; + spin_unlock_bh(&priv->lock); + return; + } + count++; +out: + spin_unlock_bh(&priv->lock); + + if ((count > priv->limit) ^ priv->invert) { + regs->verdict.code = NFT_BREAK; + return; + } +} + +static int nft_connlimit_do_init(const struct nft_ctx *ctx, + const struct nlattr * const tb[], + struct nft_connlimit *priv) +{ + bool invert = false; + u32 flags, limit; + + if (!tb[NFTA_CONNLIMIT_COUNT]) + return -EINVAL; + + limit = ntohl(nla_get_be32(tb[NFTA_CONNLIMIT_COUNT])); + + if (tb[NFTA_CONNLIMIT_FLAGS]) { + flags = ntohl(nla_get_be32(tb[NFTA_CONNLIMIT_FLAGS])); + if (flags & ~NFT_CONNLIMIT_F_INV) + return -EOPNOTSUPP; + if (flags & NFT_CONNLIMIT_F_INV) + invert = true; + } + + spin_lock_init(&priv->lock); + INIT_HLIST_HEAD(&priv->hhead); + priv->limit = limit; + priv->invert = invert; + + return nf_ct_netns_get(ctx->net, ctx->family); +} + +static void nft_connlimit_do_destroy(const struct nft_ctx *ctx, + struct nft_connlimit *priv) +{ + nf_ct_netns_put(ctx->net, ctx->family); + nf_conncount_cache_free(&priv->hhead); +} + +static int nft_connlimit_do_dump(struct sk_buff *skb, + struct nft_connlimit *priv) +{ + if (nla_put_be32(skb, NFTA_CONNLIMIT_COUNT, htonl(priv->limit))) + goto nla_put_failure; + if (priv->invert && + nla_put_be32(skb, NFTA_CONNLIMIT_FLAGS, htonl(NFT_CONNLIMIT_F_INV))) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static inline void nft_connlimit_obj_eval(struct nft_object *obj, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_connlimit *priv = nft_obj_data(obj); + + nft_connlimit_do_eval(priv, regs, pkt, NULL); +} + +static int nft_connlimit_obj_init(const struct nft_ctx *ctx, + const struct nlattr * const tb[], + struct nft_object *obj) +{ + struct nft_connlimit *priv = nft_obj_data(obj); + + return nft_connlimit_do_init(ctx, tb, priv); +} + +static void nft_connlimit_obj_destroy(const struct nft_ctx *ctx, + struct nft_object *obj) +{ + struct nft_connlimit *priv = nft_obj_data(obj); + + nft_connlimit_do_destroy(ctx, priv); +} + +static int nft_connlimit_obj_dump(struct sk_buff *skb, + struct nft_object *obj, bool reset) +{ + struct nft_connlimit *priv = nft_obj_data(obj); + + return nft_connlimit_do_dump(skb, priv); +} + +static const struct nla_policy nft_connlimit_policy[NFTA_CONNLIMIT_MAX + 1] = { + [NFTA_CONNLIMIT_COUNT] = { .type = NLA_U32 }, + [NFTA_CONNLIMIT_FLAGS] = { .type = NLA_U32 }, +}; + +static struct nft_object_type nft_connlimit_obj_type; +static const struct nft_object_ops nft_connlimit_obj_ops = { + .type = &nft_connlimit_obj_type, + .size = sizeof(struct nft_connlimit), + .eval = nft_connlimit_obj_eval, + .init = nft_connlimit_obj_init, + .destroy = nft_connlimit_obj_destroy, + .dump = nft_connlimit_obj_dump, +}; + +static struct nft_object_type nft_connlimit_obj_type __read_mostly = { + .type = NFT_OBJECT_CONNLIMIT, + .ops = &nft_connlimit_obj_ops, + .maxattr = NFTA_CONNLIMIT_MAX, + .policy = nft_connlimit_policy, + .owner = THIS_MODULE, +}; + +static void nft_connlimit_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_connlimit *priv = nft_expr_priv(expr); + + nft_connlimit_do_eval(priv, regs, pkt, NULL); +} + +static int nft_connlimit_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_connlimit *priv = nft_expr_priv(expr); + + return nft_connlimit_do_dump(skb, priv); +} + +static int nft_connlimit_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_connlimit *priv = nft_expr_priv(expr); + + return nft_connlimit_do_init(ctx, tb, priv); +} + +static void nft_connlimit_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_connlimit *priv = nft_expr_priv(expr); + + nft_connlimit_do_destroy(ctx, priv); +} + +static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src) +{ + struct nft_connlimit *priv_dst = nft_expr_priv(dst); + struct nft_connlimit *priv_src = nft_expr_priv(src); + + spin_lock_init(&priv_dst->lock); + INIT_HLIST_HEAD(&priv_dst->hhead); + priv_dst->limit = priv_src->limit; + priv_dst->invert = priv_src->invert; + + return 0; +} + +static void nft_connlimit_destroy_clone(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_connlimit *priv = nft_expr_priv(expr); + + nf_conncount_cache_free(&priv->hhead); +} + +static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr) +{ + struct nft_connlimit *priv = nft_expr_priv(expr); + bool addit, ret; + + spin_lock_bh(&priv->lock); + nf_conncount_lookup(net, &priv->hhead, NULL, &nf_ct_zone_dflt, &addit); + + ret = hlist_empty(&priv->hhead); + spin_unlock_bh(&priv->lock); + + return ret; +} + +static struct nft_expr_type nft_connlimit_type; +static const struct nft_expr_ops nft_connlimit_ops = { + .type = &nft_connlimit_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_connlimit)), + .eval = nft_connlimit_eval, + .init = nft_connlimit_init, + .destroy = nft_connlimit_destroy, + .clone = nft_connlimit_clone, + .destroy_clone = nft_connlimit_destroy_clone, + .dump = nft_connlimit_dump, + .gc = nft_connlimit_gc, +}; + +static struct nft_expr_type nft_connlimit_type __read_mostly = { + .name = "connlimit", + .ops = &nft_connlimit_ops, + .policy = nft_connlimit_policy, + .maxattr = NFTA_CONNLIMIT_MAX, + .flags = NFT_EXPR_STATEFUL | NFT_EXPR_GC, + .owner = THIS_MODULE, +}; + +static int __init nft_connlimit_module_init(void) +{ + int err; + + err = nft_register_obj(&nft_connlimit_obj_type); + if (err < 0) + return err; + + err = nft_register_expr(&nft_connlimit_type); + if (err < 0) + goto err1; + + return 0; +err1: + nft_unregister_obj(&nft_connlimit_obj_type); + return err; +} + +static void __exit nft_connlimit_module_exit(void) +{ + nft_unregister_expr(&nft_connlimit_type); + nft_unregister_obj(&nft_connlimit_obj_type); +} + +module_init(nft_connlimit_module_init); +module_exit(nft_connlimit_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso"); +MODULE_ALIAS_NFT_EXPR("connlimit"); +MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CONNLIMIT); diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index eefe3b409925..a61d7edfc290 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -96,7 +96,8 @@ static void nft_counter_do_destroy(struct nft_counter_percpu_priv *priv) free_percpu(priv->counter); } -static void nft_counter_obj_destroy(struct nft_object *obj) +static void nft_counter_obj_destroy(const struct nft_ctx *ctx, + struct nft_object *obj) { struct nft_counter_percpu_priv *priv = nft_obj_data(obj); @@ -257,6 +258,7 @@ static const struct nft_expr_ops nft_counter_ops = { .eval = nft_counter_eval, .init = nft_counter_init, .destroy = nft_counter_destroy, + .destroy_clone = nft_counter_destroy, .dump = nft_counter_dump, .clone = nft_counter_clone, }; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index ea737fd789e8..f8b19eacfa0c 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -826,7 +826,8 @@ static int nft_ct_helper_obj_init(const struct nft_ctx *ctx, return 0; } -static void nft_ct_helper_obj_destroy(struct nft_object *obj) +static void nft_ct_helper_obj_destroy(const struct nft_ctx *ctx, + struct nft_object *obj) { struct nft_ct_helper_obj *priv = nft_obj_data(obj); diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index b07a3fd9eeea..4d49529cff61 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -195,6 +195,15 @@ static int nft_dynset_init(const struct nft_ctx *ctx, err = -EOPNOTSUPP; if (!(priv->expr->ops->type->flags & NFT_EXPR_STATEFUL)) goto err1; + + if (priv->expr->ops->type->flags & NFT_EXPR_GC) { + if (set->flags & NFT_SET_TIMEOUT) + goto err1; + if (!set->ops->gc_init) + goto err1; + set->ops->gc_init(set); + } + } else if (set->flags & NFT_SET_EVAL) return -EINVAL; diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index dbf1f4ad077c..6f9a1365a09f 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -311,8 +311,16 @@ static void nft_rhash_gc(struct work_struct *work) continue; } + if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_EXPR)) { + struct nft_expr *expr = nft_set_ext_expr(&he->ext); + + if (expr->ops->gc && + expr->ops->gc(read_pnet(&set->net), expr)) + goto gc; + } if (!nft_set_elem_expired(&he->ext)) continue; +gc: if (nft_set_elem_mark_busy(&he->ext)) continue; @@ -339,6 +347,14 @@ static unsigned int nft_rhash_privsize(const struct nlattr * const nla[], return sizeof(struct nft_rhash); } +static void nft_rhash_gc_init(const struct nft_set *set) +{ + struct nft_rhash *priv = nft_set_priv(set); + + queue_delayed_work(system_power_efficient_wq, &priv->gc_work, + nft_set_gc_interval(set)); +} + static int nft_rhash_init(const struct nft_set *set, const struct nft_set_desc *desc, const struct nlattr * const tb[]) @@ -356,8 +372,8 @@ static int nft_rhash_init(const struct nft_set *set, INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc); if (set->flags & NFT_SET_TIMEOUT) - queue_delayed_work(system_power_efficient_wq, &priv->gc_work, - nft_set_gc_interval(set)); + nft_rhash_gc_init(set); + return 0; } @@ -647,6 +663,7 @@ static struct nft_set_type nft_rhash_type __read_mostly = { .elemsize = offsetof(struct nft_rhash_elem, ext), .estimate = nft_rhash_estimate, .init = nft_rhash_init, + .gc_init = nft_rhash_gc_init, .destroy = nft_rhash_destroy, .insert = nft_rhash_insert, .activate = nft_rhash_activate, diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index d86337068ecb..f28a0b944087 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -5,6 +5,7 @@ #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_socket.h> #include <net/inet_sock.h> +#include <net/tcp.h> struct nft_socket { enum nft_socket_keys key:8; @@ -48,7 +49,7 @@ static void nft_socket_eval(const struct nft_expr *expr, switch(priv->key) { case NFT_SOCKET_TRANSPARENT: - nft_reg_store8(dest, nf_sk_is_transparent(sk)); + nft_reg_store8(dest, inet_sk_transparent(sk)); break; default: WARN_ON(1); diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 8c89323c06af..58fce4e749a9 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -33,264 +33,9 @@ #include <net/netfilter/ipv6/nf_defrag_ipv6.h> #endif +#include <net/netfilter/nf_tproxy.h> #include <linux/netfilter/xt_TPROXY.h> -enum nf_tproxy_lookup_t { - NFT_LOOKUP_LISTENER, - NFT_LOOKUP_ESTABLISHED, -}; - -static bool tproxy_sk_is_transparent(struct sock *sk) -{ - switch (sk->sk_state) { - case TCP_TIME_WAIT: - if (inet_twsk(sk)->tw_transparent) - return true; - break; - case TCP_NEW_SYN_RECV: - if (inet_rsk(inet_reqsk(sk))->no_srccheck) - return true; - break; - default: - if (inet_sk(sk)->transparent) - return true; - } - - sock_gen_put(sk); - return false; -} - -static inline __be32 -tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr) -{ - struct in_device *indev; - __be32 laddr; - - if (user_laddr) - return user_laddr; - - laddr = 0; - indev = __in_dev_get_rcu(skb->dev); - for_primary_ifa(indev) { - laddr = ifa->ifa_local; - break; - } endfor_ifa(indev); - - return laddr ? laddr : daddr; -} - -/* - * This is used when the user wants to intercept a connection matching - * an explicit iptables rule. In this case the sockets are assumed - * matching in preference order: - * - * - match: if there's a fully established connection matching the - * _packet_ tuple, it is returned, assuming the redirection - * already took place and we process a packet belonging to an - * established connection - * - * - match: if there's a listening socket matching the redirection - * (e.g. on-port & on-ip of the connection), it is returned, - * regardless if it was bound to 0.0.0.0 or an explicit - * address. The reasoning is that if there's an explicit rule, it - * does not really matter if the listener is bound to an interface - * or to 0. The user already stated that he wants redirection - * (since he added the rule). - * - * Please note that there's an overlap between what a TPROXY target - * and a socket match will match. Normally if you have both rules the - * "socket" match will be the first one, effectively all packets - * belonging to established connections going through that one. - */ -static inline struct sock * -nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp, - const u8 protocol, - const __be32 saddr, const __be32 daddr, - const __be16 sport, const __be16 dport, - const struct net_device *in, - const enum nf_tproxy_lookup_t lookup_type) -{ - struct sock *sk; - struct tcphdr *tcph; - - switch (protocol) { - case IPPROTO_TCP: - switch (lookup_type) { - case NFT_LOOKUP_LISTENER: - tcph = hp; - sk = inet_lookup_listener(net, &tcp_hashinfo, skb, - ip_hdrlen(skb) + - __tcp_hdrlen(tcph), - saddr, sport, - daddr, dport, - in->ifindex, 0); - - if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) - sk = NULL; - /* NOTE: we return listeners even if bound to - * 0.0.0.0, those are filtered out in - * xt_socket, since xt_TPROXY needs 0 bound - * listeners too - */ - break; - case NFT_LOOKUP_ESTABLISHED: - sk = inet_lookup_established(net, &tcp_hashinfo, - saddr, sport, daddr, dport, - in->ifindex); - break; - default: - BUG(); - } - break; - case IPPROTO_UDP: - sk = udp4_lib_lookup(net, saddr, sport, daddr, dport, - in->ifindex); - if (sk) { - int connected = (sk->sk_state == TCP_ESTABLISHED); - int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0); - - /* NOTE: we return listeners even if bound to - * 0.0.0.0, those are filtered out in - * xt_socket, since xt_TPROXY needs 0 bound - * listeners too - */ - if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) || - (lookup_type == NFT_LOOKUP_LISTENER && connected)) { - sock_put(sk); - sk = NULL; - } - } - break; - default: - WARN_ON(1); - sk = NULL; - } - - pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n", - protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk); - - return sk; -} - -#ifdef XT_TPROXY_HAVE_IPV6 -static inline struct sock * -nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp, - const u8 protocol, - const struct in6_addr *saddr, const struct in6_addr *daddr, - const __be16 sport, const __be16 dport, - const struct net_device *in, - const enum nf_tproxy_lookup_t lookup_type) -{ - struct sock *sk; - struct tcphdr *tcph; - - switch (protocol) { - case IPPROTO_TCP: - switch (lookup_type) { - case NFT_LOOKUP_LISTENER: - tcph = hp; - sk = inet6_lookup_listener(net, &tcp_hashinfo, skb, - thoff + __tcp_hdrlen(tcph), - saddr, sport, - daddr, ntohs(dport), - in->ifindex, 0); - - if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) - sk = NULL; - /* NOTE: we return listeners even if bound to - * 0.0.0.0, those are filtered out in - * xt_socket, since xt_TPROXY needs 0 bound - * listeners too - */ - break; - case NFT_LOOKUP_ESTABLISHED: - sk = __inet6_lookup_established(net, &tcp_hashinfo, - saddr, sport, daddr, ntohs(dport), - in->ifindex, 0); - break; - default: - BUG(); - } - break; - case IPPROTO_UDP: - sk = udp6_lib_lookup(net, saddr, sport, daddr, dport, - in->ifindex); - if (sk) { - int connected = (sk->sk_state == TCP_ESTABLISHED); - int wildcard = ipv6_addr_any(&sk->sk_v6_rcv_saddr); - - /* NOTE: we return listeners even if bound to - * 0.0.0.0, those are filtered out in - * xt_socket, since xt_TPROXY needs 0 bound - * listeners too - */ - if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) || - (lookup_type == NFT_LOOKUP_LISTENER && connected)) { - sock_put(sk); - sk = NULL; - } - } - break; - default: - WARN_ON(1); - sk = NULL; - } - - pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n", - protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk); - - return sk; -} -#endif - -/** - * tproxy_handle_time_wait4 - handle IPv4 TCP TIME_WAIT reopen redirections - * @skb: The skb being processed. - * @laddr: IPv4 address to redirect to or zero. - * @lport: TCP port to redirect to or zero. - * @sk: The TIME_WAIT TCP socket found by the lookup. - * - * We have to handle SYN packets arriving to TIME_WAIT sockets - * differently: instead of reopening the connection we should rather - * redirect the new connection to the proxy if there's a listener - * socket present. - * - * tproxy_handle_time_wait4() consumes the socket reference passed in. - * - * Returns the listener socket if there's one, the TIME_WAIT socket if - * no such listener is found, or NULL if the TCP header is incomplete. - */ -static struct sock * -tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb, - __be32 laddr, __be16 lport, struct sock *sk) -{ - const struct iphdr *iph = ip_hdr(skb); - struct tcphdr _hdr, *hp; - - hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr); - if (hp == NULL) { - inet_twsk_put(inet_twsk(sk)); - return NULL; - } - - if (hp->syn && !hp->rst && !hp->ack && !hp->fin) { - /* SYN to a TIME_WAIT socket, we'd rather redirect it - * to a listener socket if there's one */ - struct sock *sk2; - - sk2 = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol, - iph->saddr, laddr ? laddr : iph->daddr, - hp->source, lport ? lport : hp->dest, - skb->dev, NFT_LOOKUP_LISTENER); - if (sk2) { - inet_twsk_deschedule_put(inet_twsk(sk)); - sk = sk2; - } - } - - return sk; -} - /* assign a socket to the skb -- consumes sk */ static void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) @@ -319,26 +64,26 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport, sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol, iph->saddr, iph->daddr, hp->source, hp->dest, - skb->dev, NFT_LOOKUP_ESTABLISHED); + skb->dev, NF_TPROXY_LOOKUP_ESTABLISHED); - laddr = tproxy_laddr4(skb, laddr, iph->daddr); + laddr = nf_tproxy_laddr4(skb, laddr, iph->daddr); if (!lport) lport = hp->dest; /* UDP has no TCP_TIME_WAIT state, so we never enter here */ if (sk && sk->sk_state == TCP_TIME_WAIT) /* reopening a TIME_WAIT connection needs special handling */ - sk = tproxy_handle_time_wait4(net, skb, laddr, lport, sk); + sk = nf_tproxy_handle_time_wait4(net, skb, laddr, lport, sk); else if (!sk) /* no, there's no established connection, check if * there's a listener on the redirected addr/port */ sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol, iph->saddr, laddr, hp->source, lport, - skb->dev, NFT_LOOKUP_LISTENER); + skb->dev, NF_TPROXY_LOOKUP_LISTENER); /* NOTE: assign_sock consumes our sk reference */ - if (sk && tproxy_sk_is_transparent(sk)) { + if (sk && nf_tproxy_sk_is_transparent(sk)) { /* This should be in a separate target, but we don't do multiple targets on the same rule yet */ skb->mark = (skb->mark & ~mark_mask) ^ mark_value; @@ -377,87 +122,6 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par) #ifdef XT_TPROXY_HAVE_IPV6 -static inline const struct in6_addr * -tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr, - const struct in6_addr *daddr) -{ - struct inet6_dev *indev; - struct inet6_ifaddr *ifa; - struct in6_addr *laddr; - - if (!ipv6_addr_any(user_laddr)) - return user_laddr; - laddr = NULL; - - indev = __in6_dev_get(skb->dev); - if (indev) { - read_lock_bh(&indev->lock); - list_for_each_entry(ifa, &indev->addr_list, if_list) { - if (ifa->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED)) - continue; - - laddr = &ifa->addr; - break; - } - read_unlock_bh(&indev->lock); - } - - return laddr ? laddr : daddr; -} - -/** - * tproxy_handle_time_wait6 - handle IPv6 TCP TIME_WAIT reopen redirections - * @skb: The skb being processed. - * @tproto: Transport protocol. - * @thoff: Transport protocol header offset. - * @par: Iptables target parameters. - * @sk: The TIME_WAIT TCP socket found by the lookup. - * - * We have to handle SYN packets arriving to TIME_WAIT sockets - * differently: instead of reopening the connection we should rather - * redirect the new connection to the proxy if there's a listener - * socket present. - * - * tproxy_handle_time_wait6() consumes the socket reference passed in. - * - * Returns the listener socket if there's one, the TIME_WAIT socket if - * no such listener is found, or NULL if the TCP header is incomplete. - */ -static struct sock * -tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, - const struct xt_action_param *par, - struct sock *sk) -{ - const struct ipv6hdr *iph = ipv6_hdr(skb); - struct tcphdr _hdr, *hp; - const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; - - hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr); - if (hp == NULL) { - inet_twsk_put(inet_twsk(sk)); - return NULL; - } - - if (hp->syn && !hp->rst && !hp->ack && !hp->fin) { - /* SYN to a TIME_WAIT socket, we'd rather redirect it - * to a listener socket if there's one */ - struct sock *sk2; - - sk2 = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto, - &iph->saddr, - tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr), - hp->source, - tgi->lport ? tgi->lport : hp->dest, - skb->dev, NFT_LOOKUP_LISTENER); - if (sk2) { - inet_twsk_deschedule_put(inet_twsk(sk)); - sk = sk2; - } - } - - return sk; -} - static unsigned int tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) { @@ -489,25 +153,31 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto, &iph->saddr, &iph->daddr, hp->source, hp->dest, - xt_in(par), NFT_LOOKUP_ESTABLISHED); + xt_in(par), NF_TPROXY_LOOKUP_ESTABLISHED); - laddr = tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr); + laddr = nf_tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr); lport = tgi->lport ? tgi->lport : hp->dest; /* UDP has no TCP_TIME_WAIT state, so we never enter here */ - if (sk && sk->sk_state == TCP_TIME_WAIT) + if (sk && sk->sk_state == TCP_TIME_WAIT) { + const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; /* reopening a TIME_WAIT connection needs special handling */ - sk = tproxy_handle_time_wait6(skb, tproto, thoff, par, sk); + sk = nf_tproxy_handle_time_wait6(skb, tproto, thoff, + xt_net(par), + &tgi->laddr.in6, + tgi->lport, + sk); + } else if (!sk) /* no there's no established connection, check if * there's a listener on the redirected addr/port */ sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto, &iph->saddr, laddr, hp->source, lport, - xt_in(par), NFT_LOOKUP_LISTENER); + xt_in(par), NF_TPROXY_LOOKUP_LISTENER); /* NOTE: assign_sock consumes our sk reference */ - if (sk && tproxy_sk_is_transparent(sk)) { + if (sk && nf_tproxy_sk_is_transparent(sk)) { /* This should be in a separate target, but we don't do multiple targets on the same rule yet */ skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value; diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 2ac7f674d19b..5c0779c4fa3c 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -73,7 +73,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, * if XT_SOCKET_TRANSPARENT is used */ if (info->flags & XT_SOCKET_TRANSPARENT) - transparent = nf_sk_is_transparent(sk); + transparent = inet_sk_transparent(sk); if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && transparent && sk_fullsock(sk)) @@ -130,7 +130,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) * if XT_SOCKET_TRANSPARENT is used */ if (info->flags & XT_SOCKET_TRANSPARENT) - transparent = nf_sk_is_transparent(sk); + transparent = inet_sk_transparent(sk); if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && transparent && sk_fullsock(sk)) |