diff options
57 files changed, 1166 insertions, 1852 deletions
diff --git a/Documentation/networking/nf_conntrack-sysctl.txt b/Documentation/networking/nf_conntrack-sysctl.txt index 1669dc2419fd..f75c2ce6e136 100644 --- a/Documentation/networking/nf_conntrack-sysctl.txt +++ b/Documentation/networking/nf_conntrack-sysctl.txt @@ -157,7 +157,16 @@ nf_conntrack_udp_timeout - INTEGER (seconds) default 30 nf_conntrack_udp_timeout_stream - INTEGER (seconds) - default 180 + default 120 This extended timeout will be used in case there is an UDP stream detected. + +nf_conntrack_gre_timeout - INTEGER (seconds) + default 30 + +nf_conntrack_gre_timeout_stream - INTEGER (seconds) + default 180 + + This extended timeout will be used in case there is an GRE stream + detected. diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 1d100efe74ec..f2e1e6b13ca4 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -303,11 +303,11 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set) /* Netlink CB args */ enum { IPSET_CB_NET = 0, /* net namespace */ + IPSET_CB_PROTO, /* ipset protocol */ IPSET_CB_DUMP, /* dump single set/all sets */ IPSET_CB_INDEX, /* set index */ IPSET_CB_PRIVATE, /* set private data */ IPSET_CB_ARG0, /* type specific */ - IPSET_CB_ARG1, }; /* register and unregister set references */ diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h index 14edb795ab43..6989e2e4eabf 100644 --- a/include/linux/netfilter/nf_conntrack_proto_gre.h +++ b/include/linux/netfilter/nf_conntrack_proto_gre.h @@ -41,7 +41,5 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, /* delete keymap entries */ void nf_ct_gre_keymap_destroy(struct nf_conn *ct); -void nf_nat_need_gre(void); - #endif /* __KERNEL__ */ #endif /* _CONNTRACK_PROTO_GRE_H */ diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 7e012312cd61..249d0a5b12b8 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -27,12 +27,17 @@ #include <net/netfilter/nf_conntrack_tuple.h> +struct nf_ct_udp { + unsigned long stream_ts; +}; + /* per conntrack: protocol private data */ union nf_conntrack_proto { /* insert conntrack proto private data here */ struct nf_ct_dccp dccp; struct ip_ct_sctp sctp; struct ip_ct_tcp tcp; + struct nf_ct_udp udp; struct nf_ct_gre gre; unsigned int tmpl_padto; }; diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h index 79d8d16732b4..bc6745d3010e 100644 --- a/include/net/netfilter/nf_conntrack_acct.h +++ b/include/net/netfilter/nf_conntrack_acct.h @@ -46,9 +46,6 @@ struct nf_conn_acct *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp) return acct; }; -unsigned int seq_print_acct(struct seq_file *s, const struct nf_conn *ct, - int dir); - /* Check if connection tracking accounting is enabled */ static inline bool nf_ct_acct_enabled(struct net *net) { @@ -61,8 +58,7 @@ static inline void nf_ct_set_acct(struct net *net, bool enable) net->ct.sysctl_acct = enable; } -int nf_conntrack_acct_pernet_init(struct net *net); -void nf_conntrack_acct_pernet_fini(struct net *net); +void nf_conntrack_acct_pernet_init(struct net *net); int nf_conntrack_acct_init(void); void nf_conntrack_acct_fini(void); diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 3f1ce9a8776e..52b44192b43f 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -142,7 +142,7 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, struct nf_conntrack_expect *exp, u32 portid, int report); -int nf_conntrack_ecache_pernet_init(struct net *net); +void nf_conntrack_ecache_pernet_init(struct net *net); void nf_conntrack_ecache_pernet_fini(struct net *net); int nf_conntrack_ecache_init(void); @@ -182,10 +182,7 @@ static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e, u32 portid, int report) {} -static inline int nf_conntrack_ecache_pernet_init(struct net *net) -{ - return 0; -} +static inline void nf_conntrack_ecache_pernet_init(struct net *net) {} static inline void nf_conntrack_ecache_pernet_fini(struct net *net) { diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 2492120b8097..ec52a8dc32fd 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -124,8 +124,7 @@ static inline void *nfct_help_data(const struct nf_conn *ct) return (void *)help->data; } -int nf_conntrack_helper_pernet_init(struct net *net); -void nf_conntrack_helper_pernet_fini(struct net *net); +void nf_conntrack_helper_pernet_init(struct net *net); int nf_conntrack_helper_init(void); void nf_conntrack_helper_fini(void); diff --git a/include/net/netfilter/nf_conntrack_timestamp.h b/include/net/netfilter/nf_conntrack_timestamp.h index 3b661986be8f..0ed617bf0a3d 100644 --- a/include/net/netfilter/nf_conntrack_timestamp.h +++ b/include/net/netfilter/nf_conntrack_timestamp.h @@ -49,21 +49,12 @@ static inline void nf_ct_set_tstamp(struct net *net, bool enable) } #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP -int nf_conntrack_tstamp_pernet_init(struct net *net); -void nf_conntrack_tstamp_pernet_fini(struct net *net); +void nf_conntrack_tstamp_pernet_init(struct net *net); int nf_conntrack_tstamp_init(void); void nf_conntrack_tstamp_fini(void); #else -static inline int nf_conntrack_tstamp_pernet_init(struct net *net) -{ - return 0; -} - -static inline void nf_conntrack_tstamp_pernet_fini(struct net *net) -{ - return; -} +static inline void nf_conntrack_tstamp_pernet_init(struct net *net) {} static inline int nf_conntrack_tstamp_init(void) { diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 77e2761d4f2f..7d5cda7ce32a 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -95,10 +95,6 @@ void flow_offload_free(struct flow_offload *flow); int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow); struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table, struct flow_offload_tuple *tuple); -int nf_flow_table_iterate(struct nf_flowtable *flow_table, - void (*iter)(struct flow_offload *flow, void *data), - void *data); - void nf_flow_table_cleanup(struct net_device *dev); int nf_flow_table_init(struct nf_flowtable *flow_table); diff --git a/include/net/netfilter/nf_nat_l3proto.h b/include/net/netfilter/nf_nat_l3proto.h index d300b8f03972..d774ca0c4c5e 100644 --- a/include/net/netfilter/nf_nat_l3proto.h +++ b/include/net/netfilter/nf_nat_l3proto.h @@ -2,18 +2,11 @@ #ifndef _NF_NAT_L3PROTO_H #define _NF_NAT_L3PROTO_H -struct nf_nat_l4proto; struct nf_nat_l3proto { u8 l3proto; - bool (*in_range)(const struct nf_conntrack_tuple *t, - const struct nf_nat_range2 *range); - - u32 (*secure_port)(const struct nf_conntrack_tuple *t, __be16); - bool (*manip_pkt)(struct sk_buff *skb, unsigned int iphdroff, - const struct nf_nat_l4proto *l4proto, const struct nf_conntrack_tuple *target, enum nf_nat_manip_type maniptype); diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h index b4d6b29bca62..95a4655bd1ad 100644 --- a/include/net/netfilter/nf_nat_l4proto.h +++ b/include/net/netfilter/nf_nat_l4proto.h @@ -5,78 +5,12 @@ #include <net/netfilter/nf_nat.h> #include <linux/netfilter/nfnetlink_conntrack.h> -struct nf_nat_range; struct nf_nat_l3proto; -struct nf_nat_l4proto { - /* Protocol number. */ - u8 l4proto; - - /* Translate a packet to the target according to manip type. - * Return true if succeeded. - */ - bool (*manip_pkt)(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, - unsigned int iphdroff, unsigned int hdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype); - - /* Is the manipable part of the tuple between min and max incl? */ - bool (*in_range)(const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype, - const union nf_conntrack_man_proto *min, - const union nf_conntrack_man_proto *max); - - /* Alter the per-proto part of the tuple (depending on - * maniptype), to give a unique tuple in the given range if - * possible. Per-protocol part of tuple is initialized to the - * incoming packet. - */ - void (*unique_tuple)(const struct nf_nat_l3proto *l3proto, - struct nf_conntrack_tuple *tuple, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct); - - int (*nlattr_to_range)(struct nlattr *tb[], - struct nf_nat_range2 *range); -}; - -/* Protocol registration. */ -int nf_nat_l4proto_register(u8 l3proto, const struct nf_nat_l4proto *l4proto); -void nf_nat_l4proto_unregister(u8 l3proto, - const struct nf_nat_l4proto *l4proto); - -const struct nf_nat_l4proto *__nf_nat_l4proto_find(u8 l3proto, u8 l4proto); - -/* Built-in protocols. */ -extern const struct nf_nat_l4proto nf_nat_l4proto_tcp; -extern const struct nf_nat_l4proto nf_nat_l4proto_udp; -extern const struct nf_nat_l4proto nf_nat_l4proto_icmp; -extern const struct nf_nat_l4proto nf_nat_l4proto_icmpv6; -extern const struct nf_nat_l4proto nf_nat_l4proto_unknown; -#ifdef CONFIG_NF_NAT_PROTO_DCCP -extern const struct nf_nat_l4proto nf_nat_l4proto_dccp; -#endif -#ifdef CONFIG_NF_NAT_PROTO_SCTP -extern const struct nf_nat_l4proto nf_nat_l4proto_sctp; -#endif -#ifdef CONFIG_NF_NAT_PROTO_UDPLITE -extern const struct nf_nat_l4proto nf_nat_l4proto_udplite; -#endif - -bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype, - const union nf_conntrack_man_proto *min, - const union nf_conntrack_man_proto *max); - -void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto, - struct nf_conntrack_tuple *tuple, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct, u16 *rover); - -int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[], - struct nf_nat_range2 *range); - +/* Translate a packet to the target according to manip type. Return on success. */ +bool nf_nat_l4proto_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype); #endif /*_NF_NAT_L4PROTO_H*/ diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 9795d628a127..51cba0b8adf5 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -97,18 +97,14 @@ struct netns_ct { struct delayed_work ecache_dwork; bool ecache_dwork_pending; #endif + bool auto_assign_helper_warned; #ifdef CONFIG_SYSCTL struct ctl_table_header *sysctl_header; - struct ctl_table_header *acct_sysctl_header; - struct ctl_table_header *tstamp_sysctl_header; - struct ctl_table_header *event_sysctl_header; - struct ctl_table_header *helper_sysctl_header; #endif unsigned int sysctl_log_invalid; /* Log invalid packets */ int sysctl_events; int sysctl_acct; int sysctl_auto_assign_helper; - bool auto_assign_helper_warned; int sysctl_tstamp; int sysctl_checksum; diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h index cca10e767cd8..ca9e63d6e0e4 100644 --- a/include/uapi/linux/netfilter.h +++ b/include/uapi/linux/netfilter.h @@ -34,10 +34,6 @@ /* only for userspace compatibility */ #ifndef __KERNEL__ -/* Generic cache responses from hook functions. - <= 0x2000 is used for protocol-flags. */ -#define NFC_UNKNOWN 0x4000 -#define NFC_ALTERED 0x8000 /* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */ #define NF_VERDICT_BITS 16 diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index 60236f694143..ea69ca21ff23 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -13,8 +13,9 @@ #include <linux/types.h> -/* The protocol version */ -#define IPSET_PROTOCOL 6 +/* The protocol versions */ +#define IPSET_PROTOCOL 7 +#define IPSET_PROTOCOL_MIN 6 /* The max length of strings including NUL: set and type identifiers */ #define IPSET_MAXNAMELEN 32 @@ -38,17 +39,19 @@ enum ipset_cmd { IPSET_CMD_TEST, /* 11: Test an element in a set */ IPSET_CMD_HEADER, /* 12: Get set header data only */ IPSET_CMD_TYPE, /* 13: Get set type */ + IPSET_CMD_GET_BYNAME, /* 14: Get set index by name */ + IPSET_CMD_GET_BYINDEX, /* 15: Get set name by index */ IPSET_MSG_MAX, /* Netlink message commands */ /* Commands in userspace: */ - IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* 14: Enter restore mode */ - IPSET_CMD_HELP, /* 15: Get help */ - IPSET_CMD_VERSION, /* 16: Get program version */ - IPSET_CMD_QUIT, /* 17: Quit from interactive mode */ + IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* 16: Enter restore mode */ + IPSET_CMD_HELP, /* 17: Get help */ + IPSET_CMD_VERSION, /* 18: Get program version */ + IPSET_CMD_QUIT, /* 19: Quit from interactive mode */ IPSET_CMD_MAX, - IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* 18: Commit buffered commands */ + IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* 20: Commit buffered commands */ }; /* Attributes at command level */ @@ -66,6 +69,7 @@ enum { IPSET_ATTR_LINENO, /* 9: Restore lineno */ IPSET_ATTR_PROTOCOL_MIN, /* 10: Minimal supported version number */ IPSET_ATTR_REVISION_MIN = IPSET_ATTR_PROTOCOL_MIN, /* type rev min */ + IPSET_ATTR_INDEX, /* 11: Kernel index of set */ __IPSET_ATTR_CMD_MAX, }; #define IPSET_ATTR_CMD_MAX (__IPSET_ATTR_CMD_MAX - 1) @@ -223,6 +227,7 @@ enum ipset_adt { /* Sets are identified by an index in kernel space. Tweak with ip_set_id_t * and IPSET_INVALID_ID if you want to increase the max number of sets. + * Also, IPSET_ATTR_INDEX must be changed. */ typedef __u16 ip_set_id_t; diff --git a/include/uapi/linux/netfilter_decnet.h b/include/uapi/linux/netfilter_decnet.h index 61f1c7dfd033..3c77f54560f2 100644 --- a/include/uapi/linux/netfilter_decnet.h +++ b/include/uapi/linux/netfilter_decnet.h @@ -15,16 +15,6 @@ #include <limits.h> /* for INT_MIN, INT_MAX */ -/* IP Cache bits. */ -/* Src IP address. */ -#define NFC_DN_SRC 0x0001 -/* Dest IP address. */ -#define NFC_DN_DST 0x0002 -/* Input device. */ -#define NFC_DN_IF_IN 0x0004 -/* Output device. */ -#define NFC_DN_IF_OUT 0x0008 - /* kernel define is in netfilter_defs.h */ #define NF_DN_NUMHOOKS 7 #endif /* ! __KERNEL__ */ diff --git a/include/uapi/linux/netfilter_ipv4.h b/include/uapi/linux/netfilter_ipv4.h index c3b060775e13..155e77d6a42d 100644 --- a/include/uapi/linux/netfilter_ipv4.h +++ b/include/uapi/linux/netfilter_ipv4.h @@ -13,34 +13,6 @@ #include <limits.h> /* for INT_MIN, INT_MAX */ -/* IP Cache bits. */ -/* Src IP address. */ -#define NFC_IP_SRC 0x0001 -/* Dest IP address. */ -#define NFC_IP_DST 0x0002 -/* Input device. */ -#define NFC_IP_IF_IN 0x0004 -/* Output device. */ -#define NFC_IP_IF_OUT 0x0008 -/* TOS. */ -#define NFC_IP_TOS 0x0010 -/* Protocol. */ -#define NFC_IP_PROTO 0x0020 -/* IP options. */ -#define NFC_IP_OPTIONS 0x0040 -/* Frag & flags. */ -#define NFC_IP_FRAG 0x0080 - -/* Per-protocol information: only matters if proto match. */ -/* TCP flags. */ -#define NFC_IP_TCPFLAGS 0x0100 -/* Source port. */ -#define NFC_IP_SRC_PT 0x0200 -/* Dest port. */ -#define NFC_IP_DST_PT 0x0400 -/* Something else about the proto */ -#define NFC_IP_PROTO_UNKNOWN 0x2000 - /* IP Hooks */ /* After promisc drops, checksum checks. */ #define NF_IP_PRE_ROUTING 0 diff --git a/include/uapi/linux/netfilter_ipv6.h b/include/uapi/linux/netfilter_ipv6.h index dc624fd24d25..80aa9b0799af 100644 --- a/include/uapi/linux/netfilter_ipv6.h +++ b/include/uapi/linux/netfilter_ipv6.h @@ -16,35 +16,6 @@ #include <limits.h> /* for INT_MIN, INT_MAX */ -/* IP Cache bits. */ -/* Src IP address. */ -#define NFC_IP6_SRC 0x0001 -/* Dest IP address. */ -#define NFC_IP6_DST 0x0002 -/* Input device. */ -#define NFC_IP6_IF_IN 0x0004 -/* Output device. */ -#define NFC_IP6_IF_OUT 0x0008 -/* TOS. */ -#define NFC_IP6_TOS 0x0010 -/* Protocol. */ -#define NFC_IP6_PROTO 0x0020 -/* IP options. */ -#define NFC_IP6_OPTIONS 0x0040 -/* Frag & flags. */ -#define NFC_IP6_FRAG 0x0080 - - -/* Per-protocol information: only matters if proto match. */ -/* TCP flags. */ -#define NFC_IP6_TCPFLAGS 0x0100 -/* Source port. */ -#define NFC_IP6_SRC_PT 0x0200 -/* Dest port. */ -#define NFC_IP6_DST_PT 0x0400 -/* Something else about the proto */ -#define NFC_IP6_PROTO_UNKNOWN 0x2000 - /* IP6 Hooks */ /* After promisc drops, checksum checks. */ #define NF_IP6_PRE_ROUTING 0 diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 184bf2e0a1ed..80f72cc5ca8d 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -156,15 +156,10 @@ config NF_NAT_SNMP_BASIC To compile it as a module, choose M here. If unsure, say N. -config NF_NAT_PROTO_GRE - tristate - depends on NF_CT_PROTO_GRE - config NF_NAT_PPTP tristate depends on NF_CONNTRACK default NF_CONNTRACK_PPTP - select NF_NAT_PROTO_GRE config NF_NAT_H323 tristate diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 367993adf4d3..fd7122e0e2c9 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -3,7 +3,7 @@ # Makefile for the netfilter modules on top of IPv4. # -nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o +nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o nf_nat_ipv4-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o @@ -28,9 +28,6 @@ nf_nat_snmp_basic-y := nf_nat_snmp_basic.asn1.o nf_nat_snmp_basic_main.o $(obj)/nf_nat_snmp_basic_main.o: $(obj)/nf_nat_snmp_basic.asn1.h obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o -# NAT protocols (nf_nat) -obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o - obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 2c8d313ae216..b61977db9b7f 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -56,18 +56,15 @@ struct clusterip_config { #endif enum clusterip_hashmode hash_mode; /* which hashing mode */ u_int32_t hash_initval; /* hash initialization */ - struct rcu_head rcu; - + struct rcu_head rcu; /* for call_rcu_bh */ + struct net *net; /* netns for pernet list */ char ifname[IFNAMSIZ]; /* device ifname */ - struct notifier_block notifier; /* refresh c->ifindex in it */ }; #ifdef CONFIG_PROC_FS static const struct file_operations clusterip_proc_fops; #endif -static unsigned int clusterip_net_id __read_mostly; - struct clusterip_net { struct list_head configs; /* lock protects the configs list */ @@ -75,51 +72,66 @@ struct clusterip_net { #ifdef CONFIG_PROC_FS struct proc_dir_entry *procdir; + /* mutex protects the config->pde*/ + struct mutex mutex; #endif }; +static unsigned int clusterip_net_id __read_mostly; +static inline struct clusterip_net *clusterip_pernet(struct net *net) +{ + return net_generic(net, clusterip_net_id); +} + static inline void clusterip_config_get(struct clusterip_config *c) { refcount_inc(&c->refcount); } - static void clusterip_config_rcu_free(struct rcu_head *head) { - kfree(container_of(head, struct clusterip_config, rcu)); + struct clusterip_config *config; + struct net_device *dev; + + config = container_of(head, struct clusterip_config, rcu); + dev = dev_get_by_name(config->net, config->ifname); + if (dev) { + dev_mc_del(dev, config->clustermac); + dev_put(dev); + } + kfree(config); } static inline void clusterip_config_put(struct clusterip_config *c) { if (refcount_dec_and_test(&c->refcount)) - call_rcu_bh(&c->rcu, clusterip_config_rcu_free); + call_rcu(&c->rcu, clusterip_config_rcu_free); } /* decrease the count of entries using/referencing this config. If last * entry(rule) is removed, remove the config from lists, but don't free it * yet, since proc-files could still be holding references */ static inline void -clusterip_config_entry_put(struct net *net, struct clusterip_config *c) +clusterip_config_entry_put(struct clusterip_config *c) { - struct clusterip_net *cn = net_generic(net, clusterip_net_id); + struct clusterip_net *cn = clusterip_pernet(c->net); local_bh_disable(); if (refcount_dec_and_lock(&c->entries, &cn->lock)) { + list_del_rcu(&c->list); + spin_unlock(&cn->lock); + local_bh_enable(); /* In case anyone still accesses the file, the open/close * functions are also incrementing the refcount on their own, * so it's safe to remove the entry even if it's in use. */ #ifdef CONFIG_PROC_FS + mutex_lock(&cn->mutex); if (cn->procdir) proc_remove(c->pde); + mutex_unlock(&cn->mutex); #endif - list_del_rcu(&c->list); - spin_unlock(&cn->lock); - local_bh_enable(); - - unregister_netdevice_notifier(&c->notifier); - return; } local_bh_enable(); @@ -129,7 +141,7 @@ static struct clusterip_config * __clusterip_config_find(struct net *net, __be32 clusterip) { struct clusterip_config *c; - struct clusterip_net *cn = net_generic(net, clusterip_net_id); + struct clusterip_net *cn = clusterip_pernet(net); list_for_each_entry_rcu(c, &cn->configs, list) { if (c->clusterip == clusterip) @@ -181,32 +193,37 @@ clusterip_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct net *net = dev_net(dev); + struct clusterip_net *cn = clusterip_pernet(net); struct clusterip_config *c; - c = container_of(this, struct clusterip_config, notifier); - switch (event) { - case NETDEV_REGISTER: - if (!strcmp(dev->name, c->ifname)) { - c->ifindex = dev->ifindex; - dev_mc_add(dev, c->clustermac); - } - break; - case NETDEV_UNREGISTER: - if (dev->ifindex == c->ifindex) { - dev_mc_del(dev, c->clustermac); - c->ifindex = -1; - } - break; - case NETDEV_CHANGENAME: - if (!strcmp(dev->name, c->ifname)) { - c->ifindex = dev->ifindex; - dev_mc_add(dev, c->clustermac); - } else if (dev->ifindex == c->ifindex) { - dev_mc_del(dev, c->clustermac); - c->ifindex = -1; + spin_lock_bh(&cn->lock); + list_for_each_entry_rcu(c, &cn->configs, list) { + switch (event) { + case NETDEV_REGISTER: + if (!strcmp(dev->name, c->ifname)) { + c->ifindex = dev->ifindex; + dev_mc_add(dev, c->clustermac); + } + break; + case NETDEV_UNREGISTER: + if (dev->ifindex == c->ifindex) { + dev_mc_del(dev, c->clustermac); + c->ifindex = -1; + } + break; + case NETDEV_CHANGENAME: + if (!strcmp(dev->name, c->ifname)) { + c->ifindex = dev->ifindex; + dev_mc_add(dev, c->clustermac); + } else if (dev->ifindex == c->ifindex) { + dev_mc_del(dev, c->clustermac); + c->ifindex = -1; + } + break; } - break; } + spin_unlock_bh(&cn->lock); return NOTIFY_DONE; } @@ -215,30 +232,44 @@ static struct clusterip_config * clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i, __be32 ip, const char *iniface) { - struct clusterip_net *cn = net_generic(net, clusterip_net_id); + struct clusterip_net *cn = clusterip_pernet(net); struct clusterip_config *c; + struct net_device *dev; int err; + if (iniface[0] == '\0') { + pr_info("Please specify an interface name\n"); + return ERR_PTR(-EINVAL); + } + c = kzalloc(sizeof(*c), GFP_ATOMIC); if (!c) return ERR_PTR(-ENOMEM); - strcpy(c->ifname, iniface); - c->ifindex = -1; - c->clusterip = ip; + dev = dev_get_by_name(net, iniface); + if (!dev) { + pr_info("no such interface %s\n", iniface); + kfree(c); + return ERR_PTR(-ENOENT); + } + c->ifindex = dev->ifindex; + strcpy(c->ifname, dev->name); memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); + dev_mc_add(dev, c->clustermac); + dev_put(dev); + + c->clusterip = ip; c->num_total_nodes = i->num_total_nodes; clusterip_config_init_nodelist(c, i); c->hash_mode = i->hash_mode; c->hash_initval = i->hash_initval; + c->net = net; refcount_set(&c->refcount, 1); spin_lock_bh(&cn->lock); if (__clusterip_config_find(net, ip)) { - spin_unlock_bh(&cn->lock); - kfree(c); - - return ERR_PTR(-EBUSY); + err = -EBUSY; + goto out_config_put; } list_add_rcu(&c->list, &cn->configs); @@ -250,9 +281,11 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i, /* create proc dir entry */ sprintf(buffer, "%pI4", &ip); + mutex_lock(&cn->mutex); c->pde = proc_create_data(buffer, 0600, cn->procdir, &clusterip_proc_fops, c); + mutex_unlock(&cn->mutex); if (!c->pde) { err = -ENOMEM; goto err; @@ -260,22 +293,17 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i, } #endif - c->notifier.notifier_call = clusterip_netdev_event; - err = register_netdevice_notifier(&c->notifier); - if (!err) { - refcount_set(&c->entries, 1); - return c; - } + refcount_set(&c->entries, 1); + return c; #ifdef CONFIG_PROC_FS - proc_remove(c->pde); err: #endif spin_lock_bh(&cn->lock); list_del_rcu(&c->list); +out_config_put: spin_unlock_bh(&cn->lock); clusterip_config_put(c); - return ERR_PTR(err); } @@ -475,34 +503,20 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) &e->ip.dst.s_addr); return -EINVAL; } else { - struct net_device *dev; - - if (e->ip.iniface[0] == '\0') { - pr_info("Please specify an interface name\n"); - return -EINVAL; - } - - dev = dev_get_by_name(par->net, e->ip.iniface); - if (!dev) { - pr_info("no such interface %s\n", - e->ip.iniface); - return -ENOENT; - } - dev_put(dev); - config = clusterip_config_init(par->net, cipinfo, e->ip.dst.s_addr, e->ip.iniface); if (IS_ERR(config)) return PTR_ERR(config); } - } + } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN)) + return -EINVAL; ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { pr_info("cannot load conntrack support for proto=%u\n", par->family); - clusterip_config_entry_put(par->net, config); + clusterip_config_entry_put(config); clusterip_config_put(config); return ret; } @@ -524,7 +538,7 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par) /* if no more entries are referencing the config, remove it * from the list and destroy the proc entry */ - clusterip_config_entry_put(par->net, cipinfo->config); + clusterip_config_entry_put(cipinfo->config); clusterip_config_put(cipinfo->config); @@ -806,7 +820,7 @@ static const struct file_operations clusterip_proc_fops = { static int clusterip_net_init(struct net *net) { - struct clusterip_net *cn = net_generic(net, clusterip_net_id); + struct clusterip_net *cn = clusterip_pernet(net); int ret; INIT_LIST_HEAD(&cn->configs); @@ -824,6 +838,7 @@ static int clusterip_net_init(struct net *net) pr_err("Unable to proc dir entry\n"); return -ENOMEM; } + mutex_init(&cn->mutex); #endif /* CONFIG_PROC_FS */ return 0; @@ -831,13 +846,15 @@ static int clusterip_net_init(struct net *net) static void clusterip_net_exit(struct net *net) { - struct clusterip_net *cn = net_generic(net, clusterip_net_id); + struct clusterip_net *cn = clusterip_pernet(net); + #ifdef CONFIG_PROC_FS + mutex_lock(&cn->mutex); proc_remove(cn->procdir); cn->procdir = NULL; + mutex_unlock(&cn->mutex); #endif nf_unregister_net_hook(net, &cip_arp_ops); - WARN_ON_ONCE(!list_empty(&cn->configs)); } static struct pernet_operations clusterip_net_ops = { @@ -847,6 +864,10 @@ static struct pernet_operations clusterip_net_ops = { .size = sizeof(struct clusterip_net), }; +struct notifier_block cip_netdev_notifier = { + .notifier_call = clusterip_netdev_event +}; + static int __init clusterip_tg_init(void) { int ret; @@ -859,11 +880,17 @@ static int __init clusterip_tg_init(void) if (ret < 0) goto cleanup_subsys; + ret = register_netdevice_notifier(&cip_netdev_notifier); + if (ret < 0) + goto unregister_target; + pr_info("ClusterIP Version %s loaded successfully\n", CLUSTERIP_VERSION); return 0; +unregister_target: + xt_unregister_target(&clusterip_tg_reg); cleanup_subsys: unregister_pernet_subsys(&clusterip_net_ops); return ret; @@ -873,11 +900,12 @@ static void __exit clusterip_tg_exit(void) { pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION); + unregister_netdevice_notifier(&cip_netdev_notifier); xt_unregister_target(&clusterip_tg_reg); unregister_pernet_subsys(&clusterip_net_ops); - /* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */ - rcu_barrier_bh(); + /* Wait for completion of call_rcu()'s (clusterip_config_rcu_free) */ + rcu_barrier(); } module_init(clusterip_tg_init); diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index 78a67f961d86..2687db015b6f 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -62,22 +62,8 @@ static void nf_nat_ipv4_decode_session(struct sk_buff *skb, } #endif /* CONFIG_XFRM */ -static bool nf_nat_ipv4_in_range(const struct nf_conntrack_tuple *t, - const struct nf_nat_range2 *range) -{ - return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) && - ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip); -} - -static u32 nf_nat_ipv4_secure_port(const struct nf_conntrack_tuple *t, - __be16 dport) -{ - return secure_ipv4_port_ephemeral(t->src.u3.ip, t->dst.u3.ip, dport); -} - static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, - const struct nf_nat_l4proto *l4proto, const struct nf_conntrack_tuple *target, enum nf_nat_manip_type maniptype) { @@ -90,8 +76,8 @@ static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb, iph = (void *)skb->data + iphdroff; hdroff = iphdroff + iph->ihl * 4; - if (!l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff, hdroff, - target, maniptype)) + if (!nf_nat_l4proto_manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff, + hdroff, target, maniptype)) return false; iph = (void *)skb->data + iphdroff; @@ -161,8 +147,6 @@ static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[], static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = { .l3proto = NFPROTO_IPV4, - .in_range = nf_nat_ipv4_in_range, - .secure_port = nf_nat_ipv4_secure_port, .manip_pkt = nf_nat_ipv4_manip_pkt, .csum_update = nf_nat_ipv4_csum_update, .csum_recalc = nf_nat_ipv4_csum_recalc, @@ -186,7 +170,6 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb, enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); unsigned int hdrlen = ip_hdrlen(skb); - const struct nf_nat_l4proto *l4proto; struct nf_conntrack_tuple target; unsigned long statusbit; @@ -217,9 +200,8 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb, if (!(ct->status & statusbit)) return 1; - l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, inside->ip.protocol); if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp), - l4proto, &ct->tuplehash[!dir].tuple, !manip)) + &ct->tuplehash[!dir].tuple, !manip)) return 0; if (skb->ip_summed != CHECKSUM_PARTIAL) { @@ -233,8 +215,7 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb, /* Change outer to look like the reply to an incoming packet */ nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); - l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, 0); - if (!nf_nat_ipv4_manip_pkt(skb, 0, l4proto, &target, manip)) + if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip)) return 0; return 1; @@ -391,26 +372,12 @@ EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv4_unregister_fn); static int __init nf_nat_l3proto_ipv4_init(void) { - int err; - - err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_icmp); - if (err < 0) - goto err1; - err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv4); - if (err < 0) - goto err2; - return err; - -err2: - nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp); -err1: - return err; + return nf_nat_l3proto_register(&nf_nat_l3proto_ipv4); } static void __exit nf_nat_l3proto_ipv4_exit(void) { nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4); - nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp); } MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 5d259a12e25f..68b4d450391b 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -299,8 +299,6 @@ pptp_inbound_pkt(struct sk_buff *skb, static int __init nf_nat_helper_pptp_init(void) { - nf_nat_need_gre(); - BUG_ON(nf_nat_pptp_hook_outbound != NULL); RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, pptp_outbound_pkt); diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c deleted file mode 100644 index 00fda6331ce5..000000000000 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ /dev/null @@ -1,150 +0,0 @@ -/* - * nf_nat_proto_gre.c - * - * NAT protocol helper module for GRE. - * - * GRE is a generic encapsulation protocol, which is generally not very - * suited for NAT, as it has no protocol-specific part as port numbers. - * - * It has an optional key field, which may help us distinguishing two - * connections between the same two hosts. - * - * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 - * - * PPTP is built on top of a modified version of GRE, and has a mandatory - * field called "CallID", which serves us for the same purpose as the key - * field in plain GRE. - * - * Documentation about PPTP can be found in RFC 2637 - * - * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> - * - * Development of this code funded by Astaro AG (http://www.astaro.com/) - * - * (C) 2006-2012 Patrick McHardy <kaber@trash.net> - * - */ - -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/ip.h> - -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_l4proto.h> -#include <linux/netfilter/nf_conntrack_proto_gre.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); -MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); - -/* generate unique tuple ... */ -static void -gre_unique_tuple(const struct nf_nat_l3proto *l3proto, - struct nf_conntrack_tuple *tuple, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - static u_int16_t key; - __be16 *keyptr; - unsigned int min, i, range_size; - - /* If there is no master conntrack we are not PPTP, - do not change tuples */ - if (!ct->master) - return; - - if (maniptype == NF_NAT_MANIP_SRC) - keyptr = &tuple->src.u.gre.key; - else - keyptr = &tuple->dst.u.gre.key; - - if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { - pr_debug("%p: NATing GRE PPTP\n", ct); - min = 1; - range_size = 0xffff; - } else { - min = ntohs(range->min_proto.gre.key); - range_size = ntohs(range->max_proto.gre.key) - min + 1; - } - - pr_debug("min = %u, range_size = %u\n", min, range_size); - - for (i = 0; ; ++key) { - *keyptr = htons(min + key % range_size); - if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) - return; - } - - pr_debug("%p: no NAT mapping\n", ct); - return; -} - -/* manipulate a GRE packet according to maniptype */ -static bool -gre_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, - unsigned int iphdroff, unsigned int hdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - const struct gre_base_hdr *greh; - struct pptp_gre_header *pgreh; - - /* pgreh includes two optional 32bit fields which are not required - * to be there. That's where the magic '8' comes from */ - if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8)) - return false; - - greh = (void *)skb->data + hdroff; - pgreh = (struct pptp_gre_header *)greh; - - /* we only have destination manip of a packet, since 'source key' - * is not present in the packet itself */ - if (maniptype != NF_NAT_MANIP_DST) - return true; - - switch (greh->flags & GRE_VERSION) { - case GRE_VERSION_0: - /* We do not currently NAT any GREv0 packets. - * Try to behave like "nf_nat_proto_unknown" */ - break; - case GRE_VERSION_1: - pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key)); - pgreh->call_id = tuple->dst.u.gre.key; - break; - default: - pr_debug("can't nat unknown GRE version\n"); - return false; - } - return true; -} - -static const struct nf_nat_l4proto gre = { - .l4proto = IPPROTO_GRE, - .manip_pkt = gre_manip_pkt, - .in_range = nf_nat_l4proto_in_range, - .unique_tuple = gre_unique_tuple, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, -#endif -}; - -static int __init nf_nat_proto_gre_init(void) -{ - return nf_nat_l4proto_register(NFPROTO_IPV4, &gre); -} - -static void __exit nf_nat_proto_gre_fini(void) -{ - nf_nat_l4proto_unregister(NFPROTO_IPV4, &gre); -} - -module_init(nf_nat_proto_gre_init); -module_exit(nf_nat_proto_gre_fini); - -void nf_nat_need_gre(void) -{ - return; -} -EXPORT_SYMBOL_GPL(nf_nat_need_gre); diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c deleted file mode 100644 index 6d7cf1d79baf..000000000000 --- a/net/ipv4/netfilter/nf_nat_proto_icmp.c +++ /dev/null @@ -1,83 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/types.h> -#include <linux/init.h> -#include <linux/export.h> -#include <linux/ip.h> -#include <linux/icmp.h> - -#include <linux/netfilter.h> -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_nat_l4proto.h> - -static bool -icmp_in_range(const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype, - const union nf_conntrack_man_proto *min, - const union nf_conntrack_man_proto *max) -{ - return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) && - ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); -} - -static void -icmp_unique_tuple(const struct nf_nat_l3proto *l3proto, - struct nf_conntrack_tuple *tuple, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - static u_int16_t id; - unsigned int range_size; - unsigned int i; - - range_size = ntohs(range->max_proto.icmp.id) - - ntohs(range->min_proto.icmp.id) + 1; - /* If no range specified... */ - if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) - range_size = 0xFFFF; - - for (i = 0; ; ++id) { - tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) + - (id % range_size)); - if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) - return; - } - return; -} - -static bool -icmp_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, - unsigned int iphdroff, unsigned int hdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - struct icmphdr *hdr; - - if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) - return false; - - hdr = (struct icmphdr *)(skb->data + hdroff); - inet_proto_csum_replace2(&hdr->checksum, skb, - hdr->un.echo.id, tuple->src.u.icmp.id, false); - hdr->un.echo.id = tuple->src.u.icmp.id; - return true; -} - -const struct nf_nat_l4proto nf_nat_l4proto_icmp = { - .l4proto = IPPROTO_ICMP, - .manip_pkt = icmp_manip_pkt, - .in_range = icmp_in_range, - .unique_tuple = icmp_unique_tuple, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, -#endif -}; diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 200c0c235565..9ea43d5256e0 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -11,7 +11,7 @@ obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o obj-$(CONFIG_IP6_NF_NAT) += ip6table_nat.o -nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o +nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_ipv6-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index ca6d38698b1a..23022447eb49 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -61,22 +61,8 @@ static void nf_nat_ipv6_decode_session(struct sk_buff *skb, } #endif -static bool nf_nat_ipv6_in_range(const struct nf_conntrack_tuple *t, - const struct nf_nat_range2 *range) -{ - return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 && - ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0; -} - -static u32 nf_nat_ipv6_secure_port(const struct nf_conntrack_tuple *t, - __be16 dport) -{ - return secure_ipv6_port_ephemeral(t->src.u3.ip6, t->dst.u3.ip6, dport); -} - static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, - const struct nf_nat_l4proto *l4proto, const struct nf_conntrack_tuple *target, enum nf_nat_manip_type maniptype) { @@ -96,8 +82,8 @@ static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb, goto manip_addr; if ((frag_off & htons(~0x7)) == 0 && - !l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff, - target, maniptype)) + !nf_nat_l4proto_manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff, + target, maniptype)) return false; /* must reload, offset might have changed */ @@ -171,8 +157,6 @@ static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[], static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = { .l3proto = NFPROTO_IPV6, - .secure_port = nf_nat_ipv6_secure_port, - .in_range = nf_nat_ipv6_in_range, .manip_pkt = nf_nat_ipv6_manip_pkt, .csum_update = nf_nat_ipv6_csum_update, .csum_recalc = nf_nat_ipv6_csum_recalc, @@ -196,7 +180,6 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, } *inside; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); - const struct nf_nat_l4proto *l4proto; struct nf_conntrack_tuple target; unsigned long statusbit; @@ -227,9 +210,8 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, if (!(ct->status & statusbit)) return 1; - l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, inside->ip6.nexthdr); if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6), - l4proto, &ct->tuplehash[!dir].tuple, !manip)) + &ct->tuplehash[!dir].tuple, !manip)) return 0; if (skb->ip_summed != CHECKSUM_PARTIAL) { @@ -244,8 +226,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, } nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); - l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, IPPROTO_ICMPV6); - if (!nf_nat_ipv6_manip_pkt(skb, 0, l4proto, &target, manip)) + if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip)) return 0; return 1; @@ -415,26 +396,12 @@ EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv6_unregister_fn); static int __init nf_nat_l3proto_ipv6_init(void) { - int err; - - err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6); - if (err < 0) - goto err1; - err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv6); - if (err < 0) - goto err2; - return err; - -err2: - nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6); -err1: - return err; + return nf_nat_l3proto_register(&nf_nat_l3proto_ipv6); } static void __exit nf_nat_l3proto_ipv6_exit(void) { nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv6); - nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6); } MODULE_LICENSE("GPL"); diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c deleted file mode 100644 index d9bf42ba44fa..000000000000 --- a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2011 Patrick Mchardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on Rusty Russell's IPv4 ICMP NAT code. Development of IPv6 - * NAT funded by Astaro. - */ - -#include <linux/types.h> -#include <linux/init.h> -#include <linux/icmpv6.h> - -#include <linux/netfilter.h> -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_nat_l3proto.h> -#include <net/netfilter/nf_nat_l4proto.h> - -static bool -icmpv6_in_range(const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype, - const union nf_conntrack_man_proto *min, - const union nf_conntrack_man_proto *max) -{ - return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) && - ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); -} - -static void -icmpv6_unique_tuple(const struct nf_nat_l3proto *l3proto, - struct nf_conntrack_tuple *tuple, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - static u16 id; - unsigned int range_size; - unsigned int i; - - range_size = ntohs(range->max_proto.icmp.id) - - ntohs(range->min_proto.icmp.id) + 1; - - if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) - range_size = 0xffff; - - for (i = 0; ; ++id) { - tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) + - (id % range_size)); - if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) - return; - } -} - -static bool -icmpv6_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, - unsigned int iphdroff, unsigned int hdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - struct icmp6hdr *hdr; - - if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) - return false; - - hdr = (struct icmp6hdr *)(skb->data + hdroff); - l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum, - tuple, maniptype); - if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST || - hdr->icmp6_type == ICMPV6_ECHO_REPLY) { - inet_proto_csum_replace2(&hdr->icmp6_cksum, skb, - hdr->icmp6_identifier, - tuple->src.u.icmp.id, false); - hdr->icmp6_identifier = tuple->src.u.icmp.id; - } - return true; -} - -const struct nf_nat_l4proto nf_nat_l4proto_icmpv6 = { - .l4proto = IPPROTO_ICMPV6, - .manip_pkt = icmpv6_manip_pkt, - .in_range = icmpv6_in_range, - .unique_tuple = icmpv6_unique_tuple, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, -#endif -}; diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 2ab870ef233a..beb3a69ce1d4 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -403,21 +403,6 @@ config NF_NAT_NEEDED depends on NF_NAT default y -config NF_NAT_PROTO_DCCP - bool - depends on NF_NAT && NF_CT_PROTO_DCCP - default NF_NAT && NF_CT_PROTO_DCCP - -config NF_NAT_PROTO_UDPLITE - bool - depends on NF_NAT && NF_CT_PROTO_UDPLITE - default NF_NAT && NF_CT_PROTO_UDPLITE - -config NF_NAT_PROTO_SCTP - bool - default NF_NAT && NF_CT_PROTO_SCTP - depends on NF_NAT && NF_CT_PROTO_SCTP - config NF_NAT_AMANDA tristate depends on NF_CONNTRACK && NF_NAT diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 4ddf3ef51ece..1ae65a314d7a 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -47,12 +47,7 @@ obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o -nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \ - nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o - -# NAT protocols (nf_nat) -nf_nat-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o -nf_nat-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o +nf_nat-y := nf_nat_core.o nf_nat_proto.o nf_nat_helper.o # generic transport layer logging obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index c00b6a2e8e3c..980000fc3b50 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -219,10 +219,6 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); u32 ip; - /* MAC can be src only */ - if (!(opt->flags & IPSET_DIM_TWO_SRC)) - return 0; - ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC)); if (ip < map->first_ip || ip > map->last_ip) return -IPSET_ERR_BITMAP_RANGE; @@ -233,7 +229,14 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, return -EINVAL; e.id = ip_to_id(map, ip); - memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN); + + if (opt->flags & IPSET_DIM_ONE_SRC) + ether_addr_copy(e.ether, eth_hdr(skb)->h_source); + else + ether_addr_copy(e.ether, eth_hdr(skb)->h_dest); + + if (is_zero_ether_addr(e.ether)) + return -EINVAL; return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 1577f2f76060..45a257695bef 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -771,11 +771,21 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_put); * The commands are serialized by the nfnl mutex. */ +static inline u8 protocol(const struct nlattr * const tb[]) +{ + return nla_get_u8(tb[IPSET_ATTR_PROTOCOL]); +} + static inline bool protocol_failed(const struct nlattr * const tb[]) { - return !tb[IPSET_ATTR_PROTOCOL] || - nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL; + return !tb[IPSET_ATTR_PROTOCOL] || protocol(tb) != IPSET_PROTOCOL; +} + +static inline bool +protocol_min_failed(const struct nlattr * const tb[]) +{ + return !tb[IPSET_ATTR_PROTOCOL] || protocol(tb) < IPSET_PROTOCOL_MIN; } static inline u32 @@ -889,7 +899,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl, u32 flags = flag_exist(nlh); int ret = 0; - if (unlikely(protocol_failed(attr) || + if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME] || !attr[IPSET_ATTR_TYPENAME] || !attr[IPSET_ATTR_REVISION] || @@ -1027,7 +1037,7 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl, ip_set_id_t i; int ret = 0; - if (unlikely(protocol_failed(attr))) + if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; /* Must wait for flush to be really finished in list:set */ @@ -1105,7 +1115,7 @@ static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb, struct ip_set *s; ip_set_id_t i; - if (unlikely(protocol_failed(attr))) + if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; if (!attr[IPSET_ATTR_SETNAME]) { @@ -1147,7 +1157,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl, ip_set_id_t i; int ret = 0; - if (unlikely(protocol_failed(attr) || + if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME] || !attr[IPSET_ATTR_SETNAME2])) return -IPSET_ERR_PROTOCOL; @@ -1196,7 +1206,7 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb, ip_set_id_t from_id, to_id; char from_name[IPSET_MAXNAMELEN]; - if (unlikely(protocol_failed(attr) || + if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME] || !attr[IPSET_ATTR_SETNAME2])) return -IPSET_ERR_PROTOCOL; @@ -1291,6 +1301,7 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst) nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len, ip_set_setname_policy, NULL); + cb->args[IPSET_CB_PROTO] = nla_get_u8(cda[IPSET_ATTR_PROTOCOL]); if (cda[IPSET_ATTR_SETNAME]) { struct ip_set *set; @@ -1392,7 +1403,8 @@ dump_last: ret = -EMSGSIZE; goto release_refcount; } - if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) || + if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL, + cb->args[IPSET_CB_PROTO]) || nla_put_string(skb, IPSET_ATTR_SETNAME, set->name)) goto nla_put_failure; if (dump_flags & IPSET_FLAG_LIST_SETNAME) @@ -1407,6 +1419,9 @@ dump_last: nla_put_u8(skb, IPSET_ATTR_REVISION, set->revision)) goto nla_put_failure; + if (cb->args[IPSET_CB_PROTO] > IPSET_PROTOCOL_MIN && + nla_put_net16(skb, IPSET_ATTR_INDEX, htons(index))) + goto nla_put_failure; ret = set->variant->head(set, skb); if (ret < 0) goto release_refcount; @@ -1466,7 +1481,7 @@ static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlattr * const attr[], struct netlink_ext_ack *extack) { - if (unlikely(protocol_failed(attr))) + if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; { @@ -1560,7 +1575,7 @@ static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb, bool use_lineno; int ret = 0; - if (unlikely(protocol_failed(attr) || + if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME] || !((attr[IPSET_ATTR_DATA] != NULL) ^ (attr[IPSET_ATTR_ADT] != NULL)) || @@ -1615,7 +1630,7 @@ static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb, bool use_lineno; int ret = 0; - if (unlikely(protocol_failed(attr) || + if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME] || !((attr[IPSET_ATTR_DATA] != NULL) ^ (attr[IPSET_ATTR_ADT] != NULL)) || @@ -1667,7 +1682,7 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb, struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; int ret = 0; - if (unlikely(protocol_failed(attr) || + if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME] || !attr[IPSET_ATTR_DATA] || !flag_nested(attr[IPSET_ATTR_DATA]))) @@ -1704,7 +1719,7 @@ static int ip_set_header(struct net *net, struct sock *ctnl, struct nlmsghdr *nlh2; int ret = 0; - if (unlikely(protocol_failed(attr) || + if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME])) return -IPSET_ERR_PROTOCOL; @@ -1720,7 +1735,7 @@ static int ip_set_header(struct net *net, struct sock *ctnl, IPSET_CMD_HEADER); if (!nlh2) goto nlmsg_failure; - if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) || + if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) || nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name) || nla_put_string(skb2, IPSET_ATTR_TYPENAME, set->type->name) || nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) || @@ -1761,7 +1776,7 @@ static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb, const char *typename; int ret = 0; - if (unlikely(protocol_failed(attr) || + if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_TYPENAME] || !attr[IPSET_ATTR_FAMILY])) return -IPSET_ERR_PROTOCOL; @@ -1780,7 +1795,7 @@ static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb, IPSET_CMD_TYPE); if (!nlh2) goto nlmsg_failure; - if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) || + if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) || nla_put_string(skb2, IPSET_ATTR_TYPENAME, typename) || nla_put_u8(skb2, IPSET_ATTR_FAMILY, family) || nla_put_u8(skb2, IPSET_ATTR_REVISION, max) || @@ -1831,6 +1846,111 @@ static int ip_set_protocol(struct net *net, struct sock *ctnl, goto nlmsg_failure; if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL)) goto nla_put_failure; + if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL_MIN, IPSET_PROTOCOL_MIN)) + goto nla_put_failure; + nlmsg_end(skb2, nlh2); + + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); + if (ret < 0) + return ret; + + return 0; + +nla_put_failure: + nlmsg_cancel(skb2, nlh2); +nlmsg_failure: + kfree_skb(skb2); + return -EMSGSIZE; +} + +/* Get set by name or index, from userspace */ + +static int ip_set_byname(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) +{ + struct ip_set_net *inst = ip_set_pernet(net); + struct sk_buff *skb2; + struct nlmsghdr *nlh2; + ip_set_id_t id = IPSET_INVALID_ID; + const struct ip_set *set; + int ret = 0; + + if (unlikely(protocol_failed(attr) || + !attr[IPSET_ATTR_SETNAME])) + return -IPSET_ERR_PROTOCOL; + + set = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &id); + if (id == IPSET_INVALID_ID) + return -ENOENT; + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, + IPSET_CMD_GET_BYNAME); + if (!nlh2) + goto nlmsg_failure; + if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) || + nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) || + nla_put_net16(skb2, IPSET_ATTR_INDEX, htons(id))) + goto nla_put_failure; + nlmsg_end(skb2, nlh2); + + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); + if (ret < 0) + return ret; + + return 0; + +nla_put_failure: + nlmsg_cancel(skb2, nlh2); +nlmsg_failure: + kfree_skb(skb2); + return -EMSGSIZE; +} + +static const struct nla_policy ip_set_index_policy[IPSET_ATTR_CMD_MAX + 1] = { + [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, + [IPSET_ATTR_INDEX] = { .type = NLA_U16 }, +}; + +static int ip_set_byindex(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const attr[], + struct netlink_ext_ack *extack) +{ + struct ip_set_net *inst = ip_set_pernet(net); + struct sk_buff *skb2; + struct nlmsghdr *nlh2; + ip_set_id_t id = IPSET_INVALID_ID; + const struct ip_set *set; + int ret = 0; + + if (unlikely(protocol_failed(attr) || + !attr[IPSET_ATTR_INDEX])) + return -IPSET_ERR_PROTOCOL; + + id = ip_set_get_h16(attr[IPSET_ATTR_INDEX]); + if (id >= inst->ip_set_max) + return -ENOENT; + set = ip_set(inst, id); + if (set == NULL) + return -ENOENT; + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, + IPSET_CMD_GET_BYINDEX); + if (!nlh2) + goto nlmsg_failure; + if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) || + nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name)) + goto nla_put_failure; nlmsg_end(skb2, nlh2); ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); @@ -1916,6 +2036,16 @@ static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = { .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_protocol_policy, }, + [IPSET_CMD_GET_BYNAME] = { + .call = ip_set_byname, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_setname_policy, + }, + [IPSET_CMD_GET_BYINDEX] = { + .call = ip_set_byindex, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_index_policy, + }, }; static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = { @@ -1961,7 +2091,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) goto done; } - if (req_version->version != IPSET_PROTOCOL) { + if (req_version->version < IPSET_PROTOCOL_MIN) { ret = -EPROTO; goto done; } @@ -2024,9 +2154,11 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) } nfnl_lock(NFNL_SUBSYS_IPSET); set = ip_set(inst, req_get->set.index); - strncpy(req_get->set.name, set ? set->name : "", - IPSET_MAXNAMELEN); + ret = strscpy(req_get->set.name, set ? set->name : "", + IPSET_MAXNAMELEN); nfnl_unlock(NFNL_SUBSYS_IPSET); + if (ret < 0) + goto done; goto copy; } default: diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index e287da68d5fa..2c9609929c71 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -67,7 +67,7 @@ tune_ahash_max(u8 curr, u32 multi) /* A hash bucket */ struct hbucket { - struct rcu_head rcu; /* for call_rcu_bh */ + struct rcu_head rcu; /* for call_rcu */ /* Which positions are used in the array */ DECLARE_BITMAP(used, AHASH_MAX_TUNED); u8 size; /* size of the array */ @@ -664,7 +664,7 @@ retry: spin_unlock_bh(&set->lock); /* Give time to other readers of the set */ - synchronize_rcu_bh(); + synchronize_rcu(); pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name, orig->htable_bits, orig, t->htable_bits, t); diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c index 1ab5ed2f6839..c830c68142ff 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmac.c +++ b/net/netfilter/ipset/ip_set_hash_ipmac.c @@ -36,9 +36,6 @@ MODULE_ALIAS("ip_set_hash:ip,mac"); /* Type specific function prefix */ #define HTYPE hash_ipmac -/* Zero valued element is not supported */ -static const unsigned char invalid_ether[ETH_ALEN] = { 0 }; - /* IPv4 variant */ /* Member elements */ @@ -103,8 +100,12 @@ hash_ipmac4_kadt(struct ip_set *set, const struct sk_buff *skb, (skb_mac_header(skb) + ETH_HLEN) > skb->data) return -EINVAL; - memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN); - if (ether_addr_equal(e.ether, invalid_ether)) + if (opt->flags & IPSET_DIM_ONE_SRC) + ether_addr_copy(e.ether, eth_hdr(skb)->h_source); + else + ether_addr_copy(e.ether, eth_hdr(skb)->h_dest); + + if (is_zero_ether_addr(e.ether)) return -EINVAL; ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); @@ -140,7 +141,7 @@ hash_ipmac4_uadt(struct ip_set *set, struct nlattr *tb[], if (ret) return ret; memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN); - if (ether_addr_equal(e.ether, invalid_ether)) + if (is_zero_ether_addr(e.ether)) return -IPSET_ERR_HASH_ELEM; return adtfn(set, &e, &ext, &ext, flags); @@ -211,16 +212,16 @@ hash_ipmac6_kadt(struct ip_set *set, const struct sk_buff *skb, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); - /* MAC can be src only */ - if (!(opt->flags & IPSET_DIM_TWO_SRC)) - return 0; - if (skb_mac_header(skb) < skb->head || (skb_mac_header(skb) + ETH_HLEN) > skb->data) return -EINVAL; - memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN); - if (ether_addr_equal(e.ether, invalid_ether)) + if (opt->flags & IPSET_DIM_ONE_SRC) + ether_addr_copy(e.ether, eth_hdr(skb)->h_source); + else + ether_addr_copy(e.ether, eth_hdr(skb)->h_dest); + + if (is_zero_ether_addr(e.ether)) return -EINVAL; ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); @@ -260,7 +261,7 @@ hash_ipmac6_uadt(struct ip_set *set, struct nlattr *tb[], return ret; memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN); - if (ether_addr_equal(e.ether, invalid_ether)) + if (is_zero_ether_addr(e.ether)) return -IPSET_ERR_HASH_ELEM; return adtfn(set, &e, &ext, &ext, flags); diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c index f9d5a2a1e3d0..4fe5f243d0a3 100644 --- a/net/netfilter/ipset/ip_set_hash_mac.c +++ b/net/netfilter/ipset/ip_set_hash_mac.c @@ -81,15 +81,15 @@ hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_mac4_elem e = { { .foo[0] = 0, .foo[1] = 0 } }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); - /* MAC can be src only */ - if (!(opt->flags & IPSET_DIM_ONE_SRC)) - return 0; - if (skb_mac_header(skb) < skb->head || (skb_mac_header(skb) + ETH_HLEN) > skb->data) return -EINVAL; - ether_addr_copy(e.ether, eth_hdr(skb)->h_source); + if (opt->flags & IPSET_DIM_ONE_SRC) + ether_addr_copy(e.ether, eth_hdr(skb)->h_source); + else + ether_addr_copy(e.ether, eth_hdr(skb)->h_dest); + if (is_zero_ether_addr(e.ether)) return -EINVAL; return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index 1d66de5151b2..49e523cc49d0 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -25,102 +25,15 @@ static bool nf_ct_acct __read_mostly; module_param_named(acct, nf_ct_acct, bool, 0644); MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting."); -#ifdef CONFIG_SYSCTL -static struct ctl_table acct_sysctl_table[] = { - { - .procname = "nf_conntrack_acct", - .data = &init_net.ct.sysctl_acct, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - {} -}; -#endif /* CONFIG_SYSCTL */ - -unsigned int -seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir) -{ - struct nf_conn_acct *acct; - struct nf_conn_counter *counter; - - acct = nf_conn_acct_find(ct); - if (!acct) - return 0; - - counter = acct->counter; - seq_printf(s, "packets=%llu bytes=%llu ", - (unsigned long long)atomic64_read(&counter[dir].packets), - (unsigned long long)atomic64_read(&counter[dir].bytes)); - - return 0; -}; -EXPORT_SYMBOL_GPL(seq_print_acct); - static const struct nf_ct_ext_type acct_extend = { .len = sizeof(struct nf_conn_acct), .align = __alignof__(struct nf_conn_acct), .id = NF_CT_EXT_ACCT, }; -#ifdef CONFIG_SYSCTL -static int nf_conntrack_acct_init_sysctl(struct net *net) -{ - struct ctl_table *table; - - table = kmemdup(acct_sysctl_table, sizeof(acct_sysctl_table), - GFP_KERNEL); - if (!table) - goto out; - - table[0].data = &net->ct.sysctl_acct; - - /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) - table[0].procname = NULL; - - net->ct.acct_sysctl_header = register_net_sysctl(net, "net/netfilter", - table); - if (!net->ct.acct_sysctl_header) { - pr_err("can't register to sysctl\n"); - goto out_register; - } - return 0; - -out_register: - kfree(table); -out: - return -ENOMEM; -} - -static void nf_conntrack_acct_fini_sysctl(struct net *net) -{ - struct ctl_table *table; - - table = net->ct.acct_sysctl_header->ctl_table_arg; - unregister_net_sysctl_table(net->ct.acct_sysctl_header); - kfree(table); -} -#else -static int nf_conntrack_acct_init_sysctl(struct net *net) -{ - return 0; -} - -static void nf_conntrack_acct_fini_sysctl(struct net *net) -{ -} -#endif - -int nf_conntrack_acct_pernet_init(struct net *net) +void nf_conntrack_acct_pernet_init(struct net *net) { net->ct.sysctl_acct = nf_ct_acct; - return nf_conntrack_acct_init_sysctl(net); -} - -void nf_conntrack_acct_pernet_fini(struct net *net) -{ - nf_conntrack_acct_fini_sysctl(net); } int nf_conntrack_acct_init(void) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index e92e749aff53..e87c21e47efe 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2110,10 +2110,7 @@ i_see_dead_people: list_for_each_entry(net, net_exit_list, exit_list) { nf_conntrack_proto_pernet_fini(net); - nf_conntrack_helper_pernet_fini(net); nf_conntrack_ecache_pernet_fini(net); - nf_conntrack_tstamp_pernet_fini(net); - nf_conntrack_acct_pernet_fini(net); nf_conntrack_expect_pernet_fini(net); free_percpu(net->ct.stat); free_percpu(net->ct.pcpu_lists); @@ -2410,32 +2407,19 @@ int nf_conntrack_init_net(struct net *net) ret = nf_conntrack_expect_pernet_init(net); if (ret < 0) goto err_expect; - ret = nf_conntrack_acct_pernet_init(net); - if (ret < 0) - goto err_acct; - ret = nf_conntrack_tstamp_pernet_init(net); - if (ret < 0) - goto err_tstamp; - ret = nf_conntrack_ecache_pernet_init(net); - if (ret < 0) - goto err_ecache; - ret = nf_conntrack_helper_pernet_init(net); - if (ret < 0) - goto err_helper; + + nf_conntrack_acct_pernet_init(net); + nf_conntrack_tstamp_pernet_init(net); + nf_conntrack_ecache_pernet_init(net); + nf_conntrack_helper_pernet_init(net); + ret = nf_conntrack_proto_pernet_init(net); if (ret < 0) goto err_proto; return 0; err_proto: - nf_conntrack_helper_pernet_fini(net); -err_helper: nf_conntrack_ecache_pernet_fini(net); -err_ecache: - nf_conntrack_tstamp_pernet_fini(net); -err_tstamp: - nf_conntrack_acct_pernet_fini(net); -err_acct: nf_conntrack_expect_pernet_fini(net); err_expect: free_percpu(net->ct.stat); diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index c11822a7d2bf..3d042f8ff183 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -336,85 +336,21 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); #define NF_CT_EVENTS_DEFAULT 1 static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT; -#ifdef CONFIG_SYSCTL -static struct ctl_table event_sysctl_table[] = { - { - .procname = "nf_conntrack_events", - .data = &init_net.ct.sysctl_events, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - {} -}; -#endif /* CONFIG_SYSCTL */ - static const struct nf_ct_ext_type event_extend = { .len = sizeof(struct nf_conntrack_ecache), .align = __alignof__(struct nf_conntrack_ecache), .id = NF_CT_EXT_ECACHE, }; -#ifdef CONFIG_SYSCTL -static int nf_conntrack_event_init_sysctl(struct net *net) -{ - struct ctl_table *table; - - table = kmemdup(event_sysctl_table, sizeof(event_sysctl_table), - GFP_KERNEL); - if (!table) - goto out; - - table[0].data = &net->ct.sysctl_events; - - /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) - table[0].procname = NULL; - - net->ct.event_sysctl_header = - register_net_sysctl(net, "net/netfilter", table); - if (!net->ct.event_sysctl_header) { - pr_err("can't register to sysctl\n"); - goto out_register; - } - return 0; - -out_register: - kfree(table); -out: - return -ENOMEM; -} - -static void nf_conntrack_event_fini_sysctl(struct net *net) -{ - struct ctl_table *table; - - table = net->ct.event_sysctl_header->ctl_table_arg; - unregister_net_sysctl_table(net->ct.event_sysctl_header); - kfree(table); -} -#else -static int nf_conntrack_event_init_sysctl(struct net *net) -{ - return 0; -} - -static void nf_conntrack_event_fini_sysctl(struct net *net) -{ -} -#endif /* CONFIG_SYSCTL */ - -int nf_conntrack_ecache_pernet_init(struct net *net) +void nf_conntrack_ecache_pernet_init(struct net *net) { net->ct.sysctl_events = nf_ct_events; INIT_DELAYED_WORK(&net->ct.ecache_dwork, ecache_work); - return nf_conntrack_event_init_sysctl(net); } void nf_conntrack_ecache_pernet_fini(struct net *net) { cancel_delayed_work_sync(&net->ct.ecache_dwork); - nf_conntrack_event_fini_sysctl(net); } int nf_conntrack_ecache_init(void) diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index e24b762ffa1d..274baf1dab87 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -42,67 +42,6 @@ module_param_named(nf_conntrack_helper, nf_ct_auto_assign_helper, bool, 0644); MODULE_PARM_DESC(nf_conntrack_helper, "Enable automatic conntrack helper assignment (default 0)"); -#ifdef CONFIG_SYSCTL -static struct ctl_table helper_sysctl_table[] = { - { - .procname = "nf_conntrack_helper", - .data = &init_net.ct.sysctl_auto_assign_helper, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - {} -}; - -static int nf_conntrack_helper_init_sysctl(struct net *net) -{ - struct ctl_table *table; - - table = kmemdup(helper_sysctl_table, sizeof(helper_sysctl_table), - GFP_KERNEL); - if (!table) - goto out; - - table[0].data = &net->ct.sysctl_auto_assign_helper; - - /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) - table[0].procname = NULL; - - net->ct.helper_sysctl_header = - register_net_sysctl(net, "net/netfilter", table); - - if (!net->ct.helper_sysctl_header) { - pr_err("nf_conntrack_helper: can't register to sysctl.\n"); - goto out_register; - } - return 0; - -out_register: - kfree(table); -out: - return -ENOMEM; -} - -static void nf_conntrack_helper_fini_sysctl(struct net *net) -{ - struct ctl_table *table; - - table = net->ct.helper_sysctl_header->ctl_table_arg; - unregister_net_sysctl_table(net->ct.helper_sysctl_header); - kfree(table); -} -#else -static int nf_conntrack_helper_init_sysctl(struct net *net) -{ - return 0; -} - -static void nf_conntrack_helper_fini_sysctl(struct net *net) -{ -} -#endif /* CONFIG_SYSCTL */ - /* Stupid hash, but collision free for the default registrations of the * helpers currently in the kernel. */ static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple) @@ -533,16 +472,10 @@ static const struct nf_ct_ext_type helper_extend = { .id = NF_CT_EXT_HELPER, }; -int nf_conntrack_helper_pernet_init(struct net *net) +void nf_conntrack_helper_pernet_init(struct net *net) { net->ct.auto_assign_helper_warned = false; net->ct.sysctl_auto_assign_helper = nf_ct_auto_assign_helper; - return nf_conntrack_helper_init_sysctl(net); -} - -void nf_conntrack_helper_pernet_fini(struct net *net) -{ - nf_conntrack_helper_fini_sysctl(net); } int nf_conntrack_helper_init(void) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 4ae8e528943a..1213beb5a714 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -47,7 +47,6 @@ #include <net/netfilter/nf_conntrack_synproxy.h> #ifdef CONFIG_NF_NAT_NEEDED #include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_nat_l4proto.h> #include <net/netfilter/nf_nat_helper.h> #endif @@ -1688,6 +1687,22 @@ static int ctnetlink_change_timeout(struct nf_conn *ct, return 0; } +#if defined(CONFIG_NF_CONNTRACK_MARK) +static void ctnetlink_change_mark(struct nf_conn *ct, + const struct nlattr * const cda[]) +{ + u32 mark, newmark, mask = 0; + + if (cda[CTA_MARK_MASK]) + mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK])); + + mark = ntohl(nla_get_be32(cda[CTA_MARK])); + newmark = (ct->mark & mask) ^ mark; + if (newmark != ct->mark) + ct->mark = newmark; +} +#endif + static const struct nla_policy protoinfo_policy[CTA_PROTOINFO_MAX+1] = { [CTA_PROTOINFO_TCP] = { .type = NLA_NESTED }, [CTA_PROTOINFO_DCCP] = { .type = NLA_NESTED }, @@ -1883,7 +1898,7 @@ ctnetlink_change_conntrack(struct nf_conn *ct, #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) - ct->mark = ntohl(nla_get_be32(cda[CTA_MARK])); + ctnetlink_change_mark(ct, cda); #endif if (cda[CTA_SEQ_ADJ_ORIG] || cda[CTA_SEQ_ADJ_REPLY]) { @@ -2027,7 +2042,7 @@ ctnetlink_create_conntrack(struct net *net, #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) - ct->mark = ntohl(nla_get_be32(cda[CTA_MARK])); + ctnetlink_change_mark(ct, cda); #endif /* setup master conntrack: this is a confirmed expectation */ @@ -2524,14 +2539,7 @@ ctnetlink_glue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct) } #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) { - u32 mask = 0, mark, newmark; - if (cda[CTA_MARK_MASK]) - mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK])); - - mark = ntohl(nla_get_be32(cda[CTA_MARK])); - newmark = (ct->mark & mask) ^ mark; - if (newmark != ct->mark) - ct->mark = newmark; + ctnetlink_change_mark(ct, cda); } #endif return 0; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 40643af7137e..859f5d07a915 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -175,8 +175,7 @@ static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, static int nf_ct_l4proto_register_sysctl(struct net *net, - struct nf_proto_net *pn, - const struct nf_conntrack_l4proto *l4proto) + struct nf_proto_net *pn) { int err = 0; @@ -198,9 +197,7 @@ int nf_ct_l4proto_register_sysctl(struct net *net, } static -void nf_ct_l4proto_unregister_sysctl(struct net *net, - struct nf_proto_net *pn, - const struct nf_conntrack_l4proto *l4proto) +void nf_ct_l4proto_unregister_sysctl(struct nf_proto_net *pn) { #ifdef CONFIG_SYSCTL if (pn->ctl_table_header != NULL) @@ -252,7 +249,7 @@ int nf_ct_l4proto_pernet_register_one(struct net *net, if (pn == NULL) goto out; - ret = nf_ct_l4proto_register_sysctl(net, pn, l4proto); + ret = nf_ct_l4proto_register_sysctl(net, pn); if (ret < 0) goto out; @@ -296,7 +293,7 @@ void nf_ct_l4proto_pernet_unregister_one(struct net *net, return; pn->users--; - nf_ct_l4proto_unregister_sysctl(net, pn, l4proto); + nf_ct_l4proto_unregister_sysctl(pn); } EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one); @@ -946,16 +943,14 @@ int nf_conntrack_proto_pernet_init(struct net *net) if (err < 0) return err; err = nf_ct_l4proto_register_sysctl(net, - pn, - &nf_conntrack_l4proto_generic); + pn); if (err < 0) return err; err = nf_ct_l4proto_pernet_register(net, builtin_l4proto, ARRAY_SIZE(builtin_l4proto)); if (err < 0) { - nf_ct_l4proto_unregister_sysctl(net, pn, - &nf_conntrack_l4proto_generic); + nf_ct_l4proto_unregister_sysctl(pn); return err; } @@ -971,9 +966,7 @@ void nf_conntrack_proto_pernet_fini(struct net *net) nf_ct_l4proto_pernet_unregister(net, builtin_l4proto, ARRAY_SIZE(builtin_l4proto)); pn->users--; - nf_ct_l4proto_unregister_sysctl(net, - pn, - &nf_conntrack_l4proto_generic); + nf_ct_l4proto_unregister_sysctl(pn); } diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 2a5e56c6d8d9..8899b51aad44 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -320,9 +320,49 @@ gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = { }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ +#ifdef CONFIG_SYSCTL +static struct ctl_table gre_sysctl_table[] = { + { + .procname = "nf_conntrack_gre_timeout", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { + .procname = "nf_conntrack_gre_timeout_stream", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + {} +}; +#endif + +static int gre_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *nf, + struct netns_proto_gre *net_gre) +{ +#ifdef CONFIG_SYSCTL + int i; + + if (nf->ctl_table) + return 0; + + nf->ctl_table = kmemdup(gre_sysctl_table, + sizeof(gre_sysctl_table), + GFP_KERNEL); + if (!nf->ctl_table) + return -ENOMEM; + + for (i = 0; i < GRE_CT_MAX; i++) + nf->ctl_table[i].data = &net_gre->gre_timeouts[i]; +#endif + return 0; +} + static int gre_init_net(struct net *net) { struct netns_proto_gre *net_gre = gre_pernet(net); + struct nf_proto_net *nf = &net_gre->nf; int i; rwlock_init(&net_gre->keymap_lock); @@ -330,7 +370,7 @@ static int gre_init_net(struct net *net) for (i = 0; i < GRE_CT_MAX; i++) net_gre->gre_timeouts[i] = gre_timeouts[i]; - return 0; + return gre_kmemdup_sysctl_table(net, nf, net_gre); } /* protocol helper struct */ diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index c879d8d78cfd..b4f5d5e82031 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -29,7 +29,7 @@ static const unsigned int udp_timeouts[UDP_CT_MAX] = { [UDP_CT_UNREPLIED] = 30*HZ, - [UDP_CT_REPLIED] = 180*HZ, + [UDP_CT_REPLIED] = 120*HZ, }; static unsigned int *udp_get_timeouts(struct net *net) @@ -100,11 +100,21 @@ static int udp_packet(struct nf_conn *ct, if (!timeouts) timeouts = udp_get_timeouts(nf_ct_net(ct)); + if (!nf_ct_is_confirmed(ct)) + ct->proto.udp.stream_ts = 2 * HZ + jiffies; + /* If we've seen traffic both ways, this is some kind of UDP - stream. Extend timeout. */ + * stream. Set Assured. + */ if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { - nf_ct_refresh_acct(ct, ctinfo, skb, - timeouts[UDP_CT_REPLIED]); + unsigned long extra = timeouts[UDP_CT_UNREPLIED]; + + /* Still active after two seconds? Extend timeout. */ + if (time_after(jiffies, ct->proto.udp.stream_ts)) + extra = timeouts[UDP_CT_REPLIED]; + + nf_ct_refresh_acct(ct, ctinfo, skb, extra); + /* Also, more likely to be important, and not a probe */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_ASSURED, ct); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 463d17d349c1..b6177fd73304 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -267,6 +267,24 @@ static const char* l4proto_name(u16 proto) return "unknown"; } +static unsigned int +seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir) +{ + struct nf_conn_acct *acct; + struct nf_conn_counter *counter; + + acct = nf_conn_acct_find(ct); + if (!acct) + return 0; + + counter = acct->counter; + seq_printf(s, "packets=%llu bytes=%llu ", + (unsigned long long)atomic64_read(&counter[dir].packets), + (unsigned long long)atomic64_read(&counter[dir].bytes)); + + return 0; +} + /* return 0 on success, 1 in case of error */ static int ct_seq_show(struct seq_file *s, void *v) { @@ -514,36 +532,53 @@ nf_conntrack_hash_sysctl(struct ctl_table *table, int write, static struct ctl_table_header *nf_ct_netfilter_header; +enum nf_ct_sysctl_index { + NF_SYSCTL_CT_MAX, + NF_SYSCTL_CT_COUNT, + NF_SYSCTL_CT_BUCKETS, + NF_SYSCTL_CT_CHECKSUM, + NF_SYSCTL_CT_LOG_INVALID, + NF_SYSCTL_CT_EXPECT_MAX, + NF_SYSCTL_CT_ACCT, + NF_SYSCTL_CT_HELPER, +#ifdef CONFIG_NF_CONNTRACK_EVENTS + NF_SYSCTL_CT_EVENTS, +#endif +#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP + NF_SYSCTL_CT_TIMESTAMP, +#endif +}; + static struct ctl_table nf_ct_sysctl_table[] = { - { + [NF_SYSCTL_CT_MAX] = { .procname = "nf_conntrack_max", .data = &nf_conntrack_max, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, - { + [NF_SYSCTL_CT_COUNT] = { .procname = "nf_conntrack_count", .data = &init_net.ct.count, .maxlen = sizeof(int), .mode = 0444, .proc_handler = proc_dointvec, }, - { + [NF_SYSCTL_CT_BUCKETS] = { .procname = "nf_conntrack_buckets", .data = &nf_conntrack_htable_size_user, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = nf_conntrack_hash_sysctl, }, - { + [NF_SYSCTL_CT_CHECKSUM] = { .procname = "nf_conntrack_checksum", .data = &init_net.ct.sysctl_checksum, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, - { + [NF_SYSCTL_CT_LOG_INVALID] = { .procname = "nf_conntrack_log_invalid", .data = &init_net.ct.sysctl_log_invalid, .maxlen = sizeof(unsigned int), @@ -552,13 +587,45 @@ static struct ctl_table nf_ct_sysctl_table[] = { .extra1 = &log_invalid_proto_min, .extra2 = &log_invalid_proto_max, }, - { + [NF_SYSCTL_CT_EXPECT_MAX] = { .procname = "nf_conntrack_expect_max", .data = &nf_ct_expect_max, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, + [NF_SYSCTL_CT_ACCT] = { + .procname = "nf_conntrack_acct", + .data = &init_net.ct.sysctl_acct, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + [NF_SYSCTL_CT_HELPER] = { + .procname = "nf_conntrack_helper", + .data = &init_net.ct.sysctl_auto_assign_helper, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#ifdef CONFIG_NF_CONNTRACK_EVENTS + [NF_SYSCTL_CT_EVENTS] = { + .procname = "nf_conntrack_events", + .data = &init_net.ct.sysctl_events, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif +#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP + [NF_SYSCTL_CT_TIMESTAMP] = { + .procname = "nf_conntrack_timestamp", + .data = &init_net.ct.sysctl_tstamp, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif { } }; @@ -582,16 +649,28 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) if (!table) goto out_kmemdup; - table[1].data = &net->ct.count; - table[3].data = &net->ct.sysctl_checksum; - table[4].data = &net->ct.sysctl_log_invalid; + table[NF_SYSCTL_CT_COUNT].data = &net->ct.count; + table[NF_SYSCTL_CT_CHECKSUM].data = &net->ct.sysctl_checksum; + table[NF_SYSCTL_CT_LOG_INVALID].data = &net->ct.sysctl_log_invalid; +#ifdef CONFIG_NF_CONNTRACK_EVENTS + table[NF_SYSCTL_CT_EVENTS].data = &net->ct.sysctl_events; +#endif /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) - table[0].procname = NULL; + if (net->user_ns != &init_user_ns) { + table[NF_SYSCTL_CT_MAX].procname = NULL; + table[NF_SYSCTL_CT_ACCT].procname = NULL; + table[NF_SYSCTL_CT_HELPER].procname = NULL; +#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP + table[NF_SYSCTL_CT_TIMESTAMP].procname = NULL; +#endif +#ifdef CONFIG_NF_CONNTRACK_EVENTS + table[NF_SYSCTL_CT_EVENTS].procname = NULL; +#endif + } if (!net_eq(&init_net, net)) - table[2].mode = 0444; + table[NF_SYSCTL_CT_BUCKETS].mode = 0444; net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", table); if (!net->ct.sysctl_header) diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c index 56766cb26e40..705b912bd91f 100644 --- a/net/netfilter/nf_conntrack_timestamp.c +++ b/net/netfilter/nf_conntrack_timestamp.c @@ -22,83 +22,15 @@ static bool nf_ct_tstamp __read_mostly; module_param_named(tstamp, nf_ct_tstamp, bool, 0644); MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping."); -#ifdef CONFIG_SYSCTL -static struct ctl_table tstamp_sysctl_table[] = { - { - .procname = "nf_conntrack_timestamp", - .data = &init_net.ct.sysctl_tstamp, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - {} -}; -#endif /* CONFIG_SYSCTL */ - static const struct nf_ct_ext_type tstamp_extend = { .len = sizeof(struct nf_conn_tstamp), .align = __alignof__(struct nf_conn_tstamp), .id = NF_CT_EXT_TSTAMP, }; -#ifdef CONFIG_SYSCTL -static int nf_conntrack_tstamp_init_sysctl(struct net *net) -{ - struct ctl_table *table; - - table = kmemdup(tstamp_sysctl_table, sizeof(tstamp_sysctl_table), - GFP_KERNEL); - if (!table) - goto out; - - table[0].data = &net->ct.sysctl_tstamp; - - /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) - table[0].procname = NULL; - - net->ct.tstamp_sysctl_header = register_net_sysctl(net, "net/netfilter", - table); - if (!net->ct.tstamp_sysctl_header) { - pr_err("can't register to sysctl\n"); - goto out_register; - } - return 0; - -out_register: - kfree(table); -out: - return -ENOMEM; -} - -static void nf_conntrack_tstamp_fini_sysctl(struct net *net) -{ - struct ctl_table *table; - - table = net->ct.tstamp_sysctl_header->ctl_table_arg; - unregister_net_sysctl_table(net->ct.tstamp_sysctl_header); - kfree(table); -} -#else -static int nf_conntrack_tstamp_init_sysctl(struct net *net) -{ - return 0; -} - -static void nf_conntrack_tstamp_fini_sysctl(struct net *net) -{ -} -#endif - -int nf_conntrack_tstamp_pernet_init(struct net *net) +void nf_conntrack_tstamp_pernet_init(struct net *net) { net->ct.sysctl_tstamp = nf_ct_tstamp; - return nf_conntrack_tstamp_init_sysctl(net); -} - -void nf_conntrack_tstamp_pernet_fini(struct net *net) -{ - nf_conntrack_tstamp_fini_sysctl(net); } int nf_conntrack_tstamp_init(void) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index b7a4816add76..fa0844e2a68d 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -247,9 +247,10 @@ flow_offload_lookup(struct nf_flowtable *flow_table, } EXPORT_SYMBOL_GPL(flow_offload_lookup); -int nf_flow_table_iterate(struct nf_flowtable *flow_table, - void (*iter)(struct flow_offload *flow, void *data), - void *data) +static int +nf_flow_table_iterate(struct nf_flowtable *flow_table, + void (*iter)(struct flow_offload *flow, void *data), + void *data) { struct flow_offload_tuple_rhash *tuplehash; struct rhashtable_iter hti; @@ -279,40 +280,19 @@ int nf_flow_table_iterate(struct nf_flowtable *flow_table, return err; } -EXPORT_SYMBOL_GPL(nf_flow_table_iterate); static inline bool nf_flow_has_expired(const struct flow_offload *flow) { return (__s32)(flow->timeout - (u32)jiffies) <= 0; } -static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table) +static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data) { - struct flow_offload_tuple_rhash *tuplehash; - struct rhashtable_iter hti; - struct flow_offload *flow; - - rhashtable_walk_enter(&flow_table->rhashtable, &hti); - rhashtable_walk_start(&hti); + struct nf_flowtable *flow_table = data; - while ((tuplehash = rhashtable_walk_next(&hti))) { - if (IS_ERR(tuplehash)) { - if (PTR_ERR(tuplehash) != -EAGAIN) - break; - continue; - } - if (tuplehash->tuple.dir) - continue; - - flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); - - if (nf_flow_has_expired(flow) || - (flow->flags & (FLOW_OFFLOAD_DYING | - FLOW_OFFLOAD_TEARDOWN))) - flow_offload_del(flow_table, flow); - } - rhashtable_walk_stop(&hti); - rhashtable_walk_exit(&hti); + if (nf_flow_has_expired(flow) || + (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))) + flow_offload_del(flow_table, flow); } static void nf_flow_offload_work_gc(struct work_struct *work) @@ -320,7 +300,7 @@ static void nf_flow_offload_work_gc(struct work_struct *work) struct nf_flowtable *flow_table; flow_table = container_of(work, struct nf_flowtable, gc_work.work); - nf_flow_offload_gc_step(flow_table); + nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table); queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); } @@ -504,7 +484,7 @@ void nf_flow_table_free(struct nf_flowtable *flow_table) mutex_unlock(&flowtable_lock); cancel_delayed_work_sync(&flow_table->gc_work); nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); - nf_flow_offload_gc_step(flow_table); + nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table); rhashtable_destroy(&flow_table->rhashtable); } EXPORT_SYMBOL_GPL(nf_flow_table_free); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 2268b10a9dcf..d159e9e7835b 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -23,7 +23,6 @@ #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_l3proto.h> -#include <net/netfilter/nf_nat_l4proto.h> #include <net/netfilter/nf_nat_core.h> #include <net/netfilter/nf_nat_helper.h> #include <net/netfilter/nf_conntrack_helper.h> @@ -38,8 +37,6 @@ static spinlock_t nf_nat_locks[CONNTRACK_LOCKS]; static DEFINE_MUTEX(nf_nat_proto_mutex); static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO] __read_mostly; -static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO] - __read_mostly; static unsigned int nat_net_id __read_mostly; static struct hlist_head *nf_nat_bysource __read_mostly; @@ -67,13 +64,6 @@ __nf_nat_l3proto_find(u8 family) return rcu_dereference(nf_nat_l3protos[family]); } -inline const struct nf_nat_l4proto * -__nf_nat_l4proto_find(u8 family, u8 protonum) -{ - return rcu_dereference(nf_nat_l4protos[family][protonum]); -} -EXPORT_SYMBOL_GPL(__nf_nat_l4proto_find); - #ifdef CONFIG_XFRM static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl) { @@ -173,27 +163,66 @@ nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, } EXPORT_SYMBOL(nf_nat_used_tuple); +static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t, + const struct nf_nat_range2 *range) +{ + if (t->src.l3num == NFPROTO_IPV4) + return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) && + ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip); + + return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 && + ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0; +} + +/* Is the manipable part of the tuple between min and max incl? */ +static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype, + const union nf_conntrack_man_proto *min, + const union nf_conntrack_man_proto *max) +{ + __be16 port; + + switch (tuple->dst.protonum) { + case IPPROTO_ICMP: /* fallthrough */ + case IPPROTO_ICMPV6: + return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) && + ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); + case IPPROTO_GRE: /* all fall though */ + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + case IPPROTO_DCCP: + case IPPROTO_SCTP: + if (maniptype == NF_NAT_MANIP_SRC) + port = tuple->src.u.all; + else + port = tuple->dst.u.all; + + return ntohs(port) >= ntohs(min->all) && + ntohs(port) <= ntohs(max->all); + default: + return true; + } +} + /* If we source map this tuple so reply looks like reply_tuple, will * that meet the constraints of range. */ -static int in_range(const struct nf_nat_l3proto *l3proto, - const struct nf_nat_l4proto *l4proto, - const struct nf_conntrack_tuple *tuple, +static int in_range(const struct nf_conntrack_tuple *tuple, const struct nf_nat_range2 *range) { /* If we are supposed to map IPs, then we must be in the * range specified, otherwise let this drag us onto a new src IP. */ if (range->flags & NF_NAT_RANGE_MAP_IPS && - !l3proto->in_range(tuple, range)) + !nf_nat_inet_in_range(tuple, range)) return 0; - if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) || - l4proto->in_range(tuple, NF_NAT_MANIP_SRC, - &range->min_proto, &range->max_proto)) + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) return 1; - return 0; + return l4proto_in_range(tuple, NF_NAT_MANIP_SRC, + &range->min_proto, &range->max_proto); } static inline int @@ -212,8 +241,6 @@ same_src(const struct nf_conn *ct, static int find_appropriate_src(struct net *net, const struct nf_conntrack_zone *zone, - const struct nf_nat_l3proto *l3proto, - const struct nf_nat_l4proto *l4proto, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *result, const struct nf_nat_range2 *range) @@ -230,7 +257,7 @@ find_appropriate_src(struct net *net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); result->dst = tuple->dst; - if (in_range(l3proto, l4proto, result, range)) + if (in_range(result, range)) return 1; } } @@ -311,6 +338,123 @@ find_best_ips_proto(const struct nf_conntrack_zone *zone, } } +/* Alter the per-proto part of the tuple (depending on maniptype), to + * give a unique tuple in the given range if possible. + * + * Per-protocol part of tuple is initialized to the incoming packet. + */ +static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_nat_range2 *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + unsigned int range_size, min, max, i, attempts; + __be16 *keyptr; + u16 off; + static const unsigned int max_attempts = 128; + + switch (tuple->dst.protonum) { + case IPPROTO_ICMP: /* fallthrough */ + case IPPROTO_ICMPV6: + /* id is same for either direction... */ + keyptr = &tuple->src.u.icmp.id; + min = range->min_proto.icmp.id; + range_size = ntohs(range->max_proto.icmp.id) - + ntohs(range->min_proto.icmp.id) + 1; + goto find_free_id; +#if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE) + case IPPROTO_GRE: + /* If there is no master conntrack we are not PPTP, + do not change tuples */ + if (!ct->master) + return; + + if (maniptype == NF_NAT_MANIP_SRC) + keyptr = &tuple->src.u.gre.key; + else + keyptr = &tuple->dst.u.gre.key; + + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { + min = 1; + range_size = 65535; + } else { + min = ntohs(range->min_proto.gre.key); + range_size = ntohs(range->max_proto.gre.key) - min + 1; + } + goto find_free_id; +#endif + case IPPROTO_UDP: /* fallthrough */ + case IPPROTO_UDPLITE: /* fallthrough */ + case IPPROTO_TCP: /* fallthrough */ + case IPPROTO_SCTP: /* fallthrough */ + case IPPROTO_DCCP: /* fallthrough */ + if (maniptype == NF_NAT_MANIP_SRC) + keyptr = &tuple->src.u.all; + else + keyptr = &tuple->dst.u.all; + + break; + default: + return; + } + + /* If no range specified... */ + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { + /* If it's dst rewrite, can't change port */ + if (maniptype == NF_NAT_MANIP_DST) + return; + + if (ntohs(*keyptr) < 1024) { + /* Loose convention: >> 512 is credential passing */ + if (ntohs(*keyptr) < 512) { + min = 1; + range_size = 511 - min + 1; + } else { + min = 600; + range_size = 1023 - min + 1; + } + } else { + min = 1024; + range_size = 65535 - 1024 + 1; + } + } else { + min = ntohs(range->min_proto.all); + max = ntohs(range->max_proto.all); + if (unlikely(max < min)) + swap(max, min); + range_size = max - min + 1; + } + +find_free_id: + if (range->flags & NF_NAT_RANGE_PROTO_OFFSET) + off = (ntohs(*keyptr) - ntohs(range->base_proto.all)); + else + off = prandom_u32(); + + attempts = range_size; + if (attempts > max_attempts) + attempts = max_attempts; + + /* We are in softirq; doing a search of the entire range risks + * soft lockup when all tuples are already used. + * + * If we can't find any free port from first offset, pick a new + * one and try again, with ever smaller search window. + */ +another_round: + for (i = 0; i < attempts; i++, off++) { + *keyptr = htons(min + off % range_size); + if (!nf_nat_used_tuple(tuple, ct)) + return; + } + + if (attempts >= range_size || attempts < 16) + return; + attempts /= 2; + off = prandom_u32(); + goto another_round; +} + /* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING, * we change the source to map into the range. For NF_INET_PRE_ROUTING * and NF_INET_LOCAL_OUT, we change the destination to map into the @@ -325,17 +469,10 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { const struct nf_conntrack_zone *zone; - const struct nf_nat_l3proto *l3proto; - const struct nf_nat_l4proto *l4proto; struct net *net = nf_ct_net(ct); zone = nf_ct_zone(ct); - rcu_read_lock(); - l3proto = __nf_nat_l3proto_find(orig_tuple->src.l3num); - l4proto = __nf_nat_l4proto_find(orig_tuple->src.l3num, - orig_tuple->dst.protonum); - /* 1) If this srcip/proto/src-proto-part is currently mapped, * and that same mapping gives a unique tuple within the given * range, use that. @@ -347,16 +484,16 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, if (maniptype == NF_NAT_MANIP_SRC && !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { /* try the original tuple first */ - if (in_range(l3proto, l4proto, orig_tuple, range)) { + if (in_range(orig_tuple, range)) { if (!nf_nat_used_tuple(orig_tuple, ct)) { *tuple = *orig_tuple; - goto out; + return; } - } else if (find_appropriate_src(net, zone, l3proto, l4proto, + } else if (find_appropriate_src(net, zone, orig_tuple, tuple, range)) { pr_debug("get_unique_tuple: Found current src map\n"); if (!nf_nat_used_tuple(tuple, ct)) - goto out; + return; } } @@ -372,21 +509,19 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) && - l4proto->in_range(tuple, maniptype, + l4proto_in_range(tuple, maniptype, &range->min_proto, &range->max_proto) && (range->min_proto.all == range->max_proto.all || !nf_nat_used_tuple(tuple, ct))) - goto out; + return; } else if (!nf_nat_used_tuple(tuple, ct)) { - goto out; + return; } } /* Last chance: get protocol to try to obtain unique tuple. */ - l4proto->unique_tuple(l3proto, tuple, range, maniptype, ct); -out: - rcu_read_unlock(); + nf_nat_l4proto_unique_tuple(tuple, range, maniptype, ct); } struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct) @@ -502,16 +637,13 @@ static unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_dir dir) { const struct nf_nat_l3proto *l3proto; - const struct nf_nat_l4proto *l4proto; struct nf_conntrack_tuple target; /* We are aiming to look like inverse of other direction. */ nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); l3proto = __nf_nat_l3proto_find(target.src.l3num); - l4proto = __nf_nat_l4proto_find(target.src.l3num, - target.dst.protonum); - if (!l3proto->manip_pkt(skb, 0, l4proto, &target, mtype)) + if (!l3proto->manip_pkt(skb, 0, &target, mtype)) return NF_DROP; return NF_ACCEPT; @@ -667,16 +799,6 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data) return 0; } -static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) -{ - struct nf_nat_proto_clean clean = { - .l3proto = l3proto, - .l4proto = l4proto, - }; - - nf_ct_iterate_destroy(nf_nat_proto_remove, &clean); -} - static void nf_nat_l3proto_clean(u8 l3proto) { struct nf_nat_proto_clean clean = { @@ -686,82 +808,8 @@ static void nf_nat_l3proto_clean(u8 l3proto) nf_ct_iterate_destroy(nf_nat_proto_remove, &clean); } -/* Protocol registration. */ -int nf_nat_l4proto_register(u8 l3proto, const struct nf_nat_l4proto *l4proto) -{ - const struct nf_nat_l4proto **l4protos; - unsigned int i; - int ret = 0; - - mutex_lock(&nf_nat_proto_mutex); - if (nf_nat_l4protos[l3proto] == NULL) { - l4protos = kmalloc_array(IPPROTO_MAX, - sizeof(struct nf_nat_l4proto *), - GFP_KERNEL); - if (l4protos == NULL) { - ret = -ENOMEM; - goto out; - } - - for (i = 0; i < IPPROTO_MAX; i++) - RCU_INIT_POINTER(l4protos[i], &nf_nat_l4proto_unknown); - - /* Before making proto_array visible to lockless readers, - * we must make sure its content is committed to memory. - */ - smp_wmb(); - - nf_nat_l4protos[l3proto] = l4protos; - } - - if (rcu_dereference_protected( - nf_nat_l4protos[l3proto][l4proto->l4proto], - lockdep_is_held(&nf_nat_proto_mutex) - ) != &nf_nat_l4proto_unknown) { - ret = -EBUSY; - goto out; - } - RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], l4proto); - out: - mutex_unlock(&nf_nat_proto_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(nf_nat_l4proto_register); - -/* No one stores the protocol anywhere; simply delete it. */ -void nf_nat_l4proto_unregister(u8 l3proto, const struct nf_nat_l4proto *l4proto) -{ - mutex_lock(&nf_nat_proto_mutex); - RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], - &nf_nat_l4proto_unknown); - mutex_unlock(&nf_nat_proto_mutex); - synchronize_rcu(); - - nf_nat_l4proto_clean(l3proto, l4proto->l4proto); -} -EXPORT_SYMBOL_GPL(nf_nat_l4proto_unregister); - int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto) { - mutex_lock(&nf_nat_proto_mutex); - RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_TCP], - &nf_nat_l4proto_tcp); - RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDP], - &nf_nat_l4proto_udp); -#ifdef CONFIG_NF_NAT_PROTO_DCCP - RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_DCCP], - &nf_nat_l4proto_dccp); -#endif -#ifdef CONFIG_NF_NAT_PROTO_SCTP - RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_SCTP], - &nf_nat_l4proto_sctp); -#endif -#ifdef CONFIG_NF_NAT_PROTO_UDPLITE - RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDPLITE], - &nf_nat_l4proto_udplite); -#endif - mutex_unlock(&nf_nat_proto_mutex); - RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto); return 0; } @@ -802,12 +850,26 @@ static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 }, }; +static int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[], + struct nf_nat_range2 *range) +{ + if (tb[CTA_PROTONAT_PORT_MIN]) { + range->min_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]); + range->max_proto.all = range->min_proto.all; + range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + if (tb[CTA_PROTONAT_PORT_MAX]) { + range->max_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]); + range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + return 0; +} + static int nfnetlink_parse_nat_proto(struct nlattr *attr, const struct nf_conn *ct, struct nf_nat_range2 *range) { struct nlattr *tb[CTA_PROTONAT_MAX+1]; - const struct nf_nat_l4proto *l4proto; int err; err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, @@ -815,11 +877,7 @@ static int nfnetlink_parse_nat_proto(struct nlattr *attr, if (err < 0) return err; - l4proto = __nf_nat_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); - if (l4proto->nlattr_to_range) - err = l4proto->nlattr_to_range(tb, range); - - return err; + return nf_nat_l4proto_nlattr_to_range(tb, range); } static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { @@ -1082,7 +1140,6 @@ static int __init nf_nat_init(void) static void __exit nf_nat_cleanup(void) { struct nf_nat_proto_clean clean = {}; - unsigned int i; nf_ct_iterate_destroy(nf_nat_proto_clean, &clean); @@ -1090,10 +1147,6 @@ static void __exit nf_nat_cleanup(void) nf_ct_helper_expectfn_unregister(&follow_master_nat); RCU_INIT_POINTER(nf_nat_hook, NULL); - synchronize_rcu(); - - for (i = 0; i < NFPROTO_NUMPROTO; i++) - kfree(nf_nat_l4protos[i]); synchronize_net(); kvfree(nf_nat_bysource); unregister_pernet_subsys(&nat_net_ops); diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c new file mode 100644 index 000000000000..f83bf9d8c9f5 --- /dev/null +++ b/net/netfilter/nf_nat_proto.c @@ -0,0 +1,343 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/types.h> +#include <linux/export.h> +#include <linux/init.h> +#include <linux/udp.h> +#include <linux/tcp.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> + +#include <linux/dccp.h> +#include <linux/sctp.h> +#include <net/sctp/checksum.h> + +#include <linux/netfilter.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_core.h> +#include <net/netfilter/nf_nat_l3proto.h> +#include <net/netfilter/nf_nat_l4proto.h> + +static void +__udp_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, struct udphdr *hdr, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype, bool do_csum) +{ + __be16 *portptr, newport; + + if (maniptype == NF_NAT_MANIP_SRC) { + /* Get rid of src port */ + newport = tuple->src.u.udp.port; + portptr = &hdr->source; + } else { + /* Get rid of dst port */ + newport = tuple->dst.u.udp.port; + portptr = &hdr->dest; + } + if (do_csum) { + l3proto->csum_update(skb, iphdroff, &hdr->check, + tuple, maniptype); + inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, + false); + if (!hdr->check) + hdr->check = CSUM_MANGLED_0; + } + *portptr = newport; +} + +static bool udp_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct udphdr *hdr; + bool do_csum; + + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + return false; + + hdr = (struct udphdr *)(skb->data + hdroff); + do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL; + + __udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, do_csum); + return true; +} + +static bool udplite_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ +#ifdef CONFIG_NF_CT_PROTO_UDPLITE + struct udphdr *hdr; + + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + return false; + + hdr = (struct udphdr *)(skb->data + hdroff); + __udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, true); +#endif + return true; +} + +static bool +sctp_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ +#ifdef CONFIG_NF_CT_PROTO_SCTP + struct sctphdr *hdr; + int hdrsize = 8; + + /* This could be an inner header returned in imcp packet; in such + * cases we cannot update the checksum field since it is outside + * of the 8 bytes of transport layer headers we are guaranteed. + */ + if (skb->len >= hdroff + sizeof(*hdr)) + hdrsize = sizeof(*hdr); + + if (!skb_make_writable(skb, hdroff + hdrsize)) + return false; + + hdr = (struct sctphdr *)(skb->data + hdroff); + + if (maniptype == NF_NAT_MANIP_SRC) { + /* Get rid of src port */ + hdr->source = tuple->src.u.sctp.port; + } else { + /* Get rid of dst port */ + hdr->dest = tuple->dst.u.sctp.port; + } + + if (hdrsize < sizeof(*hdr)) + return true; + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + hdr->checksum = sctp_compute_cksum(skb, hdroff); + skb->ip_summed = CHECKSUM_NONE; + } + +#endif + return true; +} + +static bool +tcp_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct tcphdr *hdr; + __be16 *portptr, newport, oldport; + int hdrsize = 8; /* TCP connection tracking guarantees this much */ + + /* this could be a inner header returned in icmp packet; in such + cases we cannot update the checksum field since it is outside of + the 8 bytes of transport layer headers we are guaranteed */ + if (skb->len >= hdroff + sizeof(struct tcphdr)) + hdrsize = sizeof(struct tcphdr); + + if (!skb_make_writable(skb, hdroff + hdrsize)) + return false; + + hdr = (struct tcphdr *)(skb->data + hdroff); + + if (maniptype == NF_NAT_MANIP_SRC) { + /* Get rid of src port */ + newport = tuple->src.u.tcp.port; + portptr = &hdr->source; + } else { + /* Get rid of dst port */ + newport = tuple->dst.u.tcp.port; + portptr = &hdr->dest; + } + + oldport = *portptr; + *portptr = newport; + + if (hdrsize < sizeof(*hdr)) + return true; + + l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); + inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false); + return true; +} + +static bool +dccp_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ +#ifdef CONFIG_NF_CT_PROTO_DCCP + struct dccp_hdr *hdr; + __be16 *portptr, oldport, newport; + int hdrsize = 8; /* DCCP connection tracking guarantees this much */ + + if (skb->len >= hdroff + sizeof(struct dccp_hdr)) + hdrsize = sizeof(struct dccp_hdr); + + if (!skb_make_writable(skb, hdroff + hdrsize)) + return false; + + hdr = (struct dccp_hdr *)(skb->data + hdroff); + + if (maniptype == NF_NAT_MANIP_SRC) { + newport = tuple->src.u.dccp.port; + portptr = &hdr->dccph_sport; + } else { + newport = tuple->dst.u.dccp.port; + portptr = &hdr->dccph_dport; + } + + oldport = *portptr; + *portptr = newport; + + if (hdrsize < sizeof(*hdr)) + return true; + + l3proto->csum_update(skb, iphdroff, &hdr->dccph_checksum, + tuple, maniptype); + inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport, + false); +#endif + return true; +} + +static bool +icmp_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct icmphdr *hdr; + + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + return false; + + hdr = (struct icmphdr *)(skb->data + hdroff); + inet_proto_csum_replace2(&hdr->checksum, skb, + hdr->un.echo.id, tuple->src.u.icmp.id, false); + hdr->un.echo.id = tuple->src.u.icmp.id; + return true; +} + +static bool +icmpv6_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct icmp6hdr *hdr; + + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + return false; + + hdr = (struct icmp6hdr *)(skb->data + hdroff); + l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum, + tuple, maniptype); + if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST || + hdr->icmp6_type == ICMPV6_ECHO_REPLY) { + inet_proto_csum_replace2(&hdr->icmp6_cksum, skb, + hdr->icmp6_identifier, + tuple->src.u.icmp.id, false); + hdr->icmp6_identifier = tuple->src.u.icmp.id; + } + return true; +} + +/* manipulate a GRE packet according to maniptype */ +static bool +gre_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ +#if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE) + const struct gre_base_hdr *greh; + struct pptp_gre_header *pgreh; + + /* pgreh includes two optional 32bit fields which are not required + * to be there. That's where the magic '8' comes from */ + if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8)) + return false; + + greh = (void *)skb->data + hdroff; + pgreh = (struct pptp_gre_header *)greh; + + /* we only have destination manip of a packet, since 'source key' + * is not present in the packet itself */ + if (maniptype != NF_NAT_MANIP_DST) + return true; + + switch (greh->flags & GRE_VERSION) { + case GRE_VERSION_0: + /* We do not currently NAT any GREv0 packets. + * Try to behave like "nf_nat_proto_unknown" */ + break; + case GRE_VERSION_1: + pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key)); + pgreh->call_id = tuple->dst.u.gre.key; + break; + default: + pr_debug("can't nat unknown GRE version\n"); + return false; + } +#endif + return true; +} + +bool nf_nat_l4proto_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + switch (tuple->dst.protonum) { + case IPPROTO_TCP: + return tcp_manip_pkt(skb, l3proto, iphdroff, hdroff, + tuple, maniptype); + case IPPROTO_UDP: + return udp_manip_pkt(skb, l3proto, iphdroff, hdroff, + tuple, maniptype); + case IPPROTO_UDPLITE: + return udplite_manip_pkt(skb, l3proto, iphdroff, hdroff, + tuple, maniptype); + case IPPROTO_SCTP: + return sctp_manip_pkt(skb, l3proto, iphdroff, hdroff, + tuple, maniptype); + case IPPROTO_ICMP: + return icmp_manip_pkt(skb, l3proto, iphdroff, hdroff, + tuple, maniptype); + case IPPROTO_ICMPV6: + return icmpv6_manip_pkt(skb, l3proto, iphdroff, hdroff, + tuple, maniptype); + case IPPROTO_DCCP: + return dccp_manip_pkt(skb, l3proto, iphdroff, hdroff, + tuple, maniptype); + case IPPROTO_GRE: + return gre_manip_pkt(skb, l3proto, iphdroff, hdroff, + tuple, maniptype); + } + + /* If we don't know protocol -- no error, pass it unmodified. */ + return true; +} +EXPORT_SYMBOL_GPL(nf_nat_l4proto_manip_pkt); diff --git a/net/netfilter/nf_nat_proto_common.c b/net/netfilter/nf_nat_proto_common.c deleted file mode 100644 index 5d849d835561..000000000000 --- a/net/netfilter/nf_nat_proto_common.c +++ /dev/null @@ -1,120 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> - * (C) 2008 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/types.h> -#include <linux/random.h> -#include <linux/netfilter.h> -#include <linux/export.h> - -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_nat_l3proto.h> -#include <net/netfilter/nf_nat_l4proto.h> - -bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype, - const union nf_conntrack_man_proto *min, - const union nf_conntrack_man_proto *max) -{ - __be16 port; - - if (maniptype == NF_NAT_MANIP_SRC) - port = tuple->src.u.all; - else - port = tuple->dst.u.all; - - return ntohs(port) >= ntohs(min->all) && - ntohs(port) <= ntohs(max->all); -} -EXPORT_SYMBOL_GPL(nf_nat_l4proto_in_range); - -void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto, - struct nf_conntrack_tuple *tuple, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct, - u16 *rover) -{ - unsigned int range_size, min, max, i; - __be16 *portptr; - u_int16_t off; - - if (maniptype == NF_NAT_MANIP_SRC) - portptr = &tuple->src.u.all; - else - portptr = &tuple->dst.u.all; - - /* If no range specified... */ - if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { - /* If it's dst rewrite, can't change port */ - if (maniptype == NF_NAT_MANIP_DST) - return; - - if (ntohs(*portptr) < 1024) { - /* Loose convention: >> 512 is credential passing */ - if (ntohs(*portptr) < 512) { - min = 1; - range_size = 511 - min + 1; - } else { - min = 600; - range_size = 1023 - min + 1; - } - } else { - min = 1024; - range_size = 65535 - 1024 + 1; - } - } else { - min = ntohs(range->min_proto.all); - max = ntohs(range->max_proto.all); - if (unlikely(max < min)) - swap(max, min); - range_size = max - min + 1; - } - - if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) { - off = l3proto->secure_port(tuple, maniptype == NF_NAT_MANIP_SRC - ? tuple->dst.u.all - : tuple->src.u.all); - } else if (range->flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY) { - off = prandom_u32(); - } else if (range->flags & NF_NAT_RANGE_PROTO_OFFSET) { - off = (ntohs(*portptr) - ntohs(range->base_proto.all)); - } else { - off = *rover; - } - - for (i = 0; ; ++off) { - *portptr = htons(min + off % range_size); - if (++i != range_size && nf_nat_used_tuple(tuple, ct)) - continue; - if (!(range->flags & (NF_NAT_RANGE_PROTO_RANDOM_ALL| - NF_NAT_RANGE_PROTO_OFFSET))) - *rover = off; - return; - } -} -EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple); - -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) -int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[], - struct nf_nat_range2 *range) -{ - if (tb[CTA_PROTONAT_PORT_MIN]) { - range->min_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]); - range->max_proto.all = range->min_proto.all; - range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; - } - if (tb[CTA_PROTONAT_PORT_MAX]) { - range->max_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]); - range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; - } - return 0; -} -EXPORT_SYMBOL_GPL(nf_nat_l4proto_nlattr_to_range); -#endif diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c deleted file mode 100644 index 67ea0d83aa5a..000000000000 --- a/net/netfilter/nf_nat_proto_dccp.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - * DCCP NAT protocol helper - * - * Copyright (c) 2005, 2006, 2008 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ - -#include <linux/kernel.h> -#include <linux/skbuff.h> -#include <linux/dccp.h> - -#include <net/netfilter/nf_conntrack.h> -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_l3proto.h> -#include <net/netfilter/nf_nat_l4proto.h> - -static u_int16_t dccp_port_rover; - -static void -dccp_unique_tuple(const struct nf_nat_l3proto *l3proto, - struct nf_conntrack_tuple *tuple, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, - &dccp_port_rover); -} - -static bool -dccp_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, - unsigned int iphdroff, unsigned int hdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - struct dccp_hdr *hdr; - __be16 *portptr, oldport, newport; - int hdrsize = 8; /* DCCP connection tracking guarantees this much */ - - if (skb->len >= hdroff + sizeof(struct dccp_hdr)) - hdrsize = sizeof(struct dccp_hdr); - - if (!skb_make_writable(skb, hdroff + hdrsize)) - return false; - - hdr = (struct dccp_hdr *)(skb->data + hdroff); - - if (maniptype == NF_NAT_MANIP_SRC) { - newport = tuple->src.u.dccp.port; - portptr = &hdr->dccph_sport; - } else { - newport = tuple->dst.u.dccp.port; - portptr = &hdr->dccph_dport; - } - - oldport = *portptr; - *portptr = newport; - - if (hdrsize < sizeof(*hdr)) - return true; - - l3proto->csum_update(skb, iphdroff, &hdr->dccph_checksum, - tuple, maniptype); - inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport, - false); - return true; -} - -const struct nf_nat_l4proto nf_nat_l4proto_dccp = { - .l4proto = IPPROTO_DCCP, - .manip_pkt = dccp_manip_pkt, - .in_range = nf_nat_l4proto_in_range, - .unique_tuple = dccp_unique_tuple, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, -#endif -}; diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c deleted file mode 100644 index 1c5d9b65fbba..000000000000 --- a/net/netfilter/nf_nat_proto_sctp.c +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/types.h> -#include <linux/sctp.h> -#include <net/sctp/checksum.h> - -#include <net/netfilter/nf_nat_l4proto.h> - -static u_int16_t nf_sctp_port_rover; - -static void -sctp_unique_tuple(const struct nf_nat_l3proto *l3proto, - struct nf_conntrack_tuple *tuple, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, - &nf_sctp_port_rover); -} - -static bool -sctp_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, - unsigned int iphdroff, unsigned int hdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - struct sctphdr *hdr; - int hdrsize = 8; - - /* This could be an inner header returned in imcp packet; in such - * cases we cannot update the checksum field since it is outside - * of the 8 bytes of transport layer headers we are guaranteed. - */ - if (skb->len >= hdroff + sizeof(*hdr)) - hdrsize = sizeof(*hdr); - - if (!skb_make_writable(skb, hdroff + hdrsize)) - return false; - - hdr = (struct sctphdr *)(skb->data + hdroff); - - if (maniptype == NF_NAT_MANIP_SRC) { - /* Get rid of src port */ - hdr->source = tuple->src.u.sctp.port; - } else { - /* Get rid of dst port */ - hdr->dest = tuple->dst.u.sctp.port; - } - - if (hdrsize < sizeof(*hdr)) - return true; - - if (skb->ip_summed != CHECKSUM_PARTIAL) { - hdr->checksum = sctp_compute_cksum(skb, hdroff); - skb->ip_summed = CHECKSUM_NONE; - } - - return true; -} - -const struct nf_nat_l4proto nf_nat_l4proto_sctp = { - .l4proto = IPPROTO_SCTP, - .manip_pkt = sctp_manip_pkt, - .in_range = nf_nat_l4proto_in_range, - .unique_tuple = sctp_unique_tuple, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, -#endif -}; diff --git a/net/netfilter/nf_nat_proto_tcp.c b/net/netfilter/nf_nat_proto_tcp.c deleted file mode 100644 index f15fcd475f98..000000000000 --- a/net/netfilter/nf_nat_proto_tcp.c +++ /dev/null @@ -1,85 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/types.h> -#include <linux/init.h> -#include <linux/export.h> -#include <linux/tcp.h> - -#include <linux/netfilter.h> -#include <linux/netfilter/nfnetlink_conntrack.h> -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_l3proto.h> -#include <net/netfilter/nf_nat_l4proto.h> -#include <net/netfilter/nf_nat_core.h> - -static u16 tcp_port_rover; - -static void -tcp_unique_tuple(const struct nf_nat_l3proto *l3proto, - struct nf_conntrack_tuple *tuple, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, - &tcp_port_rover); -} - -static bool -tcp_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, - unsigned int iphdroff, unsigned int hdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - struct tcphdr *hdr; - __be16 *portptr, newport, oldport; - int hdrsize = 8; /* TCP connection tracking guarantees this much */ - - /* this could be a inner header returned in icmp packet; in such - cases we cannot update the checksum field since it is outside of - the 8 bytes of transport layer headers we are guaranteed */ - if (skb->len >= hdroff + sizeof(struct tcphdr)) - hdrsize = sizeof(struct tcphdr); - - if (!skb_make_writable(skb, hdroff + hdrsize)) - return false; - - hdr = (struct tcphdr *)(skb->data + hdroff); - - if (maniptype == NF_NAT_MANIP_SRC) { - /* Get rid of src port */ - newport = tuple->src.u.tcp.port; - portptr = &hdr->source; - } else { - /* Get rid of dst port */ - newport = tuple->dst.u.tcp.port; - portptr = &hdr->dest; - } - - oldport = *portptr; - *portptr = newport; - - if (hdrsize < sizeof(*hdr)) - return true; - - l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); - inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false); - return true; -} - -const struct nf_nat_l4proto nf_nat_l4proto_tcp = { - .l4proto = IPPROTO_TCP, - .manip_pkt = tcp_manip_pkt, - .in_range = nf_nat_l4proto_in_range, - .unique_tuple = tcp_unique_tuple, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, -#endif -}; diff --git a/net/netfilter/nf_nat_proto_udp.c b/net/netfilter/nf_nat_proto_udp.c deleted file mode 100644 index 5790f70a83b2..000000000000 --- a/net/netfilter/nf_nat_proto_udp.c +++ /dev/null @@ -1,130 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/types.h> -#include <linux/export.h> -#include <linux/init.h> -#include <linux/udp.h> - -#include <linux/netfilter.h> -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_nat_l3proto.h> -#include <net/netfilter/nf_nat_l4proto.h> - -static u16 udp_port_rover; - -static void -udp_unique_tuple(const struct nf_nat_l3proto *l3proto, - struct nf_conntrack_tuple *tuple, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, - &udp_port_rover); -} - -static void -__udp_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, - unsigned int iphdroff, struct udphdr *hdr, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype, bool do_csum) -{ - __be16 *portptr, newport; - - if (maniptype == NF_NAT_MANIP_SRC) { - /* Get rid of src port */ - newport = tuple->src.u.udp.port; - portptr = &hdr->source; - } else { - /* Get rid of dst port */ - newport = tuple->dst.u.udp.port; - portptr = &hdr->dest; - } - if (do_csum) { - l3proto->csum_update(skb, iphdroff, &hdr->check, - tuple, maniptype); - inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, - false); - if (!hdr->check) - hdr->check = CSUM_MANGLED_0; - } - *portptr = newport; -} - -static bool udp_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, - unsigned int iphdroff, unsigned int hdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - struct udphdr *hdr; - bool do_csum; - - if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) - return false; - - hdr = (struct udphdr *)(skb->data + hdroff); - do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL; - - __udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, do_csum); - return true; -} - -#ifdef CONFIG_NF_NAT_PROTO_UDPLITE -static u16 udplite_port_rover; - -static bool udplite_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, - unsigned int iphdroff, unsigned int hdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - struct udphdr *hdr; - - if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) - return false; - - hdr = (struct udphdr *)(skb->data + hdroff); - __udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, true); - return true; -} - -static void -udplite_unique_tuple(const struct nf_nat_l3proto *l3proto, - struct nf_conntrack_tuple *tuple, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, - &udplite_port_rover); -} - -const struct nf_nat_l4proto nf_nat_l4proto_udplite = { - .l4proto = IPPROTO_UDPLITE, - .manip_pkt = udplite_manip_pkt, - .in_range = nf_nat_l4proto_in_range, - .unique_tuple = udplite_unique_tuple, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, -#endif -}; -#endif /* CONFIG_NF_NAT_PROTO_UDPLITE */ - -const struct nf_nat_l4proto nf_nat_l4proto_udp = { - .l4proto = IPPROTO_UDP, - .manip_pkt = udp_manip_pkt, - .in_range = nf_nat_l4proto_in_range, - .unique_tuple = udp_unique_tuple, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, -#endif -}; diff --git a/net/netfilter/nf_nat_proto_unknown.c b/net/netfilter/nf_nat_proto_unknown.c deleted file mode 100644 index c5db3e251232..000000000000 --- a/net/netfilter/nf_nat_proto_unknown.c +++ /dev/null @@ -1,54 +0,0 @@ -/* The "unknown" protocol. This is what is used for protocols we - * don't understand. It's returned by ip_ct_find_proto(). - */ - -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/types.h> -#include <linux/init.h> - -#include <linux/netfilter.h> -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_l4proto.h> - -static bool unknown_in_range(const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type manip_type, - const union nf_conntrack_man_proto *min, - const union nf_conntrack_man_proto *max) -{ - return true; -} - -static void unknown_unique_tuple(const struct nf_nat_l3proto *l3proto, - struct nf_conntrack_tuple *tuple, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - /* Sorry: we can't help you; if it's not unique, we can't frob - * anything. - */ - return; -} - -static bool -unknown_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, - unsigned int iphdroff, unsigned int hdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - return true; -} - -const struct nf_nat_l4proto nf_nat_l4proto_unknown = { - .manip_pkt = unknown_manip_pkt, - .in_range = unknown_in_range, - .unique_tuple = unknown_unique_tuple, -}; diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c index 1f3086074981..aa1be643d7a0 100644 --- a/net/netfilter/nf_nat_sip.c +++ b/net/netfilter/nf_nat_sip.c @@ -18,6 +18,7 @@ #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_helper.h> +#include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_seqadj.h> @@ -316,6 +317,9 @@ static void nf_nat_sip_seq_adjust(struct sk_buff *skb, unsigned int protoff, static void nf_nat_sip_expected(struct nf_conn *ct, struct nf_conntrack_expect *exp) { + struct nf_conn_help *help = nfct_help(ct->master); + struct nf_conntrack_expect *pair_exp; + int range_set_for_snat = 0; struct nf_nat_range2 range; /* This must be a fresh one. */ @@ -327,15 +331,42 @@ static void nf_nat_sip_expected(struct nf_conn *ct, range.min_addr = range.max_addr = exp->saved_addr; nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); - /* Change src to where master sends to, but only if the connection - * actually came from the same source. */ - if (nf_inet_addr_cmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3, + /* Do media streams SRC manip according with the parameters + * found in the paired expectation. + */ + if (exp->class != SIP_EXPECT_SIGNALLING) { + spin_lock_bh(&nf_conntrack_expect_lock); + hlist_for_each_entry(pair_exp, &help->expectations, lnode) { + if (pair_exp->tuple.src.l3num == nf_ct_l3num(ct) && + pair_exp->tuple.dst.protonum == ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum && + nf_inet_addr_cmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3, &pair_exp->saved_addr) && + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all == pair_exp->saved_proto.all) { + range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); + range.min_proto.all = range.max_proto.all = pair_exp->tuple.dst.u.all; + range.min_addr = range.max_addr = pair_exp->tuple.dst.u3; + range_set_for_snat = 1; + break; + } + } + spin_unlock_bh(&nf_conntrack_expect_lock); + } + + /* When no paired expectation has been found, change src to + * where master sends to, but only if the connection actually came + * from the same source. + */ + if (!range_set_for_snat && + nf_inet_addr_cmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3, &ct->master->tuplehash[exp->dir].tuple.src.u3)) { range.flags = NF_NAT_RANGE_MAP_IPS; range.min_addr = range.max_addr = ct->master->tuplehash[!exp->dir].tuple.dst.u3; - nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); + range_set_for_snat = 1; } + + /* Perform SRC manip. */ + if (range_set_for_snat) + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); } static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6e548d7c9f67..fec814dace5a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2295,15 +2295,52 @@ struct nft_rule_dump_ctx { char *chain; }; +static int __nf_tables_dump_rules(struct sk_buff *skb, + unsigned int *idx, + struct netlink_callback *cb, + const struct nft_table *table, + const struct nft_chain *chain) +{ + struct net *net = sock_net(skb->sk); + unsigned int s_idx = cb->args[0]; + const struct nft_rule *rule; + int rc = 1; + + list_for_each_entry_rcu(rule, &chain->rules, list) { + if (!nft_is_active(net, rule)) + goto cont; + if (*idx < s_idx) + goto cont; + if (*idx > s_idx) { + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + } + if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWRULE, + NLM_F_MULTI | NLM_F_APPEND, + table->family, + table, chain, rule) < 0) + goto out_unfinished; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +cont: + (*idx)++; + } + rc = 0; +out_unfinished: + cb->args[0] = *idx; + return rc; +} + static int nf_tables_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); const struct nft_rule_dump_ctx *ctx = cb->data; - const struct nft_table *table; + struct nft_table *table; const struct nft_chain *chain; - const struct nft_rule *rule; - unsigned int idx = 0, s_idx = cb->args[0]; + unsigned int idx = 0; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; @@ -2317,37 +2354,34 @@ static int nf_tables_dump_rules(struct sk_buff *skb, if (ctx && ctx->table && strcmp(ctx->table, table->name) != 0) continue; - list_for_each_entry_rcu(chain, &table->chains, list) { - if (ctx && ctx->chain && - strcmp(ctx->chain, chain->name) != 0) - continue; + if (ctx && ctx->chain) { + struct rhlist_head *list, *tmp; - list_for_each_entry_rcu(rule, &chain->rules, list) { - if (!nft_is_active(net, rule)) - goto cont; - if (idx < s_idx) - goto cont; - if (idx > s_idx) - memset(&cb->args[1], 0, - sizeof(cb->args) - sizeof(cb->args[0])); - if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NFT_MSG_NEWRULE, - NLM_F_MULTI | NLM_F_APPEND, - table->family, - table, chain, rule) < 0) - goto done; - - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); -cont: - idx++; + list = rhltable_lookup(&table->chains_ht, ctx->chain, + nft_chain_ht_params); + if (!list) + goto done; + + rhl_for_each_entry_rcu(chain, tmp, list, rhlhead) { + if (!nft_is_active(net, chain)) + continue; + __nf_tables_dump_rules(skb, &idx, + cb, table, chain); + break; } + goto done; } + + list_for_each_entry_rcu(chain, &table->chains, list) { + if (__nf_tables_dump_rules(skb, &idx, cb, table, chain)) + goto done; + } + + if (ctx && ctx->table) + break; } done: rcu_read_unlock(); - - cb->args[0] = idx; return skb->len; } diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 332c69d27b47..b1f9c5303f02 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -148,7 +148,7 @@ static void instance_put(struct nfulnl_instance *inst) { if (inst && refcount_dec_and_test(&inst->use)) - call_rcu_bh(&inst->rcu, nfulnl_instance_free_rcu); + call_rcu(&inst->rcu, nfulnl_instance_free_rcu); } static void nfulnl_timer(struct timer_list *t); diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 1ad4017f9b73..28e27a32d9b9 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -260,7 +260,7 @@ static inline void dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent) { hlist_del_rcu(&ent->node); - call_rcu_bh(&ent->rcu, dsthash_free_rcu); + call_rcu(&ent->rcu, dsthash_free_rcu); ht->count--; } static void htable_gc(struct work_struct *work); @@ -1326,7 +1326,7 @@ static void __exit hashlimit_mt_exit(void) xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg)); unregister_pernet_subsys(&hashlimit_net_ops); - rcu_barrier_bh(); + rcu_barrier(); kmem_cache_destroy(hashlimit_cachep); } |