diff options
Diffstat (limited to 'net')
25 files changed, 1188 insertions, 580 deletions
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 820252aee81f..37928d5f2840 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -142,6 +142,12 @@ static inline int ebt_basic_match(struct ebt_entry *e, struct ethhdr *h, return 0; } +static inline __pure +struct ebt_entry *ebt_next_entry(const struct ebt_entry *entry) +{ + return (void *)entry + entry->next_offset; +} + /* Do some firewalling */ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -164,7 +170,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, mtpar.in = tgpar.in = in; mtpar.out = tgpar.out = out; mtpar.hotdrop = &hotdrop; - tgpar.hooknum = hook; + mtpar.hooknum = tgpar.hooknum = hook; read_lock_bh(&table->lock); private = table->private; @@ -249,8 +255,7 @@ letsreturn: /* jump to a udc */ cs[sp].n = i + 1; cs[sp].chaininfo = chaininfo; - cs[sp].e = (struct ebt_entry *) - (((char *)point) + point->next_offset); + cs[sp].e = ebt_next_entry(point); i = 0; chaininfo = (struct ebt_entries *) (base + verdict); #ifdef CONFIG_NETFILTER_DEBUG @@ -266,8 +271,7 @@ letsreturn: sp++; continue; letscontinue: - point = (struct ebt_entry *) - (((char *)point) + point->next_offset); + point = ebt_next_entry(point); i++; } @@ -787,7 +791,7 @@ static int check_chainloops(struct ebt_entries *chain, struct ebt_cl_stack *cl_s /* this can't be 0, so the loop test is correct */ cl_s[i].cs.n = pos + 1; pos = 0; - cl_s[i].cs.e = ((void *)e + e->next_offset); + cl_s[i].cs.e = ebt_next_entry(e); e = (struct ebt_entry *)(hlp2->data); nentries = hlp2->nentries; cl_s[i].from = chain_nr; @@ -797,7 +801,7 @@ static int check_chainloops(struct ebt_entries *chain, struct ebt_cl_stack *cl_s continue; } letscontinue: - e = (void *)e + e->next_offset; + e = ebt_next_entry(e); pos++; } return 0; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 831fe1879dc0..7505dff4ffdf 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -231,6 +231,12 @@ static inline struct arpt_entry *get_entry(void *base, unsigned int offset) return (struct arpt_entry *)(base + offset); } +static inline __pure +struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry) +{ + return (void *)entry + entry->next_offset; +} + unsigned int arpt_do_table(struct sk_buff *skb, unsigned int hook, const struct net_device *in, @@ -267,67 +273,64 @@ unsigned int arpt_do_table(struct sk_buff *skb, arp = arp_hdr(skb); do { - if (arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { - struct arpt_entry_target *t; - int hdr_len; - - hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + - (2 * skb->dev->addr_len); + struct arpt_entry_target *t; + int hdr_len; - ADD_COUNTER(e->counters, hdr_len, 1); + if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { + e = arpt_next_entry(e); + continue; + } - t = arpt_get_target(e); + hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + + (2 * skb->dev->addr_len); + ADD_COUNTER(e->counters, hdr_len, 1); - /* Standard target? */ - if (!t->u.kernel.target->target) { - int v; + t = arpt_get_target(e); - v = ((struct arpt_standard_target *)t)->verdict; - if (v < 0) { - /* Pop from stack? */ - if (v != ARPT_RETURN) { - verdict = (unsigned)(-v) - 1; - break; - } - e = back; - back = get_entry(table_base, - back->comefrom); - continue; - } - if (table_base + v - != (void *)e + e->next_offset) { - /* Save old back ptr in next entry */ - struct arpt_entry *next - = (void *)e + e->next_offset; - next->comefrom = - (void *)back - table_base; - - /* set back pointer to next entry */ - back = next; - } + /* Standard target? */ + if (!t->u.kernel.target->target) { + int v; - e = get_entry(table_base, v); - } else { - /* Targets which reenter must return - * abs. verdicts - */ - tgpar.target = t->u.kernel.target; - tgpar.targinfo = t->data; - verdict = t->u.kernel.target->target(skb, - &tgpar); - - /* Target might have changed stuff. */ - arp = arp_hdr(skb); - - if (verdict == ARPT_CONTINUE) - e = (void *)e + e->next_offset; - else - /* Verdict */ + v = ((struct arpt_standard_target *)t)->verdict; + if (v < 0) { + /* Pop from stack? */ + if (v != ARPT_RETURN) { + verdict = (unsigned)(-v) - 1; break; + } + e = back; + back = get_entry(table_base, back->comefrom); + continue; } - } else { - e = (void *)e + e->next_offset; + if (table_base + v + != arpt_next_entry(e)) { + /* Save old back ptr in next entry */ + struct arpt_entry *next = arpt_next_entry(e); + next->comefrom = (void *)back - table_base; + + /* set back pointer to next entry */ + back = next; + } + + e = get_entry(table_base, v); + continue; } + + /* Targets which reenter must return + * abs. verdicts + */ + tgpar.target = t->u.kernel.target; + tgpar.targinfo = t->data; + verdict = t->u.kernel.target->target(skb, &tgpar); + + /* Target might have changed stuff. */ + arp = arp_hdr(skb); + + if (verdict == ARPT_CONTINUE) + e = arpt_next_entry(e); + else + /* Verdict */ + break; } while (!hotdrop); xt_info_rdunlock_bh(); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 5f22c91c6e15..c156db215987 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -596,7 +596,7 @@ static int __init ip_queue_init(void) #ifdef CONFIG_SYSCTL ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table); #endif - status = nf_register_queue_handler(PF_INET, &nfqh); + status = nf_register_queue_handler(NFPROTO_IPV4, &nfqh); if (status < 0) { printk(KERN_ERR "ip_queue: failed to register queue handler\n"); goto cleanup_sysctl; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 2ec8d7290c40..5bf7c3f185da 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -238,8 +238,8 @@ static struct nf_loginfo trace_loginfo = { /* Mildly perf critical (only if packet tracing is on) */ static inline int get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e, - char *hookname, char **chainname, - char **comment, unsigned int *rulenum) + const char *hookname, const char **chainname, + const char **comment, unsigned int *rulenum) { struct ipt_standard_target *t = (void *)ipt_get_target(s); @@ -257,8 +257,8 @@ get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e, && unconditional(&s->ip)) { /* Tail of chains: STANDARD target (return/policy) */ *comment = *chainname == hookname - ? (char *)comments[NF_IP_TRACE_COMMENT_POLICY] - : (char *)comments[NF_IP_TRACE_COMMENT_RETURN]; + ? comments[NF_IP_TRACE_COMMENT_POLICY] + : comments[NF_IP_TRACE_COMMENT_RETURN]; } return 1; } else @@ -277,14 +277,14 @@ static void trace_packet(struct sk_buff *skb, { void *table_base; const struct ipt_entry *root; - char *hookname, *chainname, *comment; + const char *hookname, *chainname, *comment; unsigned int rulenum = 0; - table_base = (void *)private->entries[smp_processor_id()]; + table_base = private->entries[smp_processor_id()]; root = get_entry(table_base, private->hook_entry[hook]); - hookname = chainname = (char *)hooknames[hook]; - comment = (char *)comments[NF_IP_TRACE_COMMENT_RULE]; + hookname = chainname = hooknames[hook]; + comment = comments[NF_IP_TRACE_COMMENT_RULE]; IPT_ENTRY_ITERATE(root, private->size - private->hook_entry[hook], @@ -297,6 +297,12 @@ static void trace_packet(struct sk_buff *skb, } #endif +static inline __pure +struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry) +{ + return (void *)entry + entry->next_offset; +} + /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int ipt_do_table(struct sk_buff *skb, @@ -305,6 +311,8 @@ ipt_do_table(struct sk_buff *skb, const struct net_device *out, struct xt_table *table) { +#define tb_comefrom ((struct ipt_entry *)table_base)->comefrom + static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); const struct iphdr *ip; u_int16_t datalen; @@ -335,7 +343,7 @@ ipt_do_table(struct sk_buff *skb, mtpar.in = tgpar.in = in; mtpar.out = tgpar.out = out; mtpar.family = tgpar.family = NFPROTO_IPV4; - tgpar.hooknum = hook; + mtpar.hooknum = tgpar.hooknum = hook; IP_NF_ASSERT(table->valid_hooks & (1 << hook)); xt_info_rdlock_bh(); @@ -348,92 +356,84 @@ ipt_do_table(struct sk_buff *skb, back = get_entry(table_base, private->underflow[hook]); do { + struct ipt_entry_target *t; + IP_NF_ASSERT(e); IP_NF_ASSERT(back); - if (ip_packet_match(ip, indev, outdev, - &e->ip, mtpar.fragoff)) { - struct ipt_entry_target *t; - - if (IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) - goto no_match; + if (!ip_packet_match(ip, indev, outdev, + &e->ip, mtpar.fragoff) || + IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) { + e = ipt_next_entry(e); + continue; + } - ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1); + ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1); - t = ipt_get_target(e); - IP_NF_ASSERT(t->u.kernel.target); + t = ipt_get_target(e); + IP_NF_ASSERT(t->u.kernel.target); #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) - /* The packet is traced: log it */ - if (unlikely(skb->nf_trace)) - trace_packet(skb, hook, in, out, - table->name, private, e); + /* The packet is traced: log it */ + if (unlikely(skb->nf_trace)) + trace_packet(skb, hook, in, out, + table->name, private, e); #endif - /* Standard target? */ - if (!t->u.kernel.target->target) { - int v; - - v = ((struct ipt_standard_target *)t)->verdict; - if (v < 0) { - /* Pop from stack? */ - if (v != IPT_RETURN) { - verdict = (unsigned)(-v) - 1; - break; - } - e = back; - back = get_entry(table_base, - back->comefrom); - continue; - } - if (table_base + v != (void *)e + e->next_offset - && !(e->ip.flags & IPT_F_GOTO)) { - /* Save old back ptr in next entry */ - struct ipt_entry *next - = (void *)e + e->next_offset; - next->comefrom - = (void *)back - table_base; - /* set back pointer to next entry */ - back = next; + /* Standard target? */ + if (!t->u.kernel.target->target) { + int v; + + v = ((struct ipt_standard_target *)t)->verdict; + if (v < 0) { + /* Pop from stack? */ + if (v != IPT_RETURN) { + verdict = (unsigned)(-v) - 1; + break; } + e = back; + back = get_entry(table_base, back->comefrom); + continue; + } + if (table_base + v != ipt_next_entry(e) + && !(e->ip.flags & IPT_F_GOTO)) { + /* Save old back ptr in next entry */ + struct ipt_entry *next = ipt_next_entry(e); + next->comefrom = (void *)back - table_base; + /* set back pointer to next entry */ + back = next; + } + + e = get_entry(table_base, v); + continue; + } + + /* Targets which reenter must return + abs. verdicts */ + tgpar.target = t->u.kernel.target; + tgpar.targinfo = t->data; + - e = get_entry(table_base, v); - } else { - /* Targets which reenter must return - abs. verdicts */ - tgpar.target = t->u.kernel.target; - tgpar.targinfo = t->data; #ifdef CONFIG_NETFILTER_DEBUG - ((struct ipt_entry *)table_base)->comefrom - = 0xeeeeeeec; + tb_comefrom = 0xeeeeeeec; #endif - verdict = t->u.kernel.target->target(skb, - &tgpar); + verdict = t->u.kernel.target->target(skb, &tgpar); #ifdef CONFIG_NETFILTER_DEBUG - if (((struct ipt_entry *)table_base)->comefrom - != 0xeeeeeeec - && verdict == IPT_CONTINUE) { - printk("Target %s reentered!\n", - t->u.kernel.target->name); - verdict = NF_DROP; - } - ((struct ipt_entry *)table_base)->comefrom - = 0x57acc001; + if (comefrom != 0xeeeeeeec && verdict == IPT_CONTINUE) { + printk("Target %s reentered!\n", + t->u.kernel.target->name); + verdict = NF_DROP; + } + tb_comefrom = 0x57acc001; #endif - /* Target might have changed stuff. */ - ip = ip_hdr(skb); - datalen = skb->len - ip->ihl * 4; - - if (verdict == IPT_CONTINUE) - e = (void *)e + e->next_offset; - else - /* Verdict */ - break; - } - } else { + /* Target might have changed stuff. */ + ip = ip_hdr(skb); + datalen = skb->len - ip->ihl * 4; - no_match: - e = (void *)e + e->next_offset; - } + if (verdict == IPT_CONTINUE) + e = ipt_next_entry(e); + else + /* Verdict */ + break; } while (!hotdrop); xt_info_rdunlock_bh(); @@ -444,6 +444,8 @@ ipt_do_table(struct sk_buff *skb, return NF_DROP; else return verdict; #endif + +#undef tb_comefrom } /* Figures out from what hook each rule can be called: returns 0 if @@ -2158,7 +2160,7 @@ static bool icmp_checkentry(const struct xt_mtchk_param *par) static struct xt_target ipt_standard_target __read_mostly = { .name = IPT_STANDARD_TARGET, .targetsize = sizeof(int), - .family = AF_INET, + .family = NFPROTO_IPV4, #ifdef CONFIG_COMPAT .compatsize = sizeof(compat_int_t), .compat_from_user = compat_standard_from_user, @@ -2170,7 +2172,7 @@ static struct xt_target ipt_error_target __read_mostly = { .name = IPT_ERROR_TARGET, .target = ipt_error, .targetsize = IPT_FUNCTION_MAXNAMELEN, - .family = AF_INET, + .family = NFPROTO_IPV4, }; static struct nf_sockopt_ops ipt_sockopts = { @@ -2196,17 +2198,17 @@ static struct xt_match icmp_matchstruct __read_mostly = { .matchsize = sizeof(struct ipt_icmp), .checkentry = icmp_checkentry, .proto = IPPROTO_ICMP, - .family = AF_INET, + .family = NFPROTO_IPV4, }; static int __net_init ip_tables_net_init(struct net *net) { - return xt_proto_init(net, AF_INET); + return xt_proto_init(net, NFPROTO_IPV4); } static void __net_exit ip_tables_net_exit(struct net *net) { - xt_proto_fini(net, AF_INET); + xt_proto_fini(net, NFPROTO_IPV4); } static struct pernet_operations ip_tables_net_ops = { diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index c0992c75bdac..dada0863946d 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -27,9 +27,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("Xtables: automatic-address SNAT"); -/* Lock protects masq region inside conntrack */ -static DEFINE_RWLOCK(masq_lock); - /* FIXME: Multiple targets. --RR */ static bool masquerade_tg_check(const struct xt_tgchk_param *par) { @@ -79,9 +76,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par) return NF_DROP; } - write_lock_bh(&masq_lock); nat->masq_index = par->out->ifindex; - write_unlock_bh(&masq_lock); /* Transfer from original range. */ newrange = ((struct nf_nat_range) @@ -97,16 +92,11 @@ static int device_cmp(struct nf_conn *i, void *ifindex) { const struct nf_conn_nat *nat = nfct_nat(i); - int ret; if (!nat) return 0; - read_lock_bh(&masq_lock); - ret = (nat->masq_index == (int)(long)ifindex); - read_unlock_bh(&masq_lock); - - return ret; + return nat->masq_index == (int)(long)ifindex; } static int masq_device_event(struct notifier_block *this, diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 23b2c2ee869a..d71ba7677344 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -82,18 +82,10 @@ static int icmp_packet(struct nf_conn *ct, u_int8_t pf, unsigned int hooknum) { - /* Try to delete connection immediately after all replies: - won't actually vanish as we still have skb, and del_timer - means this will only run once even if count hits zero twice - (theoretically possible with SMP) */ - if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { - if (atomic_dec_and_test(&ct->proto.icmp.count)) - nf_ct_kill_acct(ct, ctinfo, skb); - } else { - atomic_inc(&ct->proto.icmp.count); - nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct); - nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout); - } + /* Do not immediately delete the connection after the first + successful reply to avoid excessive conntrackd traffic + and also to handle correctly ICMP echo reply duplicates. */ + nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout); return NF_ACCEPT; } @@ -117,7 +109,6 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple); return false; } - atomic_set(&ct->proto.icmp.count, 0); return true; } diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index b693f841aeb4..1cf3f0c6a959 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -598,7 +598,7 @@ static int __init ip6_queue_init(void) #ifdef CONFIG_SYSCTL ipq_sysctl_header = register_sysctl_paths(net_ipv6_ctl_path, ipq_table); #endif - status = nf_register_queue_handler(PF_INET6, &nfqh); + status = nf_register_queue_handler(NFPROTO_IPV6, &nfqh); if (status < 0) { printk(KERN_ERR "ip6_queue: failed to register queue handler\n"); goto cleanup_sysctl; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 219e165aea10..ced1f2c0cb65 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -270,8 +270,8 @@ static struct nf_loginfo trace_loginfo = { /* Mildly perf critical (only if packet tracing is on) */ static inline int get_chainname_rulenum(struct ip6t_entry *s, struct ip6t_entry *e, - char *hookname, char **chainname, - char **comment, unsigned int *rulenum) + const char *hookname, const char **chainname, + const char **comment, unsigned int *rulenum) { struct ip6t_standard_target *t = (void *)ip6t_get_target(s); @@ -289,8 +289,8 @@ get_chainname_rulenum(struct ip6t_entry *s, struct ip6t_entry *e, && unconditional(&s->ipv6)) { /* Tail of chains: STANDARD target (return/policy) */ *comment = *chainname == hookname - ? (char *)comments[NF_IP6_TRACE_COMMENT_POLICY] - : (char *)comments[NF_IP6_TRACE_COMMENT_RETURN]; + ? comments[NF_IP6_TRACE_COMMENT_POLICY] + : comments[NF_IP6_TRACE_COMMENT_RETURN]; } return 1; } else @@ -309,14 +309,14 @@ static void trace_packet(struct sk_buff *skb, { void *table_base; const struct ip6t_entry *root; - char *hookname, *chainname, *comment; + const char *hookname, *chainname, *comment; unsigned int rulenum = 0; - table_base = (void *)private->entries[smp_processor_id()]; + table_base = private->entries[smp_processor_id()]; root = get_entry(table_base, private->hook_entry[hook]); - hookname = chainname = (char *)hooknames[hook]; - comment = (char *)comments[NF_IP6_TRACE_COMMENT_RULE]; + hookname = chainname = hooknames[hook]; + comment = comments[NF_IP6_TRACE_COMMENT_RULE]; IP6T_ENTRY_ITERATE(root, private->size - private->hook_entry[hook], @@ -329,6 +329,12 @@ static void trace_packet(struct sk_buff *skb, } #endif +static inline __pure struct ip6t_entry * +ip6t_next_entry(const struct ip6t_entry *entry) +{ + return (void *)entry + entry->next_offset; +} + /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int ip6t_do_table(struct sk_buff *skb, @@ -337,6 +343,8 @@ ip6t_do_table(struct sk_buff *skb, const struct net_device *out, struct xt_table *table) { +#define tb_comefrom ((struct ip6t_entry *)table_base)->comefrom + static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); bool hotdrop = false; /* Initializing verdict to NF_DROP keeps gcc happy. */ @@ -361,7 +369,7 @@ ip6t_do_table(struct sk_buff *skb, mtpar.in = tgpar.in = in; mtpar.out = tgpar.out = out; mtpar.family = tgpar.family = NFPROTO_IPV6; - tgpar.hooknum = hook; + mtpar.hooknum = tgpar.hooknum = hook; IP_NF_ASSERT(table->valid_hooks & (1 << hook)); @@ -375,96 +383,86 @@ ip6t_do_table(struct sk_buff *skb, back = get_entry(table_base, private->underflow[hook]); do { + struct ip6t_entry_target *t; + IP_NF_ASSERT(e); IP_NF_ASSERT(back); - if (ip6_packet_match(skb, indev, outdev, &e->ipv6, - &mtpar.thoff, &mtpar.fragoff, &hotdrop)) { - struct ip6t_entry_target *t; - - if (IP6T_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) - goto no_match; + if (!ip6_packet_match(skb, indev, outdev, &e->ipv6, + &mtpar.thoff, &mtpar.fragoff, &hotdrop) || + IP6T_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) { + e = ip6t_next_entry(e); + continue; + } - ADD_COUNTER(e->counters, - ntohs(ipv6_hdr(skb)->payload_len) + - sizeof(struct ipv6hdr), 1); + ADD_COUNTER(e->counters, + ntohs(ipv6_hdr(skb)->payload_len) + + sizeof(struct ipv6hdr), 1); - t = ip6t_get_target(e); - IP_NF_ASSERT(t->u.kernel.target); + t = ip6t_get_target(e); + IP_NF_ASSERT(t->u.kernel.target); #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) - /* The packet is traced: log it */ - if (unlikely(skb->nf_trace)) - trace_packet(skb, hook, in, out, - table->name, private, e); + /* The packet is traced: log it */ + if (unlikely(skb->nf_trace)) + trace_packet(skb, hook, in, out, + table->name, private, e); #endif - /* Standard target? */ - if (!t->u.kernel.target->target) { - int v; - - v = ((struct ip6t_standard_target *)t)->verdict; - if (v < 0) { - /* Pop from stack? */ - if (v != IP6T_RETURN) { - verdict = (unsigned)(-v) - 1; - break; - } - e = back; - back = get_entry(table_base, - back->comefrom); - continue; - } - if (table_base + v != (void *)e + e->next_offset - && !(e->ipv6.flags & IP6T_F_GOTO)) { - /* Save old back ptr in next entry */ - struct ip6t_entry *next - = (void *)e + e->next_offset; - next->comefrom - = (void *)back - table_base; - /* set back pointer to next entry */ - back = next; + /* Standard target? */ + if (!t->u.kernel.target->target) { + int v; + + v = ((struct ip6t_standard_target *)t)->verdict; + if (v < 0) { + /* Pop from stack? */ + if (v != IP6T_RETURN) { + verdict = (unsigned)(-v) - 1; + break; } + e = back; + back = get_entry(table_base, back->comefrom); + continue; + } + if (table_base + v != ip6t_next_entry(e) + && !(e->ipv6.flags & IP6T_F_GOTO)) { + /* Save old back ptr in next entry */ + struct ip6t_entry *next = ip6t_next_entry(e); + next->comefrom = (void *)back - table_base; + /* set back pointer to next entry */ + back = next; + } - e = get_entry(table_base, v); - } else { - /* Targets which reenter must return - abs. verdicts */ - tgpar.target = t->u.kernel.target; - tgpar.targinfo = t->data; + e = get_entry(table_base, v); + continue; + } -#ifdef CONFIG_NETFILTER_DEBUG - ((struct ip6t_entry *)table_base)->comefrom - = 0xeeeeeeec; -#endif - verdict = t->u.kernel.target->target(skb, - &tgpar); + /* Targets which reenter must return + abs. verdicts */ + tgpar.target = t->u.kernel.target; + tgpar.targinfo = t->data; #ifdef CONFIG_NETFILTER_DEBUG - if (((struct ip6t_entry *)table_base)->comefrom - != 0xeeeeeeec - && verdict == IP6T_CONTINUE) { - printk("Target %s reentered!\n", - t->u.kernel.target->name); - verdict = NF_DROP; - } - ((struct ip6t_entry *)table_base)->comefrom - = 0x57acc001; + tb_comefrom = 0xeeeeeeec; #endif - if (verdict == IP6T_CONTINUE) - e = (void *)e + e->next_offset; - else - /* Verdict */ - break; - } - } else { + verdict = t->u.kernel.target->target(skb, &tgpar); - no_match: - e = (void *)e + e->next_offset; +#ifdef CONFIG_NETFILTER_DEBUG + if (tb_comefrom != 0xeeeeeeec && verdict == IP6T_CONTINUE) { + printk("Target %s reentered!\n", + t->u.kernel.target->name); + verdict = NF_DROP; } + tb_comefrom = 0x57acc001; +#endif + if (verdict == IP6T_CONTINUE) + e = ip6t_next_entry(e); + else + /* Verdict */ + break; } while (!hotdrop); #ifdef CONFIG_NETFILTER_DEBUG - ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON; + tb_comefrom = NETFILTER_LINK_POISON; #endif xt_info_rdunlock_bh(); @@ -475,6 +473,8 @@ ip6t_do_table(struct sk_buff *skb, return NF_DROP; else return verdict; #endif + +#undef tb_comefrom } /* Figures out from what hook each rule can be called: returns 0 if @@ -2191,7 +2191,7 @@ static bool icmp6_checkentry(const struct xt_mtchk_param *par) static struct xt_target ip6t_standard_target __read_mostly = { .name = IP6T_STANDARD_TARGET, .targetsize = sizeof(int), - .family = AF_INET6, + .family = NFPROTO_IPV6, #ifdef CONFIG_COMPAT .compatsize = sizeof(compat_int_t), .compat_from_user = compat_standard_from_user, @@ -2203,7 +2203,7 @@ static struct xt_target ip6t_error_target __read_mostly = { .name = IP6T_ERROR_TARGET, .target = ip6t_error, .targetsize = IP6T_FUNCTION_MAXNAMELEN, - .family = AF_INET6, + .family = NFPROTO_IPV6, }; static struct nf_sockopt_ops ip6t_sockopts = { @@ -2229,17 +2229,17 @@ static struct xt_match icmp6_matchstruct __read_mostly = { .matchsize = sizeof(struct ip6t_icmp), .checkentry = icmp6_checkentry, .proto = IPPROTO_ICMPV6, - .family = AF_INET6, + .family = NFPROTO_IPV6, }; static int __net_init ip6_tables_net_init(struct net *net) { - return xt_proto_init(net, AF_INET6); + return xt_proto_init(net, NFPROTO_IPV6); } static void __net_exit ip6_tables_net_exit(struct net *net) { - xt_proto_fini(net, AF_INET6); + xt_proto_fini(net, NFPROTO_IPV6); } static struct pernet_operations ip6_tables_net_ops = { diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 9903227bf37c..642dcb127bab 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -95,18 +95,10 @@ static int icmpv6_packet(struct nf_conn *ct, u_int8_t pf, unsigned int hooknum) { - /* Try to delete connection immediately after all replies: - won't actually vanish as we still have skb, and del_timer - means this will only run once even if count hits zero twice - (theoretically possible with SMP) */ - if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { - if (atomic_dec_and_test(&ct->proto.icmp.count)) - nf_ct_kill_acct(ct, ctinfo, skb); - } else { - atomic_inc(&ct->proto.icmp.count); - nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct); - nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout); - } + /* Do not immediately delete the connection after the first + successful reply to avoid excessive conntrackd traffic + and also to handle correctly ICMP echo reply duplicates. */ + nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout); return NF_ACCEPT; } @@ -132,7 +124,6 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, type + 128); return false; } - atomic_set(&ct->proto.icmp.count, 0); return true; } diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index cb3ad741ebf8..79ba47f042c0 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -917,6 +917,19 @@ config NETFILTER_XT_MATCH_U32 Details and examples are in the kernel module source. +config NETFILTER_XT_MATCH_OSF + tristate '"osf" Passive OS fingerprint match' + depends on NETFILTER_ADVANCED && NETFILTER_NETLINK + help + This option selects the Passive OS Fingerprinting match module + that allows to passively match the remote operating system by + analyzing incoming TCP SYN packets. + + Rules and loading software can be downloaded from + http://www.ioremap.net/projects/osf + + To compile it as a module, choose M here. If unsure, say N. + endif # NETFILTER_XTABLES endmenu diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 6282060fbda9..49f62ee4e9ff 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -77,6 +77,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o obj-$(CONFIG_NETFILTER_XT_MATCH_MARK) += xt_mark.o obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o +obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 8020db6274b8..edf95695e0aa 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -398,11 +398,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) help = nfct_help(ct); if (help && help->helper) nf_conntrack_event_cache(IPCT_HELPER, ct); -#ifdef CONFIG_NF_NAT_NEEDED - if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || - test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_NATINFO, ct); -#endif + nf_conntrack_event_cache(master_ct(ct) ? IPCT_RELATED : IPCT_NEW, ct); return NF_ACCEPT; @@ -523,6 +519,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, return ERR_PTR(-ENOMEM); } + spin_lock_init(&ct->lock); atomic_set(&ct->ct_general.use, 1); ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; @@ -807,8 +804,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, unsigned long extra_jiffies, int do_acct) { - int event = 0; - NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct); NF_CT_ASSERT(skb); @@ -821,7 +816,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, /* If not in hash table, timer will not be active yet */ if (!nf_ct_is_confirmed(ct)) { ct->timeout.expires = extra_jiffies; - event = IPCT_REFRESH; } else { unsigned long newtime = jiffies + extra_jiffies; @@ -832,7 +826,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, && del_timer(&ct->timeout)) { ct->timeout.expires = newtime; add_timer(&ct->timeout); - event = IPCT_REFRESH; } } @@ -849,10 +842,6 @@ acct: } spin_unlock_bh(&nf_conntrack_lock); - - /* must be unlocked when calling event cache */ - if (event) - nf_conntrack_event_cache(event, ct); } EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); @@ -1001,7 +990,7 @@ struct __nf_ct_flush_report { int report; }; -static int kill_all(struct nf_conn *i, void *data) +static int kill_report(struct nf_conn *i, void *data) { struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data; @@ -1013,6 +1002,11 @@ static int kill_all(struct nf_conn *i, void *data) return 1; } +static int kill_all(struct nf_conn *i, void *data) +{ + return 1; +} + void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size) { if (vmalloced) @@ -1023,15 +1017,15 @@ void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size) } EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); -void nf_conntrack_flush(struct net *net, u32 pid, int report) +void nf_conntrack_flush_report(struct net *net, u32 pid, int report) { struct __nf_ct_flush_report fr = { .pid = pid, .report = report, }; - nf_ct_iterate_cleanup(net, kill_all, &fr); + nf_ct_iterate_cleanup(net, kill_report, &fr); } -EXPORT_SYMBOL_GPL(nf_conntrack_flush); +EXPORT_SYMBOL_GPL(nf_conntrack_flush_report); static void nf_conntrack_cleanup_init_net(void) { @@ -1045,7 +1039,7 @@ static void nf_conntrack_cleanup_net(struct net *net) nf_ct_event_cache_flush(net); nf_conntrack_ecache_fini(net); i_see_dead_people: - nf_conntrack_flush(net, 0, 0); + nf_ct_iterate_cleanup(net, kill_all, NULL); if (atomic_read(&net->ct.count) != 0) { schedule(); goto i_see_dead_people; diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index dee4190209cc..5516b3e64b43 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -16,24 +16,32 @@ #include <linux/stddef.h> #include <linux/err.h> #include <linux/percpu.h> -#include <linux/notifier.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> -ATOMIC_NOTIFIER_HEAD(nf_conntrack_chain); -EXPORT_SYMBOL_GPL(nf_conntrack_chain); +static DEFINE_MUTEX(nf_ct_ecache_mutex); -ATOMIC_NOTIFIER_HEAD(nf_ct_expect_chain); -EXPORT_SYMBOL_GPL(nf_ct_expect_chain); +struct nf_ct_event_notifier *nf_conntrack_event_cb __read_mostly; +EXPORT_SYMBOL_GPL(nf_conntrack_event_cb); + +struct nf_exp_event_notifier *nf_expect_event_cb __read_mostly; +EXPORT_SYMBOL_GPL(nf_expect_event_cb); /* deliver cached events and clear cache entry - must be called with locally * disabled softirqs */ static inline void __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache) { + struct nf_ct_event_notifier *notify; + + rcu_read_lock(); + notify = rcu_dereference(nf_conntrack_event_cb); + if (notify == NULL) + goto out_unlock; + if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct) && ecache->events) { struct nf_ct_event item = { @@ -42,14 +50,15 @@ __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache) .report = 0 }; - atomic_notifier_call_chain(&nf_conntrack_chain, - ecache->events, - &item); + notify->fcn(ecache->events, &item); } ecache->events = 0; nf_ct_put(ecache->ct); ecache->ct = NULL; + +out_unlock: + rcu_read_unlock(); } /* Deliver all cached events for a particular conntrack. This is called @@ -111,26 +120,68 @@ void nf_conntrack_ecache_fini(struct net *net) free_percpu(net->ct.ecache); } -int nf_conntrack_register_notifier(struct notifier_block *nb) +int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new) { - return atomic_notifier_chain_register(&nf_conntrack_chain, nb); + int ret = 0; + struct nf_ct_event_notifier *notify; + + mutex_lock(&nf_ct_ecache_mutex); + notify = rcu_dereference(nf_conntrack_event_cb); + if (notify != NULL) { + ret = -EBUSY; + goto out_unlock; + } + rcu_assign_pointer(nf_conntrack_event_cb, new); + mutex_unlock(&nf_ct_ecache_mutex); + return ret; + +out_unlock: + mutex_unlock(&nf_ct_ecache_mutex); + return ret; } EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); -int nf_conntrack_unregister_notifier(struct notifier_block *nb) +void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new) { - return atomic_notifier_chain_unregister(&nf_conntrack_chain, nb); + struct nf_ct_event_notifier *notify; + + mutex_lock(&nf_ct_ecache_mutex); + notify = rcu_dereference(nf_conntrack_event_cb); + BUG_ON(notify != new); + rcu_assign_pointer(nf_conntrack_event_cb, NULL); + mutex_unlock(&nf_ct_ecache_mutex); } EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); -int nf_ct_expect_register_notifier(struct notifier_block *nb) +int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *new) { - return atomic_notifier_chain_register(&nf_ct_expect_chain, nb); + int ret = 0; + struct nf_exp_event_notifier *notify; + + mutex_lock(&nf_ct_ecache_mutex); + notify = rcu_dereference(nf_expect_event_cb); + if (notify != NULL) { + ret = -EBUSY; + goto out_unlock; + } + rcu_assign_pointer(nf_expect_event_cb, new); + mutex_unlock(&nf_ct_ecache_mutex); + return ret; + +out_unlock: + mutex_unlock(&nf_ct_ecache_mutex); + return ret; } EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier); -int nf_ct_expect_unregister_notifier(struct notifier_block *nb) +void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new) { - return atomic_notifier_chain_unregister(&nf_ct_expect_chain, nb); + struct nf_exp_event_notifier *notify; + + mutex_lock(&nf_ct_ecache_mutex); + notify = rcu_dereference(nf_expect_event_cb); + BUG_ON(notify != new); + rcu_assign_pointer(nf_expect_event_cb, NULL); + mutex_unlock(&nf_ct_ecache_mutex); } EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 00fecc385f9b..5509dd1f14cf 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -338,11 +338,9 @@ static void update_nl_seq(struct nf_conn *ct, u32 nl_seq, if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) { info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq; - nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct); } else if (oldest != NUM_SEQ_TO_REMEMBER && after(nl_seq, info->seq_aft_nl[dir][oldest])) { info->seq_aft_nl[dir][oldest] = nl_seq; - nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct); } } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c523f0b8cee5..4e503ada5728 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -27,7 +27,6 @@ #include <linux/netlink.h> #include <linux/spinlock.h> #include <linux/interrupt.h> -#include <linux/notifier.h> #include <linux/netfilter.h> #include <net/netlink.h> @@ -144,7 +143,7 @@ nla_put_failure: } static inline int -ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct nf_conn *ct) +ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct) { struct nf_conntrack_l4proto *l4proto; struct nlattr *nest_proto; @@ -346,23 +345,21 @@ nla_put_failure: return -1; } -#define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple) - static int ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, - int event, int nowait, - const struct nf_conn *ct) + int event, struct nf_conn *ct) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; struct nlattr *nest_parms; - unsigned char *b = skb_tail_pointer(skb); + unsigned int flags = pid ? NLM_F_MULTI : 0; event |= NFNL_SUBSYS_CTNETLINK << 8; - nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg)); - nfmsg = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + if (nlh == NULL) + goto nlmsg_failure; - nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0; + nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = nf_ct_l3num(ct); nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; @@ -370,14 +367,14 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; - if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) + if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) goto nla_put_failure; nla_nest_end(skb, nest_parms); nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; - if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) + if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0) goto nla_put_failure; nla_nest_end(skb, nest_parms); @@ -395,104 +392,81 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, ctnetlink_dump_nat_seq_adj(skb, ct) < 0) goto nla_put_failure; - nlh->nlmsg_len = skb_tail_pointer(skb) - b; + nlmsg_end(skb, nlh); return skb->len; nlmsg_failure: nla_put_failure: - nlmsg_trim(skb, b); + nlmsg_cancel(skb, nlh); return -1; } #ifdef CONFIG_NF_CONNTRACK_EVENTS -/* - * The general structure of a ctnetlink event is - * - * CTA_TUPLE_ORIG - * <l3/l4-proto-attributes> - * CTA_TUPLE_REPLY - * <l3/l4-proto-attributes> - * CTA_ID - * ... - * CTA_PROTOINFO - * <l4-proto-attributes> - * CTA_TUPLE_MASTER - * <l3/l4-proto-attributes> - * - * Therefore the formular is - * - * size = sizeof(headers) + sizeof(generic_nlas) + 3 * sizeof(tuple_nlas) - * + sizeof(protoinfo_nlas) - */ -static struct sk_buff * -ctnetlink_alloc_skb(const struct nf_conntrack_tuple *tuple, gfp_t gfp) +static inline size_t +ctnetlink_proto_size(const struct nf_conn *ct) { struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l4proto *l4proto; - int len; - -#define NLA_TYPE_SIZE(type) nla_total_size(sizeof(type)) - - /* proto independant part */ - len = NLMSG_SPACE(sizeof(struct nfgenmsg)) - + 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */ - + 3 * nla_total_size(0) /* CTA_TUPLE_IP */ - + 3 * nla_total_size(0) /* CTA_TUPLE_PROTO */ - + 3 * NLA_TYPE_SIZE(u_int8_t) /* CTA_PROTO_NUM */ - + NLA_TYPE_SIZE(u_int32_t) /* CTA_ID */ - + NLA_TYPE_SIZE(u_int32_t) /* CTA_STATUS */ + size_t len = 0; + + rcu_read_lock(); + l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); + len += l3proto->nla_size; + + l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); + len += l4proto->nla_size; + rcu_read_unlock(); + + return len; +} + +static inline size_t +ctnetlink_nlmsg_size(const struct nf_conn *ct) +{ + return NLMSG_ALIGN(sizeof(struct nfgenmsg)) + + 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */ + + 3 * nla_total_size(0) /* CTA_TUPLE_IP */ + + 3 * nla_total_size(0) /* CTA_TUPLE_PROTO */ + + 3 * nla_total_size(sizeof(u_int8_t)) /* CTA_PROTO_NUM */ + + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */ + + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */ #ifdef CONFIG_NF_CT_ACCT - + 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */ - + 2 * NLA_TYPE_SIZE(uint64_t) /* CTA_COUNTERS_PACKETS */ - + 2 * NLA_TYPE_SIZE(uint64_t) /* CTA_COUNTERS_BYTES */ + + 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */ + + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_PACKETS */ + + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_BYTES */ #endif - + NLA_TYPE_SIZE(u_int32_t) /* CTA_TIMEOUT */ - + nla_total_size(0) /* CTA_PROTOINFO */ - + nla_total_size(0) /* CTA_HELP */ - + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */ + + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */ + + nla_total_size(0) /* CTA_PROTOINFO */ + + nla_total_size(0) /* CTA_HELP */ + + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */ #ifdef CONFIG_NF_CONNTRACK_SECMARK - + NLA_TYPE_SIZE(u_int32_t) /* CTA_SECMARK */ + + nla_total_size(sizeof(u_int32_t)) /* CTA_SECMARK */ #endif #ifdef CONFIG_NF_NAT_NEEDED - + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */ - + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_POS */ - + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_BEFORE */ - + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_AFTER */ + + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */ + + 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */ #endif #ifdef CONFIG_NF_CONNTRACK_MARK - + NLA_TYPE_SIZE(u_int32_t) /* CTA_MARK */ + + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */ #endif - ; - -#undef NLA_TYPE_SIZE - - rcu_read_lock(); - l3proto = __nf_ct_l3proto_find(tuple->src.l3num); - len += l3proto->nla_size; - - l4proto = __nf_ct_l4proto_find(tuple->src.l3num, tuple->dst.protonum); - len += l4proto->nla_size; - rcu_read_unlock(); - - return alloc_skb(len, gfp); + + ctnetlink_proto_size(ct) + ; } -static int ctnetlink_conntrack_event(struct notifier_block *this, - unsigned long events, void *ptr) +static int +ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; struct nlattr *nest_parms; - struct nf_ct_event *item = (struct nf_ct_event *)ptr; struct nf_conn *ct = item->ct; struct sk_buff *skb; unsigned int type; - sk_buff_data_t b; unsigned int flags = 0, group; /* ignore our fake conntrack entry */ if (ct == &nf_conntrack_untracked) - return NOTIFY_DONE; + return 0; if (events & IPCT_DESTROY) { type = IPCTNL_MSG_CT_DELETE; @@ -501,26 +475,25 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, type = IPCTNL_MSG_CT_NEW; flags = NLM_F_CREATE|NLM_F_EXCL; group = NFNLGRP_CONNTRACK_NEW; - } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) { + } else if (events) { type = IPCTNL_MSG_CT_NEW; group = NFNLGRP_CONNTRACK_UPDATE; } else - return NOTIFY_DONE; + return 0; if (!item->report && !nfnetlink_has_listeners(group)) - return NOTIFY_DONE; + return 0; - skb = ctnetlink_alloc_skb(tuple(ct, IP_CT_DIR_ORIGINAL), GFP_ATOMIC); - if (!skb) + skb = nlmsg_new(ctnetlink_nlmsg_size(ct), GFP_ATOMIC); + if (skb == NULL) goto errout; - b = skb->tail; - type |= NFNL_SUBSYS_CTNETLINK << 8; - nlh = NLMSG_PUT(skb, item->pid, 0, type, sizeof(struct nfgenmsg)); - nfmsg = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, item->pid, 0, type, sizeof(*nfmsg), flags); + if (nlh == NULL) + goto nlmsg_failure; - nlh->nlmsg_flags = flags; + nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = nf_ct_l3num(ct); nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; @@ -529,14 +502,14 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; - if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) + if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) goto nla_put_failure; nla_nest_end(skb, nest_parms); nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; - if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) + if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0) goto nla_put_failure; nla_nest_end(skb, nest_parms); @@ -584,17 +557,18 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, #endif rcu_read_unlock(); - nlh->nlmsg_len = skb->tail - b; - nfnetlink_send(skb, item->pid, group, item->report); - return NOTIFY_DONE; + nlmsg_end(skb, nlh); + nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC); + return 0; nla_put_failure: rcu_read_unlock(); + nlmsg_cancel(skb, nlh); nlmsg_failure: kfree_skb(skb); errout: nfnetlink_set_err(0, group, -ENOBUFS); - return NOTIFY_DONE; + return 0; } #endif /* CONFIG_NF_CONNTRACK_EVENTS */ @@ -611,7 +585,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) struct nf_conn *ct, *last; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh); + struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); u_int8_t l3proto = nfmsg->nfgen_family; rcu_read_lock(); @@ -637,8 +611,7 @@ restart: } if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, - IPCTNL_MSG_CT_NEW, - 1, ct) < 0) { + IPCTNL_MSG_CT_NEW, ct) < 0) { cb->args[1] = (unsigned long)ct; goto out; } @@ -792,7 +765,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; struct nf_conn *ct; - struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; int err = 0; @@ -802,9 +775,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); else { /* Flush the whole table */ - nf_conntrack_flush(&init_net, - NETLINK_CB(skb).pid, - nlmsg_report(nlh)); + nf_conntrack_flush_report(&init_net, + NETLINK_CB(skb).pid, + nlmsg_report(nlh)); return 0; } @@ -847,7 +820,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, struct nf_conntrack_tuple tuple; struct nf_conn *ct; struct sk_buff *skb2 = NULL; - struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; int err = 0; @@ -872,15 +845,15 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, ct = nf_ct_tuplehash_to_ctrack(h); err = -ENOMEM; - skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb2) { + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) { nf_ct_put(ct); return -ENOMEM; } rcu_read_lock(); err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, - IPCTNL_MSG_CT_NEW, 1, ct); + IPCTNL_MSG_CT_NEW, ct); rcu_read_unlock(); nf_ct_put(ct); if (err <= 0) @@ -1325,7 +1298,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, { struct nf_conntrack_tuple otuple, rtuple; struct nf_conntrack_tuple_hash *h = NULL; - struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; int err = 0; @@ -1503,19 +1476,18 @@ nla_put_failure: static int ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq, - int event, - int nowait, - const struct nf_conntrack_expect *exp) + int event, const struct nf_conntrack_expect *exp) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned char *b = skb_tail_pointer(skb); + unsigned int flags = pid ? NLM_F_MULTI : 0; event |= NFNL_SUBSYS_CTNETLINK_EXP << 8; - nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg)); - nfmsg = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + if (nlh == NULL) + goto nlmsg_failure; - nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0; + nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = exp->tuple.src.l3num; nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; @@ -1523,49 +1495,46 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq, if (ctnetlink_exp_dump_expect(skb, exp) < 0) goto nla_put_failure; - nlh->nlmsg_len = skb_tail_pointer(skb) - b; + nlmsg_end(skb, nlh); return skb->len; nlmsg_failure: nla_put_failure: - nlmsg_trim(skb, b); + nlmsg_cancel(skb, nlh); return -1; } #ifdef CONFIG_NF_CONNTRACK_EVENTS -static int ctnetlink_expect_event(struct notifier_block *this, - unsigned long events, void *ptr) +static int +ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - struct nf_exp_event *item = (struct nf_exp_event *)ptr; struct nf_conntrack_expect *exp = item->exp; struct sk_buff *skb; unsigned int type; - sk_buff_data_t b; int flags = 0; if (events & IPEXP_NEW) { type = IPCTNL_MSG_EXP_NEW; flags = NLM_F_CREATE|NLM_F_EXCL; } else - return NOTIFY_DONE; + return 0; if (!item->report && !nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW)) - return NOTIFY_DONE; + return 0; - skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); - if (!skb) + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + if (skb == NULL) goto errout; - b = skb->tail; - type |= NFNL_SUBSYS_CTNETLINK_EXP << 8; - nlh = NLMSG_PUT(skb, item->pid, 0, type, sizeof(struct nfgenmsg)); - nfmsg = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, item->pid, 0, type, sizeof(*nfmsg), flags); + if (nlh == NULL) + goto nlmsg_failure; - nlh->nlmsg_flags = flags; + nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = exp->tuple.src.l3num; nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; @@ -1575,17 +1544,19 @@ static int ctnetlink_expect_event(struct notifier_block *this, goto nla_put_failure; rcu_read_unlock(); - nlh->nlmsg_len = skb->tail - b; - nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, item->report); - return NOTIFY_DONE; + nlmsg_end(skb, nlh); + nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, + item->report, GFP_ATOMIC); + return 0; nla_put_failure: rcu_read_unlock(); + nlmsg_cancel(skb, nlh); nlmsg_failure: kfree_skb(skb); errout: nfnetlink_set_err(0, 0, -ENOBUFS); - return NOTIFY_DONE; + return 0; } #endif static int ctnetlink_exp_done(struct netlink_callback *cb) @@ -1600,7 +1571,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = &init_net; struct nf_conntrack_expect *exp, *last; - struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh); + struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); struct hlist_node *n; u_int8_t l3proto = nfmsg->nfgen_family; @@ -1617,10 +1588,11 @@ restart: continue; cb->args[1] = 0; } - if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid, + if (ctnetlink_exp_fill_info(skb, + NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, - 1, exp) < 0) { + exp) < 0) { if (!atomic_inc_not_zero(&exp->use)) continue; cb->args[1] = (unsigned long)exp; @@ -1652,7 +1624,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; struct sk_buff *skb2; - struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; int err = 0; @@ -1683,14 +1655,13 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, } err = -ENOMEM; - skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb2) + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) goto out; rcu_read_lock(); err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid, - nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, - 1, exp); + nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp); rcu_read_unlock(); if (err <= 0) goto free; @@ -1713,7 +1684,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, struct nf_conntrack_expect *exp; struct nf_conntrack_tuple tuple; struct nf_conntrack_helper *h; - struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct hlist_node *n, *next; u_int8_t u3 = nfmsg->nfgen_family; unsigned int i; @@ -1854,7 +1825,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, { struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; - struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; int err = 0; @@ -1891,12 +1862,12 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, } #ifdef CONFIG_NF_CONNTRACK_EVENTS -static struct notifier_block ctnl_notifier = { - .notifier_call = ctnetlink_conntrack_event, +static struct nf_ct_event_notifier ctnl_notifier = { + .fcn = ctnetlink_conntrack_event, }; -static struct notifier_block ctnl_notifier_exp = { - .notifier_call = ctnetlink_expect_event, +static struct nf_exp_event_notifier ctnl_notifier_exp = { + .fcn = ctnetlink_expect_event, }; #endif diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index aee0d6bea309..1b816a2ea813 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -25,8 +25,6 @@ #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_log.h> -static DEFINE_RWLOCK(dccp_lock); - /* Timeouts are based on values from RFC4340: * * - REQUEST: @@ -492,7 +490,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, return NF_ACCEPT; } - write_lock_bh(&dccp_lock); + spin_lock_bh(&ct->lock); role = ct->proto.dccp.role[dir]; old_state = ct->proto.dccp.state; @@ -536,13 +534,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.dccp.last_dir = dir; ct->proto.dccp.last_pkt = type; - write_unlock_bh(&dccp_lock); + spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid packet ignored "); return NF_ACCEPT; case CT_DCCP_INVALID: - write_unlock_bh(&dccp_lock); + spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid state transition "); @@ -552,7 +550,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.dccp.last_dir = dir; ct->proto.dccp.last_pkt = type; ct->proto.dccp.state = new_state; - write_unlock_bh(&dccp_lock); + spin_unlock_bh(&ct->lock); if (new_state != old_state) nf_conntrack_event_cache(IPCT_PROTOINFO, ct); @@ -621,36 +619,39 @@ static int dccp_print_tuple(struct seq_file *s, ntohs(tuple->dst.u.dccp.port)); } -static int dccp_print_conntrack(struct seq_file *s, const struct nf_conn *ct) +static int dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { return seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]); } #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, - const struct nf_conn *ct) + struct nf_conn *ct) { struct nlattr *nest_parms; - read_lock_bh(&dccp_lock); + spin_lock_bh(&ct->lock); nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state); NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_ROLE, ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]); + NLA_PUT_BE64(skb, CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ, + cpu_to_be64(ct->proto.dccp.handshake_seq)); nla_nest_end(skb, nest_parms); - read_unlock_bh(&dccp_lock); + spin_unlock_bh(&ct->lock); return 0; nla_put_failure: - read_unlock_bh(&dccp_lock); + spin_unlock_bh(&ct->lock); return -1; } static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = { [CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 }, [CTA_PROTOINFO_DCCP_ROLE] = { .type = NLA_U8 }, + [CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ] = { .type = NLA_U64 }, }; static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) @@ -674,7 +675,7 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) return -EINVAL; } - write_lock_bh(&dccp_lock); + spin_lock_bh(&ct->lock); ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]); if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) { ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; @@ -683,7 +684,11 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER; ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT; } - write_unlock_bh(&dccp_lock); + if (tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]) { + ct->proto.dccp.handshake_seq = + be64_to_cpu(nla_get_be64(tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ])); + } + spin_unlock_bh(&ct->lock); return 0; } diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index a6d6ec320fbc..a54a0af0edba 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -219,8 +219,7 @@ static int gre_print_tuple(struct seq_file *s, } /* print private data for conntrack */ -static int gre_print_conntrack(struct seq_file *s, - const struct nf_conn *ct) +static int gre_print_conntrack(struct seq_file *s, struct nf_conn *ct) { return seq_printf(s, "timeout=%u, stream_timeout=%u ", (ct->proto.gre.timeout / HZ), diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 101b4ad9e817..c10e6f36e31e 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -25,9 +25,6 @@ #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_ecache.h> -/* Protects ct->proto.sctp */ -static DEFINE_RWLOCK(sctp_lock); - /* FIXME: Examine ipfilter's timeouts and conntrack transitions more closely. They're more complex. --RR @@ -164,13 +161,13 @@ static int sctp_print_tuple(struct seq_file *s, } /* Print out the private part of the conntrack. */ -static int sctp_print_conntrack(struct seq_file *s, const struct nf_conn *ct) +static int sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { enum sctp_conntrack state; - read_lock_bh(&sctp_lock); + spin_lock_bh(&ct->lock); state = ct->proto.sctp.state; - read_unlock_bh(&sctp_lock); + spin_unlock_bh(&ct->lock); return seq_printf(s, "%s ", sctp_conntrack_names[state]); } @@ -318,7 +315,7 @@ static int sctp_packet(struct nf_conn *ct, } old_state = new_state = SCTP_CONNTRACK_NONE; - write_lock_bh(&sctp_lock); + spin_lock_bh(&ct->lock); for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { /* Special cases of Verification tag check (Sec 8.5.1) */ if (sch->type == SCTP_CID_INIT) { @@ -371,7 +368,7 @@ static int sctp_packet(struct nf_conn *ct, if (old_state != new_state) nf_conntrack_event_cache(IPCT_PROTOINFO, ct); } - write_unlock_bh(&sctp_lock); + spin_unlock_bh(&ct->lock); nf_ct_refresh_acct(ct, ctinfo, skb, sctp_timeouts[new_state]); @@ -386,7 +383,7 @@ static int sctp_packet(struct nf_conn *ct, return NF_ACCEPT; out_unlock: - write_unlock_bh(&sctp_lock); + spin_unlock_bh(&ct->lock); out: return -NF_ACCEPT; } @@ -469,11 +466,11 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb, #include <linux/netfilter/nfnetlink_conntrack.h> static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, - const struct nf_conn *ct) + struct nf_conn *ct) { struct nlattr *nest_parms; - read_lock_bh(&sctp_lock); + spin_lock_bh(&ct->lock); nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; @@ -488,14 +485,14 @@ static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, CTA_PROTOINFO_SCTP_VTAG_REPLY, ct->proto.sctp.vtag[IP_CT_DIR_REPLY]); - read_unlock_bh(&sctp_lock); + spin_unlock_bh(&ct->lock); nla_nest_end(skb, nest_parms); return 0; nla_put_failure: - read_unlock_bh(&sctp_lock); + spin_unlock_bh(&ct->lock); return -1; } @@ -527,13 +524,13 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct) !tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]) return -EINVAL; - write_lock_bh(&sctp_lock); + spin_lock_bh(&ct->lock); ct->proto.sctp.state = nla_get_u8(tb[CTA_PROTOINFO_SCTP_STATE]); ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]); ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]); - write_unlock_bh(&sctp_lock); + spin_unlock_bh(&ct->lock); return 0; } diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 97a6e93d742e..5142e60af540 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -29,9 +29,6 @@ #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> -/* Protects ct->proto.tcp */ -static DEFINE_RWLOCK(tcp_lock); - /* "Be conservative in what you do, be liberal in what you accept from others." If it's non-zero, we mark only out of window RST segments as INVALID. */ @@ -59,7 +56,7 @@ static const char *const tcp_conntrack_names[] = { "LAST_ACK", "TIME_WAIT", "CLOSE", - "LISTEN" + "SYN_SENT2", }; #define SECS * HZ @@ -82,6 +79,7 @@ static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = { [TCP_CONNTRACK_LAST_ACK] = 30 SECS, [TCP_CONNTRACK_TIME_WAIT] = 2 MINS, [TCP_CONNTRACK_CLOSE] = 10 SECS, + [TCP_CONNTRACK_SYN_SENT2] = 2 MINS, }; #define sNO TCP_CONNTRACK_NONE @@ -93,7 +91,7 @@ static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = { #define sLA TCP_CONNTRACK_LAST_ACK #define sTW TCP_CONNTRACK_TIME_WAIT #define sCL TCP_CONNTRACK_CLOSE -#define sLI TCP_CONNTRACK_LISTEN +#define sS2 TCP_CONNTRACK_SYN_SENT2 #define sIV TCP_CONNTRACK_MAX #define sIG TCP_CONNTRACK_IGNORE @@ -123,6 +121,7 @@ enum tcp_bit_set { * * NONE: initial state * SYN_SENT: SYN-only packet seen + * SYN_SENT2: SYN-only packet seen from reply dir, simultaneous open * SYN_RECV: SYN-ACK packet seen * ESTABLISHED: ACK packet seen * FIN_WAIT: FIN packet seen @@ -131,26 +130,24 @@ enum tcp_bit_set { * TIME_WAIT: last ACK seen * CLOSE: closed connection (RST) * - * LISTEN state is not used. - * * Packets marked as IGNORED (sIG): * if they may be either invalid or valid * and the receiver may send back a connection * closing RST or a SYN/ACK. * * Packets marked as INVALID (sIV): - * if they are invalid - * or we do not support the request (simultaneous open) + * if we regard them as truly invalid packets */ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { { /* ORIGINAL */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 }, /* * sNO -> sSS Initialize a new connection * sSS -> sSS Retransmitted SYN - * sSR -> sIG Late retransmitted SYN? + * sS2 -> sS2 Late retransmitted SYN + * sSR -> sIG * sES -> sIG Error: SYNs in window outside the SYN_SENT state * are errors. Receiver will reply with RST * and close the connection. @@ -161,22 +158,30 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sTW -> sSS Reopened connection (RFC 1122). * sCL -> sSS */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*synack*/ { sIV, sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR }, /* - * A SYN/ACK from the client is always invalid: - * - either it tries to set up a simultaneous open, which is - * not supported; - * - or the firewall has just been inserted between the two hosts - * during the session set-up. The SYN will be retransmitted - * by the true client (or it'll time out). + * sNO -> sIV Too late and no reason to do anything + * sSS -> sIV Client can't send SYN and then SYN/ACK + * sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open + * sSR -> sIG + * sES -> sIG Error: SYNs in window outside the SYN_SENT state + * are errors. Receiver will reply with RST + * and close the connection. + * Or we are not in sync and hold a dead connection. + * sFW -> sIG + * sCW -> sIG + * sLA -> sIG + * sTW -> sIG + * sCL -> sIG */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, /* * sNO -> sIV Too late and no reason to do anything... * sSS -> sIV Client migth not send FIN in this state: * we enforce waiting for a SYN/ACK reply first. + * sS2 -> sIV * sSR -> sFW Close started. * sES -> sFW * sFW -> sLA FIN seen in both directions, waiting for @@ -187,11 +192,12 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sTW -> sTW * sCL -> sCL */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ /*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV }, /* * sNO -> sES Assumed. * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet. + * sS2 -> sIV * sSR -> sES Established state is reached. * sES -> sES :-) * sFW -> sCW Normal close request answered by ACK. @@ -200,29 +206,31 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sTW -> sTW Retransmitted last ACK. Remain in the same state. * sCL -> sCL */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL }, /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } }, { /* REPLY */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 }, /* * sNO -> sIV Never reached. - * sSS -> sIV Simultaneous open, not supported - * sSR -> sIV Simultaneous open, not supported. - * sES -> sIV Server may not initiate a connection. + * sSS -> sS2 Simultaneous open + * sS2 -> sS2 Retransmitted simultaneous SYN + * sSR -> sIV Invalid SYN packets sent by the server + * sES -> sIV * sFW -> sIV * sCW -> sIV * sLA -> sIV * sTW -> sIV Reopened connection, but server may not do it. * sCL -> sIV */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sSR }, /* * sSS -> sSR Standard open. + * sS2 -> sSR Simultaneous open * sSR -> sSR Retransmitted SYN/ACK. * sES -> sIG Late retransmitted SYN/ACK? * sFW -> sIG Might be SYN/ACK answering ignored SYN @@ -231,10 +239,11 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sTW -> sIG * sCL -> sIG */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, /* * sSS -> sIV Server might not send FIN in this state. + * sS2 -> sIV * sSR -> sFW Close started. * sES -> sFW * sFW -> sLA FIN seen in both directions. @@ -243,10 +252,11 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sTW -> sTW * sCL -> sCL */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG }, /* * sSS -> sIG Might be a half-open connection. + * sS2 -> sIG * sSR -> sSR Might answer late resent SYN. * sES -> sES :-) * sFW -> sCW Normal close request answered by ACK. @@ -255,8 +265,8 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sTW -> sTW Retransmitted last ACK. * sCL -> sCL */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL }, /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } } }; @@ -296,13 +306,13 @@ static int tcp_print_tuple(struct seq_file *s, } /* Print out the private part of the conntrack. */ -static int tcp_print_conntrack(struct seq_file *s, const struct nf_conn *ct) +static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { enum tcp_conntrack state; - read_lock_bh(&tcp_lock); + spin_lock_bh(&ct->lock); state = ct->proto.tcp.state; - read_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); return seq_printf(s, "%s ", tcp_conntrack_names[state]); } @@ -521,13 +531,14 @@ static bool tcp_in_window(const struct nf_conn *ct, receiver->td_end, receiver->td_maxend, receiver->td_maxwin, receiver->td_scale); - if (sender->td_end == 0) { + if (sender->td_maxwin == 0) { /* * Initialize sender data. */ - if (tcph->syn && tcph->ack) { + if (tcph->syn) { /* - * Outgoing SYN-ACK in reply to a SYN. + * SYN-ACK in reply to a SYN + * or SYN from reply direction in simultaneous open. */ sender->td_end = sender->td_maxend = end; @@ -543,6 +554,9 @@ static bool tcp_in_window(const struct nf_conn *ct, && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) sender->td_scale = receiver->td_scale = 0; + if (!tcph->ack) + /* Simultaneous open */ + return true; } else { /* * We are in the middle of a connection, @@ -716,14 +730,14 @@ void nf_conntrack_tcp_update(const struct sk_buff *skb, end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph); - write_lock_bh(&tcp_lock); + spin_lock_bh(&ct->lock); /* * We have to worry for the ack in the reply packet only... */ if (after(end, ct->proto.tcp.seen[dir].td_end)) ct->proto.tcp.seen[dir].td_end = end; ct->proto.tcp.last_end = end; - write_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", sender->td_end, sender->td_maxend, sender->td_maxwin, @@ -832,7 +846,7 @@ static int tcp_packet(struct nf_conn *ct, th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); BUG_ON(th == NULL); - write_lock_bh(&tcp_lock); + spin_lock_bh(&ct->lock); old_state = ct->proto.tcp.state; dir = CTINFO2DIR(ctinfo); index = get_conntrack_index(th); @@ -862,7 +876,7 @@ static int tcp_packet(struct nf_conn *ct, && ct->proto.tcp.last_index == TCP_RST_SET)) { /* Attempt to reopen a closed/aborted connection. * Delete this connection and look up again. */ - write_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); /* Only repeat if we can actually remove the timer. * Destruction may already be in progress in process @@ -898,7 +912,7 @@ static int tcp_packet(struct nf_conn *ct, * that the client cannot but retransmit its SYN and * thus initiate a clean new session. */ - write_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: killing out of sync session "); @@ -911,7 +925,7 @@ static int tcp_packet(struct nf_conn *ct, ct->proto.tcp.last_end = segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); - write_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid packet ignored "); @@ -920,7 +934,7 @@ static int tcp_packet(struct nf_conn *ct, /* Invalid packet */ pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", dir, get_conntrack_index(th), old_state); - write_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid state "); @@ -961,7 +975,7 @@ static int tcp_packet(struct nf_conn *ct, if (!tcp_in_window(ct, &ct->proto.tcp, dir, index, skb, dataoff, th, pf)) { - write_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); return -NF_ACCEPT; } in_window: @@ -990,9 +1004,8 @@ static int tcp_packet(struct nf_conn *ct, timeout = nf_ct_tcp_timeout_unacknowledged; else timeout = tcp_timeouts[new_state]; - write_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); - nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct); if (new_state != old_state) nf_conntrack_event_cache(IPCT_PROTOINFO, ct); @@ -1086,7 +1099,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.tcp.seen[1].td_end = 0; ct->proto.tcp.seen[1].td_maxend = 0; - ct->proto.tcp.seen[1].td_maxwin = 1; + ct->proto.tcp.seen[1].td_maxwin = 0; ct->proto.tcp.seen[1].td_scale = 0; /* tcp_packet will set them */ @@ -1108,12 +1121,12 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, #include <linux/netfilter/nfnetlink_conntrack.h> static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, - const struct nf_conn *ct) + struct nf_conn *ct) { struct nlattr *nest_parms; struct nf_ct_tcp_flags tmp = {}; - read_lock_bh(&tcp_lock); + spin_lock_bh(&ct->lock); nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; @@ -1133,14 +1146,14 @@ static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, tmp.flags = ct->proto.tcp.seen[1].flags; NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY, sizeof(struct nf_ct_tcp_flags), &tmp); - read_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); nla_nest_end(skb, nest_parms); return 0; nla_put_failure: - read_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); return -1; } @@ -1171,7 +1184,7 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX) return -EINVAL; - write_lock_bh(&tcp_lock); + spin_lock_bh(&ct->lock); if (tb[CTA_PROTOINFO_TCP_STATE]) ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]); @@ -1198,7 +1211,7 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) ct->proto.tcp.seen[1].td_scale = nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]); } - write_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); return 0; } @@ -1328,6 +1341,13 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .proc_handler = proc_dointvec_jiffies, }, { + .procname = "ip_conntrack_tcp_timeout_syn_sent2", + .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT2], + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { .procname = "ip_conntrack_tcp_timeout_syn_recv", .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV], .maxlen = sizeof(unsigned int), diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 4f2310c93e01..3a6fd77f7761 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -204,10 +204,10 @@ int nf_queue(struct sk_buff *skb, queuenum); switch (pf) { - case AF_INET: + case NFPROTO_IPV4: skb->protocol = htons(ETH_P_IP); break; - case AF_INET6: + case NFPROTO_IPV6: skb->protocol = htons(ETH_P_IPV6); break; } diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index b8ab37ad7ed5..92761a988375 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -107,9 +107,10 @@ int nfnetlink_has_listeners(unsigned int group) } EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); -int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) +int nfnetlink_send(struct sk_buff *skb, u32 pid, + unsigned group, int echo, gfp_t flags) { - return nlmsg_notify(nfnl, skb, pid, group, echo, gfp_any()); + return nlmsg_notify(nfnl, skb, pid, group, echo, flags); } EXPORT_SYMBOL_GPL(nfnetlink_send); @@ -136,7 +137,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return -EPERM; /* All the messages must at least contain nfgenmsg */ - if (nlh->nlmsg_len < NLMSG_SPACE(sizeof(struct nfgenmsg))) + if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct nfgenmsg))) return 0; type = nlh->nlmsg_type; @@ -160,19 +161,14 @@ replay: { int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); - u_int16_t attr_count = ss->cb[cb_id].attr_count; - struct nlattr *cda[attr_count+1]; - - if (likely(nlh->nlmsg_len >= min_len)) { - struct nlattr *attr = (void *)nlh + NLMSG_ALIGN(min_len); - int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); - - err = nla_parse(cda, attr_count, attr, attrlen, - ss->cb[cb_id].policy); - if (err < 0) - return err; - } else - return -EINVAL; + struct nlattr *cda[ss->cb[cb_id].attr_count + 1]; + struct nlattr *attr = (void *)nlh + min_len; + int attrlen = nlh->nlmsg_len - min_len; + + err = nla_parse(cda, ss->cb[cb_id].attr_count, + attr, attrlen, ss->cb[cb_id].policy); + if (err < 0) + return err; err = nc->call(nfnl, skb, nlh, cda); if (err == -EAGAIN) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 150e5cf62f85..46dba5f043d5 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -329,6 +329,32 @@ int xt_find_revision(u8 af, const char *name, u8 revision, int target, } EXPORT_SYMBOL_GPL(xt_find_revision); +static char *textify_hooks(char *buf, size_t size, unsigned int mask) +{ + static const char *const names[] = { + "PREROUTING", "INPUT", "FORWARD", + "OUTPUT", "POSTROUTING", "BROUTING", + }; + unsigned int i; + char *p = buf; + bool np = false; + int res; + + *p = '\0'; + for (i = 0; i < ARRAY_SIZE(names); ++i) { + if (!(mask & (1 << i))) + continue; + res = snprintf(p, size, "%s%s", np ? "/" : "", names[i]); + if (res > 0) { + size -= res; + p += res; + } + np = true; + } + + return buf; +} + int xt_check_match(struct xt_mtchk_param *par, unsigned int size, u_int8_t proto, bool inv_proto) { @@ -351,9 +377,13 @@ int xt_check_match(struct xt_mtchk_param *par, return -EINVAL; } if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) { - printk("%s_tables: %s match: bad hook_mask %#x/%#x\n", + char used[64], allow[64]; + + printk("%s_tables: %s match: used from hooks %s, but only " + "valid from %s\n", xt_prefix[par->family], par->match->name, - par->hook_mask, par->match->hooks); + textify_hooks(used, sizeof(used), par->hook_mask), + textify_hooks(allow, sizeof(allow), par->match->hooks)); return -EINVAL; } if (par->match->proto && (par->match->proto != proto || inv_proto)) { @@ -497,9 +527,13 @@ int xt_check_target(struct xt_tgchk_param *par, return -EINVAL; } if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) { - printk("%s_tables: %s target: bad hook_mask %#x/%#x\n", + char used[64], allow[64]; + + printk("%s_tables: %s target: used from hooks %s, but only " + "usable from %s\n", xt_prefix[par->family], par->target->name, - par->hook_mask, par->target->hooks); + textify_hooks(used, sizeof(used), par->hook_mask), + textify_hooks(allow, sizeof(allow), par->target->hooks)); return -EINVAL; } if (par->target->proto && (par->target->proto != proto || inv_proto)) { diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index f9977b3311f7..498b45101df7 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -11,6 +11,10 @@ #include <linux/module.h> #include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/jhash.h> + #include <linux/netfilter.h> #include <linux/netfilter_arp.h> #include <linux/netfilter/x_tables.h> @@ -23,6 +27,8 @@ MODULE_ALIAS("ipt_NFQUEUE"); MODULE_ALIAS("ip6t_NFQUEUE"); MODULE_ALIAS("arpt_NFQUEUE"); +static u32 jhash_initval __read_mostly; + static unsigned int nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par) { @@ -31,32 +37,105 @@ nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par) return NF_QUEUE_NR(tinfo->queuenum); } +static u32 hash_v4(const struct sk_buff *skb) +{ + const struct iphdr *iph = ip_hdr(skb); + u32 ipaddr; + + /* packets in either direction go into same queue */ + ipaddr = iph->saddr ^ iph->daddr; + + return jhash_2words(ipaddr, iph->protocol, jhash_initval); +} + +static unsigned int +nfqueue_tg4_v1(struct sk_buff *skb, const struct xt_target_param *par) +{ + const struct xt_NFQ_info_v1 *info = par->targinfo; + u32 queue = info->queuenum; + + if (info->queues_total > 1) + queue = hash_v4(skb) % info->queues_total + queue; + return NF_QUEUE_NR(queue); +} + +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +static u32 hash_v6(const struct sk_buff *skb) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + u32 addr[4]; + + addr[0] = ip6h->saddr.s6_addr32[0] ^ ip6h->daddr.s6_addr32[0]; + addr[1] = ip6h->saddr.s6_addr32[1] ^ ip6h->daddr.s6_addr32[1]; + addr[2] = ip6h->saddr.s6_addr32[2] ^ ip6h->daddr.s6_addr32[2]; + addr[3] = ip6h->saddr.s6_addr32[3] ^ ip6h->daddr.s6_addr32[3]; + + return jhash2(addr, ARRAY_SIZE(addr), jhash_initval); +} + +static unsigned int +nfqueue_tg6_v1(struct sk_buff *skb, const struct xt_target_param *par) +{ + const struct xt_NFQ_info_v1 *info = par->targinfo; + u32 queue = info->queuenum; + + if (info->queues_total > 1) + queue = hash_v6(skb) % info->queues_total + queue; + return NF_QUEUE_NR(queue); +} +#endif + +static bool nfqueue_tg_v1_check(const struct xt_tgchk_param *par) +{ + const struct xt_NFQ_info_v1 *info = par->targinfo; + u32 maxid; + + if (info->queues_total == 0) { + pr_err("NFQUEUE: number of total queues is 0\n"); + return false; + } + maxid = info->queues_total - 1 + info->queuenum; + if (maxid > 0xffff) { + pr_err("NFQUEUE: number of queues (%u) out of range (got %u)\n", + info->queues_total, maxid); + return false; + } + return true; +} + static struct xt_target nfqueue_tg_reg[] __read_mostly = { { .name = "NFQUEUE", - .family = NFPROTO_IPV4, + .family = NFPROTO_UNSPEC, .target = nfqueue_tg, .targetsize = sizeof(struct xt_NFQ_info), .me = THIS_MODULE, }, { .name = "NFQUEUE", - .family = NFPROTO_IPV6, - .target = nfqueue_tg, - .targetsize = sizeof(struct xt_NFQ_info), + .revision = 1, + .family = NFPROTO_IPV4, + .checkentry = nfqueue_tg_v1_check, + .target = nfqueue_tg4_v1, + .targetsize = sizeof(struct xt_NFQ_info_v1), .me = THIS_MODULE, }, +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) { .name = "NFQUEUE", - .family = NFPROTO_ARP, - .target = nfqueue_tg, - .targetsize = sizeof(struct xt_NFQ_info), + .revision = 1, + .family = NFPROTO_IPV6, + .checkentry = nfqueue_tg_v1_check, + .target = nfqueue_tg6_v1, + .targetsize = sizeof(struct xt_NFQ_info_v1), .me = THIS_MODULE, }, +#endif }; static int __init nfqueue_tg_init(void) { + get_random_bytes(&jhash_initval, sizeof(jhash_initval)); return xt_register_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg)); } diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c new file mode 100644 index 000000000000..863e40977a4d --- /dev/null +++ b/net/netfilter/xt_osf.c @@ -0,0 +1,428 @@ +/* + * Copyright (c) 2003+ Evgeniy Polyakov <zbr@ioremap.net> + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/module.h> +#include <linux/kernel.h> + +#include <linux/if.h> +#include <linux/inetdevice.h> +#include <linux/ip.h> +#include <linux/list.h> +#include <linux/rculist.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/tcp.h> + +#include <net/ip.h> +#include <net/tcp.h> + +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/x_tables.h> +#include <net/netfilter/nf_log.h> +#include <linux/netfilter/xt_osf.h> + +struct xt_osf_finger { + struct rcu_head rcu_head; + struct list_head finger_entry; + struct xt_osf_user_finger finger; +}; + +enum osf_fmatch_states { + /* Packet does not match the fingerprint */ + FMATCH_WRONG = 0, + /* Packet matches the fingerprint */ + FMATCH_OK, + /* Options do not match the fingerprint, but header does */ + FMATCH_OPT_WRONG, +}; + +/* + * Indexed by dont-fragment bit. + * It is the only constant value in the fingerprint. + */ +static struct list_head xt_osf_fingers[2]; + +static const struct nla_policy xt_osf_policy[OSF_ATTR_MAX + 1] = { + [OSF_ATTR_FINGER] = { .len = sizeof(struct xt_osf_user_finger) }, +}; + +static void xt_osf_finger_free_rcu(struct rcu_head *rcu_head) +{ + struct xt_osf_finger *f = container_of(rcu_head, struct xt_osf_finger, rcu_head); + + kfree(f); +} + +static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nlattr *osf_attrs[]) +{ + struct xt_osf_user_finger *f; + struct xt_osf_finger *kf = NULL, *sf; + int err = 0; + + if (!osf_attrs[OSF_ATTR_FINGER]) + return -EINVAL; + + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) + return -EINVAL; + + f = nla_data(osf_attrs[OSF_ATTR_FINGER]); + + kf = kmalloc(sizeof(struct xt_osf_finger), GFP_KERNEL); + if (!kf) + return -ENOMEM; + + memcpy(&kf->finger, f, sizeof(struct xt_osf_user_finger)); + + list_for_each_entry(sf, &xt_osf_fingers[!!f->df], finger_entry) { + if (memcmp(&sf->finger, f, sizeof(struct xt_osf_user_finger))) + continue; + + kfree(kf); + kf = NULL; + + if (nlh->nlmsg_flags & NLM_F_EXCL) + err = -EEXIST; + break; + } + + /* + * We are protected by nfnl mutex. + */ + if (kf) + list_add_tail_rcu(&kf->finger_entry, &xt_osf_fingers[!!f->df]); + + return err; +} + +static int xt_osf_remove_callback(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nlattr *osf_attrs[]) +{ + struct xt_osf_user_finger *f; + struct xt_osf_finger *sf; + int err = ENOENT; + + if (!osf_attrs[OSF_ATTR_FINGER]) + return -EINVAL; + + f = nla_data(osf_attrs[OSF_ATTR_FINGER]); + + list_for_each_entry(sf, &xt_osf_fingers[!!f->df], finger_entry) { + if (memcmp(&sf->finger, f, sizeof(struct xt_osf_user_finger))) + continue; + + /* + * We are protected by nfnl mutex. + */ + list_del_rcu(&sf->finger_entry); + call_rcu(&sf->rcu_head, xt_osf_finger_free_rcu); + + err = 0; + break; + } + + return err; +} + +static const struct nfnl_callback xt_osf_nfnetlink_callbacks[OSF_MSG_MAX] = { + [OSF_MSG_ADD] = { + .call = xt_osf_add_callback, + .attr_count = OSF_ATTR_MAX, + .policy = xt_osf_policy, + }, + [OSF_MSG_REMOVE] = { + .call = xt_osf_remove_callback, + .attr_count = OSF_ATTR_MAX, + .policy = xt_osf_policy, + }, +}; + +static const struct nfnetlink_subsystem xt_osf_nfnetlink = { + .name = "osf", + .subsys_id = NFNL_SUBSYS_OSF, + .cb_count = OSF_MSG_MAX, + .cb = xt_osf_nfnetlink_callbacks, +}; + +static inline int xt_osf_ttl(const struct sk_buff *skb, const struct xt_osf_info *info, + unsigned char f_ttl) +{ + const struct iphdr *ip = ip_hdr(skb); + + if (info->flags & XT_OSF_TTL) { + if (info->ttl == XT_OSF_TTL_TRUE) + return ip->ttl == f_ttl; + if (info->ttl == XT_OSF_TTL_NOCHECK) + return 1; + else if (ip->ttl <= f_ttl) + return 1; + else { + struct in_device *in_dev = __in_dev_get_rcu(skb->dev); + int ret = 0; + + for_ifa(in_dev) { + if (inet_ifa_match(ip->saddr, ifa)) { + ret = (ip->ttl == f_ttl); + break; + } + } + endfor_ifa(in_dev); + + return ret; + } + } + + return ip->ttl == f_ttl; +} + +static bool xt_osf_match_packet(const struct sk_buff *skb, + const struct xt_match_param *p) +{ + const struct xt_osf_info *info = p->matchinfo; + const struct iphdr *ip = ip_hdr(skb); + const struct tcphdr *tcp; + struct tcphdr _tcph; + int fmatch = FMATCH_WRONG, fcount = 0; + unsigned int optsize = 0, check_WSS = 0; + u16 window, totlen, mss = 0; + bool df; + const unsigned char *optp = NULL, *_optp = NULL; + unsigned char opts[MAX_IPOPTLEN]; + const struct xt_osf_finger *kf; + const struct xt_osf_user_finger *f; + + if (!info) + return false; + + tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph); + if (!tcp) + return false; + + if (!tcp->syn) + return false; + + totlen = ntohs(ip->tot_len); + df = ntohs(ip->frag_off) & IP_DF; + window = ntohs(tcp->window); + + if (tcp->doff * 4 > sizeof(struct tcphdr)) { + optsize = tcp->doff * 4 - sizeof(struct tcphdr); + + _optp = optp = skb_header_pointer(skb, ip_hdrlen(skb) + + sizeof(struct tcphdr), optsize, opts); + } + + rcu_read_lock(); + list_for_each_entry_rcu(kf, &xt_osf_fingers[df], finger_entry) { + f = &kf->finger; + + if (!(info->flags & XT_OSF_LOG) && strcmp(info->genre, f->genre)) + continue; + + optp = _optp; + fmatch = FMATCH_WRONG; + + if (totlen == f->ss && xt_osf_ttl(skb, info, f->ttl)) { + int foptsize, optnum; + + /* + * Should not happen if userspace parser was written correctly. + */ + if (f->wss.wc >= OSF_WSS_MAX) + continue; + + /* Check options */ + + foptsize = 0; + for (optnum = 0; optnum < f->opt_num; ++optnum) + foptsize += f->opt[optnum].length; + + if (foptsize > MAX_IPOPTLEN || + optsize > MAX_IPOPTLEN || + optsize != foptsize) + continue; + + check_WSS = f->wss.wc; + + for (optnum = 0; optnum < f->opt_num; ++optnum) { + if (f->opt[optnum].kind == (*optp)) { + __u32 len = f->opt[optnum].length; + const __u8 *optend = optp + len; + int loop_cont = 0; + + fmatch = FMATCH_OK; + + switch (*optp) { + case OSFOPT_MSS: + mss = optp[3]; + mss <<= 8; + mss |= optp[2]; + + mss = ntohs(mss); + break; + case OSFOPT_TS: + loop_cont = 1; + break; + } + + optp = optend; + } else + fmatch = FMATCH_OPT_WRONG; + + if (fmatch != FMATCH_OK) + break; + } + + if (fmatch != FMATCH_OPT_WRONG) { + fmatch = FMATCH_WRONG; + + switch (check_WSS) { + case OSF_WSS_PLAIN: + if (f->wss.val == 0 || window == f->wss.val) + fmatch = FMATCH_OK; + break; + case OSF_WSS_MSS: + /* + * Some smart modems decrease mangle MSS to + * SMART_MSS_2, so we check standard, decreased + * and the one provided in the fingerprint MSS + * values. + */ +#define SMART_MSS_1 1460 +#define SMART_MSS_2 1448 + if (window == f->wss.val * mss || + window == f->wss.val * SMART_MSS_1 || + window == f->wss.val * SMART_MSS_2) + fmatch = FMATCH_OK; + break; + case OSF_WSS_MTU: + if (window == f->wss.val * (mss + 40) || + window == f->wss.val * (SMART_MSS_1 + 40) || + window == f->wss.val * (SMART_MSS_2 + 40)) + fmatch = FMATCH_OK; + break; + case OSF_WSS_MODULO: + if ((window % f->wss.val) == 0) + fmatch = FMATCH_OK; + break; + } + } + + if (fmatch != FMATCH_OK) + continue; + + fcount++; + + if (info->flags & XT_OSF_LOG) + nf_log_packet(p->hooknum, 0, skb, p->in, p->out, NULL, + "%s [%s:%s] : %pi4:%d -> %pi4:%d hops=%d\n", + f->genre, f->version, f->subtype, + &ip->saddr, ntohs(tcp->source), + &ip->daddr, ntohs(tcp->dest), + f->ttl - ip->ttl); + + if ((info->flags & XT_OSF_LOG) && + info->loglevel == XT_OSF_LOGLEVEL_FIRST) + break; + } + } + rcu_read_unlock(); + + if (!fcount && (info->flags & XT_OSF_LOG)) + nf_log_packet(p->hooknum, 0, skb, p->in, p->out, NULL, + "Remote OS is not known: %pi4:%u -> %pi4:%u\n", + &ip->saddr, ntohs(tcp->source), + &ip->daddr, ntohs(tcp->dest)); + + if (fcount) + fmatch = FMATCH_OK; + + return fmatch == FMATCH_OK; +} + +static struct xt_match xt_osf_match = { + .name = "osf", + .revision = 0, + .family = NFPROTO_IPV4, + .proto = IPPROTO_TCP, + .hooks = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_FORWARD), + .match = xt_osf_match_packet, + .matchsize = sizeof(struct xt_osf_info), + .me = THIS_MODULE, +}; + +static int __init xt_osf_init(void) +{ + int err = -EINVAL; + int i; + + for (i=0; i<ARRAY_SIZE(xt_osf_fingers); ++i) + INIT_LIST_HEAD(&xt_osf_fingers[i]); + + err = nfnetlink_subsys_register(&xt_osf_nfnetlink); + if (err < 0) { + printk(KERN_ERR "Failed (%d) to register OSF nsfnetlink helper.\n", err); + goto err_out_exit; + } + + err = xt_register_match(&xt_osf_match); + if (err) { + printk(KERN_ERR "Failed (%d) to register OS fingerprint " + "matching module.\n", err); + goto err_out_remove; + } + + return 0; + +err_out_remove: + nfnetlink_subsys_unregister(&xt_osf_nfnetlink); +err_out_exit: + return err; +} + +static void __exit xt_osf_fini(void) +{ + struct xt_osf_finger *f; + int i; + + nfnetlink_subsys_unregister(&xt_osf_nfnetlink); + xt_unregister_match(&xt_osf_match); + + rcu_read_lock(); + for (i=0; i<ARRAY_SIZE(xt_osf_fingers); ++i) { + + list_for_each_entry_rcu(f, &xt_osf_fingers[i], finger_entry) { + list_del_rcu(&f->finger_entry); + call_rcu(&f->rcu_head, xt_osf_finger_free_rcu); + } + } + rcu_read_unlock(); + + rcu_barrier(); +} + +module_init(xt_osf_init); +module_exit(xt_osf_fini); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>"); +MODULE_DESCRIPTION("Passive OS fingerprint matching."); +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF); diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 1acc089be7e9..ebf00ad5b194 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -22,6 +22,8 @@ #include <net/netfilter/nf_tproxy_core.h> #include <net/netfilter/ipv4/nf_defrag_ipv4.h> +#include <linux/netfilter/xt_socket.h> + #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #define XT_SOCKET_HAVE_CONNTRACK 1 #include <net/netfilter/nf_conntrack.h> @@ -86,7 +88,8 @@ extract_icmp_fields(const struct sk_buff *skb, static bool -socket_mt(const struct sk_buff *skb, const struct xt_match_param *par) +socket_match(const struct sk_buff *skb, const struct xt_match_param *par, + const struct xt_socket_mtinfo1 *info) { const struct iphdr *iph = ip_hdr(skb); struct udphdr _hdr, *hp = NULL; @@ -141,10 +144,24 @@ socket_mt(const struct sk_buff *skb, const struct xt_match_param *par) sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, saddr, daddr, sport, dport, par->in, false); if (sk != NULL) { - bool wildcard = (sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->rcv_saddr == 0); + bool wildcard; + bool transparent = true; + + /* Ignore sockets listening on INADDR_ANY */ + wildcard = (sk->sk_state != TCP_TIME_WAIT && + inet_sk(sk)->rcv_saddr == 0); + + /* Ignore non-transparent sockets, + if XT_SOCKET_TRANSPARENT is used */ + if (info && info->flags & XT_SOCKET_TRANSPARENT) + transparent = ((sk->sk_state != TCP_TIME_WAIT && + inet_sk(sk)->transparent) || + (sk->sk_state == TCP_TIME_WAIT && + inet_twsk(sk)->tw_transparent)); nf_tproxy_put_sock(sk); - if (wildcard) + + if (wildcard || !transparent) sk = NULL; } @@ -157,23 +174,47 @@ socket_mt(const struct sk_buff *skb, const struct xt_match_param *par) return (sk != NULL); } -static struct xt_match socket_mt_reg __read_mostly = { - .name = "socket", - .family = AF_INET, - .match = socket_mt, - .hooks = 1 << NF_INET_PRE_ROUTING, - .me = THIS_MODULE, +static bool +socket_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) +{ + return socket_match(skb, par, NULL); +} + +static bool +socket_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) +{ + return socket_match(skb, par, par->matchinfo); +} + +static struct xt_match socket_mt_reg[] __read_mostly = { + { + .name = "socket", + .revision = 0, + .family = NFPROTO_IPV4, + .match = socket_mt_v0, + .hooks = 1 << NF_INET_PRE_ROUTING, + .me = THIS_MODULE, + }, + { + .name = "socket", + .revision = 1, + .family = NFPROTO_IPV4, + .match = socket_mt_v1, + .matchsize = sizeof(struct xt_socket_mtinfo1), + .hooks = 1 << NF_INET_PRE_ROUTING, + .me = THIS_MODULE, + }, }; static int __init socket_mt_init(void) { nf_defrag_ipv4_enable(); - return xt_register_match(&socket_mt_reg); + return xt_register_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg)); } static void __exit socket_mt_exit(void) { - xt_unregister_match(&socket_mt_reg); + xt_unregister_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg)); } module_init(socket_mt_init); |