diff options
Diffstat (limited to 'net/netfilter')
27 files changed, 1077 insertions, 536 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 4d6737160857..441d1f134110 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -30,6 +30,9 @@ config NETFILTER_FAMILY_BRIDGE config NETFILTER_FAMILY_ARP bool +config NETFILTER_BPF_LINK + def_bool BPF_SYSCALL + config NETFILTER_NETLINK_HOOK tristate "Netfilter base hook dump support" depends on NETFILTER_ADVANCED @@ -753,7 +756,6 @@ if NETFILTER_XTABLES config NETFILTER_XTABLES_COMPAT bool "Netfilter Xtables 32bit support" depends on COMPAT - default y help This option provides a translation layer to run 32bit arp,ip(6),ebtables binaries on 64bit kernels. diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 5ffef1cd6143..d4958e7e7631 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -22,6 +22,7 @@ nf_conntrack-$(CONFIG_DEBUG_INFO_BTF) += nf_conntrack_bpf.o endif obj-$(CONFIG_NETFILTER) = netfilter.o +obj-$(CONFIG_NETFILTER_BPF_LINK) += nf_bpf_link.o obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 358220b58521..f0783e42108b 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -119,6 +119,18 @@ nf_hook_entries_grow(const struct nf_hook_entries *old, for (i = 0; i < old_entries; i++) { if (orig_ops[i] != &dummy_ops) alloc_entries++; + + /* Restrict BPF hook type to force a unique priority, not + * shared at attach time. + * + * This is mainly to avoid ordering issues between two + * different bpf programs, this doesn't prevent a normal + * hook at same priority as a bpf one (we don't want to + * prevent defrag, conntrack, iptables etc from attaching). + */ + if (reg->priority == orig_ops[i]->priority && + reg->hook_ops_type == NF_HOOK_OP_BPF) + return ERR_PTR(-EBUSY); } } diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 13534e02346c..928e64653837 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1481,6 +1481,7 @@ void __net_exit ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs) int __init ip_vs_conn_init(void) { + size_t tab_array_size; int idx; /* Compute size and mask */ @@ -1494,8 +1495,9 @@ int __init ip_vs_conn_init(void) /* * Allocate the connection hash table and initialize its list heads */ - ip_vs_conn_tab = vmalloc(array_size(ip_vs_conn_tab_size, - sizeof(*ip_vs_conn_tab))); + tab_array_size = array_size(ip_vs_conn_tab_size, + sizeof(*ip_vs_conn_tab)); + ip_vs_conn_tab = vmalloc(tab_array_size); if (!ip_vs_conn_tab) return -ENOMEM; @@ -1508,10 +1510,8 @@ int __init ip_vs_conn_init(void) return -ENOMEM; } - pr_info("Connection hash table configured " - "(size=%d, memory=%ldKbytes)\n", - ip_vs_conn_tab_size, - (long)(ip_vs_conn_tab_size*sizeof(*ip_vs_conn_tab))/1024); + pr_info("Connection hash table configured (size=%d, memory=%zdKbytes)\n", + ip_vs_conn_tab_size, tab_array_size / 1024); IP_VS_DBG(0, "Each connection entry needs %zd bytes at least\n", sizeof(struct ip_vs_conn)); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 2fcc26507d69..cb83ca506c5c 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1140,7 +1140,6 @@ struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc, __be16 vport; unsigned int flags; - EnterFunction(12); vaddr = &svc->addr; vport = svc->port; daddr = &iph->saddr; @@ -1208,7 +1207,6 @@ struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc, IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport), cp->flags, refcount_read(&cp->refcnt)); - LeaveFunction(12); return cp; } @@ -1316,13 +1314,11 @@ after_nat: ip_vs_update_conntrack(skb, cp, 0); ip_vs_conn_put(cp); - LeaveFunction(11); return NF_ACCEPT; drop: ip_vs_conn_put(cp); kfree_skb(skb); - LeaveFunction(11); return NF_STOLEN; } @@ -1341,8 +1337,6 @@ ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat int af = state->pf; struct sock *sk; - EnterFunction(11); - /* Already marked as IPVS request or reply? */ if (skb->ipvs_property) return NF_ACCEPT; @@ -2365,7 +2359,6 @@ static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list) struct netns_ipvs *ipvs; struct net *net; - EnterFunction(2); list_for_each_entry(net, net_list, exit_list) { ipvs = net_ipvs(net); ip_vs_unregister_hooks(ipvs, AF_INET); @@ -2374,7 +2367,6 @@ static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list) smp_wmb(); ip_vs_sync_net_cleanup(ipvs); } - LeaveFunction(2); } static struct pernet_operations ipvs_core_ops = { diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 2a5ed71c82c3..62606fb44d02 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1061,8 +1061,6 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) unsigned int atype; int ret; - EnterFunction(2); - #ifdef CONFIG_IP_VS_IPV6 if (udest->af == AF_INET6) { atype = ipv6_addr_type(&udest->addr.in6); @@ -1111,7 +1109,6 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) spin_lock_init(&dest->dst_lock); __ip_vs_update_dest(svc, dest, udest, 1); - LeaveFunction(2); return 0; err_stats: @@ -1134,8 +1131,6 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) __be16 dport = udest->port; int ret; - EnterFunction(2); - if (udest->weight < 0) { pr_err("%s(): server weight less than zero\n", __func__); return -ERANGE; @@ -1183,7 +1178,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) ret = ip_vs_start_estimator(svc->ipvs, &dest->stats); if (ret < 0) - goto err; + return ret; __ip_vs_update_dest(svc, dest, udest, 1); } else { /* @@ -1192,9 +1187,6 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) ret = ip_vs_new_dest(svc, udest); } -err: - LeaveFunction(2); - return ret; } @@ -1209,8 +1201,6 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) union nf_inet_addr daddr; __be16 dport = udest->port; - EnterFunction(2); - if (udest->weight < 0) { pr_err("%s(): server weight less than zero\n", __func__); return -ERANGE; @@ -1242,7 +1232,6 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) } __ip_vs_update_dest(svc, dest, udest, 0); - LeaveFunction(2); return 0; } @@ -1317,8 +1306,6 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) struct ip_vs_dest *dest; __be16 dport = udest->port; - EnterFunction(2); - /* We use function that requires RCU lock */ rcu_read_lock(); dest = ip_vs_lookup_dest(svc, udest->af, &udest->addr, dport); @@ -1339,8 +1326,6 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) */ __ip_vs_del_dest(svc->ipvs, dest, false); - LeaveFunction(2); - return 0; } @@ -1746,7 +1731,6 @@ void ip_vs_service_nets_cleanup(struct list_head *net_list) struct netns_ipvs *ipvs; struct net *net; - EnterFunction(2); /* Check for "full" addressed entries */ mutex_lock(&__ip_vs_mutex); list_for_each_entry(net, net_list, exit_list) { @@ -1754,7 +1738,6 @@ void ip_vs_service_nets_cleanup(struct list_head *net_list) ip_vs_flush(ipvs, true); } mutex_unlock(&__ip_vs_mutex); - LeaveFunction(2); } /* Put all references for device (dst_cache) */ @@ -1792,7 +1775,6 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, if (event != NETDEV_DOWN || !ipvs) return NOTIFY_DONE; IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name); - EnterFunction(2); mutex_lock(&__ip_vs_mutex); for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { @@ -1821,7 +1803,6 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, } spin_unlock_bh(&ipvs->dest_trash_lock); mutex_unlock(&__ip_vs_mutex); - LeaveFunction(2); return NOTIFY_DONE; } @@ -4537,8 +4518,6 @@ int __init ip_vs_control_init(void) int idx; int ret; - EnterFunction(2); - /* Initialize svc_table, ip_vs_svc_fwm_table */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { INIT_HLIST_HEAD(&ip_vs_svc_table[idx]); @@ -4551,15 +4530,12 @@ int __init ip_vs_control_init(void) if (ret < 0) return ret; - LeaveFunction(2); return 0; } void ip_vs_control_cleanup(void) { - EnterFunction(2); unregister_netdevice_notifier(&ip_vs_dst_notifier); /* relying on common rcu_barrier() in ip_vs_cleanup() */ - LeaveFunction(2); } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 4963fec815da..264f2f87a437 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -603,7 +603,7 @@ static void ip_vs_sync_conn_v0(struct netns_ipvs *ipvs, struct ip_vs_conn *cp, if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { struct ip_vs_sync_conn_options *opt = (struct ip_vs_sync_conn_options *)&s[1]; - memcpy(opt, &cp->in_seq, sizeof(*opt)); + memcpy(opt, &cp->sync_conn_opt, sizeof(*opt)); } m->nr_conns++; @@ -1582,13 +1582,11 @@ ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length) struct kvec iov; int len; - EnterFunction(7); iov.iov_base = (void *)buffer; iov.iov_len = length; len = kernel_sendmsg(sock, &msg, &iov, 1, (size_t)(length)); - LeaveFunction(7); return len; } @@ -1614,15 +1612,12 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen) struct kvec iov = {buffer, buflen}; int len; - EnterFunction(7); - /* Receive a packet */ iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, buflen); len = sock_recvmsg(sock, &msg, MSG_DONTWAIT); if (len < 0) return len; - LeaveFunction(7); return len; } diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 80448885c3d7..feb1d7fcb09f 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -339,7 +339,7 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, spin_unlock_bh(&dest->dst_lock); IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n", &dest->addr.ip, &dest_dst->dst_saddr.ip, - atomic_read(&rt->dst.__refcnt)); + rcuref_read(&rt->dst.__rcuref)); } if (ret_saddr) *ret_saddr = dest_dst->dst_saddr.ip; @@ -507,7 +507,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, spin_unlock_bh(&dest->dst_lock); IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", &dest->addr.in6, &dest_dst->dst_saddr.in6, - atomic_read(&rt->dst.__refcnt)); + rcuref_read(&rt->dst.__rcuref)); } if (ret_saddr) *ret_saddr = dest_dst->dst_saddr.in6; @@ -706,8 +706,6 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, { struct iphdr *iph = ip_hdr(skb); - EnterFunction(10); - if (__ip_vs_get_out_rt(cp->ipvs, cp->af, skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0) goto tx_error; @@ -719,12 +717,10 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); - LeaveFunction(10); return NF_STOLEN; tx_error: kfree_skb(skb); - LeaveFunction(10); return NF_STOLEN; } @@ -735,8 +731,6 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, { struct ipv6hdr *iph = ipv6_hdr(skb); - EnterFunction(10); - if (__ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, NULL, &iph->daddr, NULL, ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0) @@ -747,12 +741,10 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); - LeaveFunction(10); return NF_STOLEN; tx_error: kfree_skb(skb); - LeaveFunction(10); return NF_STOLEN; } #endif @@ -768,8 +760,6 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct rtable *rt; /* Route to the other host */ int local, rc, was_input; - EnterFunction(10); - /* check if it is a connection of no-client-port */ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { __be16 _pt, *p; @@ -839,12 +829,10 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); - LeaveFunction(10); return rc; tx_error: kfree_skb(skb); - LeaveFunction(10); return NF_STOLEN; } @@ -856,8 +844,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct rt6_info *rt; /* Route to the other host */ int local, rc; - EnterFunction(10); - /* check if it is a connection of no-client-port */ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) { __be16 _pt, *p; @@ -927,11 +913,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); - LeaveFunction(10); return rc; tx_error: - LeaveFunction(10); kfree_skb(skb); return NF_STOLEN; } @@ -1149,8 +1133,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, int tun_type, gso_type; int tun_flags; - EnterFunction(10); - local = __ip_vs_get_out_rt(ipvs, cp->af, skb, cp->dest, cp->daddr.ip, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | @@ -1199,7 +1181,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, &next_protocol, NULL, &dsfield, &ttl, dfp); if (IS_ERR(skb)) - goto tx_error; + return NF_STOLEN; gso_type = __tun_gso_type_mask(AF_INET, cp->af); if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { @@ -1267,14 +1249,10 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, else if (ret == NF_DROP) kfree_skb(skb); - LeaveFunction(10); - return NF_STOLEN; tx_error: - if (!IS_ERR(skb)) - kfree_skb(skb); - LeaveFunction(10); + kfree_skb(skb); return NF_STOLEN; } @@ -1298,8 +1276,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, int tun_type, gso_type; int tun_flags; - EnterFunction(10); - local = __ip_vs_get_out_rt_v6(ipvs, cp->af, skb, cp->dest, &cp->daddr.in6, &saddr, ipvsh, 1, @@ -1347,7 +1323,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, &next_protocol, &payload_len, &dsfield, &ttl, NULL); if (IS_ERR(skb)) - goto tx_error; + return NF_STOLEN; gso_type = __tun_gso_type_mask(AF_INET6, cp->af); if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { @@ -1414,14 +1390,10 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, else if (ret == NF_DROP) kfree_skb(skb); - LeaveFunction(10); - return NF_STOLEN; tx_error: - if (!IS_ERR(skb)) - kfree_skb(skb); - LeaveFunction(10); + kfree_skb(skb); return NF_STOLEN; } #endif @@ -1437,8 +1409,6 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, { int local; - EnterFunction(10); - local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | @@ -1455,12 +1425,10 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); - LeaveFunction(10); return NF_STOLEN; tx_error: kfree_skb(skb); - LeaveFunction(10); return NF_STOLEN; } @@ -1471,8 +1439,6 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, { int local; - EnterFunction(10); - local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest, &cp->daddr.in6, NULL, ipvsh, 0, @@ -1489,12 +1455,10 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); - LeaveFunction(10); return NF_STOLEN; tx_error: kfree_skb(skb); - LeaveFunction(10); return NF_STOLEN; } #endif @@ -1514,8 +1478,6 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, int local; int rt_mode, was_input; - EnterFunction(10); - /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be forwarded directly here, because there is no need to translate address/port back */ @@ -1526,7 +1488,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, rc = NF_ACCEPT; /* do not touch skb anymore */ atomic_inc(&cp->in_pkts); - goto out; + return rc; } /* @@ -1582,14 +1544,11 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->ignore_df = 1; - rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); - goto out; + return ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); tx_error: kfree_skb(skb); rc = NF_STOLEN; - out: - LeaveFunction(10); return rc; } @@ -1604,8 +1563,6 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, int local; int rt_mode; - EnterFunction(10); - /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be forwarded directly here, because there is no need to translate address/port back */ @@ -1616,7 +1573,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, rc = NF_ACCEPT; /* do not touch skb anymore */ atomic_inc(&cp->in_pkts); - goto out; + return rc; } /* @@ -1671,14 +1628,11 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->ignore_df = 1; - rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); - goto out; + return ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); tx_error: kfree_skb(skb); rc = NF_STOLEN; -out: - LeaveFunction(10); return rc; } #endif diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c new file mode 100644 index 000000000000..c36da56d756f --- /dev/null +++ b/net/netfilter/nf_bpf_link.c @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <linux/filter.h> +#include <linux/netfilter.h> + +#include <net/netfilter/nf_bpf_link.h> +#include <uapi/linux/netfilter_ipv4.h> + +static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb, + const struct nf_hook_state *s) +{ + const struct bpf_prog *prog = bpf_prog; + struct bpf_nf_ctx ctx = { + .state = s, + .skb = skb, + }; + + return bpf_prog_run(prog, &ctx); +} + +struct bpf_nf_link { + struct bpf_link link; + struct nf_hook_ops hook_ops; + struct net *net; + u32 dead; +}; + +static void bpf_nf_link_release(struct bpf_link *link) +{ + struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link); + + if (nf_link->dead) + return; + + /* prevent hook-not-found warning splat from netfilter core when + * .detach was already called + */ + if (!cmpxchg(&nf_link->dead, 0, 1)) + nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops); +} + +static void bpf_nf_link_dealloc(struct bpf_link *link) +{ + struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link); + + kfree(nf_link); +} + +static int bpf_nf_link_detach(struct bpf_link *link) +{ + bpf_nf_link_release(link); + return 0; +} + +static void bpf_nf_link_show_info(const struct bpf_link *link, + struct seq_file *seq) +{ + struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link); + + seq_printf(seq, "pf:\t%u\thooknum:\t%u\tprio:\t%d\n", + nf_link->hook_ops.pf, nf_link->hook_ops.hooknum, + nf_link->hook_ops.priority); +} + +static int bpf_nf_link_fill_link_info(const struct bpf_link *link, + struct bpf_link_info *info) +{ + struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link); + + info->netfilter.pf = nf_link->hook_ops.pf; + info->netfilter.hooknum = nf_link->hook_ops.hooknum; + info->netfilter.priority = nf_link->hook_ops.priority; + info->netfilter.flags = 0; + + return 0; +} + +static int bpf_nf_link_update(struct bpf_link *link, struct bpf_prog *new_prog, + struct bpf_prog *old_prog) +{ + return -EOPNOTSUPP; +} + +static const struct bpf_link_ops bpf_nf_link_lops = { + .release = bpf_nf_link_release, + .dealloc = bpf_nf_link_dealloc, + .detach = bpf_nf_link_detach, + .show_fdinfo = bpf_nf_link_show_info, + .fill_link_info = bpf_nf_link_fill_link_info, + .update_prog = bpf_nf_link_update, +}; + +static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr) +{ + switch (attr->link_create.netfilter.pf) { + case NFPROTO_IPV4: + case NFPROTO_IPV6: + if (attr->link_create.netfilter.hooknum >= NF_INET_NUMHOOKS) + return -EPROTO; + break; + default: + return -EAFNOSUPPORT; + } + + if (attr->link_create.netfilter.flags) + return -EOPNOTSUPP; + + /* make sure conntrack confirm is always last. + * + * In the future, if userspace can e.g. request defrag, then + * "defrag_requested && prio before NF_IP_PRI_CONNTRACK_DEFRAG" + * should fail. + */ + switch (attr->link_create.netfilter.priority) { + case NF_IP_PRI_FIRST: return -ERANGE; /* sabotage_in and other warts */ + case NF_IP_PRI_LAST: return -ERANGE; /* e.g. conntrack confirm */ + } + + return 0; +} + +int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) +{ + struct net *net = current->nsproxy->net_ns; + struct bpf_link_primer link_primer; + struct bpf_nf_link *link; + int err; + + if (attr->link_create.flags) + return -EINVAL; + + err = bpf_nf_check_pf_and_hooks(attr); + if (err) + return err; + + link = kzalloc(sizeof(*link), GFP_USER); + if (!link) + return -ENOMEM; + + bpf_link_init(&link->link, BPF_LINK_TYPE_NETFILTER, &bpf_nf_link_lops, prog); + + link->hook_ops.hook = nf_hook_run_bpf; + link->hook_ops.hook_ops_type = NF_HOOK_OP_BPF; + link->hook_ops.priv = prog; + + link->hook_ops.pf = attr->link_create.netfilter.pf; + link->hook_ops.priority = attr->link_create.netfilter.priority; + link->hook_ops.hooknum = attr->link_create.netfilter.hooknum; + + link->net = net; + link->dead = false; + + err = bpf_link_prime(&link->link, &link_primer); + if (err) { + kfree(link); + return err; + } + + err = nf_register_net_hook(net, &link->hook_ops); + if (err) { + bpf_link_cleanup(&link_primer); + return err; + } + + return bpf_link_settle(&link_primer); +} + +const struct bpf_prog_ops netfilter_prog_ops = { + .test_run = bpf_prog_test_run_nf, +}; + +static bool nf_ptr_to_btf_id(struct bpf_insn_access_aux *info, const char *name) +{ + struct btf *btf; + s32 type_id; + + btf = bpf_get_btf_vmlinux(); + if (IS_ERR_OR_NULL(btf)) + return false; + + type_id = btf_find_by_name_kind(btf, name, BTF_KIND_STRUCT); + if (WARN_ON_ONCE(type_id < 0)) + return false; + + info->btf = btf; + info->btf_id = type_id; + info->reg_type = PTR_TO_BTF_ID | PTR_TRUSTED; + return true; +} + +static bool nf_is_valid_access(int off, int size, enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + if (off < 0 || off >= sizeof(struct bpf_nf_ctx)) + return false; + + if (type == BPF_WRITE) + return false; + + switch (off) { + case bpf_ctx_range(struct bpf_nf_ctx, skb): + if (size != sizeof_field(struct bpf_nf_ctx, skb)) + return false; + + return nf_ptr_to_btf_id(info, "sk_buff"); + case bpf_ctx_range(struct bpf_nf_ctx, state): + if (size != sizeof_field(struct bpf_nf_ctx, state)) + return false; + + return nf_ptr_to_btf_id(info, "nf_hook_state"); + default: + return false; + } + + return false; +} + +static const struct bpf_func_proto * +bpf_nf_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + return bpf_base_func_proto(func_id); +} + +const struct bpf_verifier_ops netfilter_verifier_ops = { + .is_valid_access = nf_is_valid_access, + .get_func_proto = bpf_nf_func_proto, +}; diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index cd99e6dc1f35..0d36d7285e3f 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -192,8 +192,7 @@ BTF_ID(struct, nf_conn___init) /* Check writes into `struct nf_conn` */ static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, - int off, int size, enum bpf_access_type atype, - u32 *next_btf_id, enum bpf_type_flag *flag) + int off, int size) { const struct btf_type *ncit, *nct, *t; size_t end; @@ -381,6 +380,7 @@ __bpf_kfunc struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i) struct nf_conn *nfct = (struct nf_conn *)nfct_i; int err; + nfct->status |= IPS_CONFIRMED; err = nf_conntrack_hash_check_insert(nfct); if (err < 0) { nf_conntrack_free(nfct); @@ -401,8 +401,6 @@ __bpf_kfunc struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i) */ __bpf_kfunc void bpf_ct_release(struct nf_conn *nfct) { - if (!nfct) - return; nf_ct_put(nfct); } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index c6a6a6099b4e..c4ccfec6cb98 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -932,7 +932,6 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) goto out; } - ct->status |= IPS_CONFIRMED; smp_wmb(); /* The caller holds a reference to this object */ refcount_set(&ct->ct_general.use, 2); @@ -1294,7 +1293,7 @@ dying: } EXPORT_SYMBOL_GPL(__nf_conntrack_confirm); -/* Returns true if a connection correspondings to the tuple (required +/* Returns true if a connection corresponds to the tuple (required for NAT). */ int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index bfc3aaa2c872..d40544cd61a6 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -176,7 +176,12 @@ nla_put_failure: static int ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct, bool skip_zero) { - long timeout = nf_ct_expires(ct) / HZ; + long timeout; + + if (nf_ct_is_confirmed(ct)) + timeout = nf_ct_expires(ct) / HZ; + else + timeout = ct->timeout / HZ; if (skip_zero && timeout == 0) return 0; @@ -1554,9 +1559,6 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data) { - if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) - return 0; - return ctnetlink_filter_match(ct, data); } @@ -1626,11 +1628,6 @@ static int ctnetlink_del_conntrack(struct sk_buff *skb, ct = nf_ct_tuplehash_to_ctrack(h); - if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) { - nf_ct_put(ct); - return -EBUSY; - } - if (cda[CTA_ID]) { __be32 id = nla_get_be32(cda[CTA_ID]); @@ -2253,9 +2250,6 @@ ctnetlink_create_conntrack(struct net *net, if (!cda[CTA_TIMEOUT]) goto err1; - timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ; - __nf_ct_set_timeout(ct, timeout); - rcu_read_lock(); if (cda[CTA_HELP]) { char *helpname = NULL; @@ -2316,6 +2310,12 @@ ctnetlink_create_conntrack(struct net *net, nfct_seqadj_ext_add(ct); nfct_synproxy_ext_add(ct); + /* we must add conntrack extensions before confirmation. */ + ct->status |= IPS_CONFIRMED; + + timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ; + __nf_ct_set_timeout(ct, timeout); + if (cda[CTA_STATUS]) { err = ctnetlink_change_status(ct, cda); if (err < 0) diff --git a/net/netfilter/nf_conntrack_ovs.c b/net/netfilter/nf_conntrack_ovs.c index 52b776bdf526..068e9489e1c2 100644 --- a/net/netfilter/nf_conntrack_ovs.c +++ b/net/netfilter/nf_conntrack_ovs.c @@ -6,6 +6,7 @@ #include <net/netfilter/ipv6/nf_defrag_ipv6.h> #include <net/ipv6_frag.h> #include <net/ip.h> +#include <linux/netfilter_ipv6.h> /* 'skb' should already be pulled to nh_ofs. */ int nf_ct_helper(struct sk_buff *skb, struct nf_conn *ct, @@ -120,8 +121,14 @@ int nf_ct_skb_network_trim(struct sk_buff *skb, int family) len = skb_ip_totlen(skb); break; case NFPROTO_IPV6: - len = sizeof(struct ipv6hdr) - + ntohs(ipv6_hdr(skb)->payload_len); + len = ntohs(ipv6_hdr(skb)->payload_len); + if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP) { + int err = nf_ip6_check_hbh_len(skb, &len); + + if (err) + return err; + } + len += sizeof(struct ipv6hdr); break; default: len = skb->len; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index e29e4ccb5c5a..ce829d434f13 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -549,8 +549,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) && l4proto_in_range(tuple, maniptype, - &range->min_proto, - &range->max_proto) && + &range->min_proto, + &range->max_proto) && (range->min_proto.all == range->max_proto.all || !nf_nat_used_tuple(tuple, ct))) return; diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c index f91579c821e9..6616ba5d0b04 100644 --- a/net/netfilter/nf_nat_redirect.c +++ b/net/netfilter/nf_nat_redirect.c @@ -10,6 +10,7 @@ #include <linux/if.h> #include <linux/inetdevice.h> +#include <linux/in.h> #include <linux/ip.h> #include <linux/kernel.h> #include <linux/netdevice.h> @@ -24,54 +25,56 @@ #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_redirect.h> +static unsigned int +nf_nat_redirect(struct sk_buff *skb, const struct nf_nat_range2 *range, + const union nf_inet_addr *newdst) +{ + struct nf_nat_range2 newrange; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + + memset(&newrange, 0, sizeof(newrange)); + + newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr = *newdst; + newrange.max_addr = *newdst; + newrange.min_proto = range->min_proto; + newrange.max_proto = range->max_proto; + + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); +} + unsigned int -nf_nat_redirect_ipv4(struct sk_buff *skb, - const struct nf_nat_ipv4_multi_range_compat *mr, +nf_nat_redirect_ipv4(struct sk_buff *skb, const struct nf_nat_range2 *range, unsigned int hooknum) { - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - __be32 newdst; - struct nf_nat_range2 newrange; + union nf_inet_addr newdst = {}; WARN_ON(hooknum != NF_INET_PRE_ROUTING && hooknum != NF_INET_LOCAL_OUT); - ct = nf_ct_get(skb, &ctinfo); - WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED))); - /* Local packets: make them go to loopback */ if (hooknum == NF_INET_LOCAL_OUT) { - newdst = htonl(0x7F000001); + newdst.ip = htonl(INADDR_LOOPBACK); } else { const struct in_device *indev; - newdst = 0; - indev = __in_dev_get_rcu(skb->dev); if (indev) { const struct in_ifaddr *ifa; ifa = rcu_dereference(indev->ifa_list); if (ifa) - newdst = ifa->ifa_local; + newdst.ip = ifa->ifa_local; } - if (!newdst) + if (!newdst.ip) return NF_DROP; } - /* Transfer from original range. */ - memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); - memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); - newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; - newrange.min_addr.ip = newdst; - newrange.max_addr.ip = newdst; - newrange.min_proto = mr->range[0].min; - newrange.max_proto = mr->range[0].max; - - /* Hand modified range to generic setup. */ - return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); + return nf_nat_redirect(skb, range, &newdst); } EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv4); @@ -81,14 +84,10 @@ unsigned int nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, unsigned int hooknum) { - struct nf_nat_range2 newrange; - struct in6_addr newdst; - enum ip_conntrack_info ctinfo; - struct nf_conn *ct; + union nf_inet_addr newdst = {}; - ct = nf_ct_get(skb, &ctinfo); if (hooknum == NF_INET_LOCAL_OUT) { - newdst = loopback_addr; + newdst.in6 = loopback_addr; } else { struct inet6_dev *idev; struct inet6_ifaddr *ifa; @@ -98,7 +97,7 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, if (idev != NULL) { read_lock_bh(&idev->lock); list_for_each_entry(ifa, &idev->addr_list, if_list) { - newdst = ifa->addr; + newdst.in6 = ifa->addr; addr = true; break; } @@ -109,12 +108,6 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, return NF_DROP; } - newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; - newrange.min_addr.in6 = newdst; - newrange.max_addr.in6 = newdst; - newrange.min_proto = range->min_proto; - newrange.max_proto = range->max_proto; - - return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); + return nf_nat_redirect(skb, range, &newdst); } EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv6); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e48ab8dfb541..09542951656c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -102,11 +102,9 @@ static const u8 nft2audit_op[NFT_MSG_MAX] = { // enum nf_tables_msg_types [NFT_MSG_DELFLOWTABLE] = AUDIT_NFT_OP_FLOWTABLE_UNREGISTER, }; -static void nft_validate_state_update(struct net *net, u8 new_validate_state) +static void nft_validate_state_update(struct nft_table *table, u8 new_validate_state) { - struct nftables_pernet *nft_net = nft_pernet(net); - - switch (nft_net->validate_state) { + switch (table->validate_state) { case NFT_VALIDATE_SKIP: WARN_ON_ONCE(new_validate_state == NFT_VALIDATE_DO); break; @@ -117,7 +115,7 @@ static void nft_validate_state_update(struct net *net, u8 new_validate_state) return; } - nft_net->validate_state = new_validate_state; + table->validate_state = new_validate_state; } static void nf_tables_trans_destroy_work(struct work_struct *w); static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work); @@ -821,12 +819,20 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, goto nla_put_failure; if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) || - nla_put_be32(skb, NFTA_TABLE_FLAGS, - htonl(table->flags & NFT_TABLE_F_MASK)) || nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) || nla_put_be64(skb, NFTA_TABLE_HANDLE, cpu_to_be64(table->handle), NFTA_TABLE_PAD)) goto nla_put_failure; + + if (event == NFT_MSG_DELTABLE) { + nlmsg_end(skb, nlh); + return 0; + } + + if (nla_put_be32(skb, NFTA_TABLE_FLAGS, + htonl(table->flags & NFT_TABLE_F_MASK))) + goto nla_put_failure; + if (nft_table_has_owner(table) && nla_put_be32(skb, NFTA_TABLE_OWNER, htonl(table->nlpid))) goto nla_put_failure; @@ -1224,6 +1230,7 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info, if (table == NULL) goto err_kzalloc; + table->validate_state = NFT_VALIDATE_SKIP; table->name = nla_strdup(attr, GFP_KERNEL_ACCOUNT); if (table->name == NULL) goto err_strdup; @@ -1575,7 +1582,8 @@ nla_put_failure: } static int nft_dump_basechain_hook(struct sk_buff *skb, int family, - const struct nft_base_chain *basechain) + const struct nft_base_chain *basechain, + const struct list_head *hook_list) { const struct nf_hook_ops *ops = &basechain->ops; struct nft_hook *hook, *first = NULL; @@ -1592,7 +1600,11 @@ static int nft_dump_basechain_hook(struct sk_buff *skb, int family, if (nft_base_chain_netdev(family, ops->hooknum)) { nest_devs = nla_nest_start_noflag(skb, NFTA_HOOK_DEVS); - list_for_each_entry(hook, &basechain->hook_list, list) { + + if (!hook_list) + hook_list = &basechain->hook_list; + + list_for_each_entry(hook, hook_list, list) { if (!first) first = hook; @@ -1617,7 +1629,8 @@ nla_put_failure: static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq, int event, u32 flags, int family, const struct nft_table *table, - const struct nft_chain *chain) + const struct nft_chain *chain, + const struct list_head *hook_list) { struct nlmsghdr *nlh; @@ -1627,19 +1640,22 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, if (!nlh) goto nla_put_failure; - if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name)) - goto nla_put_failure; - if (nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle), + if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name) || + nla_put_string(skb, NFTA_CHAIN_NAME, chain->name) || + nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle), NFTA_CHAIN_PAD)) goto nla_put_failure; - if (nla_put_string(skb, NFTA_CHAIN_NAME, chain->name)) - goto nla_put_failure; + + if (event == NFT_MSG_DELCHAIN && !hook_list) { + nlmsg_end(skb, nlh); + return 0; + } if (nft_is_base_chain(chain)) { const struct nft_base_chain *basechain = nft_base_chain(chain); struct nft_stats __percpu *stats; - if (nft_dump_basechain_hook(skb, family, basechain)) + if (nft_dump_basechain_hook(skb, family, basechain, hook_list)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_CHAIN_POLICY, @@ -1674,7 +1690,8 @@ nla_put_failure: return -1; } -static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event) +static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event, + const struct list_head *hook_list) { struct nftables_pernet *nft_net; struct sk_buff *skb; @@ -1694,7 +1711,7 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event) err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq, event, flags, ctx->family, ctx->table, - ctx->chain); + ctx->chain, hook_list); if (err < 0) { kfree_skb(skb); goto err; @@ -1740,7 +1757,7 @@ static int nf_tables_dump_chains(struct sk_buff *skb, NFT_MSG_NEWCHAIN, NLM_F_MULTI, table->family, table, - chain) < 0) + chain, NULL) < 0) goto done; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); @@ -1794,7 +1811,7 @@ static int nf_tables_getchain(struct sk_buff *skb, const struct nfnl_info *info, err = nf_tables_fill_chain_info(skb2, net, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, - 0, family, table, chain); + 0, family, table, chain, NULL); if (err < 0) goto err_fill_chain_info; @@ -1955,7 +1972,8 @@ static struct nft_hook *nft_hook_list_find(struct list_head *hook_list, static int nf_tables_parse_netdev_hooks(struct net *net, const struct nlattr *attr, - struct list_head *hook_list) + struct list_head *hook_list, + struct netlink_ext_ack *extack) { struct nft_hook *hook, *next; const struct nlattr *tmp; @@ -1969,10 +1987,12 @@ static int nf_tables_parse_netdev_hooks(struct net *net, hook = nft_netdev_hook_alloc(net, tmp); if (IS_ERR(hook)) { + NL_SET_BAD_ATTR(extack, tmp); err = PTR_ERR(hook); goto err_hook; } if (nft_hook_list_find(hook_list, hook)) { + NL_SET_BAD_ATTR(extack, tmp); kfree(hook); err = -EEXIST; goto err_hook; @@ -2003,38 +2023,41 @@ struct nft_chain_hook { struct list_head list; }; -static int nft_chain_parse_netdev(struct net *net, - struct nlattr *tb[], - struct list_head *hook_list) +static int nft_chain_parse_netdev(struct net *net, struct nlattr *tb[], + struct list_head *hook_list, + struct netlink_ext_ack *extack, u32 flags) { struct nft_hook *hook; int err; if (tb[NFTA_HOOK_DEV]) { hook = nft_netdev_hook_alloc(net, tb[NFTA_HOOK_DEV]); - if (IS_ERR(hook)) + if (IS_ERR(hook)) { + NL_SET_BAD_ATTR(extack, tb[NFTA_HOOK_DEV]); return PTR_ERR(hook); + } list_add_tail(&hook->list, hook_list); } else if (tb[NFTA_HOOK_DEVS]) { err = nf_tables_parse_netdev_hooks(net, tb[NFTA_HOOK_DEVS], - hook_list); + hook_list, extack); if (err < 0) return err; - if (list_empty(hook_list)) - return -EINVAL; - } else { - return -EINVAL; } + if (flags & NFT_CHAIN_HW_OFFLOAD && + list_empty(hook_list)) + return -EINVAL; + return 0; } static int nft_chain_parse_hook(struct net *net, + struct nft_base_chain *basechain, const struct nlattr * const nla[], struct nft_chain_hook *hook, u8 family, - struct netlink_ext_ack *extack, bool autoload) + u32 flags, struct netlink_ext_ack *extack) { struct nftables_pernet *nft_net = nft_pernet(net); struct nlattr *ha[NFTA_HOOK_MAX + 1]; @@ -2050,31 +2073,46 @@ static int nft_chain_parse_hook(struct net *net, if (err < 0) return err; - if (ha[NFTA_HOOK_HOOKNUM] == NULL || - ha[NFTA_HOOK_PRIORITY] == NULL) - return -EINVAL; + if (!basechain) { + if (!ha[NFTA_HOOK_HOOKNUM] || + !ha[NFTA_HOOK_PRIORITY]) + return -EINVAL; - hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); - hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); + hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); + hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); - type = __nft_chain_type_get(family, NFT_CHAIN_T_DEFAULT); - if (!type) - return -EOPNOTSUPP; + type = __nft_chain_type_get(family, NFT_CHAIN_T_DEFAULT); + if (!type) + return -EOPNOTSUPP; - if (nla[NFTA_CHAIN_TYPE]) { - type = nf_tables_chain_type_lookup(net, nla[NFTA_CHAIN_TYPE], - family, autoload); - if (IS_ERR(type)) { - NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TYPE]); - return PTR_ERR(type); + if (nla[NFTA_CHAIN_TYPE]) { + type = nf_tables_chain_type_lookup(net, nla[NFTA_CHAIN_TYPE], + family, true); + if (IS_ERR(type)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TYPE]); + return PTR_ERR(type); + } } - } - if (hook->num >= NFT_MAX_HOOKS || !(type->hook_mask & (1 << hook->num))) - return -EOPNOTSUPP; + if (hook->num >= NFT_MAX_HOOKS || !(type->hook_mask & (1 << hook->num))) + return -EOPNOTSUPP; - if (type->type == NFT_CHAIN_T_NAT && - hook->priority <= NF_IP_PRI_CONNTRACK) - return -EOPNOTSUPP; + if (type->type == NFT_CHAIN_T_NAT && + hook->priority <= NF_IP_PRI_CONNTRACK) + return -EOPNOTSUPP; + } else { + if (ha[NFTA_HOOK_HOOKNUM]) { + hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); + if (hook->num != basechain->ops.hooknum) + return -EOPNOTSUPP; + } + if (ha[NFTA_HOOK_PRIORITY]) { + hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); + if (hook->priority != basechain->ops.priority) + return -EOPNOTSUPP; + } + + type = basechain->type; + } if (!try_module_get(type->owner)) { if (nla[NFTA_CHAIN_TYPE]) @@ -2086,7 +2124,7 @@ static int nft_chain_parse_hook(struct net *net, INIT_LIST_HEAD(&hook->list); if (nft_base_chain_netdev(family, hook->num)) { - err = nft_chain_parse_netdev(net, ha, &hook->list); + err = nft_chain_parse_netdev(net, ha, &hook->list, extack, flags); if (err < 0) { module_put(type->owner); return err; @@ -2110,38 +2148,34 @@ static void nft_chain_release_hook(struct nft_chain_hook *hook) module_put(hook->type->owner); } -struct nft_rules_old { - struct rcu_head h; - struct nft_rule_blob *blob; -}; - -static void nft_last_rule(struct nft_rule_blob *blob, const void *ptr) +static void nft_last_rule(const struct nft_chain *chain, const void *ptr) { - struct nft_rule_dp *prule; + struct nft_rule_dp_last *lrule; + + BUILD_BUG_ON(offsetof(struct nft_rule_dp_last, end) != 0); - prule = (struct nft_rule_dp *)ptr; - prule->is_last = 1; + lrule = (struct nft_rule_dp_last *)ptr; + lrule->end.is_last = 1; + lrule->chain = chain; /* blob size does not include the trailer rule */ } -static struct nft_rule_blob *nf_tables_chain_alloc_rules(unsigned int size) +static struct nft_rule_blob *nf_tables_chain_alloc_rules(const struct nft_chain *chain, + unsigned int size) { struct nft_rule_blob *blob; - /* size must include room for the last rule */ - if (size < offsetof(struct nft_rule_dp, data)) - return NULL; - - size += sizeof(struct nft_rule_blob) + sizeof(struct nft_rules_old); if (size > INT_MAX) return NULL; + size += sizeof(struct nft_rule_blob) + sizeof(struct nft_rule_dp_last); + blob = kvmalloc(size, GFP_KERNEL_ACCOUNT); if (!blob) return NULL; blob->size = 0; - nft_last_rule(blob, blob->data); + nft_last_rule(chain, blob->data); return blob; } @@ -2172,12 +2206,8 @@ static int nft_basechain_init(struct nft_base_chain *basechain, u8 family, list_splice_init(&hook->list, &basechain->hook_list); list_for_each_entry(h, &basechain->hook_list, list) nft_basechain_hook_init(&h->ops, family, hook, chain); - - basechain->ops.hooknum = hook->num; - basechain->ops.priority = hook->priority; - } else { - nft_basechain_hook_init(&basechain->ops, family, hook, chain); } + nft_basechain_hook_init(&basechain->ops, family, hook, chain); chain->flags |= NFT_CHAIN_BASE | flags; basechain->policy = NF_ACCEPT; @@ -2220,7 +2250,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, struct nft_rule_blob *blob; struct nft_trans *trans; struct nft_chain *chain; - unsigned int data_size; int err; if (table->use == UINT_MAX) @@ -2228,13 +2257,13 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (nla[NFTA_CHAIN_HOOK]) { struct nft_stats __percpu *stats = NULL; - struct nft_chain_hook hook; + struct nft_chain_hook hook = {}; if (flags & NFT_CHAIN_BINDING) return -EOPNOTSUPP; - err = nft_chain_parse_hook(net, nla, &hook, family, extack, - true); + err = nft_chain_parse_hook(net, NULL, nla, &hook, family, flags, + extack); if (err < 0) return err; @@ -2308,8 +2337,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, chain->udlen = nla_len(nla[NFTA_CHAIN_USERDATA]); } - data_size = offsetof(struct nft_rule_dp, data); /* last rule */ - blob = nf_tables_chain_alloc_rules(data_size); + blob = nf_tables_chain_alloc_rules(chain, 0); if (!blob) { err = -ENOMEM; goto err_destroy_chain; @@ -2349,65 +2377,57 @@ err_destroy_chain: return err; } -static bool nft_hook_list_equal(struct list_head *hook_list1, - struct list_head *hook_list2) -{ - struct nft_hook *hook; - int n = 0, m = 0; - - n = 0; - list_for_each_entry(hook, hook_list2, list) { - if (!nft_hook_list_find(hook_list1, hook)) - return false; - - n++; - } - list_for_each_entry(hook, hook_list1, list) - m++; - - return n == m; -} - static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, u32 flags, const struct nlattr *attr, struct netlink_ext_ack *extack) { const struct nlattr * const *nla = ctx->nla; + struct nft_base_chain *basechain = NULL; struct nft_table *table = ctx->table; struct nft_chain *chain = ctx->chain; - struct nft_base_chain *basechain; + struct nft_chain_hook hook = {}; struct nft_stats *stats = NULL; - struct nft_chain_hook hook; + struct nft_hook *h, *next; struct nf_hook_ops *ops; struct nft_trans *trans; + bool unregister = false; int err; if (chain->flags ^ flags) return -EOPNOTSUPP; + INIT_LIST_HEAD(&hook.list); + if (nla[NFTA_CHAIN_HOOK]) { if (!nft_is_base_chain(chain)) { NL_SET_BAD_ATTR(extack, attr); return -EEXIST; } - err = nft_chain_parse_hook(ctx->net, nla, &hook, ctx->family, - extack, false); + + basechain = nft_base_chain(chain); + err = nft_chain_parse_hook(ctx->net, basechain, nla, &hook, + ctx->family, flags, extack); if (err < 0) return err; - basechain = nft_base_chain(chain); if (basechain->type != hook.type) { nft_chain_release_hook(&hook); NL_SET_BAD_ATTR(extack, attr); return -EEXIST; } - if (nft_base_chain_netdev(ctx->family, hook.num)) { - if (!nft_hook_list_equal(&basechain->hook_list, - &hook.list)) { - nft_chain_release_hook(&hook); - NL_SET_BAD_ATTR(extack, attr); - return -EEXIST; + if (nft_base_chain_netdev(ctx->family, basechain->ops.hooknum)) { + list_for_each_entry_safe(h, next, &hook.list, list) { + h->ops.pf = basechain->ops.pf; + h->ops.hooknum = basechain->ops.hooknum; + h->ops.priority = basechain->ops.priority; + h->ops.priv = basechain->ops.priv; + h->ops.hook = basechain->ops.hook; + + if (nft_hook_list_find(&basechain->hook_list, h)) { + list_del(&h->list); + kfree(h); + } } } else { ops = &basechain->ops; @@ -2418,7 +2438,6 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, return -EEXIST; } } - nft_chain_release_hook(&hook); } if (nla[NFTA_CHAIN_HANDLE] && @@ -2429,24 +2448,43 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, nla[NFTA_CHAIN_NAME], genmask); if (!IS_ERR(chain2)) { NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]); - return -EEXIST; + err = -EEXIST; + goto err_hooks; } } if (nla[NFTA_CHAIN_COUNTERS]) { - if (!nft_is_base_chain(chain)) - return -EOPNOTSUPP; + if (!nft_is_base_chain(chain)) { + err = -EOPNOTSUPP; + goto err_hooks; + } stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]); - if (IS_ERR(stats)) - return PTR_ERR(stats); + if (IS_ERR(stats)) { + err = PTR_ERR(stats); + goto err_hooks; + } } + if (!(table->flags & NFT_TABLE_F_DORMANT) && + nft_is_base_chain(chain) && + !list_empty(&hook.list)) { + basechain = nft_base_chain(chain); + ops = &basechain->ops; + + if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) { + err = nft_netdev_register_hooks(ctx->net, &hook.list); + if (err < 0) + goto err_hooks; + } + } + + unregister = true; err = -ENOMEM; trans = nft_trans_alloc(ctx, NFT_MSG_NEWCHAIN, sizeof(struct nft_trans_chain)); if (trans == NULL) - goto err; + goto err_trans; nft_trans_chain_stats(trans) = stats; nft_trans_chain_update(trans) = true; @@ -2465,7 +2503,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, err = -ENOMEM; name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL_ACCOUNT); if (!name) - goto err; + goto err_trans; err = -EEXIST; list_for_each_entry(tmp, &nft_net->commit_list, list) { @@ -2476,18 +2514,35 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, strcmp(name, nft_trans_chain_name(tmp)) == 0) { NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]); kfree(name); - goto err; + goto err_trans; } } nft_trans_chain_name(trans) = name; } + + nft_trans_basechain(trans) = basechain; + INIT_LIST_HEAD(&nft_trans_chain_hooks(trans)); + list_splice(&hook.list, &nft_trans_chain_hooks(trans)); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; -err: + +err_trans: free_percpu(stats); kfree(trans); +err_hooks: + if (nla[NFTA_CHAIN_HOOK]) { + list_for_each_entry_safe(h, next, &hook.list, list) { + if (unregister) + nf_unregister_net_hook(ctx->net, &h->ops); + list_del(&h->list); + kfree_rcu(h, rcu); + } + module_put(hook.type->owner); + } + return err; } @@ -2611,6 +2666,59 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info, return nf_tables_addchain(&ctx, family, genmask, policy, flags, extack); } +static int nft_delchain_hook(struct nft_ctx *ctx, struct nft_chain *chain, + struct netlink_ext_ack *extack) +{ + const struct nlattr * const *nla = ctx->nla; + struct nft_chain_hook chain_hook = {}; + struct nft_base_chain *basechain; + struct nft_hook *this, *hook; + LIST_HEAD(chain_del_list); + struct nft_trans *trans; + int err; + + if (!nft_is_base_chain(chain)) + return -EOPNOTSUPP; + + basechain = nft_base_chain(chain); + err = nft_chain_parse_hook(ctx->net, basechain, nla, &chain_hook, + ctx->family, chain->flags, extack); + if (err < 0) + return err; + + list_for_each_entry(this, &chain_hook.list, list) { + hook = nft_hook_list_find(&basechain->hook_list, this); + if (!hook) { + err = -ENOENT; + goto err_chain_del_hook; + } + list_move(&hook->list, &chain_del_list); + } + + trans = nft_trans_alloc(ctx, NFT_MSG_DELCHAIN, + sizeof(struct nft_trans_chain)); + if (!trans) { + err = -ENOMEM; + goto err_chain_del_hook; + } + + nft_trans_basechain(trans) = basechain; + nft_trans_chain_update(trans) = true; + INIT_LIST_HEAD(&nft_trans_chain_hooks(trans)); + list_splice(&chain_del_list, &nft_trans_chain_hooks(trans)); + nft_chain_release_hook(&chain_hook); + + nft_trans_commit_list_add_tail(ctx->net, trans); + + return 0; + +err_chain_del_hook: + list_splice(&chain_del_list, &basechain->hook_list); + nft_chain_release_hook(&chain_hook); + + return err; +} + static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { @@ -2651,12 +2759,19 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info, return PTR_ERR(chain); } + nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla); + + if (nla[NFTA_CHAIN_HOOK]) { + if (chain->flags & NFT_CHAIN_HW_OFFLOAD) + return -EOPNOTSUPP; + + return nft_delchain_hook(&ctx, chain, extack); + } + if (info->nlh->nlmsg_flags & NLM_F_NONREC && chain->use > 0) return -EBUSY; - nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla); - use = chain->use; list_for_each_entry(rule, &chain->rules, list) { if (!nft_is_active_next(net, rule)) @@ -3666,7 +3781,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, } if (expr_info[i].ops->validate) - nft_validate_state_update(net, NFT_VALIDATE_NEED); + nft_validate_state_update(table, NFT_VALIDATE_NEED); expr_info[i].ops = NULL; expr = nft_expr_next(expr); @@ -3716,7 +3831,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, if (flow) nft_trans_flow_rule(trans) = flow; - if (nft_net->validate_state == NFT_VALIDATE_DO) + if (table->validate_state == NFT_VALIDATE_DO) return nft_table_validate(net, table); return 0; @@ -4151,6 +4266,12 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, if (nla_put_be64(skb, NFTA_SET_HANDLE, cpu_to_be64(set->handle), NFTA_SET_PAD)) goto nla_put_failure; + + if (event == NFT_MSG_DELSET) { + nlmsg_end(skb, nlh); + return 0; + } + if (set->flags != 0) if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(set->flags))) goto nla_put_failure; @@ -6318,7 +6439,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (desc.type == NFT_DATA_VERDICT && (elem.data.val.verdict.code == NFT_GOTO || elem.data.val.verdict.code == NFT_JUMP)) - nft_validate_state_update(ctx->net, + nft_validate_state_update(ctx->table, NFT_VALIDATE_NEED); } @@ -6443,7 +6564,6 @@ static int nf_tables_newsetelem(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { - struct nftables_pernet *nft_net = nft_pernet(info->net); struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_next(info->net); u8 family = info->nfmsg->nfgen_family; @@ -6482,7 +6602,7 @@ static int nf_tables_newsetelem(struct sk_buff *skb, } } - if (nft_net->validate_state == NFT_VALIDATE_DO) + if (table->validate_state == NFT_VALIDATE_DO) return nft_table_validate(net, table); return 0; @@ -7156,13 +7276,20 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net, if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) || nla_put_string(skb, NFTA_OBJ_NAME, obj->key.name) || - nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) || - nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) || - nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset) || nla_put_be64(skb, NFTA_OBJ_HANDLE, cpu_to_be64(obj->handle), NFTA_OBJ_PAD)) goto nla_put_failure; + if (event == NFT_MSG_DELOBJ) { + nlmsg_end(skb, nlh); + return 0; + } + + if (nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) || + nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) || + nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset)) + goto nla_put_failure; + if (obj->udata && nla_put(skb, NFTA_OBJ_USERDATA, obj->udlen, obj->udata)) goto nla_put_failure; @@ -7568,7 +7695,8 @@ static const struct nla_policy nft_flowtable_hook_policy[NFTA_FLOWTABLE_HOOK_MAX static int nft_flowtable_parse_hook(const struct nft_ctx *ctx, const struct nlattr *attr, struct nft_flowtable_hook *flowtable_hook, - struct nft_flowtable *flowtable, bool add) + struct nft_flowtable *flowtable, + struct netlink_ext_ack *extack, bool add) { struct nlattr *tb[NFTA_FLOWTABLE_HOOK_MAX + 1]; struct nft_hook *hook; @@ -7615,7 +7743,8 @@ static int nft_flowtable_parse_hook(const struct nft_ctx *ctx, if (tb[NFTA_FLOWTABLE_HOOK_DEVS]) { err = nf_tables_parse_netdev_hooks(ctx->net, tb[NFTA_FLOWTABLE_HOOK_DEVS], - &flowtable_hook->list); + &flowtable_hook->list, + extack); if (err < 0) return err; } @@ -7747,7 +7876,7 @@ err_unregister_net_hooks: return err; } -static void nft_flowtable_hooks_destroy(struct list_head *hook_list) +static void nft_hooks_destroy(struct list_head *hook_list) { struct nft_hook *hook, *next; @@ -7758,7 +7887,8 @@ static void nft_flowtable_hooks_destroy(struct list_head *hook_list) } static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh, - struct nft_flowtable *flowtable) + struct nft_flowtable *flowtable, + struct netlink_ext_ack *extack) { const struct nlattr * const *nla = ctx->nla; struct nft_flowtable_hook flowtable_hook; @@ -7769,7 +7899,7 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh, int err; err = nft_flowtable_parse_hook(ctx, nla[NFTA_FLOWTABLE_HOOK], - &flowtable_hook, flowtable, false); + &flowtable_hook, flowtable, extack, false); if (err < 0) return err; @@ -7874,7 +8004,7 @@ static int nf_tables_newflowtable(struct sk_buff *skb, nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); - return nft_flowtable_update(&ctx, info->nlh, flowtable); + return nft_flowtable_update(&ctx, info->nlh, flowtable, extack); } nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); @@ -7915,7 +8045,7 @@ static int nf_tables_newflowtable(struct sk_buff *skb, goto err3; err = nft_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK], - &flowtable_hook, flowtable, true); + &flowtable_hook, flowtable, extack, true); if (err < 0) goto err4; @@ -7927,7 +8057,7 @@ static int nf_tables_newflowtable(struct sk_buff *skb, &flowtable->hook_list, flowtable); if (err < 0) { - nft_flowtable_hooks_destroy(&flowtable->hook_list); + nft_hooks_destroy(&flowtable->hook_list); goto err4; } @@ -7967,7 +8097,8 @@ static void nft_flowtable_hook_release(struct nft_flowtable_hook *flowtable_hook } static int nft_delflowtable_hook(struct nft_ctx *ctx, - struct nft_flowtable *flowtable) + struct nft_flowtable *flowtable, + struct netlink_ext_ack *extack) { const struct nlattr * const *nla = ctx->nla; struct nft_flowtable_hook flowtable_hook; @@ -7977,7 +8108,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx, int err; err = nft_flowtable_parse_hook(ctx, nla[NFTA_FLOWTABLE_HOOK], - &flowtable_hook, flowtable, false); + &flowtable_hook, flowtable, extack, false); if (err < 0) return err; @@ -8059,7 +8190,7 @@ static int nf_tables_delflowtable(struct sk_buff *skb, nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); if (nla[NFTA_FLOWTABLE_HOOK]) - return nft_delflowtable_hook(&ctx, flowtable); + return nft_delflowtable_hook(&ctx, flowtable, extack); if (flowtable->use > 0) { NL_SET_BAD_ATTR(extack, attr); @@ -8087,9 +8218,16 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, if (nla_put_string(skb, NFTA_FLOWTABLE_TABLE, flowtable->table->name) || nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) || - nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) || nla_put_be64(skb, NFTA_FLOWTABLE_HANDLE, cpu_to_be64(flowtable->handle), - NFTA_FLOWTABLE_PAD) || + NFTA_FLOWTABLE_PAD)) + goto nla_put_failure; + + if (event == NFT_MSG_DELFLOWTABLE && !hook_list) { + nlmsg_end(skb, nlh); + return 0; + } + + if (nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) || nla_put_be32(skb, NFTA_FLOWTABLE_FLAGS, htonl(flowtable->data.flags))) goto nla_put_failure; @@ -8104,6 +8242,9 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, if (!nest_devs) goto nla_put_failure; + if (!hook_list) + hook_list = &flowtable->hook_list; + list_for_each_entry_rcu(hook, hook_list, list) { if (nla_put_string(skb, NFTA_DEVICE_NAME, hook->ops.dev->name)) goto nla_put_failure; @@ -8160,8 +8301,7 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb, NFT_MSG_NEWFLOWTABLE, NLM_F_MULTI | NLM_F_APPEND, table->family, - flowtable, - &flowtable->hook_list) < 0) + flowtable, NULL) < 0) goto done; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); @@ -8256,7 +8396,7 @@ static int nf_tables_getflowtable(struct sk_buff *skb, err = nf_tables_fill_flowtable_info(skb2, net, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, NFT_MSG_NEWFLOWTABLE, 0, family, - flowtable, &flowtable->hook_list); + flowtable, NULL); if (err < 0) goto err_fill_flowtable_info; @@ -8269,8 +8409,7 @@ err_fill_flowtable_info: static void nf_tables_flowtable_notify(struct nft_ctx *ctx, struct nft_flowtable *flowtable, - struct list_head *hook_list, - int event) + struct list_head *hook_list, int event) { struct nftables_pernet *nft_net = nft_pernet(ctx->net); struct sk_buff *skb; @@ -8634,17 +8773,20 @@ static int nf_tables_validate(struct net *net) struct nftables_pernet *nft_net = nft_pernet(net); struct nft_table *table; - switch (nft_net->validate_state) { - case NFT_VALIDATE_SKIP: - break; - case NFT_VALIDATE_NEED: - nft_validate_state_update(net, NFT_VALIDATE_DO); - fallthrough; - case NFT_VALIDATE_DO: - list_for_each_entry(table, &nft_net->tables, list) { + list_for_each_entry(table, &nft_net->tables, list) { + switch (table->validate_state) { + case NFT_VALIDATE_SKIP: + continue; + case NFT_VALIDATE_NEED: + nft_validate_state_update(table, NFT_VALIDATE_DO); + fallthrough; + case NFT_VALIDATE_DO: if (nft_table_validate(net, table) < 0) return -EAGAIN; + + nft_validate_state_update(table, NFT_VALIDATE_SKIP); } + break; } @@ -8729,7 +8871,10 @@ static void nft_commit_release(struct nft_trans *trans) break; case NFT_MSG_DELCHAIN: case NFT_MSG_DESTROYCHAIN: - nf_tables_chain_destroy(&trans->ctx); + if (nft_trans_chain_update(trans)) + nft_hooks_destroy(&nft_trans_chain_hooks(trans)); + else + nf_tables_chain_destroy(&trans->ctx); break; case NFT_MSG_DELRULE: case NFT_MSG_DESTROYRULE: @@ -8752,7 +8897,7 @@ static void nft_commit_release(struct nft_trans *trans) case NFT_MSG_DELFLOWTABLE: case NFT_MSG_DESTROYFLOWTABLE: if (nft_trans_flowtable_update(trans)) - nft_flowtable_hooks_destroy(&nft_trans_flowtable_hooks(trans)); + nft_hooks_destroy(&nft_trans_flowtable_hooks(trans)); else nf_tables_flowtable_destroy(nft_trans_flowtable(trans)); break; @@ -8817,9 +8962,8 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha return -ENOMEM; } } - data_size += offsetof(struct nft_rule_dp, data); /* last rule */ - chain->blob_next = nf_tables_chain_alloc_rules(data_size); + chain->blob_next = nf_tables_chain_alloc_rules(chain, data_size); if (!chain->blob_next) return -ENOMEM; @@ -8864,12 +9008,11 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha chain->blob_next->size += (unsigned long)(data - (void *)prule); } - prule = (struct nft_rule_dp *)data; - data += offsetof(struct nft_rule_dp, data); if (WARN_ON_ONCE(data > data_boundary)) return -ENOMEM; - nft_last_rule(chain->blob_next, prule); + prule = (struct nft_rule_dp *)data; + nft_last_rule(chain, prule); return 0; } @@ -8890,22 +9033,22 @@ static void nf_tables_commit_chain_prepare_cancel(struct net *net) } } -static void __nf_tables_commit_chain_free_rules_old(struct rcu_head *h) +static void __nf_tables_commit_chain_free_rules(struct rcu_head *h) { - struct nft_rules_old *o = container_of(h, struct nft_rules_old, h); + struct nft_rule_dp_last *l = container_of(h, struct nft_rule_dp_last, h); - kvfree(o->blob); + kvfree(l->blob); } static void nf_tables_commit_chain_free_rules_old(struct nft_rule_blob *blob) { - struct nft_rules_old *old; + struct nft_rule_dp_last *last; - /* rcu_head is after end marker */ - old = (void *)blob + sizeof(*blob) + blob->size; - old->blob = blob; + /* last rule trailer is after end marker */ + last = (void *)blob + sizeof(*blob) + blob->size; + last->blob = blob; - call_rcu(&old->h, __nf_tables_commit_chain_free_rules_old); + call_rcu(&last->h, __nf_tables_commit_chain_free_rules); } static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain) @@ -9209,22 +9352,34 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) { nft_chain_commit_update(trans); - nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN); + nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN, + &nft_trans_chain_hooks(trans)); + list_splice(&nft_trans_chain_hooks(trans), + &nft_trans_basechain(trans)->hook_list); /* trans destroyed after rcu grace period */ } else { nft_chain_commit_drop_policy(trans); nft_clear(net, trans->ctx.chain); - nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN); + nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN, NULL); nft_trans_destroy(trans); } break; case NFT_MSG_DELCHAIN: case NFT_MSG_DESTROYCHAIN: - nft_chain_del(trans->ctx.chain); - nf_tables_chain_notify(&trans->ctx, trans->msg_type); - nf_tables_unregister_hook(trans->ctx.net, - trans->ctx.table, - trans->ctx.chain); + if (nft_trans_chain_update(trans)) { + nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN, + &nft_trans_chain_hooks(trans)); + nft_netdev_unregister_hooks(net, + &nft_trans_chain_hooks(trans), + true); + } else { + nft_chain_del(trans->ctx.chain); + nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN, + NULL); + nf_tables_unregister_hook(trans->ctx.net, + trans->ctx.table, + trans->ctx.chain); + } break; case NFT_MSG_NEWRULE: nft_clear(trans->ctx.net, nft_trans_rule(trans)); @@ -9330,7 +9485,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_clear(net, nft_trans_flowtable(trans)); nf_tables_flowtable_notify(&trans->ctx, nft_trans_flowtable(trans), - &nft_trans_flowtable(trans)->hook_list, + NULL, NFT_MSG_NEWFLOWTABLE); } nft_trans_destroy(trans); @@ -9348,7 +9503,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) list_del_rcu(&nft_trans_flowtable(trans)->list); nf_tables_flowtable_notify(&trans->ctx, nft_trans_flowtable(trans), - &nft_trans_flowtable(trans)->hook_list, + NULL, trans->msg_type); nft_unregister_flowtable_net_hooks(net, &nft_trans_flowtable(trans)->hook_list); @@ -9388,7 +9543,10 @@ static void nf_tables_abort_release(struct nft_trans *trans) nf_tables_table_destroy(&trans->ctx); break; case NFT_MSG_NEWCHAIN: - nf_tables_chain_destroy(&trans->ctx); + if (nft_trans_chain_update(trans)) + nft_hooks_destroy(&nft_trans_chain_hooks(trans)); + else + nf_tables_chain_destroy(&trans->ctx); break; case NFT_MSG_NEWRULE: nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); @@ -9405,7 +9563,7 @@ static void nf_tables_abort_release(struct nft_trans *trans) break; case NFT_MSG_NEWFLOWTABLE: if (nft_trans_flowtable_update(trans)) - nft_flowtable_hooks_destroy(&nft_trans_flowtable_hooks(trans)); + nft_hooks_destroy(&nft_trans_flowtable_hooks(trans)); else nf_tables_flowtable_destroy(nft_trans_flowtable(trans)); break; @@ -9451,6 +9609,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) { + nft_netdev_unregister_hooks(net, + &nft_trans_chain_hooks(trans), + true); free_percpu(nft_trans_chain_stats(trans)); kfree(nft_trans_chain_name(trans)); nft_trans_destroy(trans); @@ -9468,8 +9629,13 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_DELCHAIN: case NFT_MSG_DESTROYCHAIN: - trans->ctx.table->use++; - nft_clear(trans->ctx.net, trans->ctx.chain); + if (nft_trans_chain_update(trans)) { + list_splice(&nft_trans_chain_hooks(trans), + &nft_trans_basechain(trans)->hook_list); + } else { + trans->ctx.table->use++; + nft_clear(trans->ctx.net, trans->ctx.chain); + } nft_trans_destroy(trans); break; case NFT_MSG_NEWRULE: @@ -9586,11 +9752,6 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) return 0; } -static void nf_tables_cleanup(struct net *net) -{ - nft_validate_state_update(net, NFT_VALIDATE_SKIP); -} - static int nf_tables_abort(struct net *net, struct sk_buff *skb, enum nfnl_abort_action action) { @@ -9624,7 +9785,6 @@ static const struct nfnetlink_subsystem nf_tables_subsys = { .cb = nf_tables_cb, .commit = nf_tables_commit, .abort = nf_tables_abort, - .cleanup = nf_tables_cleanup, .valid_genid = nf_tables_valid_genid, .owner = THIS_MODULE, }; @@ -10367,7 +10527,6 @@ static int __net_init nf_tables_init_net(struct net *net) INIT_LIST_HEAD(&nft_net->notify_list); mutex_init(&nft_net->commit_mutex); nft_net->base_seq = 1; - nft_net->validate_state = NFT_VALIDATE_SKIP; return 0; } diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 6ecd0ba2e546..4d0ce12221f6 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -41,39 +41,37 @@ static inline bool nf_skip_indirect_calls(void) { return false; } static inline void nf_skip_indirect_calls_enable(void) { } #endif -static noinline void __nft_trace_packet(struct nft_traceinfo *info, - const struct nft_chain *chain, +static noinline void __nft_trace_packet(const struct nft_pktinfo *pkt, + const struct nft_verdict *verdict, + const struct nft_rule_dp *rule, + struct nft_traceinfo *info, enum nft_trace_types type) { if (!info->trace || !info->nf_trace) return; - info->chain = chain; info->type = type; - nft_trace_notify(info); + nft_trace_notify(pkt, verdict, rule, info); } static inline void nft_trace_packet(const struct nft_pktinfo *pkt, + struct nft_verdict *verdict, struct nft_traceinfo *info, - const struct nft_chain *chain, const struct nft_rule_dp *rule, enum nft_trace_types type) { if (static_branch_unlikely(&nft_trace_enabled)) { info->nf_trace = pkt->skb->nf_trace; - info->rule = rule; - __nft_trace_packet(info, chain, type); + __nft_trace_packet(pkt, verdict, rule, info, type); } } static inline void nft_trace_copy_nftrace(const struct nft_pktinfo *pkt, struct nft_traceinfo *info) { - if (static_branch_unlikely(&nft_trace_enabled)) { - if (info->trace) - info->nf_trace = pkt->skb->nf_trace; - } + if (static_branch_unlikely(&nft_trace_enabled)) + info->nf_trace = pkt->skb->nf_trace; } static void nft_bitwise_fast_eval(const struct nft_expr *expr, @@ -110,8 +108,9 @@ static void nft_cmp16_fast_eval(const struct nft_expr *expr, regs->verdict.code = NFT_BREAK; } -static noinline void __nft_trace_verdict(struct nft_traceinfo *info, - const struct nft_chain *chain, +static noinline void __nft_trace_verdict(const struct nft_pktinfo *pkt, + struct nft_traceinfo *info, + const struct nft_rule_dp *rule, const struct nft_regs *regs) { enum nft_trace_types type; @@ -129,22 +128,20 @@ static noinline void __nft_trace_verdict(struct nft_traceinfo *info, type = NFT_TRACETYPE_RULE; if (info->trace) - info->nf_trace = info->pkt->skb->nf_trace; + info->nf_trace = pkt->skb->nf_trace; break; } - __nft_trace_packet(info, chain, type); + __nft_trace_packet(pkt, ®s->verdict, rule, info, type); } -static inline void nft_trace_verdict(struct nft_traceinfo *info, - const struct nft_chain *chain, +static inline void nft_trace_verdict(const struct nft_pktinfo *pkt, + struct nft_traceinfo *info, const struct nft_rule_dp *rule, const struct nft_regs *regs) { - if (static_branch_unlikely(&nft_trace_enabled)) { - info->rule = rule; - __nft_trace_verdict(info, chain, regs); - } + if (static_branch_unlikely(&nft_trace_enabled)) + __nft_trace_verdict(pkt, info, rule, regs); } static bool nft_payload_fast_eval(const struct nft_expr *expr, @@ -203,9 +200,7 @@ static noinline void nft_update_chain_stats(const struct nft_chain *chain, } struct nft_jumpstack { - const struct nft_chain *chain; const struct nft_rule_dp *rule; - const struct nft_rule_dp *last_rule; }; static void expr_call_ops_eval(const struct nft_expr *expr, @@ -248,7 +243,6 @@ indirect_call: #define nft_rule_expr_first(rule) (struct nft_expr *)&rule->data[0] #define nft_rule_expr_next(expr) ((void *)expr) + expr->ops->size #define nft_rule_expr_last(rule) (struct nft_expr *)&rule->data[rule->dlen] -#define nft_rule_next(rule) (void *)rule + sizeof(*rule) + rule->dlen #define nft_rule_dp_for_each_expr(expr, last, rule) \ for ((expr) = nft_rule_expr_first(rule), (last) = nft_rule_expr_last(rule); \ @@ -259,9 +253,9 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv) { const struct nft_chain *chain = priv, *basechain = chain; - const struct nft_rule_dp *rule, *last_rule; const struct net *net = nft_net(pkt); const struct nft_expr *expr, *last; + const struct nft_rule_dp *rule; struct nft_regs regs = {}; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; @@ -271,7 +265,7 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) info.trace = false; if (static_branch_unlikely(&nft_trace_enabled)) - nft_trace_init(&info, pkt, ®s.verdict, basechain); + nft_trace_init(&info, pkt, basechain); do_chain: if (genbit) blob = rcu_dereference(chain->blob_gen_1); @@ -279,10 +273,9 @@ do_chain: blob = rcu_dereference(chain->blob_gen_0); rule = (struct nft_rule_dp *)blob->data; - last_rule = (void *)blob->data + blob->size; next_rule: regs.verdict.code = NFT_CONTINUE; - for (; rule < last_rule; rule = nft_rule_next(rule)) { + for (; !rule->is_last ; rule = nft_rule_next(rule)) { nft_rule_dp_for_each_expr(expr, last, rule) { if (expr->ops == &nft_cmp_fast_ops) nft_cmp_fast_eval(expr, ®s); @@ -304,14 +297,14 @@ next_rule: nft_trace_copy_nftrace(pkt, &info); continue; case NFT_CONTINUE: - nft_trace_packet(pkt, &info, chain, rule, + nft_trace_packet(pkt, ®s.verdict, &info, rule, NFT_TRACETYPE_RULE); continue; } break; } - nft_trace_verdict(&info, chain, rule, ®s); + nft_trace_verdict(pkt, &info, rule, ®s); switch (regs.verdict.code & NF_VERDICT_MASK) { case NF_ACCEPT: @@ -325,9 +318,7 @@ next_rule: case NFT_JUMP: if (WARN_ON_ONCE(stackptr >= NFT_JUMP_STACK_SIZE)) return NF_DROP; - jumpstack[stackptr].chain = chain; jumpstack[stackptr].rule = nft_rule_next(rule); - jumpstack[stackptr].last_rule = last_rule; stackptr++; fallthrough; case NFT_GOTO: @@ -342,13 +333,11 @@ next_rule: if (stackptr > 0) { stackptr--; - chain = jumpstack[stackptr].chain; rule = jumpstack[stackptr].rule; - last_rule = jumpstack[stackptr].last_rule; goto next_rule; } - nft_trace_packet(pkt, &info, basechain, NULL, NFT_TRACETYPE_POLICY); + nft_trace_packet(pkt, ®s.verdict, &info, NULL, NFT_TRACETYPE_POLICY); if (static_branch_unlikely(&nft_counters_enabled)) nft_update_chain_stats(basechain, pkt); diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index 1163ba9c1401..6d41c0bd3d78 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -124,9 +124,11 @@ static int nf_trace_fill_pkt_info(struct sk_buff *nlskb, } static int nf_trace_fill_rule_info(struct sk_buff *nlskb, + const struct nft_verdict *verdict, + const struct nft_rule_dp *rule, const struct nft_traceinfo *info) { - if (!info->rule || info->rule->is_last) + if (!rule || rule->is_last) return 0; /* a continue verdict with ->type == RETURN means that this is @@ -135,15 +137,16 @@ static int nf_trace_fill_rule_info(struct sk_buff *nlskb, * Since no rule matched, the ->rule pointer is invalid. */ if (info->type == NFT_TRACETYPE_RETURN && - info->verdict->code == NFT_CONTINUE) + verdict->code == NFT_CONTINUE) return 0; return nla_put_be64(nlskb, NFTA_TRACE_RULE_HANDLE, - cpu_to_be64(info->rule->handle), + cpu_to_be64(rule->handle), NFTA_TRACE_PAD); } -static bool nft_trace_have_verdict_chain(struct nft_traceinfo *info) +static bool nft_trace_have_verdict_chain(const struct nft_verdict *verdict, + struct nft_traceinfo *info) { switch (info->type) { case NFT_TRACETYPE_RETURN: @@ -153,7 +156,7 @@ static bool nft_trace_have_verdict_chain(struct nft_traceinfo *info) return false; } - switch (info->verdict->code) { + switch (verdict->code) { case NFT_JUMP: case NFT_GOTO: break; @@ -164,9 +167,31 @@ static bool nft_trace_have_verdict_chain(struct nft_traceinfo *info) return true; } -void nft_trace_notify(struct nft_traceinfo *info) +static const struct nft_chain *nft_trace_get_chain(const struct nft_rule_dp *rule, + const struct nft_traceinfo *info) { - const struct nft_pktinfo *pkt = info->pkt; + const struct nft_rule_dp_last *last; + + if (!rule) + return &info->basechain->chain; + + while (!rule->is_last) + rule = nft_rule_next(rule); + + last = (const struct nft_rule_dp_last *)rule; + + if (WARN_ON_ONCE(!last->chain)) + return &info->basechain->chain; + + return last->chain; +} + +void nft_trace_notify(const struct nft_pktinfo *pkt, + const struct nft_verdict *verdict, + const struct nft_rule_dp *rule, + struct nft_traceinfo *info) +{ + const struct nft_chain *chain; struct nlmsghdr *nlh; struct sk_buff *skb; unsigned int size; @@ -176,9 +201,11 @@ void nft_trace_notify(struct nft_traceinfo *info) if (!nfnetlink_has_listeners(nft_net(pkt), NFNLGRP_NFTRACE)) return; + chain = nft_trace_get_chain(rule, info); + size = nlmsg_total_size(sizeof(struct nfgenmsg)) + - nla_total_size(strlen(info->chain->table->name)) + - nla_total_size(strlen(info->chain->name)) + + nla_total_size(strlen(chain->table->name)) + + nla_total_size(strlen(chain->name)) + nla_total_size_64bit(sizeof(__be64)) + /* rule handle */ nla_total_size(sizeof(__be32)) + /* trace type */ nla_total_size(0) + /* VERDICT, nested */ @@ -195,8 +222,8 @@ void nft_trace_notify(struct nft_traceinfo *info) nla_total_size(sizeof(u32)) + /* nfproto */ nla_total_size(sizeof(u32)); /* policy */ - if (nft_trace_have_verdict_chain(info)) - size += nla_total_size(strlen(info->verdict->chain->name)); /* jump target */ + if (nft_trace_have_verdict_chain(verdict, info)) + size += nla_total_size(strlen(verdict->chain->name)); /* jump target */ skb = nlmsg_new(size, GFP_ATOMIC); if (!skb) @@ -217,13 +244,13 @@ void nft_trace_notify(struct nft_traceinfo *info) if (nla_put_u32(skb, NFTA_TRACE_ID, info->skbid)) goto nla_put_failure; - if (nla_put_string(skb, NFTA_TRACE_CHAIN, info->chain->name)) + if (nla_put_string(skb, NFTA_TRACE_CHAIN, chain->name)) goto nla_put_failure; - if (nla_put_string(skb, NFTA_TRACE_TABLE, info->chain->table->name)) + if (nla_put_string(skb, NFTA_TRACE_TABLE, chain->table->name)) goto nla_put_failure; - if (nf_trace_fill_rule_info(skb, info)) + if (nf_trace_fill_rule_info(skb, verdict, rule, info)) goto nla_put_failure; switch (info->type) { @@ -232,11 +259,11 @@ void nft_trace_notify(struct nft_traceinfo *info) break; case NFT_TRACETYPE_RETURN: case NFT_TRACETYPE_RULE: - if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, info->verdict)) + if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, verdict)) goto nla_put_failure; /* pkt->skb undefined iff NF_STOLEN, disable dump */ - if (info->verdict->code == NF_STOLEN) + if (verdict->code == NF_STOLEN) info->packet_dumped = true; else mark = pkt->skb->mark; @@ -273,7 +300,6 @@ void nft_trace_notify(struct nft_traceinfo *info) } void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, - const struct nft_verdict *verdict, const struct nft_chain *chain) { static siphash_key_t trace_key __read_mostly; @@ -283,8 +309,6 @@ void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, info->trace = true; info->nf_trace = pkt->skb->nf_trace; info->packet_dumped = false; - info->pkt = pkt; - info->verdict = verdict; net_get_random_once(&trace_key, sizeof(trace_key)); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 81c7737c803a..ae7146475d17 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -590,8 +590,6 @@ done: goto replay_abort; } } - if (ss->cleanup) - ss->cleanup(net); nfnl_err_deliver(&err_list, oskb); kfree_skb(skb); diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c index 8120aadf6a0f..ade8ee1988b1 100644 --- a/net/netfilter/nfnetlink_hook.c +++ b/net/netfilter/nfnetlink_hook.c @@ -5,6 +5,7 @@ * Author: Florian Westphal <fw@strlen.de> */ +#include <linux/bpf.h> #include <linux/module.h> #include <linux/kallsyms.h> #include <linux/kernel.h> @@ -57,35 +58,76 @@ struct nfnl_dump_hook_data { u8 hook; }; +static struct nlattr *nfnl_start_info_type(struct sk_buff *nlskb, enum nfnl_hook_chaintype t) +{ + struct nlattr *nest = nla_nest_start(nlskb, NFNLA_HOOK_CHAIN_INFO); + int ret; + + if (!nest) + return NULL; + + ret = nla_put_be32(nlskb, NFNLA_HOOK_INFO_TYPE, htonl(t)); + if (ret == 0) + return nest; + + nla_nest_cancel(nlskb, nest); + return NULL; +} + +static int nfnl_hook_put_bpf_prog_info(struct sk_buff *nlskb, + const struct nfnl_dump_hook_data *ctx, + unsigned int seq, + const struct bpf_prog *prog) +{ + struct nlattr *nest, *nest2; + int ret; + + if (!IS_ENABLED(CONFIG_NETFILTER_BPF_LINK)) + return 0; + + if (WARN_ON_ONCE(!prog)) + return 0; + + nest = nfnl_start_info_type(nlskb, NFNL_HOOK_TYPE_BPF); + if (!nest) + return -EMSGSIZE; + + nest2 = nla_nest_start(nlskb, NFNLA_HOOK_INFO_DESC); + if (!nest2) + goto cancel_nest; + + ret = nla_put_be32(nlskb, NFNLA_HOOK_BPF_ID, htonl(prog->aux->id)); + if (ret) + goto cancel_nest; + + nla_nest_end(nlskb, nest2); + nla_nest_end(nlskb, nest); + return 0; + +cancel_nest: + nla_nest_cancel(nlskb, nest); + return -EMSGSIZE; +} + static int nfnl_hook_put_nft_chain_info(struct sk_buff *nlskb, const struct nfnl_dump_hook_data *ctx, unsigned int seq, - const struct nf_hook_ops *ops) + struct nft_chain *chain) { struct net *net = sock_net(nlskb->sk); struct nlattr *nest, *nest2; - struct nft_chain *chain; int ret = 0; - if (ops->hook_ops_type != NF_HOOK_OP_NF_TABLES) - return 0; - - chain = ops->priv; if (WARN_ON_ONCE(!chain)) return 0; if (!nft_is_active(net, chain)) return 0; - nest = nla_nest_start(nlskb, NFNLA_HOOK_CHAIN_INFO); + nest = nfnl_start_info_type(nlskb, NFNL_HOOK_TYPE_NFTABLES); if (!nest) return -EMSGSIZE; - ret = nla_put_be32(nlskb, NFNLA_HOOK_INFO_TYPE, - htonl(NFNL_HOOK_TYPE_NFTABLES)); - if (ret) - goto cancel_nest; - nest2 = nla_nest_start(nlskb, NFNLA_HOOK_INFO_DESC); if (!nest2) goto cancel_nest; @@ -171,7 +213,20 @@ static int nfnl_hook_dump_one(struct sk_buff *nlskb, if (ret) goto nla_put_failure; - ret = nfnl_hook_put_nft_chain_info(nlskb, ctx, seq, ops); + switch (ops->hook_ops_type) { + case NF_HOOK_OP_NF_TABLES: + ret = nfnl_hook_put_nft_chain_info(nlskb, ctx, seq, ops->priv); + break; + case NF_HOOK_OP_BPF: + ret = nfnl_hook_put_bpf_prog_info(nlskb, ctx, seq, ops->priv); + break; + case NF_HOOK_OP_UNDEFINED: + break; + default: + WARN_ON_ONCE(1); + break; + } + if (ret) goto nla_put_failure; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index d97eb280cb2e..e57eb168ee13 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -103,9 +103,9 @@ static inline u_int8_t instance_hashfn(u_int16_t group_num) } static struct nfulnl_instance * -__instance_lookup(struct nfnl_log_net *log, u_int16_t group_num) +__instance_lookup(const struct nfnl_log_net *log, u16 group_num) { - struct hlist_head *head; + const struct hlist_head *head; struct nfulnl_instance *inst; head = &log->instance_table[instance_hashfn(group_num)]; @@ -123,15 +123,25 @@ instance_get(struct nfulnl_instance *inst) } static struct nfulnl_instance * -instance_lookup_get(struct nfnl_log_net *log, u_int16_t group_num) +instance_lookup_get_rcu(const struct nfnl_log_net *log, u16 group_num) { struct nfulnl_instance *inst; - rcu_read_lock_bh(); inst = __instance_lookup(log, group_num); if (inst && !refcount_inc_not_zero(&inst->use)) inst = NULL; - rcu_read_unlock_bh(); + + return inst; +} + +static struct nfulnl_instance * +instance_lookup_get(const struct nfnl_log_net *log, u16 group_num) +{ + struct nfulnl_instance *inst; + + rcu_read_lock(); + inst = instance_lookup_get_rcu(log, group_num); + rcu_read_unlock(); return inst; } @@ -698,7 +708,7 @@ nfulnl_log_packet(struct net *net, else li = &default_loginfo; - inst = instance_lookup_get(log, li->u.ulog.group); + inst = instance_lookup_get_rcu(log, li->u.ulog.group); if (!inst) return; @@ -1030,7 +1040,7 @@ static struct hlist_node *get_first(struct net *net, struct iter_state *st) struct hlist_head *head = &log->instance_table[st->bucket]; if (!hlist_empty(head)) - return rcu_dereference_bh(hlist_first_rcu(head)); + return rcu_dereference(hlist_first_rcu(head)); } return NULL; } @@ -1038,7 +1048,7 @@ static struct hlist_node *get_first(struct net *net, struct iter_state *st) static struct hlist_node *get_next(struct net *net, struct iter_state *st, struct hlist_node *h) { - h = rcu_dereference_bh(hlist_next_rcu(h)); + h = rcu_dereference(hlist_next_rcu(h)); while (!h) { struct nfnl_log_net *log; struct hlist_head *head; @@ -1048,7 +1058,7 @@ static struct hlist_node *get_next(struct net *net, struct iter_state *st, log = nfnl_log_pernet(net); head = &log->instance_table[st->bucket]; - h = rcu_dereference_bh(hlist_first_rcu(head)); + h = rcu_dereference(hlist_first_rcu(head)); } return h; } @@ -1066,9 +1076,9 @@ static struct hlist_node *get_idx(struct net *net, struct iter_state *st, } static void *seq_start(struct seq_file *s, loff_t *pos) - __acquires(rcu_bh) + __acquires(rcu) { - rcu_read_lock_bh(); + rcu_read_lock(); return get_idx(seq_file_net(s), s->private, *pos); } @@ -1079,9 +1089,9 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos) } static void seq_stop(struct seq_file *s, void *v) - __releases(rcu_bh) + __releases(rcu) { - rcu_read_unlock_bh(); + rcu_read_unlock(); } static int seq_show(struct seq_file *s, void *v) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 87a9009d5234..e311462f6d98 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -29,6 +29,7 @@ #include <linux/netfilter/nfnetlink_queue.h> #include <linux/netfilter/nf_conntrack_common.h> #include <linux/list.h> +#include <linux/cgroup-defs.h> #include <net/sock.h> #include <net/tcp_states.h> #include <net/netfilter/nf_queue.h> @@ -301,6 +302,19 @@ nla_put_failure: return -1; } +static int nfqnl_put_sk_classid(struct sk_buff *skb, struct sock *sk) +{ +#if IS_ENABLED(CONFIG_CGROUP_NET_CLASSID) + if (sk && sk_fullsock(sk)) { + u32 classid = sock_cgroup_classid(&sk->sk_cgrp_data); + + if (classid && nla_put_be32(skb, NFQA_CGROUP_CLASSID, htonl(classid))) + return -1; + } +#endif + return 0; +} + static u32 nfqnl_get_sk_secctx(struct sk_buff *skb, char **secdata) { u32 seclen = 0; @@ -406,6 +420,9 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, + nla_total_size(sizeof(u_int32_t)) /* priority */ + nla_total_size(sizeof(struct nfqnl_msg_packet_hw)) + nla_total_size(sizeof(u_int32_t)) /* skbinfo */ +#if IS_ENABLED(CONFIG_CGROUP_NET_CLASSID) + + nla_total_size(sizeof(u_int32_t)) /* classid */ +#endif + nla_total_size(sizeof(u_int32_t)); /* cap_len */ tstamp = skb_tstamp_cond(entskb, false); @@ -599,6 +616,9 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, nfqnl_put_sk_uidgid(skb, entskb->sk) < 0) goto nla_put_failure; + if (nfqnl_put_sk_classid(skb, entskb->sk) < 0) + goto nla_put_failure; + if (seclen && nla_put(skb, NFQA_SECCTX, seclen, secdata)) goto nla_put_failure; diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 9544c2f16998..b115d77fbbc7 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -96,23 +96,39 @@ nla_put_failure: return -1; } -static void nft_masq_ipv4_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static void nft_masq_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { - struct nft_masq *priv = nft_expr_priv(expr); + const struct nft_masq *priv = nft_expr_priv(expr); struct nf_nat_range2 range; memset(&range, 0, sizeof(range)); range.flags = priv->flags; if (priv->sreg_proto_min) { - range.min_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_min]); - range.max_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_max]); + range.min_proto.all = (__force __be16) + nft_reg_load16(®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16) + nft_reg_load16(®s->data[priv->sreg_proto_max]); + } + + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: + regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, + nft_hook(pkt), + &range, + nft_out(pkt)); + break; +#ifdef CONFIG_NF_TABLES_IPV6 + case NFPROTO_IPV6: + regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, + nft_out(pkt)); + break; +#endif + default: + WARN_ON_ONCE(1); + break; } - regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, nft_hook(pkt), - &range, nft_out(pkt)); } static void @@ -125,7 +141,7 @@ static struct nft_expr_type nft_masq_ipv4_type; static const struct nft_expr_ops nft_masq_ipv4_ops = { .type = &nft_masq_ipv4_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)), - .eval = nft_masq_ipv4_eval, + .eval = nft_masq_eval, .init = nft_masq_init, .destroy = nft_masq_ipv4_destroy, .dump = nft_masq_dump, @@ -143,25 +159,6 @@ static struct nft_expr_type nft_masq_ipv4_type __read_mostly = { }; #ifdef CONFIG_NF_TABLES_IPV6 -static void nft_masq_ipv6_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - struct nft_masq *priv = nft_expr_priv(expr); - struct nf_nat_range2 range; - - memset(&range, 0, sizeof(range)); - range.flags = priv->flags; - if (priv->sreg_proto_min) { - range.min_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_min]); - range.max_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_max]); - } - regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, - nft_out(pkt)); -} - static void nft_masq_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { @@ -172,7 +169,7 @@ static struct nft_expr_type nft_masq_ipv6_type; static const struct nft_expr_ops nft_masq_ipv6_ops = { .type = &nft_masq_ipv6_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)), - .eval = nft_masq_ipv6_eval, + .eval = nft_masq_eval, .init = nft_masq_init, .destroy = nft_masq_ipv6_destroy, .dump = nft_masq_dump, @@ -204,20 +201,6 @@ static inline void nft_masq_module_exit_ipv6(void) {} #endif #ifdef CONFIG_NF_TABLES_INET -static void nft_masq_inet_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - switch (nft_pf(pkt)) { - case NFPROTO_IPV4: - return nft_masq_ipv4_eval(expr, regs, pkt); - case NFPROTO_IPV6: - return nft_masq_ipv6_eval(expr, regs, pkt); - } - - WARN_ON_ONCE(1); -} - static void nft_masq_inet_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { @@ -228,7 +211,7 @@ static struct nft_expr_type nft_masq_inet_type; static const struct nft_expr_ops nft_masq_inet_ops = { .type = &nft_masq_inet_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)), - .eval = nft_masq_inet_eval, + .eval = nft_masq_eval, .init = nft_masq_init, .destroy = nft_masq_inet_destroy, .dump = nft_masq_dump, diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index 67cec56bc84a..a70196ffcb1e 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -64,6 +64,8 @@ static int nft_redir_init(const struct nft_ctx *ctx, } else { priv->sreg_proto_max = priv->sreg_proto_min; } + + priv->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } if (tb[NFTA_REDIR_FLAGS]) { @@ -99,25 +101,37 @@ nla_put_failure: return -1; } -static void nft_redir_ipv4_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static void nft_redir_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { - struct nft_redir *priv = nft_expr_priv(expr); - struct nf_nat_ipv4_multi_range_compat mr; + const struct nft_redir *priv = nft_expr_priv(expr); + struct nf_nat_range2 range; - memset(&mr, 0, sizeof(mr)); + memset(&range, 0, sizeof(range)); + range.flags = priv->flags; if (priv->sreg_proto_min) { - mr.range[0].min.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_min]); - mr.range[0].max.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_max]); - mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + range.min_proto.all = (__force __be16) + nft_reg_load16(®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16) + nft_reg_load16(®s->data[priv->sreg_proto_max]); } - mr.range[0].flags |= priv->flags; - - regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr, nft_hook(pkt)); + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: + regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &range, + nft_hook(pkt)); + break; +#ifdef CONFIG_NF_TABLES_IPV6 + case NFPROTO_IPV6: + regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range, + nft_hook(pkt)); + break; +#endif + default: + WARN_ON_ONCE(1); + break; + } } static void @@ -130,7 +144,7 @@ static struct nft_expr_type nft_redir_ipv4_type; static const struct nft_expr_ops nft_redir_ipv4_ops = { .type = &nft_redir_ipv4_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), - .eval = nft_redir_ipv4_eval, + .eval = nft_redir_eval, .init = nft_redir_init, .destroy = nft_redir_ipv4_destroy, .dump = nft_redir_dump, @@ -148,28 +162,6 @@ static struct nft_expr_type nft_redir_ipv4_type __read_mostly = { }; #ifdef CONFIG_NF_TABLES_IPV6 -static void nft_redir_ipv6_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - struct nft_redir *priv = nft_expr_priv(expr); - struct nf_nat_range2 range; - - memset(&range, 0, sizeof(range)); - if (priv->sreg_proto_min) { - range.min_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_min]); - range.max_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_max]); - range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; - } - - range.flags |= priv->flags; - - regs->verdict.code = - nf_nat_redirect_ipv6(pkt->skb, &range, nft_hook(pkt)); -} - static void nft_redir_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { @@ -180,7 +172,7 @@ static struct nft_expr_type nft_redir_ipv6_type; static const struct nft_expr_ops nft_redir_ipv6_ops = { .type = &nft_redir_ipv6_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), - .eval = nft_redir_ipv6_eval, + .eval = nft_redir_eval, .init = nft_redir_init, .destroy = nft_redir_ipv6_destroy, .dump = nft_redir_dump, @@ -199,20 +191,6 @@ static struct nft_expr_type nft_redir_ipv6_type __read_mostly = { #endif #ifdef CONFIG_NF_TABLES_INET -static void nft_redir_inet_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - switch (nft_pf(pkt)) { - case NFPROTO_IPV4: - return nft_redir_ipv4_eval(expr, regs, pkt); - case NFPROTO_IPV6: - return nft_redir_ipv6_eval(expr, regs, pkt); - } - - WARN_ON_ONCE(1); -} - static void nft_redir_inet_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { @@ -223,7 +201,7 @@ static struct nft_expr_type nft_redir_inet_type; static const struct nft_expr_ops nft_redir_inet_ops = { .type = &nft_redir_inet_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), - .eval = nft_redir_inet_eval, + .eval = nft_redir_eval, .init = nft_redir_init, .destroy = nft_redir_inet_destroy, .dump = nft_redir_dump, diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c index 2182d361e273..acef4155f0da 100644 --- a/net/netfilter/utils.c +++ b/net/netfilter/utils.c @@ -215,3 +215,55 @@ int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry) } return ret; } + +/* Only get and check the lengths, not do any hop-by-hop stuff. */ +int nf_ip6_check_hbh_len(struct sk_buff *skb, u32 *plen) +{ + int len, off = sizeof(struct ipv6hdr); + unsigned char *nh; + + if (!pskb_may_pull(skb, off + 8)) + return -ENOMEM; + nh = (unsigned char *)(ipv6_hdr(skb) + 1); + len = (nh[1] + 1) << 3; + + if (!pskb_may_pull(skb, off + len)) + return -ENOMEM; + nh = skb_network_header(skb); + + off += 2; + len -= 2; + while (len > 0) { + int optlen; + + if (nh[off] == IPV6_TLV_PAD1) { + off++; + len--; + continue; + } + if (len < 2) + return -EBADMSG; + optlen = nh[off + 1] + 2; + if (optlen > len) + return -EBADMSG; + + if (nh[off] == IPV6_TLV_JUMBO) { + u32 pkt_len; + + if (nh[off + 1] != 4 || (off & 3) != 2) + return -EBADMSG; + pkt_len = ntohl(*(__be32 *)(nh + off + 2)); + if (pkt_len <= IPV6_MAXPLEN || + ipv6_hdr(skb)->payload_len) + return -EBADMSG; + if (pkt_len > skb->len - sizeof(struct ipv6hdr)) + return -EBADMSG; + *plen = pkt_len; + } + off += optlen; + len -= optlen; + } + + return len ? -EBADMSG : 0; +} +EXPORT_SYMBOL_GPL(nf_ip6_check_hbh_len); diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c index 353ca7801251..ff66b56a3f97 100644 --- a/net/netfilter/xt_REDIRECT.c +++ b/net/netfilter/xt_REDIRECT.c @@ -46,7 +46,6 @@ static void redirect_tg_destroy(const struct xt_tgdtor_param *par) nf_ct_netns_put(par->net, par->family); } -/* FIXME: Take multiple ranges --RR */ static int redirect_tg4_check(const struct xt_tgchk_param *par) { const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; @@ -65,7 +64,14 @@ static int redirect_tg4_check(const struct xt_tgchk_param *par) static unsigned int redirect_tg4(struct sk_buff *skb, const struct xt_action_param *par) { - return nf_nat_redirect_ipv4(skb, par->targinfo, xt_hooknum(par)); + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + struct nf_nat_range2 range = { + .flags = mr->range[0].flags, + .min_proto = mr->range[0].min, + .max_proto = mr->range[0].max, + }; + + return nf_nat_redirect_ipv4(skb, &range, xt_hooknum(par)); } static struct xt_target redirect_tg_reg[] __read_mostly = { diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c index 11ec2abf0c72..e8991130a3de 100644 --- a/net/netfilter/xt_tcpudp.c +++ b/net/netfilter/xt_tcpudp.c @@ -4,6 +4,7 @@ #include <linux/module.h> #include <net/ip.h> #include <linux/ipv6.h> +#include <linux/icmp.h> #include <net/ipv6.h> #include <net/tcp.h> #include <net/udp.h> @@ -20,6 +21,8 @@ MODULE_ALIAS("ipt_udp"); MODULE_ALIAS("ipt_tcp"); MODULE_ALIAS("ip6t_udp"); MODULE_ALIAS("ip6t_tcp"); +MODULE_ALIAS("ipt_icmp"); +MODULE_ALIAS("ip6t_icmp6"); /* Returns 1 if the port is matched by the range, 0 otherwise */ static inline bool @@ -161,6 +164,95 @@ static int udp_mt_check(const struct xt_mtchk_param *par) return (udpinfo->invflags & ~XT_UDP_INV_MASK) ? -EINVAL : 0; } +/* Returns 1 if the type and code is matched by the range, 0 otherwise */ +static bool type_code_in_range(u8 test_type, u8 min_code, u8 max_code, + u8 type, u8 code) +{ + return type == test_type && code >= min_code && code <= max_code; +} + +static bool icmp_type_code_match(u8 test_type, u8 min_code, u8 max_code, + u8 type, u8 code, bool invert) +{ + return (test_type == 0xFF || + type_code_in_range(test_type, min_code, max_code, type, code)) + ^ invert; +} + +static bool icmp6_type_code_match(u8 test_type, u8 min_code, u8 max_code, + u8 type, u8 code, bool invert) +{ + return type_code_in_range(test_type, min_code, max_code, type, code) ^ invert; +} + +static bool +icmp_match(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct icmphdr *ic; + struct icmphdr _icmph; + const struct ipt_icmp *icmpinfo = par->matchinfo; + + /* Must not be a fragment. */ + if (par->fragoff != 0) + return false; + + ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph); + if (!ic) { + /* We've been asked to examine this packet, and we + * can't. Hence, no choice but to drop. + */ + par->hotdrop = true; + return false; + } + + return icmp_type_code_match(icmpinfo->type, + icmpinfo->code[0], + icmpinfo->code[1], + ic->type, ic->code, + !!(icmpinfo->invflags & IPT_ICMP_INV)); +} + +static bool +icmp6_match(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct icmp6hdr *ic; + struct icmp6hdr _icmph; + const struct ip6t_icmp *icmpinfo = par->matchinfo; + + /* Must not be a fragment. */ + if (par->fragoff != 0) + return false; + + ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph); + if (!ic) { + /* We've been asked to examine this packet, and we + * can't. Hence, no choice but to drop. + */ + par->hotdrop = true; + return false; + } + + return icmp6_type_code_match(icmpinfo->type, + icmpinfo->code[0], + icmpinfo->code[1], + ic->icmp6_type, ic->icmp6_code, + !!(icmpinfo->invflags & IP6T_ICMP_INV)); +} + +static int icmp_checkentry(const struct xt_mtchk_param *par) +{ + const struct ipt_icmp *icmpinfo = par->matchinfo; + + return (icmpinfo->invflags & ~IPT_ICMP_INV) ? -EINVAL : 0; +} + +static int icmp6_checkentry(const struct xt_mtchk_param *par) +{ + const struct ip6t_icmp *icmpinfo = par->matchinfo; + + return (icmpinfo->invflags & ~IP6T_ICMP_INV) ? -EINVAL : 0; +} + static struct xt_match tcpudp_mt_reg[] __read_mostly = { { .name = "tcp", @@ -216,6 +308,24 @@ static struct xt_match tcpudp_mt_reg[] __read_mostly = { .proto = IPPROTO_UDPLITE, .me = THIS_MODULE, }, + { + .name = "icmp", + .match = icmp_match, + .matchsize = sizeof(struct ipt_icmp), + .checkentry = icmp_checkentry, + .proto = IPPROTO_ICMP, + .family = NFPROTO_IPV4, + .me = THIS_MODULE, + }, + { + .name = "icmp6", + .match = icmp6_match, + .matchsize = sizeof(struct ip6t_icmp), + .checkentry = icmp6_checkentry, + .proto = IPPROTO_ICMPV6, + .family = NFPROTO_IPV6, + .me = THIS_MODULE, + }, }; static int __init tcpudp_mt_init(void) |