diff options
author | David S. Miller <davem@davemloft.net> | 2020-03-29 22:30:57 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2020-03-29 22:30:57 -0700 |
commit | 96376cad350873bdeb500d49817d749242f74068 (patch) | |
tree | 2a47bb1d90dfb74af8349340ab382592ff646be3 /net | |
parent | c189b5483c1bef24dc3755177b19a595c7e668c9 (diff) | |
parent | a7a29f9c361f8542604ef959ae6627f423b7a412 (diff) |
Merge branch 'ipv6-add-rpl-source-routing'
Alexander Aring says:
====================
net: ipv6: add rpl source routing
This patch series will add handling for RPL source routing handling
and insertion (implement as lwtunnel)! I did an example prototype
implementation in rpld for using this implementation in non-storing mode:
https://github.com/linux-wpan/rpld/tree/nonstoring_mode
I will also present a talk at netdev about it:
https://netdevconf.info/0x14/session.html?talk-extend-segment-routing-for-RPL
In receive handling I add handling for IPIP encapsulation as RFC6554
describes it as possible. For reasons I didn't implemented it yet for
generating such packets because I am not really sure how/when this
should happen. So far I understand there exists a draft yet which
describes the cases (inclusive a Hop-by-Hop option which we also not
support yet).
https://tools.ietf.org/html/draft-ietf-roll-useofrplinfo-35
This is just the beginning to start implementation everything for yet,
step by step. It works for my use cases yet to have it running on a
6LOWPAN _only_ network.
I have some patches for iproute2 as well.
A sidenote: I check on local addresses if they are part of segment
routes, this is just to avoid stupid settings. A use can add addresses
afterwards what I cannot control anymore but then it's users fault to
make such thing. The receive handling checks for this as well which is
required by RFC6554, so the next hops or when it comes back should drop
it anyway.
To make this possible I added functionality to pass the net structure to
the build_state of lwtunnel (I hope I caught all lwtunnels).
Another sidenote: I set the headroom value to 0 as I figured out it will
break on interfaces with IPv6 min mtu if set to non zero for tunnels on
L3.
- Alex
changes since v3:
- use parse_nested which isn't deprecated - Thanks David Ahern
- change to return -1 instead errno in exthdr handling to unify
error code
- change function name from ipv6_rpl_srh_decompress_size to
ipv6_rpl_srh_size
changes since v2:
- add additional segdata length in lwtunnel build_state
- fix build_state patch by not catching one inline noop function
if LWTUNNEL is disabled
Alexander Aring (5):
include: uapi: linux: add rpl sr header definition
addrconf: add functionality to check on rpl requirements
net: ipv6: add support for rpl sr exthdr
net: add net available in build_state
net: ipv6: add rpl sr tunnel
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/core/lwt_bpf.c | 2 | ||||
-rw-r--r-- | net/core/lwtunnel.c | 6 | ||||
-rw-r--r-- | net/ipv4/fib_lookup.h | 2 | ||||
-rw-r--r-- | net/ipv4/fib_semantics.c | 22 | ||||
-rw-r--r-- | net/ipv4/fib_trie.c | 2 | ||||
-rw-r--r-- | net/ipv4/ip_tunnel_core.c | 4 | ||||
-rw-r--r-- | net/ipv6/Kconfig | 10 | ||||
-rw-r--r-- | net/ipv6/Makefile | 3 | ||||
-rw-r--r-- | net/ipv6/addrconf.c | 63 | ||||
-rw-r--r-- | net/ipv6/af_inet6.c | 7 | ||||
-rw-r--r-- | net/ipv6/exthdrs.c | 201 | ||||
-rw-r--r-- | net/ipv6/ila/ila_lwt.c | 2 | ||||
-rw-r--r-- | net/ipv6/route.c | 2 | ||||
-rw-r--r-- | net/ipv6/rpl.c | 123 | ||||
-rw-r--r-- | net/ipv6/rpl_iptunnel.c | 380 | ||||
-rw-r--r-- | net/ipv6/seg6_iptunnel.c | 2 | ||||
-rw-r--r-- | net/ipv6/seg6_local.c | 5 | ||||
-rw-r--r-- | net/mpls/mpls_iptunnel.c | 2 |
18 files changed, 812 insertions, 26 deletions
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 99a6de52b21d..7d3438215f32 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -367,7 +367,7 @@ static const struct nla_policy bpf_nl_policy[LWT_BPF_MAX + 1] = { [LWT_BPF_XMIT_HEADROOM] = { .type = NLA_U32 }, }; -static int bpf_build_state(struct nlattr *nla, +static int bpf_build_state(struct net *net, struct nlattr *nla, unsigned int family, const void *cfg, struct lwtunnel_state **ts, struct netlink_ext_ack *extack) diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 2f9c0de533c7..8ec7d13d2860 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -41,6 +41,8 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) return "BPF"; case LWTUNNEL_ENCAP_SEG6_LOCAL: return "SEG6LOCAL"; + case LWTUNNEL_ENCAP_RPL: + return "RPL"; case LWTUNNEL_ENCAP_IP6: case LWTUNNEL_ENCAP_IP: case LWTUNNEL_ENCAP_NONE: @@ -98,7 +100,7 @@ int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops, } EXPORT_SYMBOL_GPL(lwtunnel_encap_del_ops); -int lwtunnel_build_state(u16 encap_type, +int lwtunnel_build_state(struct net *net, u16 encap_type, struct nlattr *encap, unsigned int family, const void *cfg, struct lwtunnel_state **lws, struct netlink_ext_ack *extack) @@ -122,7 +124,7 @@ int lwtunnel_build_state(u16 encap_type, rcu_read_unlock(); if (found) { - ret = ops->build_state(encap, family, cfg, lws, extack); + ret = ops->build_state(net, encap, family, cfg, lws, extack); if (ret) module_put(ops->owner); } else { diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index c092e9a55790..818916b2a04d 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -35,7 +35,7 @@ static inline void fib_alias_accessed(struct fib_alias *fa) void fib_release_info(struct fib_info *); struct fib_info *fib_create_info(struct fib_config *cfg, struct netlink_ext_ack *extack); -int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, +int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, struct netlink_ext_ack *extack); bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi); int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e4c62b8f57a8..6ed8c9317179 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -570,8 +570,9 @@ static int fib_detect_death(struct fib_info *fi, int order, return 1; } -int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *encap, - u16 encap_type, void *cfg, gfp_t gfp_flags, +int fib_nh_common_init(struct net *net, struct fib_nh_common *nhc, + struct nlattr *encap, u16 encap_type, + void *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack) { int err; @@ -589,8 +590,9 @@ int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *encap, err = -EINVAL; goto lwt_failure; } - err = lwtunnel_build_state(encap_type, encap, nhc->nhc_family, - cfg, &lwtstate, extack); + err = lwtunnel_build_state(net, encap_type, encap, + nhc->nhc_family, cfg, &lwtstate, + extack); if (err) goto lwt_failure; @@ -614,7 +616,7 @@ int fib_nh_init(struct net *net, struct fib_nh *nh, nh->fib_nh_family = AF_INET; - err = fib_nh_common_init(&nh->nh_common, cfg->fc_encap, + err = fib_nh_common_init(net, &nh->nh_common, cfg->fc_encap, cfg->fc_encap_type, cfg, GFP_KERNEL, extack); if (err) return err; @@ -814,7 +816,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, #endif /* CONFIG_IP_ROUTE_MULTIPATH */ -static int fib_encap_match(u16 encap_type, +static int fib_encap_match(struct net *net, u16 encap_type, struct nlattr *encap, const struct fib_nh *nh, const struct fib_config *cfg, @@ -826,7 +828,7 @@ static int fib_encap_match(u16 encap_type, if (encap_type == LWTUNNEL_ENCAP_NONE) return 0; - ret = lwtunnel_build_state(encap_type, encap, AF_INET, + ret = lwtunnel_build_state(net, encap_type, encap, AF_INET, cfg, &lwtstate, extack); if (!ret) { result = lwtunnel_cmp_encap(lwtstate, nh->fib_nh_lws); @@ -836,7 +838,7 @@ static int fib_encap_match(u16 encap_type, return result; } -int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, +int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, struct netlink_ext_ack *extack) { #ifdef CONFIG_IP_ROUTE_MULTIPATH @@ -857,8 +859,8 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, struct fib_nh *nh = fib_info_nh(fi, 0); if (cfg->fc_encap) { - if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap, - nh, cfg, extack)) + if (fib_encap_match(net, cfg->fc_encap_type, + cfg->fc_encap, nh, cfg, extack)) return 1; } #ifdef CONFIG_IP_ROUTE_CLASSID diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index f4c2ac445b3f..b01b748df9dd 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1679,7 +1679,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb, fi->fib_prefsrc == cfg->fc_prefsrc) && (!cfg->fc_protocol || fi->fib_protocol == cfg->fc_protocol) && - fib_nh_match(cfg, fi, extack) == 0 && + fib_nh_match(net, cfg, fi, extack) == 0 && fib_metrics_match(cfg, fi)) { fa_to_delete = fa; break; diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 47f8b947eef1..181b7a2a0247 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -432,7 +432,7 @@ static int ip_tun_set_opts(struct nlattr *attr, struct ip_tunnel_info *info, return ip_tun_parse_opts(attr, info, extack); } -static int ip_tun_build_state(struct nlattr *attr, +static int ip_tun_build_state(struct net *net, struct nlattr *attr, unsigned int family, const void *cfg, struct lwtunnel_state **ts, struct netlink_ext_ack *extack) @@ -719,7 +719,7 @@ static const struct nla_policy ip6_tun_policy[LWTUNNEL_IP6_MAX + 1] = { [LWTUNNEL_IP6_OPTS] = { .type = NLA_NESTED }, }; -static int ip6_tun_build_state(struct nlattr *attr, +static int ip6_tun_build_state(struct net *net, struct nlattr *attr, unsigned int family, const void *cfg, struct lwtunnel_state **ts, struct netlink_ext_ack *extack) diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index ae1344e4cec5..2ccaee98fddb 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -303,4 +303,14 @@ config IPV6_SEG6_BPF depends on IPV6_SEG6_LWTUNNEL depends on IPV6 = y +config IPV6_RPL_LWTUNNEL + bool "IPv6: RPL Source Routing Header support" + depends on IPV6 + select LWTUNNEL + ---help--- + Support for RFC6554 RPL Source Routing Header using the lightweight + tunnels mechanism. + + If unsure, say N. + endif # IPV6 diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 8ccf35514015..cf7b47bdb9b3 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -10,7 +10,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \ - udp_offload.o seg6.o fib6_notifier.o + udp_offload.o seg6.o fib6_notifier.o rpl.o ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o @@ -26,6 +26,7 @@ ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o ipv6-$(CONFIG_NETLABEL) += calipso.o ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o seg6_local.o ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o +ipv6-$(CONFIG_IPV6_RPL_LWTUNNEL) += rpl_iptunnel.o ipv6-objs += $(ipv6-y) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5b9de773ce73..a11fd4d67832 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -236,6 +236,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .enhanced_dad = 1, .addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64, .disable_policy = 0, + .rpl_seg_enabled = 0, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -290,6 +291,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .enhanced_dad = 1, .addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64, .disable_policy = 0, + .rpl_seg_enabled = 0, }; /* Check if link is ready: is it up and is a valid qdisc available */ @@ -4398,6 +4400,59 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) } #endif +/* RFC6554 has some algorithm to avoid loops in segment routing by + * checking if the segments contains any of a local interface address. + * + * Quote: + * + * To detect loops in the SRH, a router MUST determine if the SRH + * includes multiple addresses assigned to any interface on that router. + * If such addresses appear more than once and are separated by at least + * one address not assigned to that router. + */ +int ipv6_chk_rpl_srh_loop(struct net *net, const struct in6_addr *segs, + unsigned char nsegs) +{ + const struct in6_addr *addr; + int i, ret = 0, found = 0; + struct inet6_ifaddr *ifp; + bool separated = false; + unsigned int hash; + bool hash_found; + + rcu_read_lock(); + for (i = 0; i < nsegs; i++) { + addr = &segs[i]; + hash = inet6_addr_hash(net, addr); + + hash_found = false; + hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { + if (!net_eq(dev_net(ifp->idev->dev), net)) + continue; + + if (ipv6_addr_equal(&ifp->addr, addr)) { + hash_found = true; + break; + } + } + + if (hash_found) { + if (found > 1 && separated) { + ret = 1; + break; + } + + separated = false; + found++; + } else { + separated = true; + } + } + rcu_read_unlock(); + + return ret; +} + /* * Periodic address status verification */ @@ -5467,6 +5522,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode; array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy; array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass; + array[DEVCONF_RPL_SEG_ENABLED] = cnf->rpl_seg_enabled; } static inline size_t inet6_ifla6_size(void) @@ -6848,6 +6904,13 @@ static const struct ctl_table addrconf_sysctl[] = { .extra2 = (void *)&two_five_five, }, { + .procname = "rpl_seg_enabled", + .data = &ipv6_devconf.rpl_seg_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { /* sentinel */ } }; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index d727c3b41495..345baa0a754f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -59,6 +59,7 @@ #endif #include <net/calipso.h> #include <net/seg6.h> +#include <net/rpl.h> #include <linux/uaccess.h> #include <linux/mroute6.h> @@ -1114,6 +1115,10 @@ static int __init inet6_init(void) if (err) goto seg6_fail; + err = rpl_init(); + if (err) + goto rpl_fail; + err = igmp6_late_init(); if (err) goto igmp6_late_err; @@ -1136,6 +1141,8 @@ sysctl_fail: igmp6_late_cleanup(); #endif igmp6_late_err: + rpl_exit(); +rpl_fail: seg6_exit(); seg6_fail: calipso_exit(); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index bcb9f5e62808..5a8bbcdcaf2b 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -48,6 +48,7 @@ #ifdef CONFIG_IPV6_SEG6_HMAC #include <net/seg6_hmac.h> #endif +#include <net/rpl.h> #include <linux/uaccess.h> @@ -468,6 +469,195 @@ looped_back: return -1; } +static int ipv6_rpl_srh_rcv(struct sk_buff *skb) +{ + struct ipv6_rpl_sr_hdr *hdr, *ohdr, *chdr; + struct inet6_skb_parm *opt = IP6CB(skb); + struct net *net = dev_net(skb->dev); + struct inet6_dev *idev; + struct ipv6hdr *oldhdr; + struct in6_addr addr; + unsigned char *buf; + int accept_rpl_seg; + int i, err; + u64 n = 0; + u32 r; + + idev = __in6_dev_get(skb->dev); + + accept_rpl_seg = net->ipv6.devconf_all->rpl_seg_enabled; + if (accept_rpl_seg > idev->cnf.rpl_seg_enabled) + accept_rpl_seg = idev->cnf.rpl_seg_enabled; + + if (!accept_rpl_seg) { + kfree_skb(skb); + return -1; + } + +looped_back: + hdr = (struct ipv6_rpl_sr_hdr *)skb_transport_header(skb); + + if (hdr->segments_left == 0) { + if (hdr->nexthdr == NEXTHDR_IPV6) { + int offset = (hdr->hdrlen + 1) << 3; + + skb_postpull_rcsum(skb, skb_network_header(skb), + skb_network_header_len(skb)); + + if (!pskb_pull(skb, offset)) { + kfree_skb(skb); + return -1; + } + skb_postpull_rcsum(skb, skb_transport_header(skb), + offset); + + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + skb->encapsulation = 0; + + __skb_tunnel_rx(skb, skb->dev, net); + + netif_rx(skb); + return -1; + } + + opt->srcrt = skb_network_header_len(skb); + opt->lastopt = opt->srcrt; + skb->transport_header += (hdr->hdrlen + 1) << 3; + opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb); + + return 1; + } + + if (!pskb_may_pull(skb, sizeof(*hdr))) { + kfree_skb(skb); + return -1; + } + + n = (hdr->hdrlen << 3) - hdr->pad - (16 - hdr->cmpre); + r = do_div(n, (16 - hdr->cmpri)); + /* checks if calculation was without remainder and n fits into + * unsigned char which is segments_left field. Should not be + * higher than that. + */ + if (r || (n + 1) > 255) { + kfree_skb(skb); + return -1; + } + + if (hdr->segments_left > n + 1) { + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, + ((&hdr->segments_left) - + skb_network_header(skb))); + return -1; + } + + if (skb_cloned(skb)) { + if (pskb_expand_head(skb, IPV6_RPL_SRH_WORST_SWAP_SIZE, 0, + GFP_ATOMIC)) { + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTDISCARDS); + kfree_skb(skb); + return -1; + } + } else { + err = skb_cow_head(skb, IPV6_RPL_SRH_WORST_SWAP_SIZE); + if (unlikely(err)) { + kfree_skb(skb); + return -1; + } + } + + hdr = (struct ipv6_rpl_sr_hdr *)skb_transport_header(skb); + + if (!pskb_may_pull(skb, ipv6_rpl_srh_size(n, hdr->cmpri, + hdr->cmpre))) { + kfree_skb(skb); + return -1; + } + + hdr->segments_left--; + i = n - hdr->segments_left; + + buf = kzalloc(ipv6_rpl_srh_alloc_size(n + 1) * 2, GFP_ATOMIC); + if (unlikely(!buf)) { + kfree_skb(skb); + return -1; + } + + ohdr = (struct ipv6_rpl_sr_hdr *)buf; + ipv6_rpl_srh_decompress(ohdr, hdr, &ipv6_hdr(skb)->daddr, n); + chdr = (struct ipv6_rpl_sr_hdr *)(buf + ((ohdr->hdrlen + 1) << 3)); + + if ((ipv6_addr_type(&ipv6_hdr(skb)->daddr) & IPV6_ADDR_MULTICAST) || + (ipv6_addr_type(&ohdr->rpl_segaddr[i]) & IPV6_ADDR_MULTICAST)) { + kfree_skb(skb); + kfree(buf); + return -1; + } + + err = ipv6_chk_rpl_srh_loop(net, ohdr->rpl_segaddr, n + 1); + if (err) { + icmpv6_send(skb, ICMPV6_PARAMPROB, 0, 0); + kfree_skb(skb); + kfree(buf); + return -1; + } + + addr = ipv6_hdr(skb)->daddr; + ipv6_hdr(skb)->daddr = ohdr->rpl_segaddr[i]; + ohdr->rpl_segaddr[i] = addr; + + ipv6_rpl_srh_compress(chdr, ohdr, &ipv6_hdr(skb)->daddr, n); + + oldhdr = ipv6_hdr(skb); + + skb_pull(skb, ((hdr->hdrlen + 1) << 3)); + skb_postpull_rcsum(skb, oldhdr, + sizeof(struct ipv6hdr) + ((hdr->hdrlen + 1) << 3)); + skb_push(skb, ((chdr->hdrlen + 1) << 3) + sizeof(struct ipv6hdr)); + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + + memmove(ipv6_hdr(skb), oldhdr, sizeof(struct ipv6hdr)); + memcpy(skb_transport_header(skb), chdr, (chdr->hdrlen + 1) << 3); + + ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + skb_postpush_rcsum(skb, ipv6_hdr(skb), + sizeof(struct ipv6hdr) + ((chdr->hdrlen + 1) << 3)); + + kfree(buf); + + skb_dst_drop(skb); + + ip6_route_input(skb); + + if (skb_dst(skb)->error) { + dst_input(skb); + return -1; + } + + if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) { + if (ipv6_hdr(skb)->hop_limit <= 1) { + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); + icmpv6_send(skb, ICMPV6_TIME_EXCEED, + ICMPV6_EXC_HOPLIMIT, 0); + kfree_skb(skb); + return -1; + } + ipv6_hdr(skb)->hop_limit--; + + skb_pull(skb, sizeof(struct ipv6hdr)); + goto looped_back; + } + + dst_input(skb); + + return -1; +} + /******************************** Routing header. ********************************/ @@ -506,9 +696,16 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) return -1; } - /* segment routing */ - if (hdr->type == IPV6_SRCRT_TYPE_4) + switch (hdr->type) { + case IPV6_SRCRT_TYPE_4: + /* segment routing */ return ipv6_srh_rcv(skb); + case IPV6_SRCRT_TYPE_3: + /* rpl segment routing */ + return ipv6_rpl_srh_rcv(skb); + default: + break; + } looped_back: if (hdr->segments_left == 0) { diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index 422dcc691f71..8c1ce78956ba 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -125,7 +125,7 @@ static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { [ILA_ATTR_HOOK_TYPE] = { .type = NLA_U8, }, }; -static int ila_build_state(struct nlattr *nla, +static int ila_build_state(struct net *net, struct nlattr *nla, unsigned int family, const void *cfg, struct lwtunnel_state **ts, struct netlink_ext_ack *extack) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index afcde55d537c..310cbddaa533 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3471,7 +3471,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, !netif_carrier_ok(dev)) fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN; - err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap, + err = fib_nh_common_init(net, &fib6_nh->nh_common, cfg->fc_encap, cfg->fc_encap_type, cfg, gfp_flags, extack); if (err) goto out; diff --git a/net/ipv6/rpl.c b/net/ipv6/rpl.c new file mode 100644 index 000000000000..dc4f20e23bf7 --- /dev/null +++ b/net/ipv6/rpl.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0-only +/** + * Authors: + * (C) 2020 Alexander Aring <alex.aring@gmail.com> + */ + +#include <net/ipv6.h> +#include <net/rpl.h> + +#define IPV6_PFXTAIL_LEN(x) (sizeof(struct in6_addr) - (x)) + +static void ipv6_rpl_addr_decompress(struct in6_addr *dst, + const struct in6_addr *daddr, + const void *post, unsigned char pfx) +{ + memcpy(dst, daddr, pfx); + memcpy(&dst->s6_addr[pfx], post, IPV6_PFXTAIL_LEN(pfx)); +} + +static void ipv6_rpl_addr_compress(void *dst, const struct in6_addr *addr, + unsigned char pfx) +{ + memcpy(dst, &addr->s6_addr[pfx], IPV6_PFXTAIL_LEN(pfx)); +} + +static void *ipv6_rpl_segdata_pos(const struct ipv6_rpl_sr_hdr *hdr, int i) +{ + return (void *)&hdr->rpl_segdata[i * IPV6_PFXTAIL_LEN(hdr->cmpri)]; +} + +size_t ipv6_rpl_srh_size(unsigned char n, unsigned char cmpri, + unsigned char cmpre) +{ + return (n * IPV6_PFXTAIL_LEN(cmpri)) + IPV6_PFXTAIL_LEN(cmpre); +} + +void ipv6_rpl_srh_decompress(struct ipv6_rpl_sr_hdr *outhdr, + const struct ipv6_rpl_sr_hdr *inhdr, + const struct in6_addr *daddr, unsigned char n) +{ + int i; + + outhdr->nexthdr = inhdr->nexthdr; + outhdr->hdrlen = (((n + 1) * sizeof(struct in6_addr)) >> 3); + outhdr->pad = 0; + outhdr->type = inhdr->type; + outhdr->segments_left = inhdr->segments_left; + outhdr->cmpri = 0; + outhdr->cmpre = 0; + + for (i = 0; i <= n; i++) + ipv6_rpl_addr_decompress(&outhdr->rpl_segaddr[i], daddr, + ipv6_rpl_segdata_pos(inhdr, i), + inhdr->cmpri); + + ipv6_rpl_addr_decompress(&outhdr->rpl_segaddr[n], daddr, + ipv6_rpl_segdata_pos(inhdr, n), + inhdr->cmpre); +} + +static unsigned char ipv6_rpl_srh_calc_cmpri(const struct ipv6_rpl_sr_hdr *inhdr, + const struct in6_addr *daddr, + unsigned char n) +{ + unsigned char plen; + int i; + + for (plen = 0; plen < sizeof(*daddr); plen++) { + for (i = 0; i <= n; i++) { + if (daddr->s6_addr[plen] != + inhdr->rpl_segaddr[i].s6_addr[plen]) + return plen; + } + } + + return plen; +} + +static unsigned char ipv6_rpl_srh_calc_cmpre(const struct in6_addr *daddr, + const struct in6_addr *last_segment) +{ + unsigned int plen; + + for (plen = 0; plen < sizeof(*daddr); plen++) { + if (daddr->s6_addr[plen] != last_segment->s6_addr[plen]) + break; + } + + return plen; +} + +void ipv6_rpl_srh_compress(struct ipv6_rpl_sr_hdr *outhdr, + const struct ipv6_rpl_sr_hdr *inhdr, + const struct in6_addr *daddr, unsigned char n) +{ + unsigned char cmpri, cmpre; + size_t seglen; + int i; + + cmpri = ipv6_rpl_srh_calc_cmpri(inhdr, daddr, n); + cmpre = ipv6_rpl_srh_calc_cmpre(daddr, &inhdr->rpl_segaddr[n]); + + outhdr->nexthdr = inhdr->nexthdr; + seglen = (n * IPV6_PFXTAIL_LEN(cmpri)) + IPV6_PFXTAIL_LEN(cmpre); + outhdr->hdrlen = seglen >> 3; + if (seglen & 0x7) { + outhdr->hdrlen++; + outhdr->pad = 8 - (seglen & 0x7); + } else { + outhdr->pad = 0; + } + outhdr->type = inhdr->type; + outhdr->segments_left = inhdr->segments_left; + outhdr->cmpri = cmpri; + outhdr->cmpre = cmpre; + + for (i = 0; i <= n; i++) + ipv6_rpl_addr_compress(ipv6_rpl_segdata_pos(outhdr, i), + &inhdr->rpl_segaddr[i], cmpri); + + ipv6_rpl_addr_compress(ipv6_rpl_segdata_pos(outhdr, n), + &inhdr->rpl_segaddr[n], cmpre); +} diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c new file mode 100644 index 000000000000..203037afe001 --- /dev/null +++ b/net/ipv6/rpl_iptunnel.c @@ -0,0 +1,380 @@ +// SPDX-License-Identifier: GPL-2.0-only +/** + * Authors: + * (C) 2020 Alexander Aring <alex.aring@gmail.com> + */ + +#include <linux/rpl_iptunnel.h> + +#include <net/dst_cache.h> +#include <net/ip6_route.h> +#include <net/lwtunnel.h> +#include <net/ipv6.h> +#include <net/rpl.h> + +struct rpl_iptunnel_encap { + struct ipv6_rpl_sr_hdr srh[0]; +}; + +struct rpl_lwt { + struct dst_cache cache; + struct rpl_iptunnel_encap tuninfo; +}; + +static inline struct rpl_lwt *rpl_lwt_lwtunnel(struct lwtunnel_state *lwt) +{ + return (struct rpl_lwt *)lwt->data; +} + +static inline struct rpl_iptunnel_encap * +rpl_encap_lwtunnel(struct lwtunnel_state *lwt) +{ + return &rpl_lwt_lwtunnel(lwt)->tuninfo; +} + +static const struct nla_policy rpl_iptunnel_policy[RPL_IPTUNNEL_MAX + 1] = { + [RPL_IPTUNNEL_SRH] = { .type = NLA_BINARY }, +}; + +static bool rpl_validate_srh(struct net *net, struct ipv6_rpl_sr_hdr *srh, + size_t seglen) +{ + int err; + + if ((srh->hdrlen << 3) != seglen) + return false; + + /* check at least one segment and seglen fit with segments_left */ + if (!srh->segments_left || + (srh->segments_left * sizeof(struct in6_addr)) != seglen) + return false; + + if (srh->cmpri || srh->cmpre) + return false; + + err = ipv6_chk_rpl_srh_loop(net, srh->rpl_segaddr, + srh->segments_left); + if (err) + return false; + + if (ipv6_addr_type(&srh->rpl_segaddr[srh->segments_left - 1]) & + IPV6_ADDR_MULTICAST) + return false; + + return true; +} + +static int rpl_build_state(struct net *net, struct nlattr *nla, + unsigned int family, const void *cfg, + struct lwtunnel_state **ts, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[RPL_IPTUNNEL_MAX + 1]; + struct lwtunnel_state *newts; + struct ipv6_rpl_sr_hdr *srh; + struct rpl_lwt *rlwt; + int err, srh_len; + + if (family != AF_INET6) + return -EINVAL; + + err = nla_parse_nested(tb, RPL_IPTUNNEL_MAX, nla, + rpl_iptunnel_policy, extack); + if (err < 0) + return err; + + if (!tb[RPL_IPTUNNEL_SRH]) + return -EINVAL; + + srh = nla_data(tb[RPL_IPTUNNEL_SRH]); + srh_len = nla_len(tb[RPL_IPTUNNEL_SRH]); + + if (srh_len < sizeof(*srh)) + return -EINVAL; + + /* verify that SRH is consistent */ + if (!rpl_validate_srh(net, srh, srh_len - sizeof(*srh))) + return -EINVAL; + + newts = lwtunnel_state_alloc(srh_len + sizeof(*rlwt)); + if (!newts) + return -ENOMEM; + + rlwt = rpl_lwt_lwtunnel(newts); + + err = dst_cache_init(&rlwt->cache, GFP_ATOMIC); + if (err) { + kfree(newts); + return err; + } + + memcpy(&rlwt->tuninfo.srh, srh, srh_len); + + newts->type = LWTUNNEL_ENCAP_RPL; + newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT; + newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; + + *ts = newts; + + return 0; +} + +static void rpl_destroy_state(struct lwtunnel_state *lwt) +{ + dst_cache_destroy(&rpl_lwt_lwtunnel(lwt)->cache); +} + +static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt, + const struct ipv6_rpl_sr_hdr *srh) +{ + struct ipv6_rpl_sr_hdr *isrh, *csrh; + const struct ipv6hdr *oldhdr; + struct ipv6hdr *hdr; + unsigned char *buf; + size_t hdrlen; + int err; + + oldhdr = ipv6_hdr(skb); + + buf = kzalloc(ipv6_rpl_srh_alloc_size(srh->segments_left - 1) * 2, + GFP_ATOMIC); + if (!buf) + return -ENOMEM; + + isrh = (struct ipv6_rpl_sr_hdr *)buf; + csrh = (struct ipv6_rpl_sr_hdr *)(buf + ((srh->hdrlen + 1) << 3)); + + memcpy(isrh, srh, sizeof(*isrh)); + memcpy(isrh->rpl_segaddr, &srh->rpl_segaddr[1], + (srh->segments_left - 1) * 16); + isrh->rpl_segaddr[srh->segments_left - 1] = oldhdr->daddr; + + ipv6_rpl_srh_compress(csrh, isrh, &srh->rpl_segaddr[0], + isrh->segments_left - 1); + + hdrlen = ((csrh->hdrlen + 1) << 3); + + err = skb_cow_head(skb, hdrlen + skb->mac_len); + if (unlikely(err)) + return err; + + skb_pull(skb, sizeof(struct ipv6hdr)); + skb_postpull_rcsum(skb, skb_network_header(skb), + sizeof(struct ipv6hdr)); + + skb_push(skb, sizeof(struct ipv6hdr) + hdrlen); + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + + hdr = ipv6_hdr(skb); + memmove(hdr, oldhdr, sizeof(*hdr)); + isrh = (void *)hdr + sizeof(*hdr); + memcpy(isrh, csrh, hdrlen); + + isrh->nexthdr = hdr->nexthdr; + hdr->nexthdr = NEXTHDR_ROUTING; + hdr->daddr = srh->rpl_segaddr[0]; + + ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + + skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen); + + kfree(buf); + + return 0; +} + +static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt) +{ + struct dst_entry *dst = skb_dst(skb); + struct rpl_iptunnel_encap *tinfo; + int err = 0; + + if (skb->protocol != htons(ETH_P_IPV6)) + return -EINVAL; + + tinfo = rpl_encap_lwtunnel(dst->lwtstate); + + err = rpl_do_srh_inline(skb, rlwt, tinfo->srh); + if (err) + return err; + + return 0; +} + +static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct dst_entry *orig_dst = skb_dst(skb); + struct dst_entry *dst = NULL; + struct rpl_lwt *rlwt; + int err = -EINVAL; + + rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate); + + err = rpl_do_srh(skb, rlwt); + if (unlikely(err)) + goto drop; + + preempt_disable(); + dst = dst_cache_get(&rlwt->cache); + preempt_enable(); + + if (unlikely(!dst)) { + struct ipv6hdr *hdr = ipv6_hdr(skb); + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + fl6.daddr = hdr->daddr; + fl6.saddr = hdr->saddr; + fl6.flowlabel = ip6_flowinfo(hdr); + fl6.flowi6_mark = skb->mark; + fl6.flowi6_proto = hdr->nexthdr; + + dst = ip6_route_output(net, NULL, &fl6); + if (dst->error) { + err = dst->error; + dst_release(dst); + goto drop; + } + + preempt_disable(); + dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr); + preempt_enable(); + } + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); + if (unlikely(err)) + goto drop; + + return dst_output(net, sk, skb); + +drop: + kfree_skb(skb); + return err; +} + +static int rpl_input(struct sk_buff *skb) +{ + struct dst_entry *orig_dst = skb_dst(skb); + struct dst_entry *dst = NULL; + struct rpl_lwt *rlwt; + int err; + + rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate); + + err = rpl_do_srh(skb, rlwt); + if (unlikely(err)) { + kfree_skb(skb); + return err; + } + + preempt_disable(); + dst = dst_cache_get(&rlwt->cache); + preempt_enable(); + + skb_dst_drop(skb); + + if (!dst) { + ip6_route_input(skb); + dst = skb_dst(skb); + if (!dst->error) { + preempt_disable(); + dst_cache_set_ip6(&rlwt->cache, dst, + &ipv6_hdr(skb)->saddr); + preempt_enable(); + } + } else { + skb_dst_set(skb, dst); + } + + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); + if (unlikely(err)) + return err; + + return dst_input(skb); +} + +static int nla_put_rpl_srh(struct sk_buff *skb, int attrtype, + struct rpl_iptunnel_encap *tuninfo) +{ + struct rpl_iptunnel_encap *data; + struct nlattr *nla; + int len; + + len = RPL_IPTUNNEL_SRH_SIZE(tuninfo->srh); + + nla = nla_reserve(skb, attrtype, len); + if (!nla) + return -EMSGSIZE; + + data = nla_data(nla); + memcpy(data, tuninfo->srh, len); + + return 0; +} + +static int rpl_fill_encap_info(struct sk_buff *skb, + struct lwtunnel_state *lwtstate) +{ + struct rpl_iptunnel_encap *tuninfo = rpl_encap_lwtunnel(lwtstate); + + if (nla_put_rpl_srh(skb, RPL_IPTUNNEL_SRH, tuninfo)) + return -EMSGSIZE; + + return 0; +} + +static int rpl_encap_nlsize(struct lwtunnel_state *lwtstate) +{ + struct rpl_iptunnel_encap *tuninfo = rpl_encap_lwtunnel(lwtstate); + + return nla_total_size(RPL_IPTUNNEL_SRH_SIZE(tuninfo->srh)); +} + +static int rpl_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) +{ + struct rpl_iptunnel_encap *a_hdr = rpl_encap_lwtunnel(a); + struct rpl_iptunnel_encap *b_hdr = rpl_encap_lwtunnel(b); + int len = RPL_IPTUNNEL_SRH_SIZE(a_hdr->srh); + + if (len != RPL_IPTUNNEL_SRH_SIZE(b_hdr->srh)) + return 1; + + return memcmp(a_hdr, b_hdr, len); +} + +static const struct lwtunnel_encap_ops rpl_ops = { + .build_state = rpl_build_state, + .destroy_state = rpl_destroy_state, + .output = rpl_output, + .input = rpl_input, + .fill_encap = rpl_fill_encap_info, + .get_encap_size = rpl_encap_nlsize, + .cmp_encap = rpl_encap_cmp, + .owner = THIS_MODULE, +}; + +int __init rpl_init(void) +{ + int err; + + err = lwtunnel_encap_add_ops(&rpl_ops, LWTUNNEL_ENCAP_RPL); + if (err) + goto out; + + pr_info("RPL Segment Routing with IPv6\n"); + + return 0; + +out: + return err; +} + +void rpl_exit(void) +{ + lwtunnel_encap_del_ops(&rpl_ops, LWTUNNEL_ENCAP_RPL); +} diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index ac837afb9040..c7cbfeae94f5 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -376,7 +376,7 @@ drop: return err; } -static int seg6_build_state(struct nlattr *nla, +static int seg6_build_state(struct net *net, struct nlattr *nla, unsigned int family, const void *cfg, struct lwtunnel_state **ts, struct netlink_ext_ack *extack) diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 8165802d8e05..52493423f329 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -970,8 +970,9 @@ static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt) return 0; } -static int seg6_local_build_state(struct nlattr *nla, unsigned int family, - const void *cfg, struct lwtunnel_state **ts, +static int seg6_local_build_state(struct net *net, struct nlattr *nla, + unsigned int family, const void *cfg, + struct lwtunnel_state **ts, struct netlink_ext_ack *extack) { struct nlattr *tb[SEG6_LOCAL_MAX + 1]; diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 44b675016393..2def85718d94 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -162,7 +162,7 @@ drop: return -EINVAL; } -static int mpls_build_state(struct nlattr *nla, +static int mpls_build_state(struct net *net, struct nlattr *nla, unsigned int family, const void *cfg, struct lwtunnel_state **ts, struct netlink_ext_ack *extack) |