diff options
author | David S. Miller <davem@davemloft.net> | 2022-11-18 11:42:54 +0000 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2022-11-18 11:42:54 +0000 |
commit | 22700706adac57608ca78ef527b0df56c11f85e6 (patch) | |
tree | d746e8df31eca28f07a56747232aeba330c09683 | |
parent | 0b6ffefbb018462856dd8ee22735f8c9aecb2c80 (diff) | |
parent | a61bd7b9fef34484a3fe144a62f4ec78cc42e20e (diff) |
Merge branch 'sctp-vrf'
Xin Long says:
====================
sctp: support vrf processing
This patchset adds the VRF processing in SCTP. Simliar to TCP/UDP,
it includes socket bind and socket/association lookup changes.
For socket bind change, it allows sockets to bind to a VRF device
and allows multiple sockets with the same IP and PORT to bind to
different interfaces in patch 1-3.
For socket/association lookup change, it adds dif and sdif check
in both asoc and ep lookup in patch 4 and 5, and when binding to
nodev, users can decide if accept the packets received from one
l3mdev by setup a sysctl option in patch 6.
Note with VRF support, in a netns, an association will be decided
by src ip + src port + dst ip + dst port + bound_dev_if, and it's
possible for ss to have:
State Local Address:Port Peer Address:Port
ESTAB 192.168.1.2%vrf-s1:1234
`- ESTAB 192.168.1.2%veth1:1234 192.168.1.1:1234
ESTAB 192.168.1.2%vrf-s2:1234
`- ESTAB 192.168.1.2%veth2:1234 192.168.1.1:1234
See the selftest in patch 7 for more usage.
Also, thanks Carlo for testing this patch series on their use.
v1->v2:
- In Patch 5, move sctp_sk_bound_dev_eq() definition to net/sctp/
input.c to avoid a build error when IP_SCTP is disabled, as Paolo
suggested.
- In Patch 7, avoid one sleep by disabling the IPv6 dad, and remove
another sleep by using ss to check if the server's ready, and also
delete two unncessary sleeps in sctp_hello.c, as Paolo suggested.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | Documentation/networking/ip-sysctl.rst | 9 | ||||
-rw-r--r-- | include/net/netns/sctp.h | 4 | ||||
-rw-r--r-- | include/net/sctp/sctp.h | 6 | ||||
-rw-r--r-- | include/net/sctp/structs.h | 9 | ||||
-rw-r--r-- | net/sctp/diag.c | 3 | ||||
-rw-r--r-- | net/sctp/endpointola.c | 13 | ||||
-rw-r--r-- | net/sctp/input.c | 108 | ||||
-rw-r--r-- | net/sctp/ipv6.c | 22 | ||||
-rw-r--r-- | net/sctp/protocol.c | 19 | ||||
-rw-r--r-- | net/sctp/socket.c | 9 | ||||
-rw-r--r-- | net/sctp/sysctl.c | 11 | ||||
-rw-r--r-- | tools/testing/selftests/net/Makefile | 2 | ||||
-rw-r--r-- | tools/testing/selftests/net/sctp_hello.c | 137 | ||||
-rwxr-xr-x | tools/testing/selftests/net/sctp_vrf.sh | 178 |
14 files changed, 461 insertions, 69 deletions
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 727b25cc7ec4..7fbd060d6047 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -3127,6 +3127,15 @@ ecn_enable - BOOLEAN Default: 1 +l3mdev_accept - BOOLEAN + Enabling this option allows a "global" bound socket to work + across L3 master domains (e.g., VRFs) with packets capable of + being received regardless of the L3 domain in which they + originated. Only valid when the kernel was compiled with + CONFIG_NET_L3_MASTER_DEV. + + Default: 1 (enabled) + ``/proc/sys/net/core/*`` ======================== diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index a681147aecd8..7eff3d981b89 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -175,6 +175,10 @@ struct netns_sctp { /* Threshold for autoclose timeout, in seconds. */ unsigned long max_autoclose; + +#ifdef CONFIG_NET_L3_MASTER_DEV + int l3mdev_accept; +#endif }; #endif /* __NETNS_SCTP_H__ */ diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 01d904b34cf0..c335dd01a597 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -109,7 +109,7 @@ struct sctp_transport *sctp_transport_get_idx(struct net *net, struct rhashtable_iter *iter, int pos); int sctp_transport_lookup_process(sctp_callback_t cb, struct net *net, const union sctp_addr *laddr, - const union sctp_addr *paddr, void *p); + const union sctp_addr *paddr, void *p, int dif); int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, struct net *net, int *pos, void *p); int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p); @@ -157,10 +157,12 @@ void sctp_unhash_transport(struct sctp_transport *t); struct sctp_transport *sctp_addrs_lookup_transport( struct net *net, const union sctp_addr *laddr, - const union sctp_addr *paddr); + const union sctp_addr *paddr, + int dif, int sdif); struct sctp_transport *sctp_epaddr_lookup_transport( const struct sctp_endpoint *ep, const union sctp_addr *paddr); +bool sctp_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif); /* * sctp/proc.c diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 350f250b0dc7..afa3781e3ca2 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -477,6 +477,7 @@ struct sctp_af { int (*available) (union sctp_addr *, struct sctp_sock *); int (*skb_iif) (const struct sk_buff *sk); + int (*skb_sdif)(const struct sk_buff *sk); int (*is_ce) (const struct sk_buff *sk); void (*seq_dump_addr)(struct seq_file *seq, union sctp_addr *addr); @@ -1378,10 +1379,12 @@ struct sctp_association *sctp_endpoint_lookup_assoc( struct sctp_transport **); bool sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep, const union sctp_addr *paddr); -struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *, - struct net *, const union sctp_addr *); +struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep, + struct net *net, + const union sctp_addr *laddr, + int dif, int sdif); bool sctp_has_association(struct net *net, const union sctp_addr *laddr, - const union sctp_addr *paddr); + const union sctp_addr *paddr, int dif, int sdif); int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, diff --git a/net/sctp/diag.c b/net/sctp/diag.c index d9c6d8f30f09..a557009e9832 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -426,6 +426,7 @@ static int sctp_diag_dump_one(struct netlink_callback *cb, struct net *net = sock_net(skb->sk); const struct nlmsghdr *nlh = cb->nlh; union sctp_addr laddr, paddr; + int dif = req->id.idiag_if; struct sctp_comm_param commp = { .skb = skb, .r = req, @@ -454,7 +455,7 @@ static int sctp_diag_dump_one(struct netlink_callback *cb, } return sctp_transport_lookup_process(sctp_sock_dump_one, - net, &laddr, &paddr, &commp); + net, &laddr, &paddr, &commp, dif); } static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index efffde7f2328..7e77b450697c 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -246,12 +246,15 @@ void sctp_endpoint_put(struct sctp_endpoint *ep) /* Is this the endpoint we are looking for? */ struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep, struct net *net, - const union sctp_addr *laddr) + const union sctp_addr *laddr, + int dif, int sdif) { + int bound_dev_if = READ_ONCE(ep->base.sk->sk_bound_dev_if); struct sctp_endpoint *retval = NULL; - if ((htons(ep->base.bind_addr.port) == laddr->v4.sin_port) && - net_eq(ep->base.net, net)) { + if (net_eq(ep->base.net, net) && + sctp_sk_bound_dev_eq(net, bound_dev_if, dif, sdif) && + (htons(ep->base.bind_addr.port) == laddr->v4.sin_port)) { if (sctp_bind_addr_match(&ep->base.bind_addr, laddr, sctp_sk(ep->base.sk))) retval = ep; @@ -298,6 +301,7 @@ out: bool sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep, const union sctp_addr *paddr) { + int bound_dev_if = READ_ONCE(ep->base.sk->sk_bound_dev_if); struct sctp_sockaddr_entry *addr; struct net *net = ep->base.net; struct sctp_bind_addr *bp; @@ -307,7 +311,8 @@ bool sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep, * so the address_list can not change. */ list_for_each_entry(addr, &bp->address_list, list) { - if (sctp_has_association(net, &addr->a, paddr)) + if (sctp_has_association(net, &addr->a, paddr, + bound_dev_if, bound_dev_if)) return true; } diff --git a/net/sctp/input.c b/net/sctp/input.c index 4f43afa8678f..bf70371301ff 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -50,16 +50,19 @@ static struct sctp_association *__sctp_rcv_lookup(struct net *net, struct sk_buff *skb, const union sctp_addr *paddr, const union sctp_addr *laddr, - struct sctp_transport **transportp); + struct sctp_transport **transportp, + int dif, int sdif); static struct sctp_endpoint *__sctp_rcv_lookup_endpoint( struct net *net, struct sk_buff *skb, const union sctp_addr *laddr, - const union sctp_addr *daddr); + const union sctp_addr *daddr, + int dif, int sdif); static struct sctp_association *__sctp_lookup_association( struct net *net, const union sctp_addr *local, const union sctp_addr *peer, - struct sctp_transport **pt); + struct sctp_transport **pt, + int dif, int sdif); static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb); @@ -92,11 +95,11 @@ int sctp_rcv(struct sk_buff *skb) struct sctp_chunk *chunk; union sctp_addr src; union sctp_addr dest; - int bound_dev_if; int family; struct sctp_af *af; struct net *net = dev_net(skb->dev); bool is_gso = skb_is_gso(skb) && skb_is_gso_sctp(skb); + int dif, sdif; if (skb->pkt_type != PACKET_HOST) goto discard_it; @@ -141,6 +144,8 @@ int sctp_rcv(struct sk_buff *skb) /* Initialize local addresses for lookups. */ af->from_skb(&src, skb, 1); af->from_skb(&dest, skb, 0); + dif = af->skb_iif(skb); + sdif = af->skb_sdif(skb); /* If the packet is to or from a non-unicast address, * silently discard the packet. @@ -157,36 +162,16 @@ int sctp_rcv(struct sk_buff *skb) !af->addr_valid(&dest, NULL, skb)) goto discard_it; - asoc = __sctp_rcv_lookup(net, skb, &src, &dest, &transport); + asoc = __sctp_rcv_lookup(net, skb, &src, &dest, &transport, dif, sdif); if (!asoc) - ep = __sctp_rcv_lookup_endpoint(net, skb, &dest, &src); + ep = __sctp_rcv_lookup_endpoint(net, skb, &dest, &src, dif, sdif); /* Retrieve the common input handling substructure. */ rcvr = asoc ? &asoc->base : &ep->base; sk = rcvr->sk; /* - * If a frame arrives on an interface and the receiving socket is - * bound to another interface, via SO_BINDTODEVICE, treat it as OOTB - */ - bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); - if (bound_dev_if && (bound_dev_if != af->skb_iif(skb))) { - if (transport) { - sctp_transport_put(transport); - asoc = NULL; - transport = NULL; - } else { - sctp_endpoint_put(ep); - ep = NULL; - } - sk = net->sctp.ctl_sock; - ep = sctp_sk(sk)->ep; - sctp_endpoint_hold(ep); - rcvr = &ep->base; - } - - /* * RFC 2960, 8.4 - Handle "Out of the blue" Packets. * An SCTP packet is called an "out of the blue" (OOTB) * packet if it is correctly formed, i.e., passed the @@ -485,6 +470,8 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, struct sctp_association *asoc; struct sctp_transport *transport = NULL; __u32 vtag = ntohl(sctphdr->vtag); + int sdif = inet_sdif(skb); + int dif = inet_iif(skb); *app = NULL; *tpp = NULL; @@ -500,7 +487,7 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, /* Look for an association that matches the incoming ICMP error * packet. */ - asoc = __sctp_lookup_association(net, &saddr, &daddr, &transport); + asoc = __sctp_lookup_association(net, &saddr, &daddr, &transport, dif, sdif); if (!asoc) return NULL; @@ -850,7 +837,8 @@ static inline __u32 sctp_hashfn(const struct net *net, __be16 lport, static struct sctp_endpoint *__sctp_rcv_lookup_endpoint( struct net *net, struct sk_buff *skb, const union sctp_addr *laddr, - const union sctp_addr *paddr) + const union sctp_addr *paddr, + int dif, int sdif) { struct sctp_hashbucket *head; struct sctp_endpoint *ep; @@ -863,7 +851,7 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint( head = &sctp_ep_hashtable[hash]; read_lock(&head->lock); sctp_for_each_hentry(ep, &head->chain) { - if (sctp_endpoint_is_match(ep, net, laddr)) + if (sctp_endpoint_is_match(ep, net, laddr, dif, sdif)) goto hit; } @@ -990,14 +978,26 @@ void sctp_unhash_transport(struct sctp_transport *t) sctp_hash_params); } +bool sctp_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) +{ + bool l3mdev_accept = true; + +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + l3mdev_accept = !!READ_ONCE(net->sctp.l3mdev_accept); +#endif + return inet_bound_dev_eq(l3mdev_accept, bound_dev_if, dif, sdif); +} + /* return a transport with holding it */ struct sctp_transport *sctp_addrs_lookup_transport( struct net *net, const union sctp_addr *laddr, - const union sctp_addr *paddr) + const union sctp_addr *paddr, + int dif, int sdif) { struct rhlist_head *tmp, *list; struct sctp_transport *t; + int bound_dev_if; struct sctp_hash_cmp_arg arg = { .paddr = paddr, .net = net, @@ -1011,7 +1011,9 @@ struct sctp_transport *sctp_addrs_lookup_transport( if (!sctp_transport_hold(t)) continue; - if (sctp_bind_addr_match(&t->asoc->base.bind_addr, + bound_dev_if = READ_ONCE(t->asoc->base.sk->sk_bound_dev_if); + if (sctp_sk_bound_dev_eq(net, bound_dev_if, dif, sdif) && + sctp_bind_addr_match(&t->asoc->base.bind_addr, laddr, sctp_sk(t->asoc->base.sk))) return t; sctp_transport_put(t); @@ -1048,12 +1050,13 @@ static struct sctp_association *__sctp_lookup_association( struct net *net, const union sctp_addr *local, const union sctp_addr *peer, - struct sctp_transport **pt) + struct sctp_transport **pt, + int dif, int sdif) { struct sctp_transport *t; struct sctp_association *asoc = NULL; - t = sctp_addrs_lookup_transport(net, local, peer); + t = sctp_addrs_lookup_transport(net, local, peer, dif, sdif); if (!t) goto out; @@ -1069,12 +1072,13 @@ static struct sctp_association *sctp_lookup_association(struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, - struct sctp_transport **transportp) + struct sctp_transport **transportp, + int dif, int sdif) { struct sctp_association *asoc; rcu_read_lock(); - asoc = __sctp_lookup_association(net, laddr, paddr, transportp); + asoc = __sctp_lookup_association(net, laddr, paddr, transportp, dif, sdif); rcu_read_unlock(); return asoc; @@ -1083,11 +1087,12 @@ struct sctp_association *sctp_lookup_association(struct net *net, /* Is there an association matching the given local and peer addresses? */ bool sctp_has_association(struct net *net, const union sctp_addr *laddr, - const union sctp_addr *paddr) + const union sctp_addr *paddr, + int dif, int sdif) { struct sctp_transport *transport; - if (sctp_lookup_association(net, laddr, paddr, &transport)) { + if (sctp_lookup_association(net, laddr, paddr, &transport, dif, sdif)) { sctp_transport_put(transport); return true; } @@ -1115,7 +1120,8 @@ bool sctp_has_association(struct net *net, */ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net, struct sk_buff *skb, - const union sctp_addr *laddr, struct sctp_transport **transportp) + const union sctp_addr *laddr, struct sctp_transport **transportp, + int dif, int sdif) { struct sctp_association *asoc; union sctp_addr addr; @@ -1154,7 +1160,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net, if (!af->from_addr_param(paddr, params.addr, sh->source, 0)) continue; - asoc = __sctp_lookup_association(net, laddr, paddr, transportp); + asoc = __sctp_lookup_association(net, laddr, paddr, transportp, dif, sdif); if (asoc) return asoc; } @@ -1181,7 +1187,8 @@ static struct sctp_association *__sctp_rcv_asconf_lookup( struct sctp_chunkhdr *ch, const union sctp_addr *laddr, __be16 peer_port, - struct sctp_transport **transportp) + struct sctp_transport **transportp, + int dif, int sdif) { struct sctp_addip_chunk *asconf = (struct sctp_addip_chunk *)ch; struct sctp_af *af; @@ -1201,7 +1208,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup( if (!af->from_addr_param(&paddr, param, peer_port, 0)) return NULL; - return __sctp_lookup_association(net, laddr, &paddr, transportp); + return __sctp_lookup_association(net, laddr, &paddr, transportp, dif, sdif); } @@ -1217,7 +1224,8 @@ static struct sctp_association *__sctp_rcv_asconf_lookup( static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, struct sk_buff *skb, const union sctp_addr *laddr, - struct sctp_transport **transportp) + struct sctp_transport **transportp, + int dif, int sdif) { struct sctp_association *asoc = NULL; struct sctp_chunkhdr *ch; @@ -1260,7 +1268,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, asoc = __sctp_rcv_asconf_lookup( net, ch, laddr, sctp_hdr(skb)->source, - transportp); + transportp, dif, sdif); break; default: break; @@ -1285,7 +1293,8 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, struct sk_buff *skb, const union sctp_addr *laddr, - struct sctp_transport **transportp) + struct sctp_transport **transportp, + int dif, int sdif) { struct sctp_chunkhdr *ch; @@ -1309,9 +1318,9 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, /* If this is INIT/INIT-ACK look inside the chunk too. */ if (ch->type == SCTP_CID_INIT || ch->type == SCTP_CID_INIT_ACK) - return __sctp_rcv_init_lookup(net, skb, laddr, transportp); + return __sctp_rcv_init_lookup(net, skb, laddr, transportp, dif, sdif); - return __sctp_rcv_walk_lookup(net, skb, laddr, transportp); + return __sctp_rcv_walk_lookup(net, skb, laddr, transportp, dif, sdif); } /* Lookup an association for an inbound skb. */ @@ -1319,11 +1328,12 @@ static struct sctp_association *__sctp_rcv_lookup(struct net *net, struct sk_buff *skb, const union sctp_addr *paddr, const union sctp_addr *laddr, - struct sctp_transport **transportp) + struct sctp_transport **transportp, + int dif, int sdif) { struct sctp_association *asoc; - asoc = __sctp_lookup_association(net, laddr, paddr, transportp); + asoc = __sctp_lookup_association(net, laddr, paddr, transportp, dif, sdif); if (asoc) goto out; @@ -1331,7 +1341,7 @@ static struct sctp_association *__sctp_rcv_lookup(struct net *net, * SCTP Implementors Guide, 2.18 Handling of address * parameters within the INIT or INIT-ACK. */ - asoc = __sctp_rcv_lookup_harder(net, skb, laddr, transportp); + asoc = __sctp_rcv_lookup_harder(net, skb, laddr, transportp, dif, sdif); if (asoc) goto out; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index d081858c2d07..097bd60ce964 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -680,9 +680,11 @@ static int sctp_v6_is_any(const union sctp_addr *addr) /* Should this be available for binding? */ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp) { - int type; - struct net *net = sock_net(&sp->inet.sk); const struct in6_addr *in6 = (const struct in6_addr *)&addr->v6.sin6_addr; + struct sock *sk = &sp->inet.sk; + struct net *net = sock_net(sk); + struct net_device *dev = NULL; + int type; type = ipv6_addr_type(in6); if (IPV6_ADDR_ANY == type) @@ -696,8 +698,14 @@ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp) if (!(type & IPV6_ADDR_UNICAST)) return 0; + if (sk->sk_bound_dev_if) { + dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); + if (!dev) + return 0; + } + return ipv6_can_nonlocal_bind(net, &sp->inet) || - ipv6_chk_addr(net, in6, NULL, 0); + ipv6_chk_addr(net, in6, dev, 0); } /* This function checks if the address is a valid address to be used for @@ -834,7 +842,12 @@ static int sctp_v6_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr) /* Where did this skb come from? */ static int sctp_v6_skb_iif(const struct sk_buff *skb) { - return IP6CB(skb)->iif; + return inet6_iif(skb); +} + +static int sctp_v6_skb_sdif(const struct sk_buff *skb) +{ + return inet6_sdif(skb); } /* Was this packet marked by Explicit Congestion Notification? */ @@ -1134,6 +1147,7 @@ static struct sctp_af sctp_af_inet6 = { .is_any = sctp_v6_is_any, .available = sctp_v6_available, .skb_iif = sctp_v6_skb_iif, + .skb_sdif = sctp_v6_skb_sdif, .is_ce = sctp_v6_is_ce, .seq_dump_addr = sctp_v6_seq_dump_addr, .ecn_capable = sctp_v6_ecn_capable, diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index bcd3384ab07a..909a89a1cff4 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -351,10 +351,13 @@ static int sctp_v4_addr_valid(union sctp_addr *addr, /* Should this be available for binding? */ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp) { - struct net *net = sock_net(&sp->inet.sk); - int ret = inet_addr_type(net, addr->v4.sin_addr.s_addr); - + struct sock *sk = &sp->inet.sk; + struct net *net = sock_net(sk); + int tb_id = RT_TABLE_LOCAL; + int ret; + tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ?: tb_id; + ret = inet_addr_type_table(net, addr->v4.sin_addr.s_addr, tb_id); if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) && ret != RTN_LOCAL && !sp->inet.freebind && @@ -564,6 +567,11 @@ static int sctp_v4_skb_iif(const struct sk_buff *skb) return inet_iif(skb); } +static int sctp_v4_skb_sdif(const struct sk_buff *skb) +{ + return inet_sdif(skb); +} + /* Was this packet marked by Explicit Congestion Notification? */ static int sctp_v4_is_ce(const struct sk_buff *skb) { @@ -1182,6 +1190,7 @@ static struct sctp_af sctp_af_inet = { .available = sctp_v4_available, .scope = sctp_v4_scope, .skb_iif = sctp_v4_skb_iif, + .skb_sdif = sctp_v4_skb_sdif, .is_ce = sctp_v4_is_ce, .seq_dump_addr = sctp_v4_seq_dump_addr, .ecn_capable = sctp_v4_ecn_capable, @@ -1385,6 +1394,10 @@ static int __net_init sctp_defaults_init(struct net *net) /* Initialize maximum autoclose timeout. */ net->sctp.max_autoclose = INT_MAX / HZ; +#ifdef CONFIG_NET_L3_MASTER_DEV + net->sctp.l3mdev_accept = 1; +#endif + status = sctp_sysctl_net_register(net); if (status) goto err_sysctl_register; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3e83963d1b8a..5acbdf0d38f3 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5315,14 +5315,14 @@ EXPORT_SYMBOL_GPL(sctp_for_each_endpoint); int sctp_transport_lookup_process(sctp_callback_t cb, struct net *net, const union sctp_addr *laddr, - const union sctp_addr *paddr, void *p) + const union sctp_addr *paddr, void *p, int dif) { struct sctp_transport *transport; struct sctp_endpoint *ep; int err = -ENOENT; rcu_read_lock(); - transport = sctp_addrs_lookup_transport(net, laddr, paddr); + transport = sctp_addrs_lookup_transport(net, laddr, paddr, dif, dif); if (!transport) { rcu_read_unlock(); return err; @@ -8398,6 +8398,7 @@ pp_found: * in an endpoint. */ sk_for_each_bound(sk2, &pp->owner) { + int bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if); struct sctp_sock *sp2 = sctp_sk(sk2); struct sctp_endpoint *ep2 = sp2->ep; @@ -8408,7 +8409,9 @@ pp_found: uid_eq(uid, sock_i_uid(sk2)))) continue; - if (sctp_bind_addr_conflict(&ep2->base.bind_addr, + if ((!sk->sk_bound_dev_if || !bound_dev_if2 || + sk->sk_bound_dev_if == bound_dev_if2) && + sctp_bind_addr_conflict(&ep2->base.bind_addr, addr, sp2, sp)) { ret = 1; goto fail_unlock; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index b46a416787ec..7f40ed117fc7 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -347,6 +347,17 @@ static struct ctl_table sctp_net_table[] = { .extra1 = &max_autoclose_min, .extra2 = &max_autoclose_max, }, +#ifdef CONFIG_NET_L3_MASTER_DEV + { + .procname = "l3mdev_accept", + .data = &init_net.sctp.l3mdev_accept, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif { .procname = "pf_enable", .data = &init_net.sctp.pf_enable, diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index cec4800cb017..880e6ded6ed5 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -72,6 +72,8 @@ TEST_GEN_PROGS += sk_bind_sendto_listen TEST_GEN_PROGS += sk_connect_zero_addr TEST_PROGS += test_ingress_egress_chaining.sh TEST_GEN_PROGS += so_incoming_cpu +TEST_PROGS += sctp_vrf.sh +TEST_GEN_FILES += sctp_hello TEST_FILES := settings diff --git a/tools/testing/selftests/net/sctp_hello.c b/tools/testing/selftests/net/sctp_hello.c new file mode 100644 index 000000000000..f02f1f95d227 --- /dev/null +++ b/tools/testing/selftests/net/sctp_hello.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +static void set_addr(struct sockaddr_storage *ss, char *ip, char *port, int *len) +{ + if (ss->ss_family == AF_INET) { + struct sockaddr_in *a = (struct sockaddr_in *)ss; + + a->sin_addr.s_addr = inet_addr(ip); + a->sin_port = htons(atoi(port)); + *len = sizeof(*a); + } else { + struct sockaddr_in6 *a = (struct sockaddr_in6 *)ss; + + a->sin6_family = AF_INET6; + inet_pton(AF_INET6, ip, &a->sin6_addr); + a->sin6_port = htons(atoi(port)); + *len = sizeof(*a); + } +} + +static int do_client(int argc, char *argv[]) +{ + struct sockaddr_storage ss; + char buf[] = "hello"; + int csk, ret, len; + + if (argc < 5) { + printf("%s client -4|6 IP PORT [IP PORT]\n", argv[0]); + return -1; + } + + bzero((void *)&ss, sizeof(ss)); + ss.ss_family = !strcmp(argv[2], "-4") ? AF_INET : AF_INET6; + csk = socket(ss.ss_family, SOCK_STREAM, IPPROTO_SCTP); + if (csk < 0) { + printf("failed to create socket\n"); + return -1; + } + + if (argc >= 7) { + set_addr(&ss, argv[5], argv[6], &len); + ret = bind(csk, (struct sockaddr *)&ss, len); + if (ret < 0) { + printf("failed to bind to address\n"); + return -1; + } + } + + set_addr(&ss, argv[3], argv[4], &len); + ret = connect(csk, (struct sockaddr *)&ss, len); + if (ret < 0) { + printf("failed to connect to peer\n"); + return -1; + } + + ret = send(csk, buf, strlen(buf) + 1, 0); + if (ret < 0) { + printf("failed to send msg %d\n", ret); + return -1; + } + close(csk); + + return 0; +} + +int main(int argc, char *argv[]) +{ + struct sockaddr_storage ss; + int lsk, csk, ret, len; + char buf[20]; + + if (argc < 2 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) { + printf("%s server|client ...\n", argv[0]); + return -1; + } + + if (!strcmp(argv[1], "client")) + return do_client(argc, argv); + + if (argc < 5) { + printf("%s server -4|6 IP PORT [IFACE]\n", argv[0]); + return -1; + } + + ss.ss_family = !strcmp(argv[2], "-4") ? AF_INET : AF_INET6; + lsk = socket(ss.ss_family, SOCK_STREAM, IPPROTO_SCTP); + if (lsk < 0) { + printf("failed to create lsk\n"); + return -1; + } + + if (argc >= 6) { + ret = setsockopt(lsk, SOL_SOCKET, SO_BINDTODEVICE, + argv[5], strlen(argv[5]) + 1); + if (ret < 0) { + printf("failed to bind to device\n"); + return -1; + } + } + + set_addr(&ss, argv[3], argv[4], &len); + ret = bind(lsk, (struct sockaddr *)&ss, len); + if (ret < 0) { + printf("failed to bind to address\n"); + return -1; + } + + ret = listen(lsk, 5); + if (ret < 0) { + printf("failed to listen on port\n"); + return -1; + } + + csk = accept(lsk, (struct sockaddr *)NULL, (socklen_t *)NULL); + if (csk < 0) { + printf("failed to accept new client\n"); + return -1; + } + + ret = recv(csk, buf, sizeof(buf), 0); + if (ret <= 0) { + printf("failed to recv msg %d\n", ret); + return -1; + } + close(csk); + close(lsk); + + return 0; +} diff --git a/tools/testing/selftests/net/sctp_vrf.sh b/tools/testing/selftests/net/sctp_vrf.sh new file mode 100755 index 000000000000..c721e952e5f3 --- /dev/null +++ b/tools/testing/selftests/net/sctp_vrf.sh @@ -0,0 +1,178 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing For SCTP VRF. +# TOPO: CLIENT_NS1 (veth1) <---> (veth1) -> vrf_s1 +# SERVER_NS +# CLIENT_NS2 (veth1) <---> (veth2) -> vrf_s2 + +CLIENT_NS1="client-ns1" +CLIENT_NS2="client-ns2" +CLIENT_IP4="10.0.0.1" +CLIENT_IP6="2000::1" +CLIENT_PORT=1234 + +SERVER_NS="server-ns" +SERVER_IP4="10.0.0.2" +SERVER_IP6="2000::2" +SERVER_PORT=1234 + +setup() { + modprobe sctp + modprobe sctp_diag + ip netns add $CLIENT_NS1 + ip netns add $CLIENT_NS2 + ip netns add $SERVER_NS + + ip net exec $CLIENT_NS1 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null + ip net exec $CLIENT_NS2 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null + ip net exec $SERVER_NS sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null + + ip -n $SERVER_NS link add veth1 type veth peer name veth1 netns $CLIENT_NS1 + ip -n $SERVER_NS link add veth2 type veth peer name veth1 netns $CLIENT_NS2 + + ip -n $CLIENT_NS1 link set veth1 up + ip -n $CLIENT_NS1 addr add $CLIENT_IP4/24 dev veth1 + ip -n $CLIENT_NS1 addr add $CLIENT_IP6/24 dev veth1 + + ip -n $CLIENT_NS2 link set veth1 up + ip -n $CLIENT_NS2 addr add $CLIENT_IP4/24 dev veth1 + ip -n $CLIENT_NS2 addr add $CLIENT_IP6/24 dev veth1 + + ip -n $SERVER_NS link add dummy1 type dummy + ip -n $SERVER_NS link set dummy1 up + ip -n $SERVER_NS link add vrf-1 type vrf table 10 + ip -n $SERVER_NS link add vrf-2 type vrf table 20 + ip -n $SERVER_NS link set vrf-1 up + ip -n $SERVER_NS link set vrf-2 up + ip -n $SERVER_NS link set veth1 master vrf-1 + ip -n $SERVER_NS link set veth2 master vrf-2 + + ip -n $SERVER_NS addr add $SERVER_IP4/24 dev dummy1 + ip -n $SERVER_NS addr add $SERVER_IP4/24 dev veth1 + ip -n $SERVER_NS addr add $SERVER_IP4/24 dev veth2 + ip -n $SERVER_NS addr add $SERVER_IP6/24 dev dummy1 + ip -n $SERVER_NS addr add $SERVER_IP6/24 dev veth1 + ip -n $SERVER_NS addr add $SERVER_IP6/24 dev veth2 + + ip -n $SERVER_NS link set veth1 up + ip -n $SERVER_NS link set veth2 up + ip -n $SERVER_NS route add table 10 $CLIENT_IP4 dev veth1 src $SERVER_IP4 + ip -n $SERVER_NS route add table 20 $CLIENT_IP4 dev veth2 src $SERVER_IP4 + ip -n $SERVER_NS route add $CLIENT_IP4 dev veth1 src $SERVER_IP4 + ip -n $SERVER_NS route add table 10 $CLIENT_IP6 dev veth1 src $SERVER_IP6 + ip -n $SERVER_NS route add table 20 $CLIENT_IP6 dev veth2 src $SERVER_IP6 + ip -n $SERVER_NS route add $CLIENT_IP6 dev veth1 src $SERVER_IP6 +} + +cleanup() { + ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null + ip netns del "$CLIENT_NS1" + ip netns del "$CLIENT_NS2" + ip netns del "$SERVER_NS" +} + +wait_server() { + local IFACE=$1 + local CNT=0 + + until ip netns exec $SERVER_NS ss -lS src $SERVER_IP:$SERVER_PORT | \ + grep LISTEN | grep "$IFACE" 2>&1 >/dev/null; do + [ $((CNT++)) = "20" ] && { RET=3; return $RET; } + sleep 0.1 + done +} + +do_test() { + local CLIENT_NS=$1 + local IFACE=$2 + + ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null + ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \ + $SERVER_PORT $IFACE 2>&1 >/dev/null & + disown + wait_server $IFACE || return $RET + timeout 3 ip netns exec $CLIENT_NS ./sctp_hello client $AF \ + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null + RET=$? + return $RET +} + +do_testx() { + local IFACE1=$1 + local IFACE2=$2 + + ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null + ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \ + $SERVER_PORT $IFACE1 2>&1 >/dev/null & + disown + wait_server $IFACE1 || return $RET + ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \ + $SERVER_PORT $IFACE2 2>&1 >/dev/null & + disown + wait_server $IFACE2 || return $RET + timeout 3 ip netns exec $CLIENT_NS1 ./sctp_hello client $AF \ + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null && \ + timeout 3 ip netns exec $CLIENT_NS2 ./sctp_hello client $AF \ + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null + RET=$? + return $RET +} + +testup() { + ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=1 2>&1 >/dev/null + echo -n "TEST 01: nobind, connect from client 1, l3mdev_accept=1, Y " + do_test $CLIENT_NS1 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 02: nobind, connect from client 2, l3mdev_accept=1, N " + do_test $CLIENT_NS2 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=0 2>&1 >/dev/null + echo -n "TEST 03: nobind, connect from client 1, l3mdev_accept=0, N " + do_test $CLIENT_NS1 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 04: nobind, connect from client 2, l3mdev_accept=0, N " + do_test $CLIENT_NS2 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 05: bind veth2 in server, connect from client 1, N " + do_test $CLIENT_NS1 veth2 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 06: bind veth1 in server, connect from client 1, Y " + do_test $CLIENT_NS1 veth1 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 07: bind vrf-1 in server, connect from client 1, Y " + do_test $CLIENT_NS1 vrf-1 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 08: bind vrf-2 in server, connect from client 1, N " + do_test $CLIENT_NS1 vrf-2 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 09: bind vrf-2 in server, connect from client 2, Y " + do_test $CLIENT_NS2 vrf-2 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 10: bind vrf-1 in server, connect from client 2, N " + do_test $CLIENT_NS2 vrf-1 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 11: bind vrf-1 & 2 in server, connect from client 1 & 2, Y " + do_testx vrf-1 vrf-2 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, N " + do_testx vrf-2 vrf-1 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" +} + +trap cleanup EXIT +setup && echo "Testing For SCTP VRF:" && \ +CLIENT_IP=$CLIENT_IP4 SERVER_IP=$SERVER_IP4 AF="-4" testup && echo "***v4 Tests Done***" && +CLIENT_IP=$CLIENT_IP6 SERVER_IP=$SERVER_IP6 AF="-6" testup && echo "***v6 Tests Done***" +exit $? |