summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Fastabend <john.fastabend@gmail.com>2018-10-16 11:08:04 -0700
committerDaniel Borkmann <daniel@iogearbox.net>2018-10-17 02:30:32 +0200
commit02c558b2d5d679fbbcaa5b9689484c7e0f8abb7b (patch)
treeb89b9cb06c6adb6862759afaefae6fb7ab6d768c
parent8734a162c13b1a893e7dff8de0df81fed04c51a6 (diff)
bpf: sockmap, support for msg_peek in sk_msg with redirect ingress
This adds support for the MSG_PEEK flag when doing redirect to ingress and receiving on the sk_msg psock queue. Previously the flag was being ignored which could confuse applications if they expected the flag to work as normal. Signed-off-by: John Fastabend <john.fastabend@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r--include/net/tcp.h2
-rw-r--r--net/ipv4/tcp_bpf.c42
-rw-r--r--net/tls/tls_sw.c3
3 files changed, 30 insertions, 17 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 3600ae0f25c3..14fdd7ce9992 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2089,7 +2089,7 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len);
int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
- struct msghdr *msg, int len);
+ struct msghdr *msg, int len, int flags);
/* Call BPF_SOCK_OPS program that returns an int. If the return value
* is < 0, then the BPF op failed (for example if the loaded BPF
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index f9d3cf185827..b7918d4caa30 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -39,17 +39,19 @@ static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock,
}
int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
- struct msghdr *msg, int len)
+ struct msghdr *msg, int len, int flags)
{
struct iov_iter *iter = &msg->msg_iter;
+ int peek = flags & MSG_PEEK;
int i, ret, copied = 0;
+ struct sk_msg *msg_rx;
+
+ msg_rx = list_first_entry_or_null(&psock->ingress_msg,
+ struct sk_msg, list);
while (copied != len) {
struct scatterlist *sge;
- struct sk_msg *msg_rx;
- msg_rx = list_first_entry_or_null(&psock->ingress_msg,
- struct sk_msg, list);
if (unlikely(!msg_rx))
break;
@@ -70,22 +72,30 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
}
copied += copy;
- sge->offset += copy;
- sge->length -= copy;
- sk_mem_uncharge(sk, copy);
- msg_rx->sg.size -= copy;
- if (!sge->length) {
- i++;
- if (i == MAX_SKB_FRAGS)
- i = 0;
- if (!msg_rx->skb)
- put_page(page);
+ if (likely(!peek)) {
+ sge->offset += copy;
+ sge->length -= copy;
+ sk_mem_uncharge(sk, copy);
+ msg_rx->sg.size -= copy;
+
+ if (!sge->length) {
+ sk_msg_iter_var_next(i);
+ if (!msg_rx->skb)
+ put_page(page);
+ }
+ } else {
+ sk_msg_iter_var_next(i);
}
if (copied == len)
break;
} while (i != msg_rx->sg.end);
+ if (unlikely(peek)) {
+ msg_rx = list_next_entry(msg_rx, list);
+ continue;
+ }
+
msg_rx->sg.start = i;
if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) {
list_del(&msg_rx->list);
@@ -93,6 +103,8 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
consume_skb(msg_rx->skb);
kfree(msg_rx);
}
+ msg_rx = list_first_entry_or_null(&psock->ingress_msg,
+ struct sk_msg, list);
}
return copied;
@@ -115,7 +127,7 @@ int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
lock_sock(sk);
msg_bytes_ready:
- copied = __tcp_bpf_recvmsg(sk, psock, msg, len);
+ copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags);
if (!copied) {
int data, err = 0;
long timeo;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index a525fc4c2a4b..5cd88ba8acd1 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1478,7 +1478,8 @@ int tls_sw_recvmsg(struct sock *sk,
skb = tls_wait_data(sk, psock, flags, timeo, &err);
if (!skb) {
if (psock) {
- int ret = __tcp_bpf_recvmsg(sk, psock, msg, len);
+ int ret = __tcp_bpf_recvmsg(sk, psock,
+ msg, len, flags);
if (ret > 0) {
copied += ret;