summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-05-15 10:43:52 -0700
committerDavid S. Miller <davem@davemloft.net>2020-05-15 10:43:52 -0700
commit3430223d393dd23734cc87177d704449cfc294a8 (patch)
tree8a82f0640e498d463d8b059aa0dabb27e5214b13 /include
parent0141792f8b7300006b874dda1c35acd0abd90d9d (diff)
parented24a7a852b542911479383d5c80b9a2b4bb8caa (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2020-05-15 The following pull-request contains BPF updates for your *net-next* tree. We've added 37 non-merge commits during the last 1 day(s) which contain a total of 67 files changed, 741 insertions(+), 252 deletions(-). The main changes are: 1) bpf_xdp_adjust_tail() now allows to grow the tail as well, from Jesper. 2) bpftool can probe CONFIG_HZ, from Daniel. 3) CAP_BPF is introduced to isolate user processes that use BPF infra and to secure BPF networking services by dropping CAP_SYS_ADMIN requirement in certain cases, from Alexei. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/linux/bpf.h18
-rw-r--r--include/linux/bpf_verifier.h3
-rw-r--r--include/linux/capability.h5
-rw-r--r--include/net/xdp.h27
-rw-r--r--include/net/xdp_sock.h11
-rw-r--r--include/uapi/linux/bpf.h4
-rw-r--r--include/uapi/linux/capability.h34
7 files changed, 97 insertions, 5 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c45d198ac38c..efe8836b5c48 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -19,6 +19,7 @@
#include <linux/mutex.h>
#include <linux/module.h>
#include <linux/kallsyms.h>
+#include <linux/capability.h>
struct bpf_verifier_env;
struct bpf_verifier_log;
@@ -119,7 +120,7 @@ struct bpf_map {
struct bpf_map_memory memory;
char name[BPF_OBJ_NAME_LEN];
u32 btf_vmlinux_value_type_id;
- bool unpriv_array;
+ bool bypass_spec_v1;
bool frozen; /* write-once; write-protected by freeze_mutex */
/* 22 bytes hole */
@@ -1095,6 +1096,21 @@ struct bpf_map *bpf_map_get_curr_or_next(u32 *id);
extern int sysctl_unprivileged_bpf_disabled;
+static inline bool bpf_allow_ptr_leaks(void)
+{
+ return perfmon_capable();
+}
+
+static inline bool bpf_bypass_spec_v1(void)
+{
+ return perfmon_capable();
+}
+
+static inline bool bpf_bypass_spec_v4(void)
+{
+ return perfmon_capable();
+}
+
int bpf_map_new_fd(struct bpf_map *map, int flags);
int bpf_prog_new_fd(struct bpf_prog *prog);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 6abd5a778fcd..ea833087e853 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -375,6 +375,9 @@ struct bpf_verifier_env {
u32 used_map_cnt; /* number of used maps */
u32 id_gen; /* used to generate unique reg IDs */
bool allow_ptr_leaks;
+ bool bpf_capable;
+ bool bypass_spec_v1;
+ bool bypass_spec_v4;
bool seen_direct_write;
struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
const struct bpf_line_info *prev_linfo;
diff --git a/include/linux/capability.h b/include/linux/capability.h
index 027d7e4a853b..b4345b38a6be 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -256,6 +256,11 @@ static inline bool perfmon_capable(void)
return capable(CAP_PERFMON) || capable(CAP_SYS_ADMIN);
}
+static inline bool bpf_capable(void)
+{
+ return capable(CAP_BPF) || capable(CAP_SYS_ADMIN);
+}
+
/* audit system wants to get cap info from files as well */
extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps);
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 3cc6d5d84aa4..3094fccf5a88 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -6,6 +6,8 @@
#ifndef __LINUX_NET_XDP_H__
#define __LINUX_NET_XDP_H__
+#include <linux/skbuff.h> /* skb_shared_info */
+
/**
* DOC: XDP RX-queue information
*
@@ -70,13 +72,25 @@ struct xdp_buff {
void *data_hard_start;
unsigned long handle;
struct xdp_rxq_info *rxq;
+ u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/
};
+/* Reserve memory area at end-of data area.
+ *
+ * This macro reserves tailroom in the XDP buffer by limiting the
+ * XDP/BPF data access to data_hard_end. Notice same area (and size)
+ * is used for XDP_PASS, when constructing the SKB via build_skb().
+ */
+#define xdp_data_hard_end(xdp) \
+ ((xdp)->data_hard_start + (xdp)->frame_sz - \
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+
struct xdp_frame {
void *data;
u16 len;
u16 headroom;
- u16 metasize;
+ u32 metasize:8;
+ u32 frame_sz:24;
/* Lifetime of xdp_rxq_info is limited to NAPI/enqueue time,
* while mem info is valid on remote CPU.
*/
@@ -91,6 +105,10 @@ static inline void xdp_scrub_frame(struct xdp_frame *frame)
frame->dev_rx = NULL;
}
+/* Avoids inlining WARN macro in fast-path */
+void xdp_warn(const char *msg, const char *func, const int line);
+#define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__)
+
struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp);
/* Convert xdp_buff to xdp_frame */
@@ -111,6 +129,12 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
if (unlikely((headroom - metasize) < sizeof(*xdp_frame)))
return NULL;
+ /* Catch if driver didn't reserve tailroom for skb_shared_info */
+ if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) {
+ XDP_WARN("Driver BUG: missing reserved tailroom");
+ return NULL;
+ }
+
/* Store info in top of packet */
xdp_frame = xdp->data_hard_start;
@@ -118,6 +142,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
xdp_frame->len = xdp->data_end - xdp->data;
xdp_frame->headroom = headroom - sizeof(*xdp_frame);
xdp_frame->metasize = metasize;
+ xdp_frame->frame_sz = xdp->frame_sz;
/* rxq only valid until napi_schedule ends, convert to xdp_mem_info */
xdp_frame->mem = xdp->rxq->mem;
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 67191ccaab85..abd72de25fa4 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -236,6 +236,12 @@ static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 address,
else
return address + offset;
}
+
+static inline u32 xsk_umem_xdp_frame_sz(struct xdp_umem *umem)
+{
+ return umem->chunk_size_nohr + umem->headroom;
+}
+
#else
static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
@@ -366,6 +372,11 @@ static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 handle,
return 0;
}
+static inline u32 xsk_umem_xdp_frame_sz(struct xdp_umem *umem)
+{
+ return 0;
+}
+
static inline int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
{
return -EOPNOTSUPP;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 32cbf36c7729..b9b8a0f63b91 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2015,8 +2015,8 @@ union bpf_attr {
* int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
* Description
* Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
- * only possible to shrink the packet as of this writing,
- * therefore *delta* must be a negative integer.
+ * possible to both shrink and grow the packet tail.
+ * Shrink done via *delta* being a negative integer.
*
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h
index e58c9636741b..c7372180a0a9 100644
--- a/include/uapi/linux/capability.h
+++ b/include/uapi/linux/capability.h
@@ -274,6 +274,7 @@ struct vfs_ns_cap_data {
arbitrary SCSI commands */
/* Allow setting encryption key on loopback filesystem */
/* Allow setting zone reclaim policy */
+/* Allow everything under CAP_BPF and CAP_PERFMON for backward compatibility */
#define CAP_SYS_ADMIN 21
@@ -374,7 +375,38 @@ struct vfs_ns_cap_data {
#define CAP_PERFMON 38
-#define CAP_LAST_CAP CAP_PERFMON
+/*
+ * CAP_BPF allows the following BPF operations:
+ * - Creating all types of BPF maps
+ * - Advanced verifier features
+ * - Indirect variable access
+ * - Bounded loops
+ * - BPF to BPF function calls
+ * - Scalar precision tracking
+ * - Larger complexity limits
+ * - Dead code elimination
+ * - And potentially other features
+ * - Loading BPF Type Format (BTF) data
+ * - Retrieve xlated and JITed code of BPF programs
+ * - Use bpf_spin_lock() helper
+ *
+ * CAP_PERFMON relaxes the verifier checks further:
+ * - BPF progs can use of pointer-to-integer conversions
+ * - speculation attack hardening measures are bypassed
+ * - bpf_probe_read to read arbitrary kernel memory is allowed
+ * - bpf_trace_printk to print kernel memory is allowed
+ *
+ * CAP_SYS_ADMIN is required to use bpf_probe_write_user.
+ *
+ * CAP_SYS_ADMIN is required to iterate system wide loaded
+ * programs, maps, links, BTFs and convert their IDs to file descriptors.
+ *
+ * CAP_PERFMON and CAP_BPF are required to load tracing programs.
+ * CAP_NET_ADMIN and CAP_BPF are required to load networking programs.
+ */
+#define CAP_BPF 39
+
+#define CAP_LAST_CAP CAP_BPF
#define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)