diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-21 09:27:50 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-21 09:27:50 -0700 |
commit | 440b65232829fad69947b8de983c13a525cc8871 (patch) | |
tree | 3cab57fca48b43ba0e11804683b33b71743494c6 /lib | |
parent | 1ec6d097897a35dfb55c4c31fc8633cf5be46497 (diff) | |
parent | 5277d130947ba8c0d54c16eed89eb97f0b6d2e5a (diff) |
Merge tag 'bpf-next-6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Pull bpf updates from Alexei Starovoitov:
- Introduce '__attribute__((bpf_fastcall))' for helpers and kfuncs with
corresponding support in LLVM.
It is similar to existing 'no_caller_saved_registers' attribute in
GCC/LLVM with a provision for backward compatibility. It allows
compilers generate more efficient BPF code assuming the verifier or
JITs will inline or partially inline a helper/kfunc with such
attribute. bpf_cast_to_kern_ctx, bpf_rdonly_cast,
bpf_get_smp_processor_id are the first set of such helpers.
- Harden and extend ELF build ID parsing logic.
When called from sleepable context the relevants parts of ELF file
will be read to find and fetch .note.gnu.build-id information. Also
harden the logic to avoid TOCTOU, overflow, out-of-bounds problems.
- Improvements and fixes for sched-ext:
- Allow passing BPF iterators as kfunc arguments
- Make the pointer returned from iter_next method trusted
- Fix x86 JIT convergence issue due to growing/shrinking conditional
jumps in variable length encoding
- BPF_LSM related:
- Introduce few VFS kfuncs and consolidate them in
fs/bpf_fs_kfuncs.c
- Enforce correct range of return values from certain LSM hooks
- Disallow attaching to other LSM hooks
- Prerequisite work for upcoming Qdisc in BPF:
- Allow kptrs in program provided structs
- Support for gen_epilogue in verifier_ops
- Important fixes:
- Fix uprobe multi pid filter check
- Fix bpf_strtol and bpf_strtoul helpers
- Track equal scalars history on per-instruction level
- Fix tailcall hierarchy on x86 and arm64
- Fix signed division overflow to prevent INT_MIN/-1 trap on x86
- Fix get kernel stack in BPF progs attached to tracepoint:syscall
- Selftests:
- Add uprobe bench/stress tool
- Generate file dependencies to drastically improve re-build time
- Match JIT-ed and BPF asm with __xlated/__jited keywords
- Convert older tests to test_progs framework
- Add support for RISC-V
- Few fixes when BPF programs are compiled with GCC-BPF backend
(support for GCC-BPF in BPF CI is ongoing in parallel)
- Add traffic monitor
- Enable cross compile and musl libc
* tag 'bpf-next-6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (260 commits)
btf: require pahole 1.21+ for DEBUG_INFO_BTF with default DWARF version
btf: move pahole check in scripts/link-vmlinux.sh to lib/Kconfig.debug
btf: remove redundant CONFIG_BPF test in scripts/link-vmlinux.sh
bpf: Call the missed kfree() when there is no special field in btf
bpf: Call the missed btf_record_free() when map creation fails
selftests/bpf: Add a test case to write mtu result into .rodata
selftests/bpf: Add a test case to write strtol result into .rodata
selftests/bpf: Rename ARG_PTR_TO_LONG test description
selftests/bpf: Fix ARG_PTR_TO_LONG {half-,}uninitialized test
bpf: Zero former ARG_PTR_TO_{LONG,INT} args in case of error
bpf: Improve check_raw_mode_ok test for MEM_UNINIT-tagged types
bpf: Fix helper writes to read-only maps
bpf: Remove truncation test in bpf_strtol and bpf_strtoul helpers
bpf: Fix bpf_strtol and bpf_strtoul helpers for 32bit
selftests/bpf: Add tests for sdiv/smod overflow cases
bpf: Fix a sdiv overflow issue
libbpf: Add bpf_object__token_fd accessor
docs/bpf: Add missing BPF program types to docs
docs/bpf: Add constant values for linkages
bpf: Use fake pt_regs when doing bpf syscall tracepoint tracing
...
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig.debug | 8 | ||||
-rw-r--r-- | lib/buildid.c | 397 |
2 files changed, 297 insertions, 108 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 62fce4ce0b16..bc8faa4509e1 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -379,13 +379,15 @@ config DEBUG_INFO_BTF depends on !DEBUG_INFO_SPLIT && !DEBUG_INFO_REDUCED depends on !GCC_PLUGIN_RANDSTRUCT || COMPILE_TEST depends on BPF_SYSCALL - depends on !DEBUG_INFO_DWARF5 || PAHOLE_VERSION >= 121 + depends on PAHOLE_VERSION >= 116 + depends on DEBUG_INFO_DWARF4 || PAHOLE_VERSION >= 121 # pahole uses elfutils, which does not have support for Hexagon relocations depends on !HEXAGON help Generate deduplicated BTF type information from DWARF debug info. - Turning this on expects presence of pahole tool, which will convert - DWARF type info into equivalent deduplicated BTF type info. + Turning this on requires pahole v1.16 or later (v1.21 or later to + support DWARF 5), which will convert DWARF type info into equivalent + deduplicated BTF type info. config PAHOLE_HAS_SPLIT_BTF def_bool PAHOLE_VERSION >= 119 diff --git a/lib/buildid.c b/lib/buildid.c index e02b5507418b..290641d92ac1 100644 --- a/lib/buildid.c +++ b/lib/buildid.c @@ -8,154 +8,302 @@ #define BUILD_ID 3 +#define MAX_PHDR_CNT 256 + +struct freader { + void *buf; + u32 buf_sz; + int err; + union { + struct { + struct file *file; + struct folio *folio; + void *addr; + loff_t folio_off; + bool may_fault; + }; + struct { + const char *data; + u64 data_sz; + }; + }; +}; + +static void freader_init_from_file(struct freader *r, void *buf, u32 buf_sz, + struct file *file, bool may_fault) +{ + memset(r, 0, sizeof(*r)); + r->buf = buf; + r->buf_sz = buf_sz; + r->file = file; + r->may_fault = may_fault; +} + +static void freader_init_from_mem(struct freader *r, const char *data, u64 data_sz) +{ + memset(r, 0, sizeof(*r)); + r->data = data; + r->data_sz = data_sz; +} + +static void freader_put_folio(struct freader *r) +{ + if (!r->folio) + return; + kunmap_local(r->addr); + folio_put(r->folio); + r->folio = NULL; +} + +static int freader_get_folio(struct freader *r, loff_t file_off) +{ + /* check if we can just reuse current folio */ + if (r->folio && file_off >= r->folio_off && + file_off < r->folio_off + folio_size(r->folio)) + return 0; + + freader_put_folio(r); + + r->folio = filemap_get_folio(r->file->f_mapping, file_off >> PAGE_SHIFT); + + /* if sleeping is allowed, wait for the page, if necessary */ + if (r->may_fault && (IS_ERR(r->folio) || !folio_test_uptodate(r->folio))) { + filemap_invalidate_lock_shared(r->file->f_mapping); + r->folio = read_cache_folio(r->file->f_mapping, file_off >> PAGE_SHIFT, + NULL, r->file); + filemap_invalidate_unlock_shared(r->file->f_mapping); + } + + if (IS_ERR(r->folio) || !folio_test_uptodate(r->folio)) { + if (!IS_ERR(r->folio)) + folio_put(r->folio); + r->folio = NULL; + return -EFAULT; + } + + r->folio_off = folio_pos(r->folio); + r->addr = kmap_local_folio(r->folio, 0); + + return 0; +} + +static const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz) +{ + size_t folio_sz; + + /* provided internal temporary buffer should be sized correctly */ + if (WARN_ON(r->buf && sz > r->buf_sz)) { + r->err = -E2BIG; + return NULL; + } + + if (unlikely(file_off + sz < file_off)) { + r->err = -EOVERFLOW; + return NULL; + } + + /* working with memory buffer is much more straightforward */ + if (!r->buf) { + if (file_off + sz > r->data_sz) { + r->err = -ERANGE; + return NULL; + } + return r->data + file_off; + } + + /* fetch or reuse folio for given file offset */ + r->err = freader_get_folio(r, file_off); + if (r->err) + return NULL; + + /* if requested data is crossing folio boundaries, we have to copy + * everything into our local buffer to keep a simple linear memory + * access interface + */ + folio_sz = folio_size(r->folio); + if (file_off + sz > r->folio_off + folio_sz) { + int part_sz = r->folio_off + folio_sz - file_off; + + /* copy the part that resides in the current folio */ + memcpy(r->buf, r->addr + (file_off - r->folio_off), part_sz); + + /* fetch next folio */ + r->err = freader_get_folio(r, r->folio_off + folio_sz); + if (r->err) + return NULL; + + /* copy the rest of requested data */ + memcpy(r->buf + part_sz, r->addr, sz - part_sz); + + return r->buf; + } + + /* if data fits in a single folio, just return direct pointer */ + return r->addr + (file_off - r->folio_off); +} + +static void freader_cleanup(struct freader *r) +{ + if (!r->buf) + return; /* non-file-backed mode */ + + freader_put_folio(r); +} + /* * Parse build id from the note segment. This logic can be shared between * 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are * identical. */ -static int parse_build_id_buf(unsigned char *build_id, - __u32 *size, - const void *note_start, - Elf32_Word note_size) +static int parse_build_id(struct freader *r, unsigned char *build_id, __u32 *size, + loff_t note_off, Elf32_Word note_size) { - Elf32_Word note_offs = 0, new_offs; + const char note_name[] = "GNU"; + const size_t note_name_sz = sizeof(note_name); + u32 build_id_off, new_off, note_end, name_sz, desc_sz; + const Elf32_Nhdr *nhdr; + const char *data; + + if (check_add_overflow(note_off, note_size, ¬e_end)) + return -EINVAL; - while (note_offs + sizeof(Elf32_Nhdr) < note_size) { - Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs); + while (note_end - note_off > sizeof(Elf32_Nhdr) + note_name_sz) { + nhdr = freader_fetch(r, note_off, sizeof(Elf32_Nhdr) + note_name_sz); + if (!nhdr) + return r->err; + + name_sz = READ_ONCE(nhdr->n_namesz); + desc_sz = READ_ONCE(nhdr->n_descsz); + + new_off = note_off + sizeof(Elf32_Nhdr); + if (check_add_overflow(new_off, ALIGN(name_sz, 4), &new_off) || + check_add_overflow(new_off, ALIGN(desc_sz, 4), &new_off) || + new_off > note_end) + break; if (nhdr->n_type == BUILD_ID && - nhdr->n_namesz == sizeof("GNU") && - !strcmp((char *)(nhdr + 1), "GNU") && - nhdr->n_descsz > 0 && - nhdr->n_descsz <= BUILD_ID_SIZE_MAX) { - memcpy(build_id, - note_start + note_offs + - ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), - nhdr->n_descsz); - memset(build_id + nhdr->n_descsz, 0, - BUILD_ID_SIZE_MAX - nhdr->n_descsz); + name_sz == note_name_sz && + memcmp(nhdr + 1, note_name, note_name_sz) == 0 && + desc_sz > 0 && desc_sz <= BUILD_ID_SIZE_MAX) { + build_id_off = note_off + sizeof(Elf32_Nhdr) + ALIGN(note_name_sz, 4); + + /* freader_fetch() will invalidate nhdr pointer */ + data = freader_fetch(r, build_id_off, desc_sz); + if (!data) + return r->err; + + memcpy(build_id, data, desc_sz); + memset(build_id + desc_sz, 0, BUILD_ID_SIZE_MAX - desc_sz); if (size) - *size = nhdr->n_descsz; + *size = desc_sz; return 0; } - new_offs = note_offs + sizeof(Elf32_Nhdr) + - ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4); - if (new_offs <= note_offs) /* overflow */ - break; - note_offs = new_offs; + + note_off = new_off; } return -EINVAL; } -static inline int parse_build_id(const void *page_addr, - unsigned char *build_id, - __u32 *size, - const void *note_start, - Elf32_Word note_size) +/* Parse build ID from 32-bit ELF */ +static int get_build_id_32(struct freader *r, unsigned char *build_id, __u32 *size) { - /* check for overflow */ - if (note_start < page_addr || note_start + note_size < note_start) - return -EINVAL; + const Elf32_Ehdr *ehdr; + const Elf32_Phdr *phdr; + __u32 phnum, phoff, i; - /* only supports note that fits in the first page */ - if (note_start + note_size > page_addr + PAGE_SIZE) - return -EINVAL; + ehdr = freader_fetch(r, 0, sizeof(Elf32_Ehdr)); + if (!ehdr) + return r->err; - return parse_build_id_buf(build_id, size, note_start, note_size); -} + /* subsequent freader_fetch() calls invalidate pointers, so remember locally */ + phnum = READ_ONCE(ehdr->e_phnum); + phoff = READ_ONCE(ehdr->e_phoff); -/* Parse build ID from 32-bit ELF */ -static int get_build_id_32(const void *page_addr, unsigned char *build_id, - __u32 *size) -{ - Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr; - Elf32_Phdr *phdr; - int i; - - /* - * FIXME - * Neither ELF spec nor ELF loader require that program headers - * start immediately after ELF header. - */ - if (ehdr->e_phoff != sizeof(Elf32_Ehdr)) - return -EINVAL; - /* only supports phdr that fits in one page */ - if (ehdr->e_phnum > - (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr)) + /* set upper bound on amount of segments (phdrs) we iterate */ + if (phnum > MAX_PHDR_CNT) + phnum = MAX_PHDR_CNT; + + /* check that phoff is not large enough to cause an overflow */ + if (phoff + phnum * sizeof(Elf32_Phdr) < phoff) return -EINVAL; - phdr = (Elf32_Phdr *)(page_addr + sizeof(Elf32_Ehdr)); + for (i = 0; i < phnum; ++i) { + phdr = freader_fetch(r, phoff + i * sizeof(Elf32_Phdr), sizeof(Elf32_Phdr)); + if (!phdr) + return r->err; - for (i = 0; i < ehdr->e_phnum; ++i) { - if (phdr[i].p_type == PT_NOTE && - !parse_build_id(page_addr, build_id, size, - page_addr + phdr[i].p_offset, - phdr[i].p_filesz)) + if (phdr->p_type == PT_NOTE && + !parse_build_id(r, build_id, size, READ_ONCE(phdr->p_offset), + READ_ONCE(phdr->p_filesz))) return 0; } return -EINVAL; } /* Parse build ID from 64-bit ELF */ -static int get_build_id_64(const void *page_addr, unsigned char *build_id, - __u32 *size) +static int get_build_id_64(struct freader *r, unsigned char *build_id, __u32 *size) { - Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr; - Elf64_Phdr *phdr; - int i; - - /* - * FIXME - * Neither ELF spec nor ELF loader require that program headers - * start immediately after ELF header. - */ - if (ehdr->e_phoff != sizeof(Elf64_Ehdr)) - return -EINVAL; - /* only supports phdr that fits in one page */ - if (ehdr->e_phnum > - (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr)) + const Elf64_Ehdr *ehdr; + const Elf64_Phdr *phdr; + __u32 phnum, i; + __u64 phoff; + + ehdr = freader_fetch(r, 0, sizeof(Elf64_Ehdr)); + if (!ehdr) + return r->err; + + /* subsequent freader_fetch() calls invalidate pointers, so remember locally */ + phnum = READ_ONCE(ehdr->e_phnum); + phoff = READ_ONCE(ehdr->e_phoff); + + /* set upper bound on amount of segments (phdrs) we iterate */ + if (phnum > MAX_PHDR_CNT) + phnum = MAX_PHDR_CNT; + + /* check that phoff is not large enough to cause an overflow */ + if (phoff + phnum * sizeof(Elf64_Phdr) < phoff) return -EINVAL; - phdr = (Elf64_Phdr *)(page_addr + sizeof(Elf64_Ehdr)); + for (i = 0; i < phnum; ++i) { + phdr = freader_fetch(r, phoff + i * sizeof(Elf64_Phdr), sizeof(Elf64_Phdr)); + if (!phdr) + return r->err; - for (i = 0; i < ehdr->e_phnum; ++i) { - if (phdr[i].p_type == PT_NOTE && - !parse_build_id(page_addr, build_id, size, - page_addr + phdr[i].p_offset, - phdr[i].p_filesz)) + if (phdr->p_type == PT_NOTE && + !parse_build_id(r, build_id, size, READ_ONCE(phdr->p_offset), + READ_ONCE(phdr->p_filesz))) return 0; } + return -EINVAL; } -/* - * Parse build ID of ELF file mapped to vma - * @vma: vma object - * @build_id: buffer to store build id, at least BUILD_ID_SIZE long - * @size: returns actual build id size in case of success - * - * Return: 0 on success, -EINVAL otherwise - */ -int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id, - __u32 *size) +/* enough for Elf64_Ehdr, Elf64_Phdr, and all the smaller requests */ +#define MAX_FREADER_BUF_SZ 64 + +static int __build_id_parse(struct vm_area_struct *vma, unsigned char *build_id, + __u32 *size, bool may_fault) { - Elf32_Ehdr *ehdr; - struct page *page; - void *page_addr; + const Elf32_Ehdr *ehdr; + struct freader r; + char buf[MAX_FREADER_BUF_SZ]; int ret; /* only works for page backed storage */ if (!vma->vm_file) return -EINVAL; - page = find_get_page(vma->vm_file->f_mapping, 0); - if (!page) - return -EFAULT; /* page not mapped */ + freader_init_from_file(&r, buf, sizeof(buf), vma->vm_file, may_fault); + + /* fetch first 18 bytes of ELF header for checks */ + ehdr = freader_fetch(&r, 0, offsetofend(Elf32_Ehdr, e_type)); + if (!ehdr) { + ret = r.err; + goto out; + } ret = -EINVAL; - page_addr = kmap_local_page(page); - ehdr = (Elf32_Ehdr *)page_addr; /* compare magic x7f "ELF" */ if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0) @@ -166,15 +314,46 @@ int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id, goto out; if (ehdr->e_ident[EI_CLASS] == ELFCLASS32) - ret = get_build_id_32(page_addr, build_id, size); + ret = get_build_id_32(&r, build_id, size); else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) - ret = get_build_id_64(page_addr, build_id, size); + ret = get_build_id_64(&r, build_id, size); out: - kunmap_local(page_addr); - put_page(page); + freader_cleanup(&r); return ret; } +/* + * Parse build ID of ELF file mapped to vma + * @vma: vma object + * @build_id: buffer to store build id, at least BUILD_ID_SIZE long + * @size: returns actual build id size in case of success + * + * Assumes no page fault can be taken, so if relevant portions of ELF file are + * not already paged in, fetching of build ID fails. + * + * Return: 0 on success; negative error, otherwise + */ +int build_id_parse_nofault(struct vm_area_struct *vma, unsigned char *build_id, __u32 *size) +{ + return __build_id_parse(vma, build_id, size, false /* !may_fault */); +} + +/* + * Parse build ID of ELF file mapped to VMA + * @vma: vma object + * @build_id: buffer to store build id, at least BUILD_ID_SIZE long + * @size: returns actual build id size in case of success + * + * Assumes faultable context and can cause page faults to bring in file data + * into page cache. + * + * Return: 0 on success; negative error, otherwise + */ +int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id, __u32 *size) +{ + return __build_id_parse(vma, build_id, size, true /* may_fault */); +} + /** * build_id_parse_buf - Get build ID from a buffer * @buf: ELF note section(s) to parse @@ -185,7 +364,15 @@ out: */ int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size) { - return parse_build_id_buf(build_id, NULL, buf, buf_size); + struct freader r; + int err; + + freader_init_from_mem(&r, buf, buf_size); + + err = parse_build_id(&r, build_id, NULL, 0, buf_size); + + freader_cleanup(&r); + return err; } #if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_VMCORE_INFO) |