diff options
Diffstat (limited to 'kernel/bpf/core.c')
| -rw-r--r-- | kernel/bpf/core.c | 198 |
1 files changed, 158 insertions, 40 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 08626b519ce2..733fca2634b7 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -26,6 +26,7 @@ #include <linux/bpf.h> #include <linux/btf.h> #include <linux/objtool.h> +#include <linux/overflow.h> #include <linux/rbtree_latch.h> #include <linux/kallsyms.h> #include <linux/rcupdate.h> @@ -64,8 +65,8 @@ #define OFF insn->off #define IMM insn->imm -struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma; -bool bpf_global_ma_set, bpf_global_percpu_ma_set; +struct bpf_mem_alloc bpf_global_ma; +bool bpf_global_ma_set; /* No hurry in this branch * @@ -88,13 +89,18 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns return NULL; } +/* tell bpf programs that include vmlinux.h kernel's PAGE_SIZE */ +enum page_size_enum { + __PAGE_SIZE = PAGE_SIZE +}; + struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags) { gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags); struct bpf_prog_aux *aux; struct bpf_prog *fp; - size = round_up(size, PAGE_SIZE); + size = round_up(size, __PAGE_SIZE); fp = __vmalloc(size, gfp_flags); if (fp == NULL) return NULL; @@ -121,6 +127,9 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag #endif INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode); +#ifdef CONFIG_FINEIBT + INIT_LIST_HEAD_RCU(&fp->aux->ksym_prefix.lnode); +#endif mutex_init(&fp->aux->used_maps_mutex); mutex_init(&fp->aux->dst_mutex); @@ -371,14 +380,18 @@ static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old, static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old, s32 end_new, s32 curr, const bool probe_pass) { - const s32 off_min = S16_MIN, off_max = S16_MAX; + s64 off_min, off_max, off; s32 delta = end_new - end_old; - s32 off; - if (insn->code == (BPF_JMP32 | BPF_JA)) + if (insn->code == (BPF_JMP32 | BPF_JA)) { off = insn->imm; - else + off_min = S32_MIN; + off_max = S32_MAX; + } else { off = insn->off; + off_min = S16_MIN; + off_max = S16_MAX; + } if (curr < pos && curr + off + 1 >= end_old) off += delta; @@ -675,7 +688,7 @@ static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp) void bpf_prog_kallsyms_add(struct bpf_prog *fp) { if (!bpf_prog_kallsyms_candidate(fp) || - !bpf_capable()) + !bpf_token_capable(fp->aux->token, CAP_BPF)) return; bpf_prog_ksym_set_addr(fp); @@ -683,6 +696,23 @@ void bpf_prog_kallsyms_add(struct bpf_prog *fp) fp->aux->ksym.prog = true; bpf_ksym_add(&fp->aux->ksym); + +#ifdef CONFIG_FINEIBT + /* + * When FineIBT, code in the __cfi_foo() symbols can get executed + * and hence unwinder needs help. + */ + if (cfi_mode != CFI_FINEIBT) + return; + + snprintf(fp->aux->ksym_prefix.name, KSYM_NAME_LEN, + "__cfi_%s", fp->aux->ksym.name); + + fp->aux->ksym_prefix.start = (unsigned long) fp->bpf_func - 16; + fp->aux->ksym_prefix.end = (unsigned long) fp->bpf_func; + + bpf_ksym_add(&fp->aux->ksym_prefix); +#endif } void bpf_prog_kallsyms_del(struct bpf_prog *fp) @@ -691,6 +721,11 @@ void bpf_prog_kallsyms_del(struct bpf_prog *fp) return; bpf_ksym_del(&fp->aux->ksym); +#ifdef CONFIG_FINEIBT + if (cfi_mode != CFI_FINEIBT) + return; + bpf_ksym_del(&fp->aux->ksym_prefix); +#endif } static struct bpf_ksym *bpf_ksym_find(unsigned long addr) @@ -713,7 +748,7 @@ const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long symbol_start = ksym->start; unsigned long symbol_end = ksym->end; - strncpy(sym, ksym->name, KSYM_NAME_LEN); + strscpy(sym, ksym->name, KSYM_NAME_LEN); ret = sym; if (size) @@ -779,7 +814,7 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, if (it++ != symnum) continue; - strncpy(sym, ksym->name, KSYM_NAME_LEN); + strscpy(sym, ksym->name, KSYM_NAME_LEN); *value = ksym->start; *type = BPF_SYM_ELF_TYPE; @@ -815,7 +850,7 @@ int bpf_jit_add_poke_descriptor(struct bpf_prog *prog, return -EINVAL; } - tab = krealloc(tab, size * sizeof(*poke), GFP_KERNEL); + tab = krealloc_array(tab, size, sizeof(*poke), GFP_KERNEL); if (!tab) return -ENOMEM; @@ -859,7 +894,12 @@ static LIST_HEAD(pack_list); * CONFIG_MMU=n. Use PAGE_SIZE in these cases. */ #ifdef PMD_SIZE -#define BPF_PROG_PACK_SIZE (PMD_SIZE * num_possible_nodes()) +/* PMD_SIZE is really big for some archs. It doesn't make sense to + * reserve too much memory in one allocation. Hardcode BPF_PROG_PACK_SIZE to + * 2MiB * num_possible_nodes(). On most architectures PMD_SIZE will be + * greater than or equal to 2MB. + */ +#define BPF_PROG_PACK_SIZE (SZ_2M * num_possible_nodes()) #else #define BPF_PROG_PACK_SIZE PAGE_SIZE #endif @@ -869,23 +909,30 @@ static LIST_HEAD(pack_list); static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_insns) { struct bpf_prog_pack *pack; + int err; pack = kzalloc(struct_size(pack, bitmap, BITS_TO_LONGS(BPF_PROG_CHUNK_COUNT)), GFP_KERNEL); if (!pack) return NULL; pack->ptr = bpf_jit_alloc_exec(BPF_PROG_PACK_SIZE); - if (!pack->ptr) { - kfree(pack); - return NULL; - } + if (!pack->ptr) + goto out; bpf_fill_ill_insns(pack->ptr, BPF_PROG_PACK_SIZE); bitmap_zero(pack->bitmap, BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE); - list_add_tail(&pack->list, &pack_list); set_vm_flush_reset_perms(pack->ptr); - set_memory_rox((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); + err = set_memory_rox((unsigned long)pack->ptr, + BPF_PROG_PACK_SIZE / PAGE_SIZE); + if (err) + goto out; + list_add_tail(&pack->list, &pack_list); return pack; + +out: + bpf_jit_free_exec(pack->ptr); + kfree(pack); + return NULL; } void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns) @@ -900,9 +947,16 @@ void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns) size = round_up(size, PAGE_SIZE); ptr = bpf_jit_alloc_exec(size); if (ptr) { + int err; + bpf_fill_ill_insns(ptr, size); set_vm_flush_reset_perms(ptr); - set_memory_rox((unsigned long)ptr, size / PAGE_SIZE); + err = set_memory_rox((unsigned long)ptr, + size / PAGE_SIZE); + if (err) { + bpf_jit_free_exec(ptr); + ptr = NULL; + } } goto out; } @@ -928,20 +982,20 @@ out: return ptr; } -void bpf_prog_pack_free(struct bpf_binary_header *hdr) +void bpf_prog_pack_free(void *ptr, u32 size) { struct bpf_prog_pack *pack = NULL, *tmp; unsigned int nbits; unsigned long pos; mutex_lock(&pack_mutex); - if (hdr->size > BPF_PROG_PACK_SIZE) { - bpf_jit_free_exec(hdr); + if (size > BPF_PROG_PACK_SIZE) { + bpf_jit_free_exec(ptr); goto out; } list_for_each_entry(tmp, &pack_list, list) { - if ((void *)hdr >= tmp->ptr && (tmp->ptr + BPF_PROG_PACK_SIZE) > (void *)hdr) { + if (ptr >= tmp->ptr && (tmp->ptr + BPF_PROG_PACK_SIZE) > ptr) { pack = tmp; break; } @@ -950,10 +1004,10 @@ void bpf_prog_pack_free(struct bpf_binary_header *hdr) if (WARN_ONCE(!pack, "bpf_prog_pack bug\n")) goto out; - nbits = BPF_PROG_SIZE_TO_NBITS(hdr->size); - pos = ((unsigned long)hdr - (unsigned long)pack->ptr) >> BPF_PROG_CHUNK_SHIFT; + nbits = BPF_PROG_SIZE_TO_NBITS(size); + pos = ((unsigned long)ptr - (unsigned long)pack->ptr) >> BPF_PROG_CHUNK_SHIFT; - WARN_ONCE(bpf_arch_text_invalidate(hdr, hdr->size), + WARN_ONCE(bpf_arch_text_invalidate(ptr, size), "bpf_prog_pack bug: missing bpf_arch_text_invalidate?\n"); bitmap_clear(pack->bitmap, pos, nbits); @@ -1100,8 +1154,7 @@ bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr, *rw_header = kvmalloc(size, GFP_KERNEL); if (!*rw_header) { - bpf_arch_text_copy(&ro_header->size, &size, sizeof(size)); - bpf_prog_pack_free(ro_header); + bpf_prog_pack_free(ro_header, size); bpf_jit_uncharge_modmem(size); return NULL; } @@ -1132,7 +1185,7 @@ int bpf_jit_binary_pack_finalize(struct bpf_prog *prog, kvfree(rw_header); if (IS_ERR(ptr)) { - bpf_prog_pack_free(ro_header); + bpf_prog_pack_free(ro_header, ro_header->size); return PTR_ERR(ptr); } return 0; @@ -1153,7 +1206,7 @@ void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header, { u32 size = ro_header->size; - bpf_prog_pack_free(ro_header); + bpf_prog_pack_free(ro_header, size); kvfree(rw_header); bpf_jit_uncharge_modmem(size); } @@ -1647,6 +1700,7 @@ bool bpf_opcode_in_insntable(u8 code) [BPF_LD | BPF_IND | BPF_B] = true, [BPF_LD | BPF_IND | BPF_H] = true, [BPF_LD | BPF_IND | BPF_W] = true, + [BPF_JMP | BPF_JCOND] = true, }; #undef BPF_INSN_3_TBL #undef BPF_INSN_2_TBL @@ -2165,6 +2219,7 @@ static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn u64 stack[stack_size / sizeof(u64)]; \ u64 regs[MAX_BPF_EXT_REG] = {}; \ \ + kmsan_unpoison_memory(stack, sizeof(stack)); \ FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \ ARG1 = (u64) (unsigned long) ctx; \ return ___bpf_prog_run(regs, insn); \ @@ -2178,6 +2233,7 @@ static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \ u64 stack[stack_size / sizeof(u64)]; \ u64 regs[MAX_BPF_EXT_REG]; \ \ + kmsan_unpoison_memory(stack, sizeof(stack)); \ FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \ BPF_R1 = r1; \ BPF_R2 = r2; \ @@ -2364,7 +2420,9 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) } finalize: - bpf_prog_lock_ro(fp); + *err = bpf_prog_lock_ro(fp); + if (*err) + return fp; /* The tail call compatibility check can only be done at * this late stage as we need to determine, if we deal @@ -2398,13 +2456,14 @@ EXPORT_SYMBOL(bpf_empty_prog_array); struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags) { + struct bpf_prog_array *p; + if (prog_cnt) - return kzalloc(sizeof(struct bpf_prog_array) + - sizeof(struct bpf_prog_array_item) * - (prog_cnt + 1), - flags); + p = kzalloc(struct_size(p, items, prog_cnt + 1), flags); + else + p = &bpf_empty_prog_array.hdr; - return &bpf_empty_prog_array.hdr; + return p; } void bpf_prog_array_free(struct bpf_prog_array *progs) @@ -2664,12 +2723,16 @@ void __bpf_free_used_maps(struct bpf_prog_aux *aux, struct bpf_map **used_maps, u32 len) { struct bpf_map *map; + bool sleepable; u32 i; + sleepable = aux->prog->sleepable; for (i = 0; i < len; i++) { map = used_maps[i]; if (map->ops->map_poke_untrack) map->ops->map_poke_untrack(map, aux); + if (sleepable) + atomic64_dec(&map->sleepable_refcnt); bpf_map_put(map); } } @@ -2747,12 +2810,13 @@ void bpf_prog_free(struct bpf_prog *fp) if (aux->dst_prog) bpf_prog_put(aux->dst_prog); + bpf_token_put(aux->token); INIT_WORK(&aux->work, bpf_prog_free_deferred); schedule_work(&aux->work); } EXPORT_SYMBOL_GPL(bpf_prog_free); -/* RNG for unpriviledged user space with separated state from prandom_u32(). */ +/* RNG for unprivileged user space with separated state from prandom_u32(). */ static DEFINE_PER_CPU(struct rnd_state, bpf_user_rnd_state); void bpf_user_rnd_init_once(void) @@ -2877,12 +2941,28 @@ bool __weak bpf_jit_needs_zext(void) return false; } +/* Return true if the JIT inlines the call to the helper corresponding to + * the imm. + * + * The verifier will not patch the insn->imm for the call to the helper if + * this returns true. + */ +bool __weak bpf_jit_inlines_helper_call(s32 imm) +{ + return false; +} + /* Return TRUE if the JIT backend supports mixing bpf2bpf and tailcalls. */ bool __weak bpf_jit_supports_subprog_tailcalls(void) { return false; } +bool __weak bpf_jit_supports_percpu_insn(void) +{ + return false; +} + bool __weak bpf_jit_supports_kfunc_call(void) { return false; @@ -2893,6 +2973,35 @@ bool __weak bpf_jit_supports_far_kfunc_call(void) return false; } +bool __weak bpf_jit_supports_arena(void) +{ + return false; +} + +bool __weak bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) +{ + return false; +} + +u64 __weak bpf_arch_uaddress_limit(void) +{ +#if defined(CONFIG_64BIT) && defined(CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE) + return TASK_SIZE; +#else + return 0; +#endif +} + +/* Return TRUE if the JIT backend satisfies the following two conditions: + * 1) JIT backend supports atomic_xchg() on pointer-sized words. + * 2) Under the specific arch, the implementation of xchg() is the same + * as atomic_xchg() on pointer-sized words. + */ +bool __weak bpf_jit_supports_ptr_xchg(void) +{ + return false; +} + /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call * skb_copy_bits(), so provide a weak definition of it for NET-less config. */ @@ -2927,6 +3036,17 @@ void __weak arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, { } +/* for configs without MMU or 32-bit */ +__weak const struct bpf_map_ops arena_map_ops; +__weak u64 bpf_arena_get_user_vm_start(struct bpf_arena *arena) +{ + return 0; +} +__weak u64 bpf_arena_get_kern_vm_start(struct bpf_arena *arena) +{ + return 0; +} + #ifdef CONFIG_BPF_SYSCALL static int __init bpf_global_ma_init(void) { @@ -2934,9 +3054,7 @@ static int __init bpf_global_ma_init(void) ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false); bpf_global_ma_set = !ret; - ret = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true); - bpf_global_percpu_ma_set = !ret; - return !bpf_global_ma_set || !bpf_global_percpu_ma_set; + return ret; } late_initcall(bpf_global_ma_init); #endif |