diff options
Diffstat (limited to 'arch/x86')
| -rw-r--r-- | arch/x86/Kconfig | 1 | ||||
| -rw-r--r-- | arch/x86/entry/vdso/vma.c | 3 | ||||
| -rw-r--r-- | arch/x86/include/asm/alternative.h | 14 | ||||
| -rw-r--r-- | arch/x86/include/asm/page.h | 2 | ||||
| -rw-r--r-- | arch/x86/include/asm/page_64.h | 2 | ||||
| -rw-r--r-- | arch/x86/include/asm/percpu.h | 7 | ||||
| -rw-r--r-- | arch/x86/include/asm/pgtable.h | 37 | ||||
| -rw-r--r-- | arch/x86/include/asm/pgtable_64_types.h | 2 | ||||
| -rw-r--r-- | arch/x86/include/asm/set_memory.h | 1 | ||||
| -rw-r--r-- | arch/x86/include/asm/text-patching.h | 1 | ||||
| -rw-r--r-- | arch/x86/kernel/alternative.c | 181 | ||||
| -rw-r--r-- | arch/x86/kernel/ftrace.c | 30 | ||||
| -rw-r--r-- | arch/x86/kernel/module.c | 45 | ||||
| -rw-r--r-- | arch/x86/kernel/sys_x86_64.c | 24 | ||||
| -rw-r--r-- | arch/x86/mm/hugetlbpage.c | 101 | ||||
| -rw-r--r-- | arch/x86/mm/init.c | 37 | ||||
| -rw-r--r-- | arch/x86/mm/init_64.c | 30 | ||||
| -rw-r--r-- | arch/x86/mm/kaslr.c | 14 | ||||
| -rw-r--r-- | arch/x86/mm/pat/set_memory.c | 8 |
19 files changed, 297 insertions, 243 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 948707a3615e..6c633d93c639 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -83,6 +83,7 @@ config X86 select ARCH_HAS_DMA_OPS if GART_IOMMU || XEN select ARCH_HAS_EARLY_DEBUG if KGDB select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_EXECMEM_ROX if X86_64 select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index bfc7cabf4017..39e6efc1a9ca 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -48,7 +48,8 @@ int __init init_vdso_image(const struct vdso_image *image) apply_alternatives((struct alt_instr *)(image->data + image->alt), (struct alt_instr *)(image->data + image->alt + - image->alt_len)); + image->alt_len), + NULL); return 0; } diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index ca9ae606aab9..dc03a647776d 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -96,16 +96,16 @@ extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; * instructions were patched in already: */ extern int alternatives_patched; +struct module; extern void alternative_instructions(void); -extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); -extern void apply_retpolines(s32 *start, s32 *end); -extern void apply_returns(s32 *start, s32 *end); -extern void apply_seal_endbr(s32 *start, s32 *end); +extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end, + struct module *mod); +extern void apply_retpolines(s32 *start, s32 *end, struct module *mod); +extern void apply_returns(s32 *start, s32 *end, struct module *mod); +extern void apply_seal_endbr(s32 *start, s32 *end, struct module *mod); extern void apply_fineibt(s32 *start_retpoline, s32 *end_retpoine, - s32 *start_cfi, s32 *end_cfi); - -struct module; + s32 *start_cfi, s32 *end_cfi, struct module *mod); struct callthunk_sites { s32 *call_start, *call_end; diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 1b93ff80b43b..c9fe207916f4 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -35,7 +35,7 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr, } #define vma_alloc_zeroed_movable_folio(vma, vaddr) \ - vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr, false) + vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr) #ifndef __pa #define __pa(x) __phys_addr((unsigned long)(x)) diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index f3d257c45225..d63576608ce7 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -17,7 +17,7 @@ extern unsigned long phys_base; extern unsigned long page_offset_base; extern unsigned long vmalloc_base; extern unsigned long vmemmap_base; -extern unsigned long physmem_end; +extern unsigned long direct_map_physmem_end; static __always_inline unsigned long __phys_addr_nodebug(unsigned long x) { diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index c55a79d5feae..e525cd85f999 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -234,9 +234,10 @@ do { \ */ #define percpu_add_op(size, qual, var, val) \ do { \ - const int pao_ID__ = (__builtin_constant_p(val) && \ - ((val) == 1 || (val) == -1)) ? \ - (int)(val) : 0; \ + const int pao_ID__ = \ + (__builtin_constant_p(val) && \ + ((val) == 1 || \ + (val) == (typeof(val))-1)) ? (int)(val) : 0; \ \ if (0) { \ typeof(var) pao_tmp__; \ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 4c2d080d26b4..593f10aabd45 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1775,6 +1775,43 @@ bool arch_is_platform_page(u64 paddr); #define arch_is_platform_page arch_is_platform_page #endif +/* + * Use set_p*_safe(), and elide TLB flushing, when confident that *no* + * TLB flush will be required as a result of the "set". For example, use + * in scenarios where it is known ahead of time that the routine is + * setting non-present entries, or re-setting an existing entry to the + * same value. Otherwise, use the typical "set" helpers and flush the + * TLB. + */ +#define set_pte_safe(ptep, pte) \ +({ \ + WARN_ON_ONCE(pte_present(*ptep) && !pte_same(*ptep, pte)); \ + set_pte(ptep, pte); \ +}) + +#define set_pmd_safe(pmdp, pmd) \ +({ \ + WARN_ON_ONCE(pmd_present(*pmdp) && !pmd_same(*pmdp, pmd)); \ + set_pmd(pmdp, pmd); \ +}) + +#define set_pud_safe(pudp, pud) \ +({ \ + WARN_ON_ONCE(pud_present(*pudp) && !pud_same(*pudp, pud)); \ + set_pud(pudp, pud); \ +}) + +#define set_p4d_safe(p4dp, p4d) \ +({ \ + WARN_ON_ONCE(p4d_present(*p4dp) && !p4d_same(*p4dp, p4d)); \ + set_p4d(p4dp, p4d); \ +}) + +#define set_pgd_safe(pgdp, pgd) \ +({ \ + WARN_ON_ONCE(pgd_present(*pgdp) && !pgd_same(*pgdp, pgd)); \ + set_pgd(pgdp, pgd); \ +}) #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_H */ diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index a98e53491a4e..ec68f8369bdc 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -141,7 +141,7 @@ extern unsigned int ptrs_per_p4d; #endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ #ifdef CONFIG_RANDOMIZE_MEMORY -# define PHYSMEM_END physmem_end +# define DIRECT_MAP_PHYSMEM_END direct_map_physmem_end #endif /* diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index 4b2abce2e3e7..cc62ef70ccc0 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -89,6 +89,7 @@ int set_pages_rw(struct page *page, int numpages); int set_direct_map_invalid_noflush(struct page *page); int set_direct_map_default_noflush(struct page *page); +int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid); bool kernel_page_present(struct page *page); extern int kernel_set_to_readonly; diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 6259f1937fe7..ab9e143ec9fe 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -35,6 +35,7 @@ extern void *text_poke(void *addr, const void *opcode, size_t len); extern void text_poke_sync(void); extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len); extern void *text_poke_copy(void *addr, const void *opcode, size_t len); +#define text_poke_copy text_poke_copy extern void *text_poke_copy_locked(void *addr, const void *opcode, size_t len, bool core_ok); extern void *text_poke_set(void *addr, int c, size_t len); extern int poke_int3_handler(struct pt_regs *regs); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index d17518ca19b8..243843e44e89 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -392,8 +392,10 @@ EXPORT_SYMBOL(BUG_func); * Rewrite the "call BUG_func" replacement to point to the target of the * indirect pv_ops call "call *disp(%ip)". */ -static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a) +static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a, + struct module *mod) { + u8 *wr_instr = module_writable_address(mod, instr); void *target, *bug = &BUG_func; s32 disp; @@ -403,14 +405,14 @@ static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a) } if (a->instrlen != 6 || - instr[0] != CALL_RIP_REL_OPCODE || - instr[1] != CALL_RIP_REL_MODRM) { + wr_instr[0] != CALL_RIP_REL_OPCODE || + wr_instr[1] != CALL_RIP_REL_MODRM) { pr_err("ALT_FLAG_DIRECT_CALL set for unrecognized indirect call\n"); BUG(); } /* Skip CALL_RIP_REL_OPCODE and CALL_RIP_REL_MODRM */ - disp = *(s32 *)(instr + 2); + disp = *(s32 *)(wr_instr + 2); #ifdef CONFIG_X86_64 /* ff 15 00 00 00 00 call *0x0(%rip) */ /* target address is stored at "next instruction + disp". */ @@ -448,7 +450,8 @@ static inline u8 * instr_va(struct alt_instr *i) * to refetch changed I$ lines. */ void __init_or_module noinline apply_alternatives(struct alt_instr *start, - struct alt_instr *end) + struct alt_instr *end, + struct module *mod) { u8 insn_buff[MAX_PATCH_LEN]; u8 *instr, *replacement; @@ -477,6 +480,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, */ for (a = start; a < end; a++) { int insn_buff_sz = 0; + u8 *wr_instr, *wr_replacement; /* * In case of nested ALTERNATIVE()s the outer alternative might @@ -490,7 +494,11 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, } instr = instr_va(a); + wr_instr = module_writable_address(mod, instr); + replacement = (u8 *)&a->repl_offset + a->repl_offset; + wr_replacement = module_writable_address(mod, replacement); + BUG_ON(a->instrlen > sizeof(insn_buff)); BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32); @@ -501,9 +509,9 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, * patch if feature is *NOT* present. */ if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) { - memcpy(insn_buff, instr, a->instrlen); + memcpy(insn_buff, wr_instr, a->instrlen); optimize_nops(instr, insn_buff, a->instrlen); - text_poke_early(instr, insn_buff, a->instrlen); + text_poke_early(wr_instr, insn_buff, a->instrlen); continue; } @@ -513,11 +521,12 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, instr, instr, a->instrlen, replacement, a->replacementlen, a->flags); - memcpy(insn_buff, replacement, a->replacementlen); + memcpy(insn_buff, wr_replacement, a->replacementlen); insn_buff_sz = a->replacementlen; if (a->flags & ALT_FLAG_DIRECT_CALL) { - insn_buff_sz = alt_replace_call(instr, insn_buff, a); + insn_buff_sz = alt_replace_call(instr, insn_buff, a, + mod); if (insn_buff_sz < 0) continue; } @@ -527,11 +536,11 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, apply_relocation(insn_buff, instr, a->instrlen, replacement, a->replacementlen); - DUMP_BYTES(ALT, instr, a->instrlen, "%px: old_insn: ", instr); + DUMP_BYTES(ALT, wr_instr, a->instrlen, "%px: old_insn: ", instr); DUMP_BYTES(ALT, replacement, a->replacementlen, "%px: rpl_insn: ", replacement); DUMP_BYTES(ALT, insn_buff, insn_buff_sz, "%px: final_insn: ", instr); - text_poke_early(instr, insn_buff, insn_buff_sz); + text_poke_early(wr_instr, insn_buff, insn_buff_sz); } kasan_enable_current(); @@ -722,18 +731,20 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) /* * Generated by 'objtool --retpoline'. */ -void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) +void __init_or_module noinline apply_retpolines(s32 *start, s32 *end, + struct module *mod) { s32 *s; for (s = start; s < end; s++) { void *addr = (void *)s + *s; + void *wr_addr = module_writable_address(mod, addr); struct insn insn; int len, ret; u8 bytes[16]; u8 op1, op2; - ret = insn_decode_kernel(&insn, addr); + ret = insn_decode_kernel(&insn, wr_addr); if (WARN_ON_ONCE(ret < 0)) continue; @@ -761,9 +772,9 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) len = patch_retpoline(addr, &insn, bytes); if (len == insn.length) { optimize_nops(addr, bytes, len); - DUMP_BYTES(RETPOLINE, ((u8*)addr), len, "%px: orig: ", addr); + DUMP_BYTES(RETPOLINE, ((u8*)wr_addr), len, "%px: orig: ", addr); DUMP_BYTES(RETPOLINE, ((u8*)bytes), len, "%px: repl: ", addr); - text_poke_early(addr, bytes, len); + text_poke_early(wr_addr, bytes, len); } } } @@ -799,7 +810,8 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes) return i; } -void __init_or_module noinline apply_returns(s32 *start, s32 *end) +void __init_or_module noinline apply_returns(s32 *start, s32 *end, + struct module *mod) { s32 *s; @@ -808,12 +820,13 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) for (s = start; s < end; s++) { void *dest = NULL, *addr = (void *)s + *s; + void *wr_addr = module_writable_address(mod, addr); struct insn insn; int len, ret; u8 bytes[16]; u8 op; - ret = insn_decode_kernel(&insn, addr); + ret = insn_decode_kernel(&insn, wr_addr); if (WARN_ON_ONCE(ret < 0)) continue; @@ -833,32 +846,35 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) len = patch_return(addr, &insn, bytes); if (len == insn.length) { - DUMP_BYTES(RET, ((u8*)addr), len, "%px: orig: ", addr); + DUMP_BYTES(RET, ((u8*)wr_addr), len, "%px: orig: ", addr); DUMP_BYTES(RET, ((u8*)bytes), len, "%px: repl: ", addr); - text_poke_early(addr, bytes, len); + text_poke_early(wr_addr, bytes, len); } } } #else -void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } +void __init_or_module noinline apply_returns(s32 *start, s32 *end, + struct module *mod) { } #endif /* CONFIG_MITIGATION_RETHUNK */ #else /* !CONFIG_MITIGATION_RETPOLINE || !CONFIG_OBJTOOL */ -void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } -void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } +void __init_or_module noinline apply_retpolines(s32 *start, s32 *end, + struct module *mod) { } +void __init_or_module noinline apply_returns(s32 *start, s32 *end, + struct module *mod) { } #endif /* CONFIG_MITIGATION_RETPOLINE && CONFIG_OBJTOOL */ #ifdef CONFIG_X86_KERNEL_IBT -static void poison_cfi(void *addr); +static void poison_cfi(void *addr, void *wr_addr); -static void __init_or_module poison_endbr(void *addr, bool warn) +static void __init_or_module poison_endbr(void *addr, void *wr_addr, bool warn) { u32 endbr, poison = gen_endbr_poison(); - if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr))) + if (WARN_ON_ONCE(get_kernel_nofault(endbr, wr_addr))) return; if (!is_endbr(endbr)) { @@ -873,7 +889,7 @@ static void __init_or_module poison_endbr(void *addr, bool warn) */ DUMP_BYTES(ENDBR, ((u8*)addr), 4, "%px: orig: ", addr); DUMP_BYTES(ENDBR, ((u8*)&poison), 4, "%px: repl: ", addr); - text_poke_early(addr, &poison, 4); + text_poke_early(wr_addr, &poison, 4); } /* @@ -882,22 +898,23 @@ static void __init_or_module poison_endbr(void *addr, bool warn) * Seal the functions for indirect calls by clobbering the ENDBR instructions * and the kCFI hash value. */ -void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end) +void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end, struct module *mod) { s32 *s; for (s = start; s < end; s++) { void *addr = (void *)s + *s; + void *wr_addr = module_writable_address(mod, addr); - poison_endbr(addr, true); + poison_endbr(addr, wr_addr, true); if (IS_ENABLED(CONFIG_FINEIBT)) - poison_cfi(addr - 16); + poison_cfi(addr - 16, wr_addr - 16); } } #else -void __init_or_module apply_seal_endbr(s32 *start, s32 *end) { } +void __init_or_module apply_seal_endbr(s32 *start, s32 *end, struct module *mod) { } #endif /* CONFIG_X86_KERNEL_IBT */ @@ -1119,7 +1136,7 @@ static u32 decode_caller_hash(void *addr) } /* .retpoline_sites */ -static int cfi_disable_callers(s32 *start, s32 *end) +static int cfi_disable_callers(s32 *start, s32 *end, struct module *mod) { /* * Disable kCFI by patching in a JMP.d8, this leaves the hash immediate @@ -1131,20 +1148,23 @@ static int cfi_disable_callers(s32 *start, s32 *end) for (s = start; s < end; s++) { void *addr = (void *)s + *s; + void *wr_addr; u32 hash; addr -= fineibt_caller_size; - hash = decode_caller_hash(addr); + wr_addr = module_writable_address(mod, addr); + hash = decode_caller_hash(wr_addr); + if (!hash) /* nocfi callers */ continue; - text_poke_early(addr, jmp, 2); + text_poke_early(wr_addr, jmp, 2); } return 0; } -static int cfi_enable_callers(s32 *start, s32 *end) +static int cfi_enable_callers(s32 *start, s32 *end, struct module *mod) { /* * Re-enable kCFI, undo what cfi_disable_callers() did. @@ -1154,106 +1174,115 @@ static int cfi_enable_callers(s32 *start, s32 *end) for (s = start; s < end; s++) { void *addr = (void *)s + *s; + void *wr_addr; u32 hash; addr -= fineibt_caller_size; - hash = decode_caller_hash(addr); + wr_addr = module_writable_address(mod, addr); + hash = decode_caller_hash(wr_addr); if (!hash) /* nocfi callers */ continue; - text_poke_early(addr, mov, 2); + text_poke_early(wr_addr, mov, 2); } return 0; } /* .cfi_sites */ -static int cfi_rand_preamble(s32 *start, s32 *end) +static int cfi_rand_preamble(s32 *start, s32 *end, struct module *mod) { s32 *s; for (s = start; s < end; s++) { void *addr = (void *)s + *s; + void *wr_addr = module_writable_address(mod, addr); u32 hash; - hash = decode_preamble_hash(addr); + hash = decode_preamble_hash(wr_addr); if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n", addr, addr, 5, addr)) return -EINVAL; hash = cfi_rehash(hash); - text_poke_early(addr + 1, &hash, 4); + text_poke_early(wr_addr + 1, &hash, 4); } return 0; } -static int cfi_rewrite_preamble(s32 *start, s32 *end) +static int cfi_rewrite_preamble(s32 *start, s32 *end, struct module *mod) { s32 *s; for (s = start; s < end; s++) { void *addr = (void *)s + *s; + void *wr_addr = module_writable_address(mod, addr); u32 hash; - hash = decode_preamble_hash(addr); + hash = decode_preamble_hash(wr_addr); if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n", addr, addr, 5, addr)) return -EINVAL; - text_poke_early(addr, fineibt_preamble_start, fineibt_preamble_size); - WARN_ON(*(u32 *)(addr + fineibt_preamble_hash) != 0x12345678); - text_poke_early(addr + fineibt_preamble_hash, &hash, 4); + text_poke_early(wr_addr, fineibt_preamble_start, fineibt_preamble_size); + WARN_ON(*(u32 *)(wr_addr + fineibt_preamble_hash) != 0x12345678); + text_poke_early(wr_addr + fineibt_preamble_hash, &hash, 4); } return 0; } -static void cfi_rewrite_endbr(s32 *start, s32 *end) +static void cfi_rewrite_endbr(s32 *start, s32 *end, struct module *mod) { s32 *s; for (s = start; s < end; s++) { void *addr = (void *)s + *s; + void *wr_addr = module_writable_address(mod, addr); - poison_endbr(addr+16, false); + poison_endbr(addr + 16, wr_addr + 16, false); } } /* .retpoline_sites */ -static int cfi_rand_callers(s32 *start, s32 *end) +static int cfi_rand_callers(s32 *start, s32 *end, struct module *mod) { s32 *s; for (s = start; s < end; s++) { void *addr = (void *)s + *s; + void *wr_addr; u32 hash; addr -= fineibt_caller_size; - hash = decode_caller_hash(addr); + wr_addr = module_writable_address(mod, addr); + hash = decode_caller_hash(wr_addr); if (hash) { hash = -cfi_rehash(hash); - text_poke_early(addr + 2, &hash, 4); + text_poke_early(wr_addr + 2, &hash, 4); } } return 0; } -static int cfi_rewrite_callers(s32 *start, s32 *end) +static int cfi_rewrite_callers(s32 *start, s32 *end, struct module *mod) { s32 *s; for (s = start; s < end; s++) { void *addr = (void *)s + *s; + void *wr_addr; u32 hash; addr -= fineibt_caller_size; - hash = decode_caller_hash(addr); + wr_addr = module_writable_address(mod, addr); + hash = decode_caller_hash(wr_addr); if (hash) { - text_poke_early(addr, fineibt_caller_start, fineibt_caller_size); - WARN_ON(*(u32 *)(addr + fineibt_caller_hash) != 0x12345678); - text_poke_early(addr + fineibt_caller_hash, &hash, 4); + text_poke_early(wr_addr, fineibt_caller_start, fineibt_caller_size); + WARN_ON(*(u32 *)(wr_addr + fineibt_caller_hash) != 0x12345678); + text_poke_early(wr_addr + fineibt_caller_hash, &hash, 4); } /* rely on apply_retpolines() */ } @@ -1262,8 +1291,9 @@ static int cfi_rewrite_callers(s32 *start, s32 *end) } static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, - s32 *start_cfi, s32 *end_cfi, bool builtin) + s32 *start_cfi, s32 *end_cfi, struct module *mod) { + bool builtin = mod ? false : true; int ret; if (WARN_ONCE(fineibt_preamble_size != 16, @@ -1281,7 +1311,7 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, * rewrite them. This disables all CFI. If this succeeds but any of the * later stages fails, we're without CFI. */ - ret = cfi_disable_callers(start_retpoline, end_retpoline); + ret = cfi_disable_callers(start_retpoline, end_retpoline, mod); if (ret) goto err; @@ -1292,11 +1322,11 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, cfi_bpf_subprog_hash = cfi_rehash(cfi_bpf_subprog_hash); } - ret = cfi_rand_preamble(start_cfi, end_cfi); + ret = cfi_rand_preamble(start_cfi, end_cfi, mod); if (ret) goto err; - ret = cfi_rand_callers(start_retpoline, end_retpoline); + ret = cfi_rand_callers(start_retpoline, end_retpoline, mod); if (ret) goto err; } @@ -1308,7 +1338,7 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, return; case CFI_KCFI: - ret = cfi_enable_callers(start_retpoline, end_retpoline); + ret = cfi_enable_callers(start_retpoline, end_retpoline, mod); if (ret) goto err; @@ -1318,17 +1348,17 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, case CFI_FINEIBT: /* place the FineIBT preamble at func()-16 */ - ret = cfi_rewrite_preamble(start_cfi, end_cfi); + ret = cfi_rewrite_preamble(start_cfi, end_cfi, mod); if (ret) goto err; /* rewrite the callers to target func()-16 */ - ret = cfi_rewrite_callers(start_retpoline, end_retpoline); + ret = cfi_rewrite_callers(start_retpoline, end_retpoline, mod); if (ret) goto err; /* now that nobody targets func()+0, remove ENDBR there */ - cfi_rewrite_endbr(start_cfi, end_cfi); + cfi_rewrite_endbr(start_cfi, end_cfi, mod); if (builtin) pr_info("Using FineIBT CFI\n"); @@ -1347,7 +1377,7 @@ static inline void poison_hash(void *addr) *(u32 *)addr = 0; } -static void poison_cfi(void *addr) +static void poison_cfi(void *addr, void *wr_addr) { switch (cfi_mode) { case CFI_FINEIBT: @@ -1359,8 +1389,8 @@ static void poison_cfi(void *addr) * ud2 * 1: nop */ - poison_endbr(addr, false); - poison_hash(addr + fineibt_preamble_hash); + poison_endbr(addr, wr_addr, false); + poison_hash(wr_addr + fineibt_preamble_hash); break; case CFI_KCFI: @@ -1369,7 +1399,7 @@ static void poison_cfi(void *addr) * movl $0, %eax * .skip 11, 0x90 */ - poison_hash(addr + 1); + poison_hash(wr_addr + 1); break; default: @@ -1380,22 +1410,21 @@ static void poison_cfi(void *addr) #else static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, - s32 *start_cfi, s32 *end_cfi, bool builtin) + s32 *start_cfi, s32 *end_cfi, struct module *mod) { } #ifdef CONFIG_X86_KERNEL_IBT -static void poison_cfi(void *addr) { } +static void poison_cfi(void *addr, void *wr_addr) { } #endif #endif void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, - s32 *start_cfi, s32 *end_cfi) + s32 *start_cfi, s32 *end_cfi, struct module *mod) { return __apply_fineibt(start_retpoline, end_retpoline, - start_cfi, end_cfi, - /* .builtin = */ false); + start_cfi, end_cfi, mod); } #ifdef CONFIG_SMP @@ -1692,16 +1721,16 @@ void __init alternative_instructions(void) paravirt_set_cap(); __apply_fineibt(__retpoline_sites, __retpoline_sites_end, - __cfi_sites, __cfi_sites_end, true); + __cfi_sites, __cfi_sites_end, NULL); /* * Rewrite the retpolines, must be done before alternatives since * those can rewrite the retpoline thunks. */ - apply_retpolines(__retpoline_sites, __retpoline_sites_end); - apply_returns(__return_sites, __return_sites_end); + apply_retpolines(__retpoline_sites, __retpoline_sites_end, NULL); + apply_returns(__return_sites, __return_sites_end, NULL); - apply_alternatives(__alt_instructions, __alt_instructions_end); + apply_alternatives(__alt_instructions, __alt_instructions_end, NULL); /* * Now all calls are established. Apply the call thunks if @@ -1712,7 +1741,7 @@ void __init alternative_instructions(void) /* * Seal all functions that do not have their address taken. */ - apply_seal_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end); + apply_seal_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end, NULL); #ifdef CONFIG_SMP /* Patch to UP if other cpus not imminent. */ diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index adb09f78edb2..4dd0ad6c94d6 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -118,10 +118,13 @@ ftrace_modify_code_direct(unsigned long ip, const char *old_code, return ret; /* replace the text with the new text */ - if (ftrace_poke_late) + if (ftrace_poke_late) { text_poke_queue((void *)ip, new_code, MCOUNT_INSN_SIZE, NULL); - else - text_poke_early((void *)ip, new_code, MCOUNT_INSN_SIZE); + } else { + mutex_lock(&text_mutex); + text_poke((void *)ip, new_code, MCOUNT_INSN_SIZE); + mutex_unlock(&text_mutex); + } return 0; } @@ -318,7 +321,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 }; unsigned const char retq[] = { RET_INSN_OPCODE, INT3_INSN_OPCODE }; union ftrace_op_code_union op_ptr; - int ret; + void *ret; if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { start_offset = (unsigned long)ftrace_regs_caller; @@ -349,15 +352,15 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) npages = DIV_ROUND_UP(*tramp_size, PAGE_SIZE); /* Copy ftrace_caller onto the trampoline memory */ - ret = copy_from_kernel_nofault(trampoline, (void *)start_offset, size); - if (WARN_ON(ret < 0)) + ret = text_poke_copy(trampoline, (void *)start_offset, size); + if (WARN_ON(!ret)) goto fail; ip = trampoline + size; if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE); else - memcpy(ip, retq, sizeof(retq)); + text_poke_copy(ip, retq, sizeof(retq)); /* No need to test direct calls on created trampolines */ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { @@ -365,8 +368,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) ip = trampoline + (jmp_offset - start_offset); if (WARN_ON(*(char *)ip != 0x75)) goto fail; - ret = copy_from_kernel_nofault(ip, x86_nops[2], 2); - if (ret < 0) + if (!text_poke_copy(ip, x86_nops[2], 2)) goto fail; } @@ -379,7 +381,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) */ ptr = (unsigned long *)(trampoline + size + RET_SIZE); - *ptr = (unsigned long)ops; + text_poke_copy(ptr, &ops, sizeof(unsigned long)); op_offset -= start_offset; memcpy(&op_ptr, trampoline + op_offset, OP_REF_SIZE); @@ -395,7 +397,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) op_ptr.offset = offset; /* put in the new offset to the ftrace_ops */ - memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE); + text_poke_copy(trampoline + op_offset, &op_ptr, OP_REF_SIZE); /* put in the call to the function */ mutex_lock(&text_mutex); @@ -405,9 +407,9 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) * the depth accounting before the call already. */ dest = ftrace_ops_get_func(ops); - memcpy(trampoline + call_offset, - text_gen_insn(CALL_INSN_OPCODE, trampoline + call_offset, dest), - CALL_INSN_SIZE); + text_poke_copy_locked(trampoline + call_offset, + text_gen_insn(CALL_INSN_OPCODE, trampoline + call_offset, dest), + CALL_INSN_SIZE, false); mutex_unlock(&text_mutex); /* ALLOC_TRAMP flags lets us know we created it */ diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 837450b6e882..8984abd91c00 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -146,18 +146,21 @@ static int __write_relocate_add(Elf64_Shdr *sechdrs, } if (apply) { - if (memcmp(loc, &zero, size)) { + void *wr_loc = module_writable_address(me, loc); + + if (memcmp(wr_loc, &zero, size)) { pr_err("x86/modules: Invalid relocation target, existing value is nonzero for type %d, loc %p, val %Lx\n", (int)ELF64_R_TYPE(rel[i].r_info), loc, val); return -ENOEXEC; } - write(loc, &val, size); + write(wr_loc, &val, size); } else { if (memcmp(loc, &val, size)) { pr_warn("x86/modules: Invalid relocation target, existing value does not match expected value for type %d, loc %p, val %Lx\n", (int)ELF64_R_TYPE(rel[i].r_info), loc, val); return -ENOEXEC; } + /* FIXME: needs care for ROX module allocations */ write(loc, &zero, size); } } @@ -224,7 +227,7 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - const Elf_Shdr *s, *alt = NULL, *locks = NULL, + const Elf_Shdr *s, *alt = NULL, *orc = NULL, *orc_ip = NULL, *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL, *calls = NULL, *cfi = NULL; @@ -233,8 +236,6 @@ int module_finalize(const Elf_Ehdr *hdr, for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { if (!strcmp(".altinstructions", secstrings + s->sh_name)) alt = s; - if (!strcmp(".smp_locks", secstrings + s->sh_name)) - locks = s; if (!strcmp(".orc_unwind", secstrings + s->sh_name)) orc = s; if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name)) @@ -265,20 +266,20 @@ int module_finalize(const Elf_Ehdr *hdr, csize = cfi->sh_size; } - apply_fineibt(rseg, rseg + rsize, cseg, cseg + csize); + apply_fineibt(rseg, rseg + rsize, cseg, cseg + csize, me); } if (retpolines) { void *rseg = (void *)retpolines->sh_addr; - apply_retpolines(rseg, rseg + retpolines->sh_size); + apply_retpolines(rseg, rseg + retpolines->sh_size, me); } if (returns) { void *rseg = (void *)returns->sh_addr; - apply_returns(rseg, rseg + returns->sh_size); + apply_returns(rseg, rseg + returns->sh_size, me); } if (alt) { /* patch .altinstructions */ void *aseg = (void *)alt->sh_addr; - apply_alternatives(aseg, aseg + alt->sh_size); + apply_alternatives(aseg, aseg + alt->sh_size, me); } if (calls || alt) { struct callthunk_sites cs = {}; @@ -297,8 +298,28 @@ int module_finalize(const Elf_Ehdr *hdr, } if (ibt_endbr) { void *iseg = (void *)ibt_endbr->sh_addr; - apply_seal_endbr(iseg, iseg + ibt_endbr->sh_size); + apply_seal_endbr(iseg, iseg + ibt_endbr->sh_size, me); } + + if (orc && orc_ip) + unwind_module_init(me, (void *)orc_ip->sh_addr, orc_ip->sh_size, + (void *)orc->sh_addr, orc->sh_size); + + return 0; +} + +int module_post_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *me) +{ + const Elf_Shdr *s, *locks = NULL; + char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { + if (!strcmp(".smp_locks", secstrings + s->sh_name)) + locks = s; + } + if (locks) { void *lseg = (void *)locks->sh_addr; void *text = me->mem[MOD_TEXT].base; @@ -308,10 +329,6 @@ int module_finalize(const Elf_Ehdr *hdr, text, text_end); } - if (orc && orc_ip) - unwind_module_init(me, (void *)orc_ip->sh_addr, orc_ip->sh_size, - (void *)orc->sh_addr, orc->sh_size); - return 0; } diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 87f8c9a71c49..776ae6fa7f2d 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -18,6 +18,7 @@ #include <linux/random.h> #include <linux/uaccess.h> #include <linux/elf.h> +#include <linux/hugetlb.h> #include <asm/elf.h> #include <asm/ia32.h> @@ -25,8 +26,10 @@ /* * Align a virtual address to avoid aliasing in the I$ on AMD F15h. */ -static unsigned long get_align_mask(void) +static unsigned long get_align_mask(struct file *filp) { + if (filp && is_file_hugepages(filp)) + return huge_page_mask_align(filp); /* handle 32- and 64-bit case with a single conditional */ if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32()))) return 0; @@ -49,7 +52,7 @@ static unsigned long get_align_mask(void) */ static unsigned long get_align_bits(void) { - return va_align.bits & get_align_mask(); + return va_align.bits & get_align_mask(NULL); } static int __init control_va_addr_alignment(char *str) @@ -148,12 +151,15 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, info.length = len; info.low_limit = begin; info.high_limit = end; - info.align_offset = pgoff << PAGE_SHIFT; - info.start_gap = stack_guard_placement(vm_flags); + if (!(filp && is_file_hugepages(filp))) { + info.align_offset = pgoff << PAGE_SHIFT; + info.start_gap = stack_guard_placement(vm_flags); + } if (filp) { - info.align_mask = get_align_mask(); + info.align_mask = get_align_mask(filp); info.align_offset += get_align_bits(); } + return vm_unmapped_area(&info); } @@ -199,7 +205,10 @@ get_unmapped_area: info.low_limit = PAGE_SIZE; info.high_limit = get_mmap_base(0); - info.start_gap = stack_guard_placement(vm_flags); + if (!(filp && is_file_hugepages(filp))) { + info.start_gap = stack_guard_placement(vm_flags); + info.align_offset = pgoff << PAGE_SHIFT; + } /* * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area @@ -211,9 +220,8 @@ get_unmapped_area: if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall()) info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW; - info.align_offset = pgoff << PAGE_SHIFT; if (filp) { - info.align_mask = get_align_mask(); + info.align_mask = get_align_mask(filp); info.align_offset += get_align_bits(); } addr = vm_unmapped_area(&info); diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 807a5859a3c4..58f7f2bd535d 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -19,107 +19,6 @@ #include <asm/tlbflush.h> #include <asm/elf.h> -#ifdef CONFIG_HUGETLB_PAGE -static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, - unsigned long addr, unsigned long len, - unsigned long pgoff, unsigned long flags) -{ - struct hstate *h = hstate_file(file); - struct vm_unmapped_area_info info = {}; - - info.length = len; - info.low_limit = get_mmap_base(1); - - /* - * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area - * in the full address space. - */ - info.high_limit = in_32bit_syscall() ? - task_size_32bit() : task_size_64bit(addr > DEFAULT_MAP_WINDOW); - - info.align_mask = PAGE_MASK & ~huge_page_mask(h); - return vm_unmapped_area(&info); -} - -static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, - unsigned long addr, unsigned long len, - unsigned long pgoff, unsigned long flags) -{ - struct hstate *h = hstate_file(file); - struct vm_unmapped_area_info info = {}; - - info.flags = VM_UNMAPPED_AREA_TOPDOWN; - info.length = len; - info.low_limit = PAGE_SIZE; - info.high_limit = get_mmap_base(0); - - /* - * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area - * in the full address space. - */ - if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall()) - info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW; - - info.align_mask = PAGE_MASK & ~huge_page_mask(h); - addr = vm_unmapped_area(&info); - - /* - * A failed mmap() very likely causes application failure, - * so fall back to the bottom-up function here. This scenario - * can happen with large stack limits and large mmap() - * allocations. - */ - if (addr & ~PAGE_MASK) { - VM_BUG_ON(addr != -ENOMEM); - info.flags = 0; - info.low_limit = TASK_UNMAPPED_BASE; - info.high_limit = TASK_SIZE_LOW; - addr = vm_unmapped_area(&info); - } - - return addr; -} - -unsigned long -hugetlb_get_unmapped_area(struct file *file, unsigned long addr, - unsigned long len, unsigned long pgoff, unsigned long flags) -{ - struct hstate *h = hstate_file(file); - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - - if (len & ~huge_page_mask(h)) - return -EINVAL; - - if (len > TASK_SIZE) - return -ENOMEM; - - /* No address checking. See comment at mmap_address_hint_valid() */ - if (flags & MAP_FIXED) { - if (prepare_hugepage_range(file, addr, len)) - return -EINVAL; - return addr; - } - - if (addr) { - addr &= huge_page_mask(h); - if (!mmap_address_hint_valid(addr, len)) - goto get_unmapped_area; - - vma = find_vma(mm, addr); - if (!vma || addr + len <= vm_start_gap(vma)) - return addr; - } - -get_unmapped_area: - if (!test_bit(MMF_TOPDOWN, &mm->flags)) - return hugetlb_get_unmapped_area_bottomup(file, addr, len, - pgoff, flags); - else - return hugetlb_get_unmapped_area_topdown(file, addr, len, - pgoff, flags); -} -#endif /* CONFIG_HUGETLB_PAGE */ #ifdef CONFIG_X86_64 bool __init arch_hugetlb_valid_size(unsigned long size) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 101725c149c4..c6d29f283001 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -1058,18 +1058,53 @@ unsigned long arch_max_swapfile_size(void) #ifdef CONFIG_EXECMEM static struct execmem_info execmem_info __ro_after_init; +#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX +void execmem_fill_trapping_insns(void *ptr, size_t size, bool writeable) +{ + /* fill memory with INT3 instructions */ + if (writeable) + memset(ptr, INT3_INSN_OPCODE, size); + else + text_poke_set(ptr, INT3_INSN_OPCODE, size); +} +#endif + struct execmem_info __init *execmem_arch_setup(void) { unsigned long start, offset = 0; + enum execmem_range_flags flags; + pgprot_t pgprot; if (kaslr_enabled()) offset = get_random_u32_inclusive(1, 1024) * PAGE_SIZE; start = MODULES_VADDR + offset; + if (IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX)) { + pgprot = PAGE_KERNEL_ROX; + flags = EXECMEM_KASAN_SHADOW | EXECMEM_ROX_CACHE; + } else { + pgprot = PAGE_KERNEL; + flags = EXECMEM_KASAN_SHADOW; + } + execmem_info = (struct execmem_info){ .ranges = { - [EXECMEM_DEFAULT] = { + [EXECMEM_MODULE_TEXT] = { + .flags = flags, + .start = start, + .end = MODULES_END, + .pgprot = pgprot, + .alignment = MODULE_ALIGN, + }, + [EXECMEM_KPROBES ... EXECMEM_BPF] = { + .flags = EXECMEM_KASAN_SHADOW, + .start = start, + .end = MODULES_END, + .pgprot = PAGE_KERNEL, + .alignment = MODULE_ALIGN, + }, + [EXECMEM_MODULE_DATA] = { .flags = EXECMEM_KASAN_SHADOW, .start = start, .end = MODULES_END, diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ff253648706f..01ea7c6df303 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -961,7 +961,7 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, unsigned long end = ((start_pfn + nr_pages) << PAGE_SHIFT) - 1; int ret; - if (WARN_ON_ONCE(end > PHYSMEM_END)) + if (WARN_ON_ONCE(end > DIRECT_MAP_PHYSMEM_END)) return -ERANGE; ret = __add_pages(nid, start_pfn, nr_pages, params); @@ -985,22 +985,32 @@ int arch_add_memory(int nid, u64 start, u64 size, return add_pages(nid, start_pfn, nr_pages, params); } -static void __meminit free_pagetable(struct page *page, int order) +static void free_reserved_pages(struct page *page, unsigned long nr_pages) { - unsigned long magic; - unsigned int nr_pages = 1 << order; + while (nr_pages--) + free_reserved_page(page++); +} +static void __meminit free_pagetable(struct page *page, int order) +{ /* bootmem page has reserved flag */ if (PageReserved(page)) { - magic = page->index; - if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) { + unsigned long nr_pages = 1 << order; +#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE + enum bootmem_type type = bootmem_type(page); + + if (type == SECTION_INFO || type == MIX_SECTION_INFO) { while (nr_pages--) put_page_bootmem(page++); - } else - while (nr_pages--) - free_reserved_page(page++); - } else + } else { + free_reserved_pages(page, nr_pages); + } +#else + free_reserved_pages(page, nr_pages); +#endif + } else { free_pages((unsigned long)page_address(page), order); + } } static void __meminit free_hugepage_table(struct page *page, diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index e17e6e27b7ec..11a93542d198 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -52,7 +52,7 @@ static __initdata struct kaslr_memory_region { } kaslr_regions[] = { { .base = &page_offset_base, - .end = &physmem_end, + .end = &direct_map_physmem_end, }, { .base = &vmalloc_base, @@ -62,8 +62,12 @@ static __initdata struct kaslr_memory_region { }, }; -/* The end of the possible address space for physical memory */ -unsigned long physmem_end __ro_after_init; +/* + * The end of the physical address space that can be mapped directly by the + * kernel. This starts out at (1<<MAX_PHYSMEM_BITS) - 1), but KASLR may reduce + * that in order to increase the available entropy for mapping other regions. + */ +unsigned long direct_map_physmem_end __ro_after_init; /* Get size in bytes used by the memory region */ static inline unsigned long get_padding(struct kaslr_memory_region *region) @@ -94,7 +98,7 @@ void __init kernel_randomize_memory(void) BUILD_BUG_ON(vaddr_end > __START_KERNEL_map); /* Preset the end of the possible address space for physical memory */ - physmem_end = ((1ULL << MAX_PHYSMEM_BITS) - 1); + direct_map_physmem_end = ((1ULL << MAX_PHYSMEM_BITS) - 1); if (!kaslr_memory_enabled()) return; @@ -145,7 +149,7 @@ void __init kernel_randomize_memory(void) vaddr += get_padding(&kaslr_regions[i]); /* * KASLR trims the maximum possible size of the - * direct-map. Update the physmem_end boundary. + * direct-map. Update the direct_map_physmem_end boundary. * No rounding required as the region starts * PUD aligned and size is in units of TB. */ diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 44f7b2ea6a07..069e421c2247 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -2444,6 +2444,14 @@ int set_direct_map_default_noflush(struct page *page) return __set_pages_p(page, 1); } +int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid) +{ + if (valid) + return __set_pages_p(page, nr); + + return __set_pages_np(page, nr); +} + #ifdef CONFIG_DEBUG_PAGEALLOC void __kernel_map_pages(struct page *page, int numpages, int enable) { |