summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/entry/vdso/vma.c3
-rw-r--r--arch/x86/include/asm/alternative.h14
-rw-r--r--arch/x86/include/asm/page.h2
-rw-r--r--arch/x86/include/asm/page_64.h2
-rw-r--r--arch/x86/include/asm/percpu.h7
-rw-r--r--arch/x86/include/asm/pgtable.h37
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h2
-rw-r--r--arch/x86/include/asm/set_memory.h1
-rw-r--r--arch/x86/include/asm/text-patching.h1
-rw-r--r--arch/x86/kernel/alternative.c181
-rw-r--r--arch/x86/kernel/ftrace.c30
-rw-r--r--arch/x86/kernel/module.c45
-rw-r--r--arch/x86/kernel/sys_x86_64.c24
-rw-r--r--arch/x86/mm/hugetlbpage.c101
-rw-r--r--arch/x86/mm/init.c37
-rw-r--r--arch/x86/mm/init_64.c30
-rw-r--r--arch/x86/mm/kaslr.c14
-rw-r--r--arch/x86/mm/pat/set_memory.c8
19 files changed, 297 insertions, 243 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 948707a3615e..6c633d93c639 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -83,6 +83,7 @@ config X86
select ARCH_HAS_DMA_OPS if GART_IOMMU || XEN
select ARCH_HAS_EARLY_DEBUG if KGDB
select ARCH_HAS_ELF_RANDOMIZE
+ select ARCH_HAS_EXECMEM_ROX if X86_64
select ARCH_HAS_FAST_MULTIPLIER
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index bfc7cabf4017..39e6efc1a9ca 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -48,7 +48,8 @@ int __init init_vdso_image(const struct vdso_image *image)
apply_alternatives((struct alt_instr *)(image->data + image->alt),
(struct alt_instr *)(image->data + image->alt +
- image->alt_len));
+ image->alt_len),
+ NULL);
return 0;
}
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index ca9ae606aab9..dc03a647776d 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -96,16 +96,16 @@ extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
* instructions were patched in already:
*/
extern int alternatives_patched;
+struct module;
extern void alternative_instructions(void);
-extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
-extern void apply_retpolines(s32 *start, s32 *end);
-extern void apply_returns(s32 *start, s32 *end);
-extern void apply_seal_endbr(s32 *start, s32 *end);
+extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end,
+ struct module *mod);
+extern void apply_retpolines(s32 *start, s32 *end, struct module *mod);
+extern void apply_returns(s32 *start, s32 *end, struct module *mod);
+extern void apply_seal_endbr(s32 *start, s32 *end, struct module *mod);
extern void apply_fineibt(s32 *start_retpoline, s32 *end_retpoine,
- s32 *start_cfi, s32 *end_cfi);
-
-struct module;
+ s32 *start_cfi, s32 *end_cfi, struct module *mod);
struct callthunk_sites {
s32 *call_start, *call_end;
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 1b93ff80b43b..c9fe207916f4 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -35,7 +35,7 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
}
#define vma_alloc_zeroed_movable_folio(vma, vaddr) \
- vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr, false)
+ vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr)
#ifndef __pa
#define __pa(x) __phys_addr((unsigned long)(x))
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index f3d257c45225..d63576608ce7 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -17,7 +17,7 @@ extern unsigned long phys_base;
extern unsigned long page_offset_base;
extern unsigned long vmalloc_base;
extern unsigned long vmemmap_base;
-extern unsigned long physmem_end;
+extern unsigned long direct_map_physmem_end;
static __always_inline unsigned long __phys_addr_nodebug(unsigned long x)
{
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index c55a79d5feae..e525cd85f999 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -234,9 +234,10 @@ do { \
*/
#define percpu_add_op(size, qual, var, val) \
do { \
- const int pao_ID__ = (__builtin_constant_p(val) && \
- ((val) == 1 || (val) == -1)) ? \
- (int)(val) : 0; \
+ const int pao_ID__ = \
+ (__builtin_constant_p(val) && \
+ ((val) == 1 || \
+ (val) == (typeof(val))-1)) ? (int)(val) : 0; \
\
if (0) { \
typeof(var) pao_tmp__; \
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 4c2d080d26b4..593f10aabd45 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1775,6 +1775,43 @@ bool arch_is_platform_page(u64 paddr);
#define arch_is_platform_page arch_is_platform_page
#endif
+/*
+ * Use set_p*_safe(), and elide TLB flushing, when confident that *no*
+ * TLB flush will be required as a result of the "set". For example, use
+ * in scenarios where it is known ahead of time that the routine is
+ * setting non-present entries, or re-setting an existing entry to the
+ * same value. Otherwise, use the typical "set" helpers and flush the
+ * TLB.
+ */
+#define set_pte_safe(ptep, pte) \
+({ \
+ WARN_ON_ONCE(pte_present(*ptep) && !pte_same(*ptep, pte)); \
+ set_pte(ptep, pte); \
+})
+
+#define set_pmd_safe(pmdp, pmd) \
+({ \
+ WARN_ON_ONCE(pmd_present(*pmdp) && !pmd_same(*pmdp, pmd)); \
+ set_pmd(pmdp, pmd); \
+})
+
+#define set_pud_safe(pudp, pud) \
+({ \
+ WARN_ON_ONCE(pud_present(*pudp) && !pud_same(*pudp, pud)); \
+ set_pud(pudp, pud); \
+})
+
+#define set_p4d_safe(p4dp, p4d) \
+({ \
+ WARN_ON_ONCE(p4d_present(*p4dp) && !p4d_same(*p4dp, p4d)); \
+ set_p4d(p4dp, p4d); \
+})
+
+#define set_pgd_safe(pgdp, pgd) \
+({ \
+ WARN_ON_ONCE(pgd_present(*pgdp) && !pgd_same(*pgdp, pgd)); \
+ set_pgd(pgdp, pgd); \
+})
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_PGTABLE_H */
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index a98e53491a4e..ec68f8369bdc 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -141,7 +141,7 @@ extern unsigned int ptrs_per_p4d;
#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
#ifdef CONFIG_RANDOMIZE_MEMORY
-# define PHYSMEM_END physmem_end
+# define DIRECT_MAP_PHYSMEM_END direct_map_physmem_end
#endif
/*
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index 4b2abce2e3e7..cc62ef70ccc0 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -89,6 +89,7 @@ int set_pages_rw(struct page *page, int numpages);
int set_direct_map_invalid_noflush(struct page *page);
int set_direct_map_default_noflush(struct page *page);
+int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid);
bool kernel_page_present(struct page *page);
extern int kernel_set_to_readonly;
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 6259f1937fe7..ab9e143ec9fe 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -35,6 +35,7 @@ extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void text_poke_sync(void);
extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
extern void *text_poke_copy(void *addr, const void *opcode, size_t len);
+#define text_poke_copy text_poke_copy
extern void *text_poke_copy_locked(void *addr, const void *opcode, size_t len, bool core_ok);
extern void *text_poke_set(void *addr, int c, size_t len);
extern int poke_int3_handler(struct pt_regs *regs);
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index d17518ca19b8..243843e44e89 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -392,8 +392,10 @@ EXPORT_SYMBOL(BUG_func);
* Rewrite the "call BUG_func" replacement to point to the target of the
* indirect pv_ops call "call *disp(%ip)".
*/
-static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a)
+static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a,
+ struct module *mod)
{
+ u8 *wr_instr = module_writable_address(mod, instr);
void *target, *bug = &BUG_func;
s32 disp;
@@ -403,14 +405,14 @@ static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a)
}
if (a->instrlen != 6 ||
- instr[0] != CALL_RIP_REL_OPCODE ||
- instr[1] != CALL_RIP_REL_MODRM) {
+ wr_instr[0] != CALL_RIP_REL_OPCODE ||
+ wr_instr[1] != CALL_RIP_REL_MODRM) {
pr_err("ALT_FLAG_DIRECT_CALL set for unrecognized indirect call\n");
BUG();
}
/* Skip CALL_RIP_REL_OPCODE and CALL_RIP_REL_MODRM */
- disp = *(s32 *)(instr + 2);
+ disp = *(s32 *)(wr_instr + 2);
#ifdef CONFIG_X86_64
/* ff 15 00 00 00 00 call *0x0(%rip) */
/* target address is stored at "next instruction + disp". */
@@ -448,7 +450,8 @@ static inline u8 * instr_va(struct alt_instr *i)
* to refetch changed I$ lines.
*/
void __init_or_module noinline apply_alternatives(struct alt_instr *start,
- struct alt_instr *end)
+ struct alt_instr *end,
+ struct module *mod)
{
u8 insn_buff[MAX_PATCH_LEN];
u8 *instr, *replacement;
@@ -477,6 +480,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
*/
for (a = start; a < end; a++) {
int insn_buff_sz = 0;
+ u8 *wr_instr, *wr_replacement;
/*
* In case of nested ALTERNATIVE()s the outer alternative might
@@ -490,7 +494,11 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
}
instr = instr_va(a);
+ wr_instr = module_writable_address(mod, instr);
+
replacement = (u8 *)&a->repl_offset + a->repl_offset;
+ wr_replacement = module_writable_address(mod, replacement);
+
BUG_ON(a->instrlen > sizeof(insn_buff));
BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
@@ -501,9 +509,9 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
* patch if feature is *NOT* present.
*/
if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) {
- memcpy(insn_buff, instr, a->instrlen);
+ memcpy(insn_buff, wr_instr, a->instrlen);
optimize_nops(instr, insn_buff, a->instrlen);
- text_poke_early(instr, insn_buff, a->instrlen);
+ text_poke_early(wr_instr, insn_buff, a->instrlen);
continue;
}
@@ -513,11 +521,12 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
instr, instr, a->instrlen,
replacement, a->replacementlen, a->flags);
- memcpy(insn_buff, replacement, a->replacementlen);
+ memcpy(insn_buff, wr_replacement, a->replacementlen);
insn_buff_sz = a->replacementlen;
if (a->flags & ALT_FLAG_DIRECT_CALL) {
- insn_buff_sz = alt_replace_call(instr, insn_buff, a);
+ insn_buff_sz = alt_replace_call(instr, insn_buff, a,
+ mod);
if (insn_buff_sz < 0)
continue;
}
@@ -527,11 +536,11 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
apply_relocation(insn_buff, instr, a->instrlen, replacement, a->replacementlen);
- DUMP_BYTES(ALT, instr, a->instrlen, "%px: old_insn: ", instr);
+ DUMP_BYTES(ALT, wr_instr, a->instrlen, "%px: old_insn: ", instr);
DUMP_BYTES(ALT, replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
DUMP_BYTES(ALT, insn_buff, insn_buff_sz, "%px: final_insn: ", instr);
- text_poke_early(instr, insn_buff, insn_buff_sz);
+ text_poke_early(wr_instr, insn_buff, insn_buff_sz);
}
kasan_enable_current();
@@ -722,18 +731,20 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
/*
* Generated by 'objtool --retpoline'.
*/
-void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
+void __init_or_module noinline apply_retpolines(s32 *start, s32 *end,
+ struct module *mod)
{
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
+ void *wr_addr = module_writable_address(mod, addr);
struct insn insn;
int len, ret;
u8 bytes[16];
u8 op1, op2;
- ret = insn_decode_kernel(&insn, addr);
+ ret = insn_decode_kernel(&insn, wr_addr);
if (WARN_ON_ONCE(ret < 0))
continue;
@@ -761,9 +772,9 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
len = patch_retpoline(addr, &insn, bytes);
if (len == insn.length) {
optimize_nops(addr, bytes, len);
- DUMP_BYTES(RETPOLINE, ((u8*)addr), len, "%px: orig: ", addr);
+ DUMP_BYTES(RETPOLINE, ((u8*)wr_addr), len, "%px: orig: ", addr);
DUMP_BYTES(RETPOLINE, ((u8*)bytes), len, "%px: repl: ", addr);
- text_poke_early(addr, bytes, len);
+ text_poke_early(wr_addr, bytes, len);
}
}
}
@@ -799,7 +810,8 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes)
return i;
}
-void __init_or_module noinline apply_returns(s32 *start, s32 *end)
+void __init_or_module noinline apply_returns(s32 *start, s32 *end,
+ struct module *mod)
{
s32 *s;
@@ -808,12 +820,13 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end)
for (s = start; s < end; s++) {
void *dest = NULL, *addr = (void *)s + *s;
+ void *wr_addr = module_writable_address(mod, addr);
struct insn insn;
int len, ret;
u8 bytes[16];
u8 op;
- ret = insn_decode_kernel(&insn, addr);
+ ret = insn_decode_kernel(&insn, wr_addr);
if (WARN_ON_ONCE(ret < 0))
continue;
@@ -833,32 +846,35 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end)
len = patch_return(addr, &insn, bytes);
if (len == insn.length) {
- DUMP_BYTES(RET, ((u8*)addr), len, "%px: orig: ", addr);
+ DUMP_BYTES(RET, ((u8*)wr_addr), len, "%px: orig: ", addr);
DUMP_BYTES(RET, ((u8*)bytes), len, "%px: repl: ", addr);
- text_poke_early(addr, bytes, len);
+ text_poke_early(wr_addr, bytes, len);
}
}
}
#else
-void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
+void __init_or_module noinline apply_returns(s32 *start, s32 *end,
+ struct module *mod) { }
#endif /* CONFIG_MITIGATION_RETHUNK */
#else /* !CONFIG_MITIGATION_RETPOLINE || !CONFIG_OBJTOOL */
-void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
-void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
+void __init_or_module noinline apply_retpolines(s32 *start, s32 *end,
+ struct module *mod) { }
+void __init_or_module noinline apply_returns(s32 *start, s32 *end,
+ struct module *mod) { }
#endif /* CONFIG_MITIGATION_RETPOLINE && CONFIG_OBJTOOL */
#ifdef CONFIG_X86_KERNEL_IBT
-static void poison_cfi(void *addr);
+static void poison_cfi(void *addr, void *wr_addr);
-static void __init_or_module poison_endbr(void *addr, bool warn)
+static void __init_or_module poison_endbr(void *addr, void *wr_addr, bool warn)
{
u32 endbr, poison = gen_endbr_poison();
- if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr)))
+ if (WARN_ON_ONCE(get_kernel_nofault(endbr, wr_addr)))
return;
if (!is_endbr(endbr)) {
@@ -873,7 +889,7 @@ static void __init_or_module poison_endbr(void *addr, bool warn)
*/
DUMP_BYTES(ENDBR, ((u8*)addr), 4, "%px: orig: ", addr);
DUMP_BYTES(ENDBR, ((u8*)&poison), 4, "%px: repl: ", addr);
- text_poke_early(addr, &poison, 4);
+ text_poke_early(wr_addr, &poison, 4);
}
/*
@@ -882,22 +898,23 @@ static void __init_or_module poison_endbr(void *addr, bool warn)
* Seal the functions for indirect calls by clobbering the ENDBR instructions
* and the kCFI hash value.
*/
-void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end)
+void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end, struct module *mod)
{
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
+ void *wr_addr = module_writable_address(mod, addr);
- poison_endbr(addr, true);
+ poison_endbr(addr, wr_addr, true);
if (IS_ENABLED(CONFIG_FINEIBT))
- poison_cfi(addr - 16);
+ poison_cfi(addr - 16, wr_addr - 16);
}
}
#else
-void __init_or_module apply_seal_endbr(s32 *start, s32 *end) { }
+void __init_or_module apply_seal_endbr(s32 *start, s32 *end, struct module *mod) { }
#endif /* CONFIG_X86_KERNEL_IBT */
@@ -1119,7 +1136,7 @@ static u32 decode_caller_hash(void *addr)
}
/* .retpoline_sites */
-static int cfi_disable_callers(s32 *start, s32 *end)
+static int cfi_disable_callers(s32 *start, s32 *end, struct module *mod)
{
/*
* Disable kCFI by patching in a JMP.d8, this leaves the hash immediate
@@ -1131,20 +1148,23 @@ static int cfi_disable_callers(s32 *start, s32 *end)
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
+ void *wr_addr;
u32 hash;
addr -= fineibt_caller_size;
- hash = decode_caller_hash(addr);
+ wr_addr = module_writable_address(mod, addr);
+ hash = decode_caller_hash(wr_addr);
+
if (!hash) /* nocfi callers */
continue;
- text_poke_early(addr, jmp, 2);
+ text_poke_early(wr_addr, jmp, 2);
}
return 0;
}
-static int cfi_enable_callers(s32 *start, s32 *end)
+static int cfi_enable_callers(s32 *start, s32 *end, struct module *mod)
{
/*
* Re-enable kCFI, undo what cfi_disable_callers() did.
@@ -1154,106 +1174,115 @@ static int cfi_enable_callers(s32 *start, s32 *end)
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
+ void *wr_addr;
u32 hash;
addr -= fineibt_caller_size;
- hash = decode_caller_hash(addr);
+ wr_addr = module_writable_address(mod, addr);
+ hash = decode_caller_hash(wr_addr);
if (!hash) /* nocfi callers */
continue;
- text_poke_early(addr, mov, 2);
+ text_poke_early(wr_addr, mov, 2);
}
return 0;
}
/* .cfi_sites */
-static int cfi_rand_preamble(s32 *start, s32 *end)
+static int cfi_rand_preamble(s32 *start, s32 *end, struct module *mod)
{
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
+ void *wr_addr = module_writable_address(mod, addr);
u32 hash;
- hash = decode_preamble_hash(addr);
+ hash = decode_preamble_hash(wr_addr);
if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n",
addr, addr, 5, addr))
return -EINVAL;
hash = cfi_rehash(hash);
- text_poke_early(addr + 1, &hash, 4);
+ text_poke_early(wr_addr + 1, &hash, 4);
}
return 0;
}
-static int cfi_rewrite_preamble(s32 *start, s32 *end)
+static int cfi_rewrite_preamble(s32 *start, s32 *end, struct module *mod)
{
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
+ void *wr_addr = module_writable_address(mod, addr);
u32 hash;
- hash = decode_preamble_hash(addr);
+ hash = decode_preamble_hash(wr_addr);
if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n",
addr, addr, 5, addr))
return -EINVAL;
- text_poke_early(addr, fineibt_preamble_start, fineibt_preamble_size);
- WARN_ON(*(u32 *)(addr + fineibt_preamble_hash) != 0x12345678);
- text_poke_early(addr + fineibt_preamble_hash, &hash, 4);
+ text_poke_early(wr_addr, fineibt_preamble_start, fineibt_preamble_size);
+ WARN_ON(*(u32 *)(wr_addr + fineibt_preamble_hash) != 0x12345678);
+ text_poke_early(wr_addr + fineibt_preamble_hash, &hash, 4);
}
return 0;
}
-static void cfi_rewrite_endbr(s32 *start, s32 *end)
+static void cfi_rewrite_endbr(s32 *start, s32 *end, struct module *mod)
{
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
+ void *wr_addr = module_writable_address(mod, addr);
- poison_endbr(addr+16, false);
+ poison_endbr(addr + 16, wr_addr + 16, false);
}
}
/* .retpoline_sites */
-static int cfi_rand_callers(s32 *start, s32 *end)
+static int cfi_rand_callers(s32 *start, s32 *end, struct module *mod)
{
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
+ void *wr_addr;
u32 hash;
addr -= fineibt_caller_size;
- hash = decode_caller_hash(addr);
+ wr_addr = module_writable_address(mod, addr);
+ hash = decode_caller_hash(wr_addr);
if (hash) {
hash = -cfi_rehash(hash);
- text_poke_early(addr + 2, &hash, 4);
+ text_poke_early(wr_addr + 2, &hash, 4);
}
}
return 0;
}
-static int cfi_rewrite_callers(s32 *start, s32 *end)
+static int cfi_rewrite_callers(s32 *start, s32 *end, struct module *mod)
{
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
+ void *wr_addr;
u32 hash;
addr -= fineibt_caller_size;
- hash = decode_caller_hash(addr);
+ wr_addr = module_writable_address(mod, addr);
+ hash = decode_caller_hash(wr_addr);
if (hash) {
- text_poke_early(addr, fineibt_caller_start, fineibt_caller_size);
- WARN_ON(*(u32 *)(addr + fineibt_caller_hash) != 0x12345678);
- text_poke_early(addr + fineibt_caller_hash, &hash, 4);
+ text_poke_early(wr_addr, fineibt_caller_start, fineibt_caller_size);
+ WARN_ON(*(u32 *)(wr_addr + fineibt_caller_hash) != 0x12345678);
+ text_poke_early(wr_addr + fineibt_caller_hash, &hash, 4);
}
/* rely on apply_retpolines() */
}
@@ -1262,8 +1291,9 @@ static int cfi_rewrite_callers(s32 *start, s32 *end)
}
static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
- s32 *start_cfi, s32 *end_cfi, bool builtin)
+ s32 *start_cfi, s32 *end_cfi, struct module *mod)
{
+ bool builtin = mod ? false : true;
int ret;
if (WARN_ONCE(fineibt_preamble_size != 16,
@@ -1281,7 +1311,7 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
* rewrite them. This disables all CFI. If this succeeds but any of the
* later stages fails, we're without CFI.
*/
- ret = cfi_disable_callers(start_retpoline, end_retpoline);
+ ret = cfi_disable_callers(start_retpoline, end_retpoline, mod);
if (ret)
goto err;
@@ -1292,11 +1322,11 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
cfi_bpf_subprog_hash = cfi_rehash(cfi_bpf_subprog_hash);
}
- ret = cfi_rand_preamble(start_cfi, end_cfi);
+ ret = cfi_rand_preamble(start_cfi, end_cfi, mod);
if (ret)
goto err;
- ret = cfi_rand_callers(start_retpoline, end_retpoline);
+ ret = cfi_rand_callers(start_retpoline, end_retpoline, mod);
if (ret)
goto err;
}
@@ -1308,7 +1338,7 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
return;
case CFI_KCFI:
- ret = cfi_enable_callers(start_retpoline, end_retpoline);
+ ret = cfi_enable_callers(start_retpoline, end_retpoline, mod);
if (ret)
goto err;
@@ -1318,17 +1348,17 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
case CFI_FINEIBT:
/* place the FineIBT preamble at func()-16 */
- ret = cfi_rewrite_preamble(start_cfi, end_cfi);
+ ret = cfi_rewrite_preamble(start_cfi, end_cfi, mod);
if (ret)
goto err;
/* rewrite the callers to target func()-16 */
- ret = cfi_rewrite_callers(start_retpoline, end_retpoline);
+ ret = cfi_rewrite_callers(start_retpoline, end_retpoline, mod);
if (ret)
goto err;
/* now that nobody targets func()+0, remove ENDBR there */
- cfi_rewrite_endbr(start_cfi, end_cfi);
+ cfi_rewrite_endbr(start_cfi, end_cfi, mod);
if (builtin)
pr_info("Using FineIBT CFI\n");
@@ -1347,7 +1377,7 @@ static inline void poison_hash(void *addr)
*(u32 *)addr = 0;
}
-static void poison_cfi(void *addr)
+static void poison_cfi(void *addr, void *wr_addr)
{
switch (cfi_mode) {
case CFI_FINEIBT:
@@ -1359,8 +1389,8 @@ static void poison_cfi(void *addr)
* ud2
* 1: nop
*/
- poison_endbr(addr, false);
- poison_hash(addr + fineibt_preamble_hash);
+ poison_endbr(addr, wr_addr, false);
+ poison_hash(wr_addr + fineibt_preamble_hash);
break;
case CFI_KCFI:
@@ -1369,7 +1399,7 @@ static void poison_cfi(void *addr)
* movl $0, %eax
* .skip 11, 0x90
*/
- poison_hash(addr + 1);
+ poison_hash(wr_addr + 1);
break;
default:
@@ -1380,22 +1410,21 @@ static void poison_cfi(void *addr)
#else
static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
- s32 *start_cfi, s32 *end_cfi, bool builtin)
+ s32 *start_cfi, s32 *end_cfi, struct module *mod)
{
}
#ifdef CONFIG_X86_KERNEL_IBT
-static void poison_cfi(void *addr) { }
+static void poison_cfi(void *addr, void *wr_addr) { }
#endif
#endif
void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
- s32 *start_cfi, s32 *end_cfi)
+ s32 *start_cfi, s32 *end_cfi, struct module *mod)
{
return __apply_fineibt(start_retpoline, end_retpoline,
- start_cfi, end_cfi,
- /* .builtin = */ false);
+ start_cfi, end_cfi, mod);
}
#ifdef CONFIG_SMP
@@ -1692,16 +1721,16 @@ void __init alternative_instructions(void)
paravirt_set_cap();
__apply_fineibt(__retpoline_sites, __retpoline_sites_end,
- __cfi_sites, __cfi_sites_end, true);
+ __cfi_sites, __cfi_sites_end, NULL);
/*
* Rewrite the retpolines, must be done before alternatives since
* those can rewrite the retpoline thunks.
*/
- apply_retpolines(__retpoline_sites, __retpoline_sites_end);
- apply_returns(__return_sites, __return_sites_end);
+ apply_retpolines(__retpoline_sites, __retpoline_sites_end, NULL);
+ apply_returns(__return_sites, __return_sites_end, NULL);
- apply_alternatives(__alt_instructions, __alt_instructions_end);
+ apply_alternatives(__alt_instructions, __alt_instructions_end, NULL);
/*
* Now all calls are established. Apply the call thunks if
@@ -1712,7 +1741,7 @@ void __init alternative_instructions(void)
/*
* Seal all functions that do not have their address taken.
*/
- apply_seal_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end);
+ apply_seal_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end, NULL);
#ifdef CONFIG_SMP
/* Patch to UP if other cpus not imminent. */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index adb09f78edb2..4dd0ad6c94d6 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -118,10 +118,13 @@ ftrace_modify_code_direct(unsigned long ip, const char *old_code,
return ret;
/* replace the text with the new text */
- if (ftrace_poke_late)
+ if (ftrace_poke_late) {
text_poke_queue((void *)ip, new_code, MCOUNT_INSN_SIZE, NULL);
- else
- text_poke_early((void *)ip, new_code, MCOUNT_INSN_SIZE);
+ } else {
+ mutex_lock(&text_mutex);
+ text_poke((void *)ip, new_code, MCOUNT_INSN_SIZE);
+ mutex_unlock(&text_mutex);
+ }
return 0;
}
@@ -318,7 +321,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 };
unsigned const char retq[] = { RET_INSN_OPCODE, INT3_INSN_OPCODE };
union ftrace_op_code_union op_ptr;
- int ret;
+ void *ret;
if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
start_offset = (unsigned long)ftrace_regs_caller;
@@ -349,15 +352,15 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
npages = DIV_ROUND_UP(*tramp_size, PAGE_SIZE);
/* Copy ftrace_caller onto the trampoline memory */
- ret = copy_from_kernel_nofault(trampoline, (void *)start_offset, size);
- if (WARN_ON(ret < 0))
+ ret = text_poke_copy(trampoline, (void *)start_offset, size);
+ if (WARN_ON(!ret))
goto fail;
ip = trampoline + size;
if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
__text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE);
else
- memcpy(ip, retq, sizeof(retq));
+ text_poke_copy(ip, retq, sizeof(retq));
/* No need to test direct calls on created trampolines */
if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
@@ -365,8 +368,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
ip = trampoline + (jmp_offset - start_offset);
if (WARN_ON(*(char *)ip != 0x75))
goto fail;
- ret = copy_from_kernel_nofault(ip, x86_nops[2], 2);
- if (ret < 0)
+ if (!text_poke_copy(ip, x86_nops[2], 2))
goto fail;
}
@@ -379,7 +381,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
*/
ptr = (unsigned long *)(trampoline + size + RET_SIZE);
- *ptr = (unsigned long)ops;
+ text_poke_copy(ptr, &ops, sizeof(unsigned long));
op_offset -= start_offset;
memcpy(&op_ptr, trampoline + op_offset, OP_REF_SIZE);
@@ -395,7 +397,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
op_ptr.offset = offset;
/* put in the new offset to the ftrace_ops */
- memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE);
+ text_poke_copy(trampoline + op_offset, &op_ptr, OP_REF_SIZE);
/* put in the call to the function */
mutex_lock(&text_mutex);
@@ -405,9 +407,9 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
* the depth accounting before the call already.
*/
dest = ftrace_ops_get_func(ops);
- memcpy(trampoline + call_offset,
- text_gen_insn(CALL_INSN_OPCODE, trampoline + call_offset, dest),
- CALL_INSN_SIZE);
+ text_poke_copy_locked(trampoline + call_offset,
+ text_gen_insn(CALL_INSN_OPCODE, trampoline + call_offset, dest),
+ CALL_INSN_SIZE, false);
mutex_unlock(&text_mutex);
/* ALLOC_TRAMP flags lets us know we created it */
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 837450b6e882..8984abd91c00 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -146,18 +146,21 @@ static int __write_relocate_add(Elf64_Shdr *sechdrs,
}
if (apply) {
- if (memcmp(loc, &zero, size)) {
+ void *wr_loc = module_writable_address(me, loc);
+
+ if (memcmp(wr_loc, &zero, size)) {
pr_err("x86/modules: Invalid relocation target, existing value is nonzero for type %d, loc %p, val %Lx\n",
(int)ELF64_R_TYPE(rel[i].r_info), loc, val);
return -ENOEXEC;
}
- write(loc, &val, size);
+ write(wr_loc, &val, size);
} else {
if (memcmp(loc, &val, size)) {
pr_warn("x86/modules: Invalid relocation target, existing value does not match expected value for type %d, loc %p, val %Lx\n",
(int)ELF64_R_TYPE(rel[i].r_info), loc, val);
return -ENOEXEC;
}
+ /* FIXME: needs care for ROX module allocations */
write(loc, &zero, size);
}
}
@@ -224,7 +227,7 @@ int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *me)
{
- const Elf_Shdr *s, *alt = NULL, *locks = NULL,
+ const Elf_Shdr *s, *alt = NULL,
*orc = NULL, *orc_ip = NULL,
*retpolines = NULL, *returns = NULL, *ibt_endbr = NULL,
*calls = NULL, *cfi = NULL;
@@ -233,8 +236,6 @@ int module_finalize(const Elf_Ehdr *hdr,
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
if (!strcmp(".altinstructions", secstrings + s->sh_name))
alt = s;
- if (!strcmp(".smp_locks", secstrings + s->sh_name))
- locks = s;
if (!strcmp(".orc_unwind", secstrings + s->sh_name))
orc = s;
if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name))
@@ -265,20 +266,20 @@ int module_finalize(const Elf_Ehdr *hdr,
csize = cfi->sh_size;
}
- apply_fineibt(rseg, rseg + rsize, cseg, cseg + csize);
+ apply_fineibt(rseg, rseg + rsize, cseg, cseg + csize, me);
}
if (retpolines) {
void *rseg = (void *)retpolines->sh_addr;
- apply_retpolines(rseg, rseg + retpolines->sh_size);
+ apply_retpolines(rseg, rseg + retpolines->sh_size, me);
}
if (returns) {
void *rseg = (void *)returns->sh_addr;
- apply_returns(rseg, rseg + returns->sh_size);
+ apply_returns(rseg, rseg + returns->sh_size, me);
}
if (alt) {
/* patch .altinstructions */
void *aseg = (void *)alt->sh_addr;
- apply_alternatives(aseg, aseg + alt->sh_size);
+ apply_alternatives(aseg, aseg + alt->sh_size, me);
}
if (calls || alt) {
struct callthunk_sites cs = {};
@@ -297,8 +298,28 @@ int module_finalize(const Elf_Ehdr *hdr,
}
if (ibt_endbr) {
void *iseg = (void *)ibt_endbr->sh_addr;
- apply_seal_endbr(iseg, iseg + ibt_endbr->sh_size);
+ apply_seal_endbr(iseg, iseg + ibt_endbr->sh_size, me);
}
+
+ if (orc && orc_ip)
+ unwind_module_init(me, (void *)orc_ip->sh_addr, orc_ip->sh_size,
+ (void *)orc->sh_addr, orc->sh_size);
+
+ return 0;
+}
+
+int module_post_finalize(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs,
+ struct module *me)
+{
+ const Elf_Shdr *s, *locks = NULL;
+ char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+
+ for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
+ if (!strcmp(".smp_locks", secstrings + s->sh_name))
+ locks = s;
+ }
+
if (locks) {
void *lseg = (void *)locks->sh_addr;
void *text = me->mem[MOD_TEXT].base;
@@ -308,10 +329,6 @@ int module_finalize(const Elf_Ehdr *hdr,
text, text_end);
}
- if (orc && orc_ip)
- unwind_module_init(me, (void *)orc_ip->sh_addr, orc_ip->sh_size,
- (void *)orc->sh_addr, orc->sh_size);
-
return 0;
}
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 87f8c9a71c49..776ae6fa7f2d 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -18,6 +18,7 @@
#include <linux/random.h>
#include <linux/uaccess.h>
#include <linux/elf.h>
+#include <linux/hugetlb.h>
#include <asm/elf.h>
#include <asm/ia32.h>
@@ -25,8 +26,10 @@
/*
* Align a virtual address to avoid aliasing in the I$ on AMD F15h.
*/
-static unsigned long get_align_mask(void)
+static unsigned long get_align_mask(struct file *filp)
{
+ if (filp && is_file_hugepages(filp))
+ return huge_page_mask_align(filp);
/* handle 32- and 64-bit case with a single conditional */
if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32())))
return 0;
@@ -49,7 +52,7 @@ static unsigned long get_align_mask(void)
*/
static unsigned long get_align_bits(void)
{
- return va_align.bits & get_align_mask();
+ return va_align.bits & get_align_mask(NULL);
}
static int __init control_va_addr_alignment(char *str)
@@ -148,12 +151,15 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len,
info.length = len;
info.low_limit = begin;
info.high_limit = end;
- info.align_offset = pgoff << PAGE_SHIFT;
- info.start_gap = stack_guard_placement(vm_flags);
+ if (!(filp && is_file_hugepages(filp))) {
+ info.align_offset = pgoff << PAGE_SHIFT;
+ info.start_gap = stack_guard_placement(vm_flags);
+ }
if (filp) {
- info.align_mask = get_align_mask();
+ info.align_mask = get_align_mask(filp);
info.align_offset += get_align_bits();
}
+
return vm_unmapped_area(&info);
}
@@ -199,7 +205,10 @@ get_unmapped_area:
info.low_limit = PAGE_SIZE;
info.high_limit = get_mmap_base(0);
- info.start_gap = stack_guard_placement(vm_flags);
+ if (!(filp && is_file_hugepages(filp))) {
+ info.start_gap = stack_guard_placement(vm_flags);
+ info.align_offset = pgoff << PAGE_SHIFT;
+ }
/*
* If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
@@ -211,9 +220,8 @@ get_unmapped_area:
if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall())
info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
- info.align_offset = pgoff << PAGE_SHIFT;
if (filp) {
- info.align_mask = get_align_mask();
+ info.align_mask = get_align_mask(filp);
info.align_offset += get_align_bits();
}
addr = vm_unmapped_area(&info);
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 807a5859a3c4..58f7f2bd535d 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -19,107 +19,6 @@
#include <asm/tlbflush.h>
#include <asm/elf.h>
-#ifdef CONFIG_HUGETLB_PAGE
-static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
- unsigned long addr, unsigned long len,
- unsigned long pgoff, unsigned long flags)
-{
- struct hstate *h = hstate_file(file);
- struct vm_unmapped_area_info info = {};
-
- info.length = len;
- info.low_limit = get_mmap_base(1);
-
- /*
- * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
- * in the full address space.
- */
- info.high_limit = in_32bit_syscall() ?
- task_size_32bit() : task_size_64bit(addr > DEFAULT_MAP_WINDOW);
-
- info.align_mask = PAGE_MASK & ~huge_page_mask(h);
- return vm_unmapped_area(&info);
-}
-
-static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
- unsigned long addr, unsigned long len,
- unsigned long pgoff, unsigned long flags)
-{
- struct hstate *h = hstate_file(file);
- struct vm_unmapped_area_info info = {};
-
- info.flags = VM_UNMAPPED_AREA_TOPDOWN;
- info.length = len;
- info.low_limit = PAGE_SIZE;
- info.high_limit = get_mmap_base(0);
-
- /*
- * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
- * in the full address space.
- */
- if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall())
- info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
-
- info.align_mask = PAGE_MASK & ~huge_page_mask(h);
- addr = vm_unmapped_area(&info);
-
- /*
- * A failed mmap() very likely causes application failure,
- * so fall back to the bottom-up function here. This scenario
- * can happen with large stack limits and large mmap()
- * allocations.
- */
- if (addr & ~PAGE_MASK) {
- VM_BUG_ON(addr != -ENOMEM);
- info.flags = 0;
- info.low_limit = TASK_UNMAPPED_BASE;
- info.high_limit = TASK_SIZE_LOW;
- addr = vm_unmapped_area(&info);
- }
-
- return addr;
-}
-
-unsigned long
-hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
- unsigned long len, unsigned long pgoff, unsigned long flags)
-{
- struct hstate *h = hstate_file(file);
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
-
- if (len & ~huge_page_mask(h))
- return -EINVAL;
-
- if (len > TASK_SIZE)
- return -ENOMEM;
-
- /* No address checking. See comment at mmap_address_hint_valid() */
- if (flags & MAP_FIXED) {
- if (prepare_hugepage_range(file, addr, len))
- return -EINVAL;
- return addr;
- }
-
- if (addr) {
- addr &= huge_page_mask(h);
- if (!mmap_address_hint_valid(addr, len))
- goto get_unmapped_area;
-
- vma = find_vma(mm, addr);
- if (!vma || addr + len <= vm_start_gap(vma))
- return addr;
- }
-
-get_unmapped_area:
- if (!test_bit(MMF_TOPDOWN, &mm->flags))
- return hugetlb_get_unmapped_area_bottomup(file, addr, len,
- pgoff, flags);
- else
- return hugetlb_get_unmapped_area_topdown(file, addr, len,
- pgoff, flags);
-}
-#endif /* CONFIG_HUGETLB_PAGE */
#ifdef CONFIG_X86_64
bool __init arch_hugetlb_valid_size(unsigned long size)
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 101725c149c4..c6d29f283001 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -1058,18 +1058,53 @@ unsigned long arch_max_swapfile_size(void)
#ifdef CONFIG_EXECMEM
static struct execmem_info execmem_info __ro_after_init;
+#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX
+void execmem_fill_trapping_insns(void *ptr, size_t size, bool writeable)
+{
+ /* fill memory with INT3 instructions */
+ if (writeable)
+ memset(ptr, INT3_INSN_OPCODE, size);
+ else
+ text_poke_set(ptr, INT3_INSN_OPCODE, size);
+}
+#endif
+
struct execmem_info __init *execmem_arch_setup(void)
{
unsigned long start, offset = 0;
+ enum execmem_range_flags flags;
+ pgprot_t pgprot;
if (kaslr_enabled())
offset = get_random_u32_inclusive(1, 1024) * PAGE_SIZE;
start = MODULES_VADDR + offset;
+ if (IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX)) {
+ pgprot = PAGE_KERNEL_ROX;
+ flags = EXECMEM_KASAN_SHADOW | EXECMEM_ROX_CACHE;
+ } else {
+ pgprot = PAGE_KERNEL;
+ flags = EXECMEM_KASAN_SHADOW;
+ }
+
execmem_info = (struct execmem_info){
.ranges = {
- [EXECMEM_DEFAULT] = {
+ [EXECMEM_MODULE_TEXT] = {
+ .flags = flags,
+ .start = start,
+ .end = MODULES_END,
+ .pgprot = pgprot,
+ .alignment = MODULE_ALIGN,
+ },
+ [EXECMEM_KPROBES ... EXECMEM_BPF] = {
+ .flags = EXECMEM_KASAN_SHADOW,
+ .start = start,
+ .end = MODULES_END,
+ .pgprot = PAGE_KERNEL,
+ .alignment = MODULE_ALIGN,
+ },
+ [EXECMEM_MODULE_DATA] = {
.flags = EXECMEM_KASAN_SHADOW,
.start = start,
.end = MODULES_END,
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index ff253648706f..01ea7c6df303 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -961,7 +961,7 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
unsigned long end = ((start_pfn + nr_pages) << PAGE_SHIFT) - 1;
int ret;
- if (WARN_ON_ONCE(end > PHYSMEM_END))
+ if (WARN_ON_ONCE(end > DIRECT_MAP_PHYSMEM_END))
return -ERANGE;
ret = __add_pages(nid, start_pfn, nr_pages, params);
@@ -985,22 +985,32 @@ int arch_add_memory(int nid, u64 start, u64 size,
return add_pages(nid, start_pfn, nr_pages, params);
}
-static void __meminit free_pagetable(struct page *page, int order)
+static void free_reserved_pages(struct page *page, unsigned long nr_pages)
{
- unsigned long magic;
- unsigned int nr_pages = 1 << order;
+ while (nr_pages--)
+ free_reserved_page(page++);
+}
+static void __meminit free_pagetable(struct page *page, int order)
+{
/* bootmem page has reserved flag */
if (PageReserved(page)) {
- magic = page->index;
- if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
+ unsigned long nr_pages = 1 << order;
+#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE
+ enum bootmem_type type = bootmem_type(page);
+
+ if (type == SECTION_INFO || type == MIX_SECTION_INFO) {
while (nr_pages--)
put_page_bootmem(page++);
- } else
- while (nr_pages--)
- free_reserved_page(page++);
- } else
+ } else {
+ free_reserved_pages(page, nr_pages);
+ }
+#else
+ free_reserved_pages(page, nr_pages);
+#endif
+ } else {
free_pages((unsigned long)page_address(page), order);
+ }
}
static void __meminit free_hugepage_table(struct page *page,
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index e17e6e27b7ec..11a93542d198 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -52,7 +52,7 @@ static __initdata struct kaslr_memory_region {
} kaslr_regions[] = {
{
.base = &page_offset_base,
- .end = &physmem_end,
+ .end = &direct_map_physmem_end,
},
{
.base = &vmalloc_base,
@@ -62,8 +62,12 @@ static __initdata struct kaslr_memory_region {
},
};
-/* The end of the possible address space for physical memory */
-unsigned long physmem_end __ro_after_init;
+/*
+ * The end of the physical address space that can be mapped directly by the
+ * kernel. This starts out at (1<<MAX_PHYSMEM_BITS) - 1), but KASLR may reduce
+ * that in order to increase the available entropy for mapping other regions.
+ */
+unsigned long direct_map_physmem_end __ro_after_init;
/* Get size in bytes used by the memory region */
static inline unsigned long get_padding(struct kaslr_memory_region *region)
@@ -94,7 +98,7 @@ void __init kernel_randomize_memory(void)
BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
/* Preset the end of the possible address space for physical memory */
- physmem_end = ((1ULL << MAX_PHYSMEM_BITS) - 1);
+ direct_map_physmem_end = ((1ULL << MAX_PHYSMEM_BITS) - 1);
if (!kaslr_memory_enabled())
return;
@@ -145,7 +149,7 @@ void __init kernel_randomize_memory(void)
vaddr += get_padding(&kaslr_regions[i]);
/*
* KASLR trims the maximum possible size of the
- * direct-map. Update the physmem_end boundary.
+ * direct-map. Update the direct_map_physmem_end boundary.
* No rounding required as the region starts
* PUD aligned and size is in units of TB.
*/
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 44f7b2ea6a07..069e421c2247 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -2444,6 +2444,14 @@ int set_direct_map_default_noflush(struct page *page)
return __set_pages_p(page, 1);
}
+int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid)
+{
+ if (valid)
+ return __set_pages_p(page, nr);
+
+ return __set_pages_np(page, nr);
+}
+
#ifdef CONFIG_DEBUG_PAGEALLOC
void __kernel_map_pages(struct page *page, int numpages, int enable)
{