summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorOliver Upton <oupton@kernel.org>2025-12-01 00:47:41 -0800
committerOliver Upton <oupton@kernel.org>2025-12-01 00:47:41 -0800
commit3eef0c83c3f3e58933e98e678ddf4e95457d4d14 (patch)
tree42687a76bd29bd078a4bcc35a9ed1b38f87222cb /arch
parent938309b028baa877909782bd4c0befee8d46e059 (diff)
parentd52aca1635654805a682afef176473793a63b588 (diff)
Merge branch 'kvm-arm64/nv-xnx-haf' into kvmarm/next
* kvm-arm64/nv-xnx-haf: (22 commits) : Support for FEAT_XNX and FEAT_HAF in nested : : Add support for a couple of MMU-related features that weren't : implemented by KVM's software page table walk: : : - FEAT_XNX: Allows the hypervisor to describe execute permissions : separately for EL0 and EL1 : : - FEAT_HAF: Hardware update of the Access Flag, which in the context of : nested means software walkers must also set the Access Flag. : : The series also adds some basic support for testing KVM's emulation of : the AT instruction, including the implementation detail that AT sets the : Access Flag in KVM. KVM: arm64: at: Update AF on software walk only if VM has FEAT_HAFDBS KVM: arm64: at: Use correct HA bit in TCR_EL2 when regime is EL2 KVM: arm64: Document KVM_PGTABLE_PROT_{UX,PX} KVM: arm64: Fix spelling mistake "Unexpeced" -> "Unexpected" KVM: arm64: Add break to default case in kvm_pgtable_stage2_pte_prot() KVM: arm64: Add endian casting to kvm_swap_s[12]_desc() KVM: arm64: Fix compilation when CONFIG_ARM64_USE_LSE_ATOMICS=n KVM: arm64: selftests: Add test for AT emulation KVM: arm64: nv: Expose hardware access flag management to NV guests KVM: arm64: nv: Implement HW access flag management in stage-2 SW PTW KVM: arm64: Implement HW access flag management in stage-1 SW PTW KVM: arm64: Propagate PTW errors up to AT emulation KVM: arm64: Add helper for swapping guest descriptor KVM: arm64: nv: Use pgtable definitions in stage-2 walk KVM: arm64: Handle endianness in read helper for emulated PTW KVM: arm64: nv: Stop passing vCPU through void ptr in S2 PTW KVM: arm64: Call helper for reading descriptors directly KVM: arm64: nv: Advertise support for FEAT_XNX KVM: arm64: Teach ptdump about FEAT_XNX permissions KVM: arm64: nv: Forward FEAT_XNX permissions to the shadow stage-2 ... Signed-off-by: Oliver Upton <oupton@kernel.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/include/asm/kvm_arm.h1
-rw-r--r--arch/arm64/include/asm/kvm_asm.h6
-rw-r--r--arch/arm64/include/asm/kvm_nested.h40
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h21
-rw-r--r--arch/arm64/kernel/cpufeature.c7
-rw-r--r--arch/arm64/kvm/at.c196
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c59
-rw-r--r--arch/arm64/kvm/mmu.c28
-rw-r--r--arch/arm64/kvm/nested.c123
-rw-r--r--arch/arm64/kvm/ptdump.c35
-rw-r--r--arch/arm64/kvm/sys_regs.c9
-rw-r--r--arch/arm64/tools/cpucaps1
12 files changed, 430 insertions, 96 deletions
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 1da290aeedce..e500600e4b9b 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -111,6 +111,7 @@
#define TCR_EL2_DS (1UL << 32)
#define TCR_EL2_RES1 ((1U << 31) | (1 << 23))
#define TCR_EL2_HPD (1 << 24)
+#define TCR_EL2_HA (1 << 21)
#define TCR_EL2_TBI (1 << 20)
#define TCR_EL2_PS_SHIFT 16
#define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index f8adbd535b4a..4b17e822afec 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -246,9 +246,9 @@ extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
extern int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding);
extern void __kvm_timer_set_cntvoff(u64 cntvoff);
-extern void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
-extern void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
-extern void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
+extern int __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
+extern int __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
+extern int __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index f7c06a840963..905c658057a4 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -120,9 +120,42 @@ static inline bool kvm_s2_trans_writable(struct kvm_s2_trans *trans)
return trans->writable;
}
-static inline bool kvm_s2_trans_executable(struct kvm_s2_trans *trans)
+static inline bool kvm_has_xnx(struct kvm *kvm)
{
- return !(trans->desc & BIT(54));
+ return cpus_have_final_cap(ARM64_HAS_XNX) &&
+ kvm_has_feat(kvm, ID_AA64MMFR1_EL1, XNX, IMP);
+}
+
+static inline bool kvm_s2_trans_exec_el0(struct kvm *kvm, struct kvm_s2_trans *trans)
+{
+ u8 xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, trans->desc);
+
+ if (!kvm_has_xnx(kvm))
+ xn &= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b10);
+
+ switch (xn) {
+ case 0b00:
+ case 0b01:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool kvm_s2_trans_exec_el1(struct kvm *kvm, struct kvm_s2_trans *trans)
+{
+ u8 xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, trans->desc);
+
+ if (!kvm_has_xnx(kvm))
+ xn &= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b10);
+
+ switch (xn) {
+ case 0b00:
+ case 0b11:
+ return true;
+ default:
+ return false;
+ }
}
extern int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
@@ -320,6 +353,7 @@ struct s1_walk_info {
bool be;
bool s2;
bool pa52bit;
+ bool ha;
};
struct s1_walk_result {
@@ -370,4 +404,6 @@ void kvm_handle_s1e2_tlbi(struct kvm_vcpu *vcpu, u32 inst, u64 val);
(FIX_VNCR - __c); \
})
+int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 old, u64 new);
+
#endif /* __ARM64_KVM_NESTED_H */
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 1246216616b5..fc02de43c68d 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -89,7 +89,7 @@ typedef u64 kvm_pte_t;
#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
-#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
+#define KVM_PTE_LEAF_ATTR_HI_S2_XN GENMASK(54, 53)
#define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50)
@@ -240,7 +240,9 @@ enum kvm_pgtable_stage2_flags {
/**
* enum kvm_pgtable_prot - Page-table permissions and attributes.
- * @KVM_PGTABLE_PROT_X: Execute permission.
+ * @KVM_PGTABLE_PROT_UX: Unprivileged execute permission.
+ * @KVM_PGTABLE_PROT_PX: Privileged execute permission.
+ * @KVM_PGTABLE_PROT_X: Privileged and unprivileged execute permission.
* @KVM_PGTABLE_PROT_W: Write permission.
* @KVM_PGTABLE_PROT_R: Read permission.
* @KVM_PGTABLE_PROT_DEVICE: Device attributes.
@@ -251,12 +253,15 @@ enum kvm_pgtable_stage2_flags {
* @KVM_PGTABLE_PROT_SW3: Software bit 3.
*/
enum kvm_pgtable_prot {
- KVM_PGTABLE_PROT_X = BIT(0),
- KVM_PGTABLE_PROT_W = BIT(1),
- KVM_PGTABLE_PROT_R = BIT(2),
-
- KVM_PGTABLE_PROT_DEVICE = BIT(3),
- KVM_PGTABLE_PROT_NORMAL_NC = BIT(4),
+ KVM_PGTABLE_PROT_PX = BIT(0),
+ KVM_PGTABLE_PROT_UX = BIT(1),
+ KVM_PGTABLE_PROT_X = KVM_PGTABLE_PROT_PX |
+ KVM_PGTABLE_PROT_UX,
+ KVM_PGTABLE_PROT_W = BIT(2),
+ KVM_PGTABLE_PROT_R = BIT(3),
+
+ KVM_PGTABLE_PROT_DEVICE = BIT(4),
+ KVM_PGTABLE_PROT_NORMAL_NC = BIT(5),
KVM_PGTABLE_PROT_SW0 = BIT(55),
KVM_PGTABLE_PROT_SW1 = BIT(56),
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 75fb9a0efcc8..d34dcc563086 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -3140,6 +3140,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_HAS_GICV5_LEGACY,
.matches = test_has_gicv5_legacy,
},
+ {
+ .desc = "XNX",
+ .capability = ARM64_HAS_XNX,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, XNX, IMP)
+ },
{},
};
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index be26d5aa668c..53bf70126f81 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -346,6 +346,11 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x);
+ wi->ha = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HAFDBS, AF);
+ wi->ha &= (wi->regime == TR_EL2 ?
+ FIELD_GET(TCR_EL2_HA, tcr) :
+ FIELD_GET(TCR_HA, tcr));
+
return 0;
addrsz:
@@ -362,10 +367,42 @@ transfault:
return -EFAULT;
}
+static int kvm_read_s1_desc(struct kvm_vcpu *vcpu, u64 pa, u64 *desc,
+ struct s1_walk_info *wi)
+{
+ u64 val;
+ int r;
+
+ r = kvm_read_guest(vcpu->kvm, pa, &val, sizeof(val));
+ if (r)
+ return r;
+
+ if (wi->be)
+ *desc = be64_to_cpu((__force __be64)val);
+ else
+ *desc = le64_to_cpu((__force __le64)val);
+
+ return 0;
+}
+
+static int kvm_swap_s1_desc(struct kvm_vcpu *vcpu, u64 pa, u64 old, u64 new,
+ struct s1_walk_info *wi)
+{
+ if (wi->be) {
+ old = (__force u64)cpu_to_be64(old);
+ new = (__force u64)cpu_to_be64(new);
+ } else {
+ old = (__force u64)cpu_to_le64(old);
+ new = (__force u64)cpu_to_le64(new);
+ }
+
+ return __kvm_at_swap_desc(vcpu->kvm, pa, old, new);
+}
+
static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
struct s1_walk_result *wr, u64 va)
{
- u64 va_top, va_bottom, baddr, desc;
+ u64 va_top, va_bottom, baddr, desc, new_desc, ipa;
int level, stride, ret;
level = wi->sl;
@@ -375,7 +412,7 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
va_top = get_ia_size(wi) - 1;
while (1) {
- u64 index, ipa;
+ u64 index;
va_bottom = (3 - level) * stride + wi->pgshift;
index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3);
@@ -414,16 +451,13 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
return ret;
}
- ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc));
+ ret = kvm_read_s1_desc(vcpu, ipa, &desc, wi);
if (ret) {
fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false);
return ret;
}
- if (wi->be)
- desc = be64_to_cpu((__force __be64)desc);
- else
- desc = le64_to_cpu((__force __le64)desc);
+ new_desc = desc;
/* Invalid descriptor */
if (!(desc & BIT(0)))
@@ -477,6 +511,17 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
if (check_output_size(baddr & GENMASK(52, va_bottom), wi))
goto addrsz;
+ if (wi->ha)
+ new_desc |= PTE_AF;
+
+ if (new_desc != desc) {
+ ret = kvm_swap_s1_desc(vcpu, ipa, desc, new_desc, wi);
+ if (ret)
+ return ret;
+
+ desc = new_desc;
+ }
+
if (!(desc & PTE_AF)) {
fail_s1_walk(wr, ESR_ELx_FSC_ACCESS_L(level), false);
return -EACCES;
@@ -1221,7 +1266,7 @@ static void compute_s1_permissions(struct kvm_vcpu *vcpu,
wr->pr &= !pan;
}
-static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
+static int handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr, u64 *par)
{
struct s1_walk_result wr = {};
struct s1_walk_info wi = {};
@@ -1246,6 +1291,11 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ /*
+ * Race to update a descriptor -- restart the walk.
+ */
+ if (ret == -EAGAIN)
+ return ret;
if (ret)
goto compute_par;
@@ -1279,7 +1329,8 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false);
compute_par:
- return compute_par_s1(vcpu, &wi, &wr);
+ *par = compute_par_s1(vcpu, &wi, &wr);
+ return 0;
}
/*
@@ -1407,9 +1458,10 @@ static bool par_check_s1_access_fault(u64 par)
!(par & SYS_PAR_EL1_S));
}
-void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
+int __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
{
u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr);
+ int ret;
/*
* If PAR_EL1 reports that AT failed on a S1 permission or access
@@ -1421,15 +1473,20 @@ void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
*/
if ((par & SYS_PAR_EL1_F) &&
!par_check_s1_perm_fault(par) &&
- !par_check_s1_access_fault(par))
- par = handle_at_slow(vcpu, op, vaddr);
+ !par_check_s1_access_fault(par)) {
+ ret = handle_at_slow(vcpu, op, vaddr, &par);
+ if (ret)
+ return ret;
+ }
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
+ return 0;
}
-void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
+int __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
{
u64 par;
+ int ret;
/*
* We've trapped, so everything is live on the CPU. As we will be
@@ -1476,13 +1533,17 @@ void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
}
/* We failed the translation, let's replay it in slow motion */
- if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
- par = handle_at_slow(vcpu, op, vaddr);
+ if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) {
+ ret = handle_at_slow(vcpu, op, vaddr, &par);
+ if (ret)
+ return ret;
+ }
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
+ return 0;
}
-void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
+int __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
{
struct kvm_s2_trans out = {};
u64 ipa, par;
@@ -1509,13 +1570,13 @@ void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
break;
default:
WARN_ON_ONCE(1);
- return;
+ return 0;
}
__kvm_at_s1e01(vcpu, op, vaddr);
par = vcpu_read_sys_reg(vcpu, PAR_EL1);
if (par & SYS_PAR_EL1_F)
- return;
+ return 0;
/*
* If we only have a single stage of translation (EL2&0), exit
@@ -1523,14 +1584,14 @@ void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
*/
if (compute_translation_regime(vcpu, op) == TR_EL20 ||
!(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC)))
- return;
+ return 0;
/* Do the stage-2 translation */
ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0));
out.esr = 0;
ret = kvm_walk_nested_s2(vcpu, ipa, &out);
if (ret < 0)
- return;
+ return ret;
/* Check the access permission */
if (!out.esr &&
@@ -1539,6 +1600,7 @@ void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
par = compute_par_s12(vcpu, par, &out);
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
+ return 0;
}
/*
@@ -1637,3 +1699,97 @@ int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level)
return ret;
}
}
+
+#ifdef CONFIG_ARM64_LSE_ATOMICS
+static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new)
+{
+ u64 tmp = old;
+ int ret = 0;
+
+ uaccess_enable_privileged();
+
+ asm volatile(__LSE_PREAMBLE
+ "1: cas %[old], %[new], %[addr]\n"
+ "2:\n"
+ _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w[ret])
+ : [old] "+r" (old), [addr] "+Q" (*ptep), [ret] "+r" (ret)
+ : [new] "r" (new)
+ : "memory");
+
+ uaccess_disable_privileged();
+
+ if (ret)
+ return ret;
+ if (tmp != old)
+ return -EAGAIN;
+
+ return ret;
+}
+#else
+static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new)
+{
+ return -EINVAL;
+}
+#endif
+
+static int __llsc_swap_desc(u64 __user *ptep, u64 old, u64 new)
+{
+ int ret = 1;
+ u64 tmp;
+
+ uaccess_enable_privileged();
+
+ asm volatile("prfm pstl1strm, %[addr]\n"
+ "1: ldxr %[tmp], %[addr]\n"
+ "sub %[tmp], %[tmp], %[old]\n"
+ "cbnz %[tmp], 3f\n"
+ "2: stlxr %w[ret], %[new], %[addr]\n"
+ "3:\n"
+ _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w[ret])
+ _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %w[ret])
+ : [ret] "+r" (ret), [addr] "+Q" (*ptep), [tmp] "=&r" (tmp)
+ : [old] "r" (old), [new] "r" (new)
+ : "memory");
+
+ uaccess_disable_privileged();
+
+ /* STLXR didn't update the descriptor, or the compare failed */
+ if (ret == 1)
+ return -EAGAIN;
+
+ return ret;
+}
+
+int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 old, u64 new)
+{
+ struct kvm_memory_slot *slot;
+ unsigned long hva;
+ u64 __user *ptep;
+ bool writable;
+ int offset;
+ gfn_t gfn;
+ int r;
+
+ lockdep_assert(srcu_read_lock_held(&kvm->srcu));
+
+ gfn = ipa >> PAGE_SHIFT;
+ offset = offset_in_page(ipa);
+ slot = gfn_to_memslot(kvm, gfn);
+ hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
+ if (kvm_is_error_hva(hva))
+ return -EINVAL;
+ if (!writable)
+ return -EPERM;
+
+ ptep = (u64 __user *)hva + offset;
+ if (cpus_have_final_cap(ARM64_HAS_LSE_ATOMICS))
+ r = __lse_swap_desc(ptep, old, new);
+ else
+ r = __llsc_swap_desc(ptep, old, new);
+
+ if (r < 0)
+ return r;
+
+ mark_page_dirty_in_slot(kvm, slot, gfn);
+ return 0;
+}
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 0882896dbf8f..947ac1a951a5 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -661,11 +661,37 @@ void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
#define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))
+static int stage2_set_xn_attr(enum kvm_pgtable_prot prot, kvm_pte_t *attr)
+{
+ bool px, ux;
+ u8 xn;
+
+ px = prot & KVM_PGTABLE_PROT_PX;
+ ux = prot & KVM_PGTABLE_PROT_UX;
+
+ if (!cpus_have_final_cap(ARM64_HAS_XNX) && px != ux)
+ return -EINVAL;
+
+ if (px && ux)
+ xn = 0b00;
+ else if (!px && ux)
+ xn = 0b01;
+ else if (!px && !ux)
+ xn = 0b10;
+ else
+ xn = 0b11;
+
+ *attr &= ~KVM_PTE_LEAF_ATTR_HI_S2_XN;
+ *attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, xn);
+ return 0;
+}
+
static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
kvm_pte_t *ptep)
{
kvm_pte_t attr;
u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
+ int r;
switch (prot & (KVM_PGTABLE_PROT_DEVICE |
KVM_PGTABLE_PROT_NORMAL_NC)) {
@@ -685,8 +711,9 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p
attr = KVM_S2_MEMATTR(pgt, NORMAL);
}
- if (!(prot & KVM_PGTABLE_PROT_X))
- attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
+ r = stage2_set_xn_attr(prot, &attr);
+ if (r)
+ return r;
if (prot & KVM_PGTABLE_PROT_R)
attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
@@ -715,8 +742,20 @@ enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte)
prot |= KVM_PGTABLE_PROT_R;
if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W)
prot |= KVM_PGTABLE_PROT_W;
- if (!(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN))
- prot |= KVM_PGTABLE_PROT_X;
+
+ switch (FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, pte)) {
+ case 0b00:
+ prot |= KVM_PGTABLE_PROT_PX | KVM_PGTABLE_PROT_UX;
+ break;
+ case 0b01:
+ prot |= KVM_PGTABLE_PROT_UX;
+ break;
+ case 0b11:
+ prot |= KVM_PGTABLE_PROT_PX;
+ break;
+ default:
+ break;
+ }
return prot;
}
@@ -1290,9 +1329,9 @@ bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
enum kvm_pgtable_prot prot, enum kvm_pgtable_walk_flags flags)
{
- int ret;
+ kvm_pte_t xn = 0, set = 0, clr = 0;
s8 level;
- kvm_pte_t set = 0, clr = 0;
+ int ret;
if (prot & KVM_PTE_LEAF_ATTR_HI_SW)
return -EINVAL;
@@ -1303,8 +1342,12 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
if (prot & KVM_PGTABLE_PROT_W)
set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
- if (prot & KVM_PGTABLE_PROT_X)
- clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
+ ret = stage2_set_xn_attr(prot, &xn);
+ if (ret)
+ return ret;
+
+ set |= xn & KVM_PTE_LEAF_ATTR_HI_S2_XN;
+ clr |= ~xn & KVM_PTE_LEAF_ATTR_HI_S2_XN;
ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, flags);
if (!ret || ret == -EAGAIN)
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index fe3341d19b92..5ab0cfa08343 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1553,6 +1553,16 @@ static void adjust_nested_fault_perms(struct kvm_s2_trans *nested,
*prot |= kvm_encode_nested_level(nested);
}
+static void adjust_nested_exec_perms(struct kvm *kvm,
+ struct kvm_s2_trans *nested,
+ enum kvm_pgtable_prot *prot)
+{
+ if (!kvm_s2_trans_exec_el0(kvm, nested))
+ *prot &= ~KVM_PGTABLE_PROT_UX;
+ if (!kvm_s2_trans_exec_el1(kvm, nested))
+ *prot &= ~KVM_PGTABLE_PROT_PX;
+}
+
#define KVM_PGTABLE_WALK_MEMABORT_FLAGS (KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED)
static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
@@ -1604,11 +1614,12 @@ static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (writable)
prot |= KVM_PGTABLE_PROT_W;
- if (exec_fault ||
- (cpus_have_final_cap(ARM64_HAS_CACHE_DIC) &&
- (!nested || kvm_s2_trans_executable(nested))))
+ if (exec_fault || cpus_have_final_cap(ARM64_HAS_CACHE_DIC))
prot |= KVM_PGTABLE_PROT_X;
+ if (nested)
+ adjust_nested_exec_perms(kvm, nested, &prot);
+
kvm_fault_lock(kvm);
if (mmu_invalidate_retry(kvm, mmu_seq)) {
ret = -EAGAIN;
@@ -1883,11 +1894,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
prot |= KVM_PGTABLE_PROT_NORMAL_NC;
else
prot |= KVM_PGTABLE_PROT_DEVICE;
- } else if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC) &&
- (!nested || kvm_s2_trans_executable(nested))) {
+ } else if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC)) {
prot |= KVM_PGTABLE_PROT_X;
}
+ if (nested)
+ adjust_nested_exec_perms(kvm, nested, &prot);
+
/*
* Under the premise of getting a FSC_PERM fault, we just need to relax
* permissions only if vma_pagesize equals fault_granule. Otherwise,
@@ -2097,6 +2110,11 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
u32 esr;
ret = kvm_walk_nested_s2(vcpu, fault_ipa, &nested_trans);
+ if (ret == -EAGAIN) {
+ ret = 1;
+ goto out_unlock;
+ }
+
if (ret) {
esr = kvm_s2_trans_esr(&nested_trans);
kvm_inject_s2_fault(vcpu, esr);
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index f04cda40545b..911fc99ed99d 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -124,14 +124,13 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu)
}
struct s2_walk_info {
- int (*read_desc)(phys_addr_t pa, u64 *desc, void *data);
- void *data;
- u64 baddr;
- unsigned int max_oa_bits;
- unsigned int pgshift;
- unsigned int sl;
- unsigned int t0sz;
- bool be;
+ u64 baddr;
+ unsigned int max_oa_bits;
+ unsigned int pgshift;
+ unsigned int sl;
+ unsigned int t0sz;
+ bool be;
+ bool ha;
};
static u32 compute_fsc(int level, u32 fsc)
@@ -199,6 +198,42 @@ static int check_output_size(struct s2_walk_info *wi, phys_addr_t output)
return 0;
}
+static int read_guest_s2_desc(struct kvm_vcpu *vcpu, phys_addr_t pa, u64 *desc,
+ struct s2_walk_info *wi)
+{
+ u64 val;
+ int r;
+
+ r = kvm_read_guest(vcpu->kvm, pa, &val, sizeof(val));
+ if (r)
+ return r;
+
+ /*
+ * Handle reversedescriptors if endianness differs between the
+ * host and the guest hypervisor.
+ */
+ if (wi->be)
+ *desc = be64_to_cpu((__force __be64)val);
+ else
+ *desc = le64_to_cpu((__force __le64)val);
+
+ return 0;
+}
+
+static int swap_guest_s2_desc(struct kvm_vcpu *vcpu, phys_addr_t pa, u64 old, u64 new,
+ struct s2_walk_info *wi)
+{
+ if (wi->be) {
+ old = (__force u64)cpu_to_be64(old);
+ new = (__force u64)cpu_to_be64(new);
+ } else {
+ old = (__force u64)cpu_to_le64(old);
+ new = (__force u64)cpu_to_le64(new);
+ }
+
+ return __kvm_at_swap_desc(vcpu->kvm, pa, old, new);
+}
+
/*
* This is essentially a C-version of the pseudo code from the ARM ARM
* AArch64.TranslationTableWalk function. I strongly recommend looking at
@@ -206,13 +241,13 @@ static int check_output_size(struct s2_walk_info *wi, phys_addr_t output)
*
* Must be called with the kvm->srcu read lock held
*/
-static int walk_nested_s2_pgd(phys_addr_t ipa,
+static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa,
struct s2_walk_info *wi, struct kvm_s2_trans *out)
{
int first_block_level, level, stride, input_size, base_lower_bound;
phys_addr_t base_addr;
unsigned int addr_top, addr_bottom;
- u64 desc; /* page table entry */
+ u64 desc, new_desc; /* page table entry */
int ret;
phys_addr_t paddr;
@@ -257,28 +292,30 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
>> (addr_bottom - 3);
paddr = base_addr | index;
- ret = wi->read_desc(paddr, &desc, wi->data);
+ ret = read_guest_s2_desc(vcpu, paddr, &desc, wi);
if (ret < 0)
return ret;
- /*
- * Handle reversedescriptors if endianness differs between the
- * host and the guest hypervisor.
- */
- if (wi->be)
- desc = be64_to_cpu((__force __be64)desc);
- else
- desc = le64_to_cpu((__force __le64)desc);
+ new_desc = desc;
/* Check for valid descriptor at this point */
- if (!(desc & 1) || ((desc & 3) == 1 && level == 3)) {
+ if (!(desc & KVM_PTE_VALID)) {
out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT);
out->desc = desc;
return 1;
}
- /* We're at the final level or block translation level */
- if ((desc & 3) == 1 || level == 3)
+ if (FIELD_GET(KVM_PTE_TYPE, desc) == KVM_PTE_TYPE_BLOCK) {
+ if (level < 3)
+ break;
+
+ out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT);
+ out->desc = desc;
+ return 1;
+ }
+
+ /* We're at the final level */
+ if (level == 3)
break;
if (check_output_size(wi, desc)) {
@@ -305,7 +342,18 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
return 1;
}
- if (!(desc & BIT(10))) {
+ if (wi->ha)
+ new_desc |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
+
+ if (new_desc != desc) {
+ ret = swap_guest_s2_desc(vcpu, paddr, desc, new_desc, wi);
+ if (ret)
+ return ret;
+
+ desc = new_desc;
+ }
+
+ if (!(desc & KVM_PTE_LEAF_ATTR_LO_S2_AF)) {
out->esr = compute_fsc(level, ESR_ELx_FSC_ACCESS);
out->desc = desc;
return 1;
@@ -318,20 +366,13 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
(ipa & GENMASK_ULL(addr_bottom - 1, 0));
out->output = paddr;
out->block_size = 1UL << ((3 - level) * stride + wi->pgshift);
- out->readable = desc & (0b01 << 6);
- out->writable = desc & (0b10 << 6);
+ out->readable = desc & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
+ out->writable = desc & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
out->level = level;
out->desc = desc;
return 0;
}
-static int read_guest_s2_desc(phys_addr_t pa, u64 *desc, void *data)
-{
- struct kvm_vcpu *vcpu = data;
-
- return kvm_read_guest(vcpu->kvm, pa, desc, sizeof(*desc));
-}
-
static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi)
{
wi->t0sz = vtcr & TCR_EL2_T0SZ_MASK;
@@ -350,6 +391,8 @@ static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi)
/* Global limit for now, should eventually be per-VM */
wi->max_oa_bits = min(get_kvm_ipa_limit(),
ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr), false));
+
+ wi->ha = vtcr & VTCR_EL2_HA;
}
int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
@@ -364,15 +407,13 @@ int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
if (!vcpu_has_nv(vcpu))
return 0;
- wi.read_desc = read_guest_s2_desc;
- wi.data = vcpu;
wi.baddr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
vtcr_to_walk_info(vtcr, &wi);
wi.be = vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_EE;
- ret = walk_nested_s2_pgd(gipa, &wi, result);
+ ret = walk_nested_s2_pgd(vcpu, gipa, &wi, result);
if (ret)
result->esr |= (kvm_vcpu_get_esr(vcpu) & ~ESR_ELx_FSC);
@@ -788,7 +829,10 @@ int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, struct kvm_s2_trans *trans)
return 0;
if (kvm_vcpu_trap_is_iabt(vcpu)) {
- forward_fault = !kvm_s2_trans_executable(trans);
+ if (vcpu_mode_priv(vcpu))
+ forward_fault = !kvm_s2_trans_exec_el1(vcpu->kvm, trans);
+ else
+ forward_fault = !kvm_s2_trans_exec_el0(vcpu->kvm, trans);
} else {
bool write_fault = kvm_is_write_fault(vcpu);
@@ -1555,12 +1599,13 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val)
case SYS_ID_AA64MMFR1_EL1:
val &= ~(ID_AA64MMFR1_EL1_CMOW |
ID_AA64MMFR1_EL1_nTLBPA |
- ID_AA64MMFR1_EL1_ETS |
- ID_AA64MMFR1_EL1_XNX |
- ID_AA64MMFR1_EL1_HAFDBS);
+ ID_AA64MMFR1_EL1_ETS);
+
/* FEAT_E2H0 implies no VHE */
if (test_bit(KVM_ARM_VCPU_HAS_EL2_E2H0, kvm->arch.vcpu_features))
val &= ~ID_AA64MMFR1_EL1_VH;
+
+ val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64MMFR1_EL1, HAFDBS, AF);
break;
case SYS_ID_AA64MMFR2_EL1:
diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
index dc5acfb00af9..6cbe018fd6fd 100644
--- a/arch/arm64/kvm/ptdump.c
+++ b/arch/arm64/kvm/ptdump.c
@@ -31,27 +31,46 @@ static const struct ptdump_prot_bits stage2_pte_bits[] = {
.val = PTE_VALID,
.set = " ",
.clear = "F",
- }, {
+ },
+ {
.mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R,
.val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R,
.set = "R",
.clear = " ",
- }, {
+ },
+ {
.mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
.val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
.set = "W",
.clear = " ",
- }, {
+ },
+ {
.mask = KVM_PTE_LEAF_ATTR_HI_S2_XN,
- .val = KVM_PTE_LEAF_ATTR_HI_S2_XN,
- .set = "NX",
- .clear = "x ",
- }, {
+ .val = 0b00UL << __bf_shf(KVM_PTE_LEAF_ATTR_HI_S2_XN),
+ .set = "px ux ",
+ },
+ {
+ .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN,
+ .val = 0b01UL << __bf_shf(KVM_PTE_LEAF_ATTR_HI_S2_XN),
+ .set = "PXNux ",
+ },
+ {
+ .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN,
+ .val = 0b10UL << __bf_shf(KVM_PTE_LEAF_ATTR_HI_S2_XN),
+ .set = "PXNUXN",
+ },
+ {
+ .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN,
+ .val = 0b11UL << __bf_shf(KVM_PTE_LEAF_ATTR_HI_S2_XN),
+ .set = "px UXN",
+ },
+ {
.mask = KVM_PTE_LEAF_ATTR_LO_S2_AF,
.val = KVM_PTE_LEAF_ATTR_LO_S2_AF,
.set = "AF",
.clear = " ",
- }, {
+ },
+ {
.mask = PMD_TYPE_MASK,
.val = PMD_TYPE_SECT,
.set = "BLK",
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 1b69d6e2d720..7d6869c07fb4 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -3782,7 +3782,8 @@ static bool handle_at_s1e01(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
{
u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
- __kvm_at_s1e01(vcpu, op, p->regval);
+ if (__kvm_at_s1e01(vcpu, op, p->regval))
+ return false;
return true;
}
@@ -3799,7 +3800,8 @@ static bool handle_at_s1e2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return false;
}
- __kvm_at_s1e2(vcpu, op, p->regval);
+ if (__kvm_at_s1e2(vcpu, op, p->regval))
+ return false;
return true;
}
@@ -3809,7 +3811,8 @@ static bool handle_at_s12(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
{
u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
- __kvm_at_s12(vcpu, op, p->regval);
+ if (__kvm_at_s12(vcpu, op, p->regval))
+ return false;
return true;
}
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 116d1a7b688c..0fac75f01534 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -65,6 +65,7 @@ HAS_TLB_RANGE
HAS_VA52
HAS_VIRT_HOST_EXTN
HAS_WFXT
+HAS_XNX
HAFT
HW_DBM
KVM_HVHE