summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/virt/kvm/x86/errata.rst9
-rw-r--r--arch/x86/include/asm/cpufeatures.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h9
-rw-r--r--arch/x86/include/asm/svm.h5
-rw-r--r--arch/x86/include/uapi/asm/kvm.h1
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kvm/cpuid.c1
-rw-r--r--arch/x86/kvm/svm/avic.c86
-rw-r--r--arch/x86/kvm/svm/nested.c12
-rw-r--r--arch/x86/kvm/svm/sev.c47
-rw-r--r--arch/x86/kvm/svm/svm.c76
-rw-r--r--arch/x86/kvm/svm/svm.h4
-rw-r--r--arch/x86/kvm/svm/vmenter.S47
-rw-r--r--arch/x86/kvm/x86.c21
-rw-r--r--drivers/crypto/ccp/sev-dev.c37
-rw-r--r--include/linux/psp-sev.h37
16 files changed, 310 insertions, 85 deletions
diff --git a/Documentation/virt/kvm/x86/errata.rst b/Documentation/virt/kvm/x86/errata.rst
index 37c79362a48f..a9cf0e004651 100644
--- a/Documentation/virt/kvm/x86/errata.rst
+++ b/Documentation/virt/kvm/x86/errata.rst
@@ -48,7 +48,14 @@ versus "has_error_code", i.e. KVM's ABI follows AMD behavior.
Nested virtualization features
------------------------------
-TBD
+On AMD CPUs, when GIF is cleared, #DB exceptions or traps due to a breakpoint
+register match are ignored and discarded by the CPU. The CPU relies on the VMM
+to fully virtualize this behavior, even when vGIF is enabled for the guest
+(i.e. vGIF=0 does not cause the CPU to drop #DBs when the guest is running).
+KVM does not virtualize this behavior as the complexity is unjustified given
+the rarity of the use case. One way to handle this would be for KVM to
+intercept the #DB, temporarily disable the breakpoint, single-step over the
+instruction, then re-enable the breakpoint.
x2APIC
------
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index fc5698844a0b..c2f1281b32d8 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -338,6 +338,7 @@
#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */
#define X86_FEATURE_AMD_IBRS_SAME_MODE (13*32+19) /* Indirect Branch Restricted Speculation same mode protection*/
+#define X86_FEATURE_EFER_LMSLE_MBZ (13*32+20) /* EFER.LMSLE must be zero */
#define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */
#define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */
@@ -504,6 +505,7 @@
* can access host MMIO (ignored for all intents
* and purposes if CLEAR_CPU_BUF_VM is set).
*/
+#define X86_FEATURE_X2AVIC_EXT (21*32+18) /* AMD SVM x2AVIC support for 4k vCPUs */
/*
* BUG word(s)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6d41249e86af..5a3bfa293e8b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -2139,6 +2139,11 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
* the gfn, i.e. retrying the instruction will hit a
* !PRESENT fault, which results in a new shadow page
* and sends KVM back to square one.
+ *
+ * EMULTYPE_SKIP_SOFT_INT - Set in combination with EMULTYPE_SKIP to only skip
+ * an instruction if it could generate a given software
+ * interrupt, which must be encoded via
+ * EMULTYPE_SET_SOFT_INT_VECTOR().
*/
#define EMULTYPE_NO_DECODE (1 << 0)
#define EMULTYPE_TRAP_UD (1 << 1)
@@ -2149,6 +2154,10 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
#define EMULTYPE_PF (1 << 6)
#define EMULTYPE_COMPLETE_USER_EXIT (1 << 7)
#define EMULTYPE_WRITE_PF_TO_SP (1 << 8)
+#define EMULTYPE_SKIP_SOFT_INT (1 << 9)
+
+#define EMULTYPE_SET_SOFT_INT_VECTOR(v) ((u32)((v) & 0xff) << 16)
+#define EMULTYPE_GET_SOFT_INT_VECTOR(e) (((e) >> 16) & 0xff)
static inline bool kvm_can_emulate_event_vectoring(int emul_type)
{
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 17f6c3fedeee..e69b6d0dedcf 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -279,7 +279,7 @@ enum avic_ipi_failure_cause {
AVIC_IPI_FAILURE_INVALID_IPI_VECTOR,
};
-#define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(8, 0)
+#define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(11, 0)
/*
* For AVIC, the max index allowed for physical APIC ID table is 0xfe (254), as
@@ -289,11 +289,14 @@ enum avic_ipi_failure_cause {
/*
* For x2AVIC, the max index allowed for physical APIC ID table is 0x1ff (511).
+ * With X86_FEATURE_X2AVIC_EXT, the max index is increased to 0xfff (4095).
*/
#define X2AVIC_MAX_PHYSICAL_ID 0x1FFUL
+#define X2AVIC_4K_MAX_PHYSICAL_ID 0xFFFUL
static_assert((AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == AVIC_MAX_PHYSICAL_ID);
static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_MAX_PHYSICAL_ID);
+static_assert((X2AVIC_4K_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_4K_MAX_PHYSICAL_ID);
#define SVM_SEV_FEAT_SNP_ACTIVE BIT(0)
#define SVM_SEV_FEAT_RESTRICTED_INJECTION BIT(3)
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index d420c9c066d4..7ceff6583652 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -502,6 +502,7 @@ struct kvm_sync_regs {
/* vendor-specific groups and attributes for system fd */
#define KVM_X86_GRP_SEV 1
# define KVM_X86_SEV_VMSA_FEATURES 0
+# define KVM_X86_SNP_POLICY_BITS 1
struct kvm_vmx_nested_state_data {
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index caa4dc885c21..aa7f21f5f46b 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -49,6 +49,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
{ X86_FEATURE_AMD_FAST_CPPC, CPUID_EDX, 15, 0x80000007, 0 },
{ X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 },
+ { X86_FEATURE_X2AVIC_EXT, CPUID_ECX, 6, 0x8000000a, 0 },
{ X86_FEATURE_COHERENCY_SFW_NO, CPUID_EBX, 31, 0x8000001f, 0 },
{ X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 },
{ X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 },
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 52524e0ca97f..d563a948318b 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -1135,6 +1135,7 @@ void kvm_set_cpu_caps(void)
F(AMD_STIBP),
F(AMD_STIBP_ALWAYS_ON),
F(AMD_IBRS_SAME_MODE),
+ PASSTHROUGH_F(EFER_LMSLE_MBZ),
F(AMD_PSFD),
F(AMD_IBPB_RET),
);
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index fef00546c885..6b77b2033208 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -106,7 +106,7 @@ static u32 next_vm_id = 0;
static bool next_vm_id_wrapped = 0;
static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
static bool x2avic_enabled;
-
+static u32 x2avic_max_physical_id;
static void avic_set_x2apic_msr_interception(struct vcpu_svm *svm,
bool intercept)
@@ -158,12 +158,40 @@ static void avic_set_x2apic_msr_interception(struct vcpu_svm *svm,
svm->x2avic_msrs_intercepted = intercept;
}
+static u32 __avic_get_max_physical_id(struct kvm *kvm, struct kvm_vcpu *vcpu)
+{
+ u32 arch_max;
+
+ /*
+ * Return the largest size (x2APIC) when querying without a vCPU, e.g.
+ * to allocate the per-VM table..
+ */
+ if (x2avic_enabled && (!vcpu || apic_x2apic_mode(vcpu->arch.apic)))
+ arch_max = x2avic_max_physical_id;
+ else
+ arch_max = AVIC_MAX_PHYSICAL_ID;
+
+ /*
+ * Despite its name, KVM_CAP_MAX_VCPU_ID represents the maximum APIC ID
+ * plus one, so the max possible APIC ID is one less than that.
+ */
+ return min(kvm->arch.max_vcpu_ids - 1, arch_max);
+}
+
+static u32 avic_get_max_physical_id(struct kvm_vcpu *vcpu)
+{
+ return __avic_get_max_physical_id(vcpu->kvm, vcpu);
+}
+
static void avic_activate_vmcb(struct vcpu_svm *svm)
{
struct vmcb *vmcb = svm->vmcb01.ptr;
+ struct kvm_vcpu *vcpu = &svm->vcpu;
vmcb->control.int_ctl &= ~(AVIC_ENABLE_MASK | X2APIC_MODE_MASK);
+
vmcb->control.avic_physical_id &= ~AVIC_PHYSICAL_MAX_INDEX_MASK;
+ vmcb->control.avic_physical_id |= avic_get_max_physical_id(vcpu);
vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
@@ -176,7 +204,7 @@ static void avic_activate_vmcb(struct vcpu_svm *svm)
*/
if (x2avic_enabled && apic_x2apic_mode(svm->vcpu.arch.apic)) {
vmcb->control.int_ctl |= X2APIC_MODE_MASK;
- vmcb->control.avic_physical_id |= X2AVIC_MAX_PHYSICAL_ID;
+
/* Disabling MSR intercept for x2APIC registers */
avic_set_x2apic_msr_interception(svm, false);
} else {
@@ -186,8 +214,6 @@ static void avic_activate_vmcb(struct vcpu_svm *svm)
*/
kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, &svm->vcpu);
- /* For xAVIC and hybrid-xAVIC modes */
- vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID;
/* Enabling MSR intercept for x2APIC registers */
avic_set_x2apic_msr_interception(svm, true);
}
@@ -247,6 +273,30 @@ static int avic_ga_log_notifier(u32 ga_tag)
return 0;
}
+static int avic_get_physical_id_table_order(struct kvm *kvm)
+{
+ /* Provision for the maximum physical ID supported in x2avic mode */
+ return get_order((__avic_get_max_physical_id(kvm, NULL) + 1) * sizeof(u64));
+}
+
+int avic_alloc_physical_id_table(struct kvm *kvm)
+{
+ struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
+
+ if (!irqchip_in_kernel(kvm) || !enable_apicv)
+ return 0;
+
+ if (kvm_svm->avic_physical_id_table)
+ return 0;
+
+ kvm_svm->avic_physical_id_table = (void *)__get_free_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO,
+ avic_get_physical_id_table_order(kvm));
+ if (!kvm_svm->avic_physical_id_table)
+ return -ENOMEM;
+
+ return 0;
+}
+
void avic_vm_destroy(struct kvm *kvm)
{
unsigned long flags;
@@ -256,7 +306,8 @@ void avic_vm_destroy(struct kvm *kvm)
return;
free_page((unsigned long)kvm_svm->avic_logical_id_table);
- free_page((unsigned long)kvm_svm->avic_physical_id_table);
+ free_pages((unsigned long)kvm_svm->avic_physical_id_table,
+ avic_get_physical_id_table_order(kvm));
spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
hash_del(&kvm_svm->hnode);
@@ -274,10 +325,6 @@ int avic_vm_init(struct kvm *kvm)
if (!enable_apicv)
return 0;
- kvm_svm->avic_physical_id_table = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
- if (!kvm_svm->avic_physical_id_table)
- goto free_avic;
-
kvm_svm->avic_logical_id_table = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
if (!kvm_svm->avic_logical_id_table)
goto free_avic;
@@ -342,7 +389,7 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
* fully initialized AVIC.
*/
if ((!x2avic_enabled && id > AVIC_MAX_PHYSICAL_ID) ||
- (id > X2AVIC_MAX_PHYSICAL_ID)) {
+ (id > x2avic_max_physical_id)) {
kvm_set_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_PHYSICAL_ID_TOO_BIG);
vcpu->arch.apic->apicv_active = false;
return 0;
@@ -562,7 +609,7 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
u32 icrl = svm->vmcb->control.exit_info_1;
u32 id = svm->vmcb->control.exit_info_2 >> 32;
- u32 index = svm->vmcb->control.exit_info_2 & 0x1FF;
+ u32 index = svm->vmcb->control.exit_info_2 & AVIC_PHYSICAL_MAX_INDEX_MASK;
struct kvm_lapic *apic = vcpu->arch.apic;
trace_kvm_avic_incomplete_ipi(vcpu->vcpu_id, icrh, icrl, id, index);
@@ -962,7 +1009,8 @@ static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu,
if (WARN_ON(h_physical_id & ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
return;
- if (WARN_ON_ONCE(vcpu->vcpu_id * sizeof(entry) >= PAGE_SIZE))
+ if (WARN_ON_ONCE(vcpu->vcpu_id * sizeof(entry) >=
+ PAGE_SIZE << avic_get_physical_id_table_order(vcpu->kvm)))
return;
/*
@@ -1024,7 +1072,8 @@ static void __avic_vcpu_put(struct kvm_vcpu *vcpu, enum avic_vcpu_action action)
lockdep_assert_preemption_disabled();
- if (WARN_ON_ONCE(vcpu->vcpu_id * sizeof(entry) >= PAGE_SIZE))
+ if (WARN_ON_ONCE(vcpu->vcpu_id * sizeof(entry) >=
+ PAGE_SIZE << avic_get_physical_id_table_order(vcpu->kvm)))
return;
/*
@@ -1226,10 +1275,15 @@ bool __init avic_hardware_setup(void)
/* AVIC is a prerequisite for x2AVIC. */
x2avic_enabled = boot_cpu_has(X86_FEATURE_X2AVIC);
- if (x2avic_enabled)
- pr_info("x2AVIC enabled\n");
- else
+ if (x2avic_enabled) {
+ if (cpu_feature_enabled(X86_FEATURE_X2AVIC_EXT))
+ x2avic_max_physical_id = X2AVIC_4K_MAX_PHYSICAL_ID;
+ else
+ x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID;
+ pr_info("x2AVIC enabled (max %u vCPUs)\n", x2avic_max_physical_id + 1);
+ } else {
svm_x86_ops.allow_apicv_in_x2apic_without_x2apic_virtualization = true;
+ }
/*
* Disable IPI virtualization for AMD Family 17h CPUs (Zen1 and Zen2)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index da6e80b3ac35..c81005b24522 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -613,6 +613,7 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
struct kvm_vcpu *vcpu = &svm->vcpu;
nested_vmcb02_compute_g_pat(svm);
+ vmcb_mark_dirty(vmcb02, VMCB_NPT);
/* Load the nested guest state */
if (svm->nested.vmcb12_gpa != svm->nested.last_vmcb12_gpa) {
@@ -751,6 +752,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
vmcb02->control.nested_ctl = vmcb01->control.nested_ctl;
vmcb02->control.iopm_base_pa = vmcb01->control.iopm_base_pa;
vmcb02->control.msrpm_base_pa = vmcb01->control.msrpm_base_pa;
+ vmcb_mark_dirty(vmcb02, VMCB_PERM_MAP);
/*
* Stash vmcb02's counter if the guest hasn't moved past the guilty
@@ -1430,16 +1432,6 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
case SVM_EXIT_IOIO:
vmexit = nested_svm_intercept_ioio(svm);
break;
- case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
- if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
- vmexit = NESTED_EXIT_DONE;
- break;
- }
- case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
- if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
- vmexit = NESTED_EXIT_DONE;
- break;
- }
case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
/*
* Host-intercepted exceptions have been checked already in
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 0835c664fbfd..f59c65abe3cf 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -65,20 +65,24 @@ module_param_named(ciphertext_hiding_asids, nr_ciphertext_hiding_asids, uint, 04
#define AP_RESET_HOLD_NAE_EVENT 1
#define AP_RESET_HOLD_MSR_PROTO 2
-/* As defined by SEV-SNP Firmware ABI, under "Guest Policy". */
-#define SNP_POLICY_MASK_API_MINOR GENMASK_ULL(7, 0)
-#define SNP_POLICY_MASK_API_MAJOR GENMASK_ULL(15, 8)
-#define SNP_POLICY_MASK_SMT BIT_ULL(16)
-#define SNP_POLICY_MASK_RSVD_MBO BIT_ULL(17)
-#define SNP_POLICY_MASK_DEBUG BIT_ULL(19)
-#define SNP_POLICY_MASK_SINGLE_SOCKET BIT_ULL(20)
-
-#define SNP_POLICY_MASK_VALID (SNP_POLICY_MASK_API_MINOR | \
- SNP_POLICY_MASK_API_MAJOR | \
- SNP_POLICY_MASK_SMT | \
- SNP_POLICY_MASK_RSVD_MBO | \
- SNP_POLICY_MASK_DEBUG | \
- SNP_POLICY_MASK_SINGLE_SOCKET)
+/*
+ * SEV-SNP policy bits that can be supported by KVM. These include policy bits
+ * that have implementation support within KVM or policy bits that do not
+ * require implementation support within KVM to enforce the policy.
+ */
+#define KVM_SNP_POLICY_MASK_VALID (SNP_POLICY_MASK_API_MINOR | \
+ SNP_POLICY_MASK_API_MAJOR | \
+ SNP_POLICY_MASK_SMT | \
+ SNP_POLICY_MASK_RSVD_MBO | \
+ SNP_POLICY_MASK_DEBUG | \
+ SNP_POLICY_MASK_SINGLE_SOCKET | \
+ SNP_POLICY_MASK_CXL_ALLOW | \
+ SNP_POLICY_MASK_MEM_AES_256_XTS | \
+ SNP_POLICY_MASK_RAPL_DIS | \
+ SNP_POLICY_MASK_CIPHERTEXT_HIDING_DRAM | \
+ SNP_POLICY_MASK_PAGE_SWAP_DISABLE)
+
+static u64 snp_supported_policy_bits __ro_after_init;
#define INITIAL_VMSA_GPA 0xFFFFFFFFF000
@@ -2143,6 +2147,10 @@ int sev_dev_get_attr(u32 group, u64 attr, u64 *val)
*val = sev_supported_vmsa_features;
return 0;
+ case KVM_X86_SNP_POLICY_BITS:
+ *val = snp_supported_policy_bits;
+ return 0;
+
default:
return -ENXIO;
}
@@ -2207,7 +2215,7 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (params.flags)
return -EINVAL;
- if (params.policy & ~SNP_POLICY_MASK_VALID)
+ if (params.policy & ~snp_supported_policy_bits)
return -EINVAL;
/* Check for policy bits that must be set */
@@ -3100,8 +3108,11 @@ out:
else if (sev_snp_supported)
sev_snp_supported = is_sev_snp_initialized();
- if (sev_snp_supported)
+ if (sev_snp_supported) {
+ snp_supported_policy_bits = sev_get_snp_policy_bits() &
+ KVM_SNP_POLICY_MASK_VALID;
nr_ciphertext_hiding_asids = init_args.max_snp_asid;
+ }
/*
* If ciphertext hiding is enabled, the joint SEV-ES/SEV-SNP
@@ -5085,10 +5096,10 @@ struct vmcb_save_area *sev_decrypt_vmsa(struct kvm_vcpu *vcpu)
/* Check if the SEV policy allows debugging */
if (sev_snp_guest(vcpu->kvm)) {
- if (!(sev->policy & SNP_POLICY_DEBUG))
+ if (!(sev->policy & SNP_POLICY_MASK_DEBUG))
return NULL;
} else {
- if (sev->policy & SEV_POLICY_NODBG)
+ if (sev->policy & SEV_POLICY_MASK_NODBG)
return NULL;
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 6ddeeb0b1604..f56c2d895011 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -272,6 +272,7 @@ static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
}
static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu,
+ int emul_type,
bool commit_side_effects)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -293,7 +294,7 @@ static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu,
if (unlikely(!commit_side_effects))
old_rflags = svm->vmcb->save.rflags;
- if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
+ if (!kvm_emulate_instruction(vcpu, emul_type))
return 0;
if (unlikely(!commit_side_effects))
@@ -311,11 +312,13 @@ done:
static int svm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
{
- return __svm_skip_emulated_instruction(vcpu, true);
+ return __svm_skip_emulated_instruction(vcpu, EMULTYPE_SKIP, true);
}
-static int svm_update_soft_interrupt_rip(struct kvm_vcpu *vcpu)
+static int svm_update_soft_interrupt_rip(struct kvm_vcpu *vcpu, u8 vector)
{
+ const int emul_type = EMULTYPE_SKIP | EMULTYPE_SKIP_SOFT_INT |
+ EMULTYPE_SET_SOFT_INT_VECTOR(vector);
unsigned long rip, old_rip = kvm_rip_read(vcpu);
struct vcpu_svm *svm = to_svm(vcpu);
@@ -331,7 +334,7 @@ static int svm_update_soft_interrupt_rip(struct kvm_vcpu *vcpu)
* in use, the skip must not commit any side effects such as clearing
* the interrupt shadow or RFLAGS.RF.
*/
- if (!__svm_skip_emulated_instruction(vcpu, !nrips))
+ if (!__svm_skip_emulated_instruction(vcpu, emul_type, !nrips))
return -EIO;
rip = kvm_rip_read(vcpu);
@@ -367,7 +370,7 @@ static void svm_inject_exception(struct kvm_vcpu *vcpu)
kvm_deliver_exception_payload(vcpu, ex);
if (kvm_exception_is_soft(ex->vector) &&
- svm_update_soft_interrupt_rip(vcpu))
+ svm_update_soft_interrupt_rip(vcpu, ex->vector))
return;
svm->vmcb->control.event_inj = ex->vector
@@ -1198,6 +1201,11 @@ void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb)
svm->vmcb = target_vmcb->ptr;
}
+static int svm_vcpu_precreate(struct kvm *kvm)
+{
+ return avic_alloc_physical_id_table(kvm);
+}
+
static int svm_vcpu_create(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm;
@@ -3628,11 +3636,12 @@ static bool svm_set_vnmi_pending(struct kvm_vcpu *vcpu)
static void svm_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
{
+ struct kvm_queued_interrupt *intr = &vcpu->arch.interrupt;
struct vcpu_svm *svm = to_svm(vcpu);
u32 type;
- if (vcpu->arch.interrupt.soft) {
- if (svm_update_soft_interrupt_rip(vcpu))
+ if (intr->soft) {
+ if (svm_update_soft_interrupt_rip(vcpu, intr->nr))
return;
type = SVM_EVTINJ_TYPE_SOFT;
@@ -3640,12 +3649,10 @@ static void svm_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
type = SVM_EVTINJ_TYPE_INTR;
}
- trace_kvm_inj_virq(vcpu->arch.interrupt.nr,
- vcpu->arch.interrupt.soft, reinjected);
+ trace_kvm_inj_virq(intr->nr, intr->soft, reinjected);
++vcpu->stat.irq_injections;
- svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
- SVM_EVTINJ_VALID | type;
+ svm->vmcb->control.event_inj = intr->nr | SVM_EVTINJ_VALID | type;
}
void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode,
@@ -4511,31 +4518,45 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
case SVM_EXIT_WRITE_CR0: {
unsigned long cr0, val;
- if (info->intercept == x86_intercept_cr_write)
+ /*
+ * Adjust the exit code accordingly if a CR other than CR0 is
+ * being written, and skip straight to the common handling as
+ * only CR0 has an additional selective intercept.
+ */
+ if (info->intercept == x86_intercept_cr_write && info->modrm_reg) {
icpt_info.exit_code += info->modrm_reg;
-
- if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0 ||
- info->intercept == x86_intercept_clts)
break;
+ }
- if (!(vmcb12_is_intercept(&svm->nested.ctl,
- INTERCEPT_SELECTIVE_CR0)))
+ /*
+ * Convert the exit_code to SVM_EXIT_CR0_SEL_WRITE if a
+ * selective CR0 intercept is triggered (the common logic will
+ * treat the selective intercept as being enabled). Note, the
+ * unconditional intercept has higher priority, i.e. this is
+ * only relevant if *only* the selective intercept is enabled.
+ */
+ if (vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_CR0_WRITE) ||
+ !(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SELECTIVE_CR0)))
break;
- cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
- val = info->src_val & ~SVM_CR0_SELECTIVE_MASK;
+ /* CLTS never triggers INTERCEPT_SELECTIVE_CR0 */
+ if (info->intercept == x86_intercept_clts)
+ break;
+ /* LMSW always triggers INTERCEPT_SELECTIVE_CR0 */
if (info->intercept == x86_intercept_lmsw) {
- cr0 &= 0xfUL;
- val &= 0xfUL;
- /* lmsw can't clear PE - catch this here */
- if (cr0 & X86_CR0_PE)
- val |= X86_CR0_PE;
+ icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE;
+ break;
}
+ /*
+ * MOV-to-CR0 only triggers INTERCEPT_SELECTIVE_CR0 if any bit
+ * other than SVM_CR0_SELECTIVE_MASK is changed.
+ */
+ cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
+ val = info->src_val & ~SVM_CR0_SELECTIVE_MASK;
if (cr0 ^ val)
icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE;
-
break;
}
case SVM_EXIT_READ_DR0:
@@ -5005,6 +5026,7 @@ struct kvm_x86_ops svm_x86_ops __initdata = {
.emergency_disable_virtualization_cpu = svm_emergency_disable_virtualization_cpu,
.has_emulated_msr = svm_has_emulated_msr,
+ .vcpu_precreate = svm_vcpu_precreate,
.vcpu_create = svm_vcpu_create,
.vcpu_free = svm_vcpu_free,
.vcpu_reset = svm_vcpu_reset,
@@ -5309,7 +5331,9 @@ static __init int svm_hardware_setup(void)
if (nested) {
pr_info("Nested Virtualization enabled\n");
- kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
+ kvm_enable_efer_bits(EFER_SVME);
+ if (!boot_cpu_has(X86_FEATURE_EFER_LMSLE_MBZ))
+ kvm_enable_efer_bits(EFER_LMSLE);
r = nested_svm_init_msrpm_merge_offsets();
if (r)
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index dd78e6402345..9e151dbdef25 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -117,9 +117,6 @@ struct kvm_sev_info {
cpumask_var_t have_run_cpus; /* CPUs that have done VMRUN for this VM. */
};
-#define SEV_POLICY_NODBG BIT_ULL(0)
-#define SNP_POLICY_DEBUG BIT_ULL(19)
-
struct kvm_svm {
struct kvm kvm;
@@ -807,6 +804,7 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
bool __init avic_hardware_setup(void);
void avic_hardware_unsetup(void);
+int avic_alloc_physical_id_table(struct kvm *kvm);
void avic_vm_destroy(struct kvm *kvm);
int avic_vm_init(struct kvm *kvm);
void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb);
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
index da5f481cb17e..3392bcadfb89 100644
--- a/arch/x86/kvm/svm/vmenter.S
+++ b/arch/x86/kvm/svm/vmenter.S
@@ -52,11 +52,23 @@
* there must not be any returns or indirect branches between this code
* and vmentry.
*/
- movl SVM_spec_ctrl(%_ASM_DI), %eax
- cmp PER_CPU_VAR(x86_spec_ctrl_current), %eax
+#ifdef CONFIG_X86_64
+ mov SVM_spec_ctrl(%rdi), %rdx
+ cmp PER_CPU_VAR(x86_spec_ctrl_current), %rdx
+ je 801b
+ movl %edx, %eax
+ shr $32, %rdx
+#else
+ mov SVM_spec_ctrl(%edi), %eax
+ mov PER_CPU_VAR(x86_spec_ctrl_current), %ecx
+ xor %eax, %ecx
+ mov SVM_spec_ctrl + 4(%edi), %edx
+ mov PER_CPU_VAR(x86_spec_ctrl_current + 4), %esi
+ xor %edx, %esi
+ or %esi, %ecx
je 801b
+#endif
mov $MSR_IA32_SPEC_CTRL, %ecx
- xor %edx, %edx
wrmsr
jmp 801b
.endm
@@ -81,13 +93,25 @@
jnz 998f
rdmsr
movl %eax, SVM_spec_ctrl(%_ASM_DI)
+ movl %edx, SVM_spec_ctrl + 4(%_ASM_DI)
998:
-
/* Now restore the host value of the MSR if different from the guest's. */
- movl PER_CPU_VAR(x86_spec_ctrl_current), %eax
- cmp SVM_spec_ctrl(%_ASM_DI), %eax
+#ifdef CONFIG_X86_64
+ mov PER_CPU_VAR(x86_spec_ctrl_current), %rdx
+ cmp SVM_spec_ctrl(%rdi), %rdx
je 901b
- xor %edx, %edx
+ movl %edx, %eax
+ shr $32, %rdx
+#else
+ mov PER_CPU_VAR(x86_spec_ctrl_current), %eax
+ mov SVM_spec_ctrl(%edi), %esi
+ xor %eax, %esi
+ mov PER_CPU_VAR(x86_spec_ctrl_current + 4), %edx
+ mov SVM_spec_ctrl + 4(%edi), %edi
+ xor %edx, %edi
+ or %edi, %esi
+ je 901b
+#endif
wrmsr
jmp 901b
.endm
@@ -136,7 +160,7 @@ SYM_FUNC_START(__svm_vcpu_run)
mov %_ASM_ARG1, %_ASM_DI
.endif
- /* Clobbers RAX, RCX, RDX. */
+ /* Clobbers RAX, RCX, RDX (and ESI on 32-bit), consumes RDI (@svm). */
RESTORE_GUEST_SPEC_CTRL
/*
@@ -213,7 +237,10 @@ SYM_FUNC_START(__svm_vcpu_run)
/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT
- /* Clobbers RAX, RCX, RDX. */
+ /*
+ * Clobbers RAX, RCX, RDX (and ESI, EDI on 32-bit), consumes RDI (@svm)
+ * and RSP (pointer to @spec_ctrl_intercepted).
+ */
RESTORE_HOST_SPEC_CTRL
/*
@@ -333,7 +360,7 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
mov %rdi, SEV_ES_RDI (%rdx)
mov %rsi, SEV_ES_RSI (%rdx)
- /* Clobbers RAX, RCX, RDX (@hostsa). */
+ /* Clobbers RAX, RCX, and RDX (@hostsa), consumes RDI (@svm). */
RESTORE_GUEST_SPEC_CTRL
/* Get svm->current_vmcb->pa into RAX. */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ea66d0047c9b..0c6d899d53dd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9332,6 +9332,23 @@ static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt)
return false;
}
+static bool is_soft_int_instruction(struct x86_emulate_ctxt *ctxt,
+ int emulation_type)
+{
+ u8 vector = EMULTYPE_GET_SOFT_INT_VECTOR(emulation_type);
+
+ switch (ctxt->b) {
+ case 0xcc:
+ return vector == BP_VECTOR;
+ case 0xcd:
+ return vector == ctxt->src.val;
+ case 0xce:
+ return vector == OF_VECTOR;
+ default:
+ return false;
+ }
+}
+
/*
* Decode an instruction for emulation. The caller is responsible for handling
* code breakpoints. Note, manually detecting code breakpoints is unnecessary
@@ -9442,6 +9459,10 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
* injecting single-step #DBs.
*/
if (emulation_type & EMULTYPE_SKIP) {
+ if (emulation_type & EMULTYPE_SKIP_SOFT_INT &&
+ !is_soft_int_instruction(ctxt, emulation_type))
+ return 0;
+
if (ctxt->mode != X86EMUL_MODE_PROT64)
ctxt->eip = (u32)ctxt->_eip;
else
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index 0d13d47c164b..db7c7c50cebc 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -2777,6 +2777,43 @@ void sev_platform_shutdown(void)
}
EXPORT_SYMBOL_GPL(sev_platform_shutdown);
+u64 sev_get_snp_policy_bits(void)
+{
+ struct psp_device *psp = psp_master;
+ struct sev_device *sev;
+ u64 policy_bits;
+
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
+ return 0;
+
+ if (!psp || !psp->sev_data)
+ return 0;
+
+ sev = psp->sev_data;
+
+ policy_bits = SNP_POLICY_MASK_BASE;
+
+ if (sev->snp_plat_status.feature_info) {
+ if (sev->snp_feat_info_0.ecx & SNP_RAPL_DISABLE_SUPPORTED)
+ policy_bits |= SNP_POLICY_MASK_RAPL_DIS;
+
+ if (sev->snp_feat_info_0.ecx & SNP_CIPHER_TEXT_HIDING_SUPPORTED)
+ policy_bits |= SNP_POLICY_MASK_CIPHERTEXT_HIDING_DRAM;
+
+ if (sev->snp_feat_info_0.ecx & SNP_AES_256_XTS_POLICY_SUPPORTED)
+ policy_bits |= SNP_POLICY_MASK_MEM_AES_256_XTS;
+
+ if (sev->snp_feat_info_0.ecx & SNP_CXL_ALLOW_POLICY_SUPPORTED)
+ policy_bits |= SNP_POLICY_MASK_CXL_ALLOW;
+
+ if (sev_version_greater_or_equal(1, 58))
+ policy_bits |= SNP_POLICY_MASK_PAGE_SWAP_DISABLE;
+ }
+
+ return policy_bits;
+}
+EXPORT_SYMBOL_GPL(sev_get_snp_policy_bits);
+
void sev_dev_destroy(struct psp_device *psp)
{
struct sev_device *sev = psp->sev_data;
diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h
index e0dbcb4b4fd9..abcdee256c65 100644
--- a/include/linux/psp-sev.h
+++ b/include/linux/psp-sev.h
@@ -14,6 +14,39 @@
#include <uapi/linux/psp-sev.h>
+/* As defined by SEV API, under "Guest Policy". */
+#define SEV_POLICY_MASK_NODBG BIT(0)
+#define SEV_POLICY_MASK_NOKS BIT(1)
+#define SEV_POLICY_MASK_ES BIT(2)
+#define SEV_POLICY_MASK_NOSEND BIT(3)
+#define SEV_POLICY_MASK_DOMAIN BIT(4)
+#define SEV_POLICY_MASK_SEV BIT(5)
+#define SEV_POLICY_MASK_API_MAJOR GENMASK(23, 16)
+#define SEV_POLICY_MASK_API_MINOR GENMASK(31, 24)
+
+/* As defined by SEV-SNP Firmware ABI, under "Guest Policy". */
+#define SNP_POLICY_MASK_API_MINOR GENMASK_ULL(7, 0)
+#define SNP_POLICY_MASK_API_MAJOR GENMASK_ULL(15, 8)
+#define SNP_POLICY_MASK_SMT BIT_ULL(16)
+#define SNP_POLICY_MASK_RSVD_MBO BIT_ULL(17)
+#define SNP_POLICY_MASK_MIGRATE_MA BIT_ULL(18)
+#define SNP_POLICY_MASK_DEBUG BIT_ULL(19)
+#define SNP_POLICY_MASK_SINGLE_SOCKET BIT_ULL(20)
+#define SNP_POLICY_MASK_CXL_ALLOW BIT_ULL(21)
+#define SNP_POLICY_MASK_MEM_AES_256_XTS BIT_ULL(22)
+#define SNP_POLICY_MASK_RAPL_DIS BIT_ULL(23)
+#define SNP_POLICY_MASK_CIPHERTEXT_HIDING_DRAM BIT_ULL(24)
+#define SNP_POLICY_MASK_PAGE_SWAP_DISABLE BIT_ULL(25)
+
+/* Base SEV-SNP policy bitmask for minimum supported SEV firmware version */
+#define SNP_POLICY_MASK_BASE (SNP_POLICY_MASK_API_MINOR | \
+ SNP_POLICY_MASK_API_MAJOR | \
+ SNP_POLICY_MASK_SMT | \
+ SNP_POLICY_MASK_RSVD_MBO | \
+ SNP_POLICY_MASK_MIGRATE_MA | \
+ SNP_POLICY_MASK_DEBUG | \
+ SNP_POLICY_MASK_SINGLE_SOCKET)
+
#define SEV_FW_BLOB_MAX_SIZE 0x4000 /* 16KB */
/**
@@ -849,7 +882,10 @@ struct snp_feature_info {
u32 edx;
} __packed;
+#define SNP_RAPL_DISABLE_SUPPORTED BIT(2)
#define SNP_CIPHER_TEXT_HIDING_SUPPORTED BIT(3)
+#define SNP_AES_256_XTS_POLICY_SUPPORTED BIT(4)
+#define SNP_CXL_ALLOW_POLICY_SUPPORTED BIT(5)
#ifdef CONFIG_CRYPTO_DEV_SP_PSP
@@ -995,6 +1031,7 @@ void *snp_alloc_firmware_page(gfp_t mask);
void snp_free_firmware_page(void *addr);
void sev_platform_shutdown(void);
bool sev_is_snp_ciphertext_hiding_supported(void);
+u64 sev_get_snp_policy_bits(void);
#else /* !CONFIG_CRYPTO_DEV_SP_PSP */