diff options
| -rw-r--r-- | Documentation/virt/kvm/x86/errata.rst | 9 | ||||
| -rw-r--r-- | arch/x86/include/asm/cpufeatures.h | 2 | ||||
| -rw-r--r-- | arch/x86/include/asm/kvm_host.h | 9 | ||||
| -rw-r--r-- | arch/x86/include/asm/svm.h | 5 | ||||
| -rw-r--r-- | arch/x86/include/uapi/asm/kvm.h | 1 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/scattered.c | 1 | ||||
| -rw-r--r-- | arch/x86/kvm/cpuid.c | 1 | ||||
| -rw-r--r-- | arch/x86/kvm/svm/avic.c | 86 | ||||
| -rw-r--r-- | arch/x86/kvm/svm/nested.c | 12 | ||||
| -rw-r--r-- | arch/x86/kvm/svm/sev.c | 47 | ||||
| -rw-r--r-- | arch/x86/kvm/svm/svm.c | 76 | ||||
| -rw-r--r-- | arch/x86/kvm/svm/svm.h | 4 | ||||
| -rw-r--r-- | arch/x86/kvm/svm/vmenter.S | 47 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.c | 21 | ||||
| -rw-r--r-- | drivers/crypto/ccp/sev-dev.c | 37 | ||||
| -rw-r--r-- | include/linux/psp-sev.h | 37 |
16 files changed, 310 insertions, 85 deletions
diff --git a/Documentation/virt/kvm/x86/errata.rst b/Documentation/virt/kvm/x86/errata.rst index 37c79362a48f..a9cf0e004651 100644 --- a/Documentation/virt/kvm/x86/errata.rst +++ b/Documentation/virt/kvm/x86/errata.rst @@ -48,7 +48,14 @@ versus "has_error_code", i.e. KVM's ABI follows AMD behavior. Nested virtualization features ------------------------------ -TBD +On AMD CPUs, when GIF is cleared, #DB exceptions or traps due to a breakpoint +register match are ignored and discarded by the CPU. The CPU relies on the VMM +to fully virtualize this behavior, even when vGIF is enabled for the guest +(i.e. vGIF=0 does not cause the CPU to drop #DBs when the guest is running). +KVM does not virtualize this behavior as the complexity is unjustified given +the rarity of the use case. One way to handle this would be for KVM to +intercept the #DB, temporarily disable the breakpoint, single-step over the +instruction, then re-enable the breakpoint. x2APIC ------ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index fc5698844a0b..c2f1281b32d8 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -338,6 +338,7 @@ #define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ #define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */ #define X86_FEATURE_AMD_IBRS_SAME_MODE (13*32+19) /* Indirect Branch Restricted Speculation same mode protection*/ +#define X86_FEATURE_EFER_LMSLE_MBZ (13*32+20) /* EFER.LMSLE must be zero */ #define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */ #define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */ @@ -504,6 +505,7 @@ * can access host MMIO (ignored for all intents * and purposes if CLEAR_CPU_BUF_VM is set). */ +#define X86_FEATURE_X2AVIC_EXT (21*32+18) /* AMD SVM x2AVIC support for 4k vCPUs */ /* * BUG word(s) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 6d41249e86af..5a3bfa293e8b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -2139,6 +2139,11 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); * the gfn, i.e. retrying the instruction will hit a * !PRESENT fault, which results in a new shadow page * and sends KVM back to square one. + * + * EMULTYPE_SKIP_SOFT_INT - Set in combination with EMULTYPE_SKIP to only skip + * an instruction if it could generate a given software + * interrupt, which must be encoded via + * EMULTYPE_SET_SOFT_INT_VECTOR(). */ #define EMULTYPE_NO_DECODE (1 << 0) #define EMULTYPE_TRAP_UD (1 << 1) @@ -2149,6 +2154,10 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); #define EMULTYPE_PF (1 << 6) #define EMULTYPE_COMPLETE_USER_EXIT (1 << 7) #define EMULTYPE_WRITE_PF_TO_SP (1 << 8) +#define EMULTYPE_SKIP_SOFT_INT (1 << 9) + +#define EMULTYPE_SET_SOFT_INT_VECTOR(v) ((u32)((v) & 0xff) << 16) +#define EMULTYPE_GET_SOFT_INT_VECTOR(e) (((e) >> 16) & 0xff) static inline bool kvm_can_emulate_event_vectoring(int emul_type) { diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 17f6c3fedeee..e69b6d0dedcf 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -279,7 +279,7 @@ enum avic_ipi_failure_cause { AVIC_IPI_FAILURE_INVALID_IPI_VECTOR, }; -#define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(8, 0) +#define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(11, 0) /* * For AVIC, the max index allowed for physical APIC ID table is 0xfe (254), as @@ -289,11 +289,14 @@ enum avic_ipi_failure_cause { /* * For x2AVIC, the max index allowed for physical APIC ID table is 0x1ff (511). + * With X86_FEATURE_X2AVIC_EXT, the max index is increased to 0xfff (4095). */ #define X2AVIC_MAX_PHYSICAL_ID 0x1FFUL +#define X2AVIC_4K_MAX_PHYSICAL_ID 0xFFFUL static_assert((AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == AVIC_MAX_PHYSICAL_ID); static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_MAX_PHYSICAL_ID); +static_assert((X2AVIC_4K_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_4K_MAX_PHYSICAL_ID); #define SVM_SEV_FEAT_SNP_ACTIVE BIT(0) #define SVM_SEV_FEAT_RESTRICTED_INJECTION BIT(3) diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index d420c9c066d4..7ceff6583652 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -502,6 +502,7 @@ struct kvm_sync_regs { /* vendor-specific groups and attributes for system fd */ #define KVM_X86_GRP_SEV 1 # define KVM_X86_SEV_VMSA_FEATURES 0 +# define KVM_X86_SNP_POLICY_BITS 1 struct kvm_vmx_nested_state_data { __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index caa4dc885c21..aa7f21f5f46b 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -49,6 +49,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, { X86_FEATURE_AMD_FAST_CPPC, CPUID_EDX, 15, 0x80000007, 0 }, { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, + { X86_FEATURE_X2AVIC_EXT, CPUID_ECX, 6, 0x8000000a, 0 }, { X86_FEATURE_COHERENCY_SFW_NO, CPUID_EBX, 31, 0x8000001f, 0 }, { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 52524e0ca97f..d563a948318b 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -1135,6 +1135,7 @@ void kvm_set_cpu_caps(void) F(AMD_STIBP), F(AMD_STIBP_ALWAYS_ON), F(AMD_IBRS_SAME_MODE), + PASSTHROUGH_F(EFER_LMSLE_MBZ), F(AMD_PSFD), F(AMD_IBPB_RET), ); diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index fef00546c885..6b77b2033208 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -106,7 +106,7 @@ static u32 next_vm_id = 0; static bool next_vm_id_wrapped = 0; static DEFINE_SPINLOCK(svm_vm_data_hash_lock); static bool x2avic_enabled; - +static u32 x2avic_max_physical_id; static void avic_set_x2apic_msr_interception(struct vcpu_svm *svm, bool intercept) @@ -158,12 +158,40 @@ static void avic_set_x2apic_msr_interception(struct vcpu_svm *svm, svm->x2avic_msrs_intercepted = intercept; } +static u32 __avic_get_max_physical_id(struct kvm *kvm, struct kvm_vcpu *vcpu) +{ + u32 arch_max; + + /* + * Return the largest size (x2APIC) when querying without a vCPU, e.g. + * to allocate the per-VM table.. + */ + if (x2avic_enabled && (!vcpu || apic_x2apic_mode(vcpu->arch.apic))) + arch_max = x2avic_max_physical_id; + else + arch_max = AVIC_MAX_PHYSICAL_ID; + + /* + * Despite its name, KVM_CAP_MAX_VCPU_ID represents the maximum APIC ID + * plus one, so the max possible APIC ID is one less than that. + */ + return min(kvm->arch.max_vcpu_ids - 1, arch_max); +} + +static u32 avic_get_max_physical_id(struct kvm_vcpu *vcpu) +{ + return __avic_get_max_physical_id(vcpu->kvm, vcpu); +} + static void avic_activate_vmcb(struct vcpu_svm *svm) { struct vmcb *vmcb = svm->vmcb01.ptr; + struct kvm_vcpu *vcpu = &svm->vcpu; vmcb->control.int_ctl &= ~(AVIC_ENABLE_MASK | X2APIC_MODE_MASK); + vmcb->control.avic_physical_id &= ~AVIC_PHYSICAL_MAX_INDEX_MASK; + vmcb->control.avic_physical_id |= avic_get_max_physical_id(vcpu); vmcb->control.int_ctl |= AVIC_ENABLE_MASK; @@ -176,7 +204,7 @@ static void avic_activate_vmcb(struct vcpu_svm *svm) */ if (x2avic_enabled && apic_x2apic_mode(svm->vcpu.arch.apic)) { vmcb->control.int_ctl |= X2APIC_MODE_MASK; - vmcb->control.avic_physical_id |= X2AVIC_MAX_PHYSICAL_ID; + /* Disabling MSR intercept for x2APIC registers */ avic_set_x2apic_msr_interception(svm, false); } else { @@ -186,8 +214,6 @@ static void avic_activate_vmcb(struct vcpu_svm *svm) */ kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, &svm->vcpu); - /* For xAVIC and hybrid-xAVIC modes */ - vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID; /* Enabling MSR intercept for x2APIC registers */ avic_set_x2apic_msr_interception(svm, true); } @@ -247,6 +273,30 @@ static int avic_ga_log_notifier(u32 ga_tag) return 0; } +static int avic_get_physical_id_table_order(struct kvm *kvm) +{ + /* Provision for the maximum physical ID supported in x2avic mode */ + return get_order((__avic_get_max_physical_id(kvm, NULL) + 1) * sizeof(u64)); +} + +int avic_alloc_physical_id_table(struct kvm *kvm) +{ + struct kvm_svm *kvm_svm = to_kvm_svm(kvm); + + if (!irqchip_in_kernel(kvm) || !enable_apicv) + return 0; + + if (kvm_svm->avic_physical_id_table) + return 0; + + kvm_svm->avic_physical_id_table = (void *)__get_free_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, + avic_get_physical_id_table_order(kvm)); + if (!kvm_svm->avic_physical_id_table) + return -ENOMEM; + + return 0; +} + void avic_vm_destroy(struct kvm *kvm) { unsigned long flags; @@ -256,7 +306,8 @@ void avic_vm_destroy(struct kvm *kvm) return; free_page((unsigned long)kvm_svm->avic_logical_id_table); - free_page((unsigned long)kvm_svm->avic_physical_id_table); + free_pages((unsigned long)kvm_svm->avic_physical_id_table, + avic_get_physical_id_table_order(kvm)); spin_lock_irqsave(&svm_vm_data_hash_lock, flags); hash_del(&kvm_svm->hnode); @@ -274,10 +325,6 @@ int avic_vm_init(struct kvm *kvm) if (!enable_apicv) return 0; - kvm_svm->avic_physical_id_table = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); - if (!kvm_svm->avic_physical_id_table) - goto free_avic; - kvm_svm->avic_logical_id_table = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); if (!kvm_svm->avic_logical_id_table) goto free_avic; @@ -342,7 +389,7 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu) * fully initialized AVIC. */ if ((!x2avic_enabled && id > AVIC_MAX_PHYSICAL_ID) || - (id > X2AVIC_MAX_PHYSICAL_ID)) { + (id > x2avic_max_physical_id)) { kvm_set_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_PHYSICAL_ID_TOO_BIG); vcpu->arch.apic->apicv_active = false; return 0; @@ -562,7 +609,7 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu) u32 icrh = svm->vmcb->control.exit_info_1 >> 32; u32 icrl = svm->vmcb->control.exit_info_1; u32 id = svm->vmcb->control.exit_info_2 >> 32; - u32 index = svm->vmcb->control.exit_info_2 & 0x1FF; + u32 index = svm->vmcb->control.exit_info_2 & AVIC_PHYSICAL_MAX_INDEX_MASK; struct kvm_lapic *apic = vcpu->arch.apic; trace_kvm_avic_incomplete_ipi(vcpu->vcpu_id, icrh, icrl, id, index); @@ -962,7 +1009,8 @@ static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu, if (WARN_ON(h_physical_id & ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) return; - if (WARN_ON_ONCE(vcpu->vcpu_id * sizeof(entry) >= PAGE_SIZE)) + if (WARN_ON_ONCE(vcpu->vcpu_id * sizeof(entry) >= + PAGE_SIZE << avic_get_physical_id_table_order(vcpu->kvm))) return; /* @@ -1024,7 +1072,8 @@ static void __avic_vcpu_put(struct kvm_vcpu *vcpu, enum avic_vcpu_action action) lockdep_assert_preemption_disabled(); - if (WARN_ON_ONCE(vcpu->vcpu_id * sizeof(entry) >= PAGE_SIZE)) + if (WARN_ON_ONCE(vcpu->vcpu_id * sizeof(entry) >= + PAGE_SIZE << avic_get_physical_id_table_order(vcpu->kvm))) return; /* @@ -1226,10 +1275,15 @@ bool __init avic_hardware_setup(void) /* AVIC is a prerequisite for x2AVIC. */ x2avic_enabled = boot_cpu_has(X86_FEATURE_X2AVIC); - if (x2avic_enabled) - pr_info("x2AVIC enabled\n"); - else + if (x2avic_enabled) { + if (cpu_feature_enabled(X86_FEATURE_X2AVIC_EXT)) + x2avic_max_physical_id = X2AVIC_4K_MAX_PHYSICAL_ID; + else + x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID; + pr_info("x2AVIC enabled (max %u vCPUs)\n", x2avic_max_physical_id + 1); + } else { svm_x86_ops.allow_apicv_in_x2apic_without_x2apic_virtualization = true; + } /* * Disable IPI virtualization for AMD Family 17h CPUs (Zen1 and Zen2) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index da6e80b3ac35..c81005b24522 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -613,6 +613,7 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12 struct kvm_vcpu *vcpu = &svm->vcpu; nested_vmcb02_compute_g_pat(svm); + vmcb_mark_dirty(vmcb02, VMCB_NPT); /* Load the nested guest state */ if (svm->nested.vmcb12_gpa != svm->nested.last_vmcb12_gpa) { @@ -751,6 +752,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, vmcb02->control.nested_ctl = vmcb01->control.nested_ctl; vmcb02->control.iopm_base_pa = vmcb01->control.iopm_base_pa; vmcb02->control.msrpm_base_pa = vmcb01->control.msrpm_base_pa; + vmcb_mark_dirty(vmcb02, VMCB_PERM_MAP); /* * Stash vmcb02's counter if the guest hasn't moved past the guilty @@ -1430,16 +1432,6 @@ static int nested_svm_intercept(struct vcpu_svm *svm) case SVM_EXIT_IOIO: vmexit = nested_svm_intercept_ioio(svm); break; - case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: { - if (vmcb12_is_intercept(&svm->nested.ctl, exit_code)) - vmexit = NESTED_EXIT_DONE; - break; - } - case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: { - if (vmcb12_is_intercept(&svm->nested.ctl, exit_code)) - vmexit = NESTED_EXIT_DONE; - break; - } case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: { /* * Host-intercepted exceptions have been checked already in diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0835c664fbfd..f59c65abe3cf 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -65,20 +65,24 @@ module_param_named(ciphertext_hiding_asids, nr_ciphertext_hiding_asids, uint, 04 #define AP_RESET_HOLD_NAE_EVENT 1 #define AP_RESET_HOLD_MSR_PROTO 2 -/* As defined by SEV-SNP Firmware ABI, under "Guest Policy". */ -#define SNP_POLICY_MASK_API_MINOR GENMASK_ULL(7, 0) -#define SNP_POLICY_MASK_API_MAJOR GENMASK_ULL(15, 8) -#define SNP_POLICY_MASK_SMT BIT_ULL(16) -#define SNP_POLICY_MASK_RSVD_MBO BIT_ULL(17) -#define SNP_POLICY_MASK_DEBUG BIT_ULL(19) -#define SNP_POLICY_MASK_SINGLE_SOCKET BIT_ULL(20) - -#define SNP_POLICY_MASK_VALID (SNP_POLICY_MASK_API_MINOR | \ - SNP_POLICY_MASK_API_MAJOR | \ - SNP_POLICY_MASK_SMT | \ - SNP_POLICY_MASK_RSVD_MBO | \ - SNP_POLICY_MASK_DEBUG | \ - SNP_POLICY_MASK_SINGLE_SOCKET) +/* + * SEV-SNP policy bits that can be supported by KVM. These include policy bits + * that have implementation support within KVM or policy bits that do not + * require implementation support within KVM to enforce the policy. + */ +#define KVM_SNP_POLICY_MASK_VALID (SNP_POLICY_MASK_API_MINOR | \ + SNP_POLICY_MASK_API_MAJOR | \ + SNP_POLICY_MASK_SMT | \ + SNP_POLICY_MASK_RSVD_MBO | \ + SNP_POLICY_MASK_DEBUG | \ + SNP_POLICY_MASK_SINGLE_SOCKET | \ + SNP_POLICY_MASK_CXL_ALLOW | \ + SNP_POLICY_MASK_MEM_AES_256_XTS | \ + SNP_POLICY_MASK_RAPL_DIS | \ + SNP_POLICY_MASK_CIPHERTEXT_HIDING_DRAM | \ + SNP_POLICY_MASK_PAGE_SWAP_DISABLE) + +static u64 snp_supported_policy_bits __ro_after_init; #define INITIAL_VMSA_GPA 0xFFFFFFFFF000 @@ -2143,6 +2147,10 @@ int sev_dev_get_attr(u32 group, u64 attr, u64 *val) *val = sev_supported_vmsa_features; return 0; + case KVM_X86_SNP_POLICY_BITS: + *val = snp_supported_policy_bits; + return 0; + default: return -ENXIO; } @@ -2207,7 +2215,7 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) if (params.flags) return -EINVAL; - if (params.policy & ~SNP_POLICY_MASK_VALID) + if (params.policy & ~snp_supported_policy_bits) return -EINVAL; /* Check for policy bits that must be set */ @@ -3100,8 +3108,11 @@ out: else if (sev_snp_supported) sev_snp_supported = is_sev_snp_initialized(); - if (sev_snp_supported) + if (sev_snp_supported) { + snp_supported_policy_bits = sev_get_snp_policy_bits() & + KVM_SNP_POLICY_MASK_VALID; nr_ciphertext_hiding_asids = init_args.max_snp_asid; + } /* * If ciphertext hiding is enabled, the joint SEV-ES/SEV-SNP @@ -5085,10 +5096,10 @@ struct vmcb_save_area *sev_decrypt_vmsa(struct kvm_vcpu *vcpu) /* Check if the SEV policy allows debugging */ if (sev_snp_guest(vcpu->kvm)) { - if (!(sev->policy & SNP_POLICY_DEBUG)) + if (!(sev->policy & SNP_POLICY_MASK_DEBUG)) return NULL; } else { - if (sev->policy & SEV_POLICY_NODBG) + if (sev->policy & SEV_POLICY_MASK_NODBG) return NULL; } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 6ddeeb0b1604..f56c2d895011 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -272,6 +272,7 @@ static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) } static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu, + int emul_type, bool commit_side_effects) { struct vcpu_svm *svm = to_svm(vcpu); @@ -293,7 +294,7 @@ static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu, if (unlikely(!commit_side_effects)) old_rflags = svm->vmcb->save.rflags; - if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP)) + if (!kvm_emulate_instruction(vcpu, emul_type)) return 0; if (unlikely(!commit_side_effects)) @@ -311,11 +312,13 @@ done: static int svm_skip_emulated_instruction(struct kvm_vcpu *vcpu) { - return __svm_skip_emulated_instruction(vcpu, true); + return __svm_skip_emulated_instruction(vcpu, EMULTYPE_SKIP, true); } -static int svm_update_soft_interrupt_rip(struct kvm_vcpu *vcpu) +static int svm_update_soft_interrupt_rip(struct kvm_vcpu *vcpu, u8 vector) { + const int emul_type = EMULTYPE_SKIP | EMULTYPE_SKIP_SOFT_INT | + EMULTYPE_SET_SOFT_INT_VECTOR(vector); unsigned long rip, old_rip = kvm_rip_read(vcpu); struct vcpu_svm *svm = to_svm(vcpu); @@ -331,7 +334,7 @@ static int svm_update_soft_interrupt_rip(struct kvm_vcpu *vcpu) * in use, the skip must not commit any side effects such as clearing * the interrupt shadow or RFLAGS.RF. */ - if (!__svm_skip_emulated_instruction(vcpu, !nrips)) + if (!__svm_skip_emulated_instruction(vcpu, emul_type, !nrips)) return -EIO; rip = kvm_rip_read(vcpu); @@ -367,7 +370,7 @@ static void svm_inject_exception(struct kvm_vcpu *vcpu) kvm_deliver_exception_payload(vcpu, ex); if (kvm_exception_is_soft(ex->vector) && - svm_update_soft_interrupt_rip(vcpu)) + svm_update_soft_interrupt_rip(vcpu, ex->vector)) return; svm->vmcb->control.event_inj = ex->vector @@ -1198,6 +1201,11 @@ void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb) svm->vmcb = target_vmcb->ptr; } +static int svm_vcpu_precreate(struct kvm *kvm) +{ + return avic_alloc_physical_id_table(kvm); +} + static int svm_vcpu_create(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm; @@ -3628,11 +3636,12 @@ static bool svm_set_vnmi_pending(struct kvm_vcpu *vcpu) static void svm_inject_irq(struct kvm_vcpu *vcpu, bool reinjected) { + struct kvm_queued_interrupt *intr = &vcpu->arch.interrupt; struct vcpu_svm *svm = to_svm(vcpu); u32 type; - if (vcpu->arch.interrupt.soft) { - if (svm_update_soft_interrupt_rip(vcpu)) + if (intr->soft) { + if (svm_update_soft_interrupt_rip(vcpu, intr->nr)) return; type = SVM_EVTINJ_TYPE_SOFT; @@ -3640,12 +3649,10 @@ static void svm_inject_irq(struct kvm_vcpu *vcpu, bool reinjected) type = SVM_EVTINJ_TYPE_INTR; } - trace_kvm_inj_virq(vcpu->arch.interrupt.nr, - vcpu->arch.interrupt.soft, reinjected); + trace_kvm_inj_virq(intr->nr, intr->soft, reinjected); ++vcpu->stat.irq_injections; - svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr | - SVM_EVTINJ_VALID | type; + svm->vmcb->control.event_inj = intr->nr | SVM_EVTINJ_VALID | type; } void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode, @@ -4511,31 +4518,45 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, case SVM_EXIT_WRITE_CR0: { unsigned long cr0, val; - if (info->intercept == x86_intercept_cr_write) + /* + * Adjust the exit code accordingly if a CR other than CR0 is + * being written, and skip straight to the common handling as + * only CR0 has an additional selective intercept. + */ + if (info->intercept == x86_intercept_cr_write && info->modrm_reg) { icpt_info.exit_code += info->modrm_reg; - - if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0 || - info->intercept == x86_intercept_clts) break; + } - if (!(vmcb12_is_intercept(&svm->nested.ctl, - INTERCEPT_SELECTIVE_CR0))) + /* + * Convert the exit_code to SVM_EXIT_CR0_SEL_WRITE if a + * selective CR0 intercept is triggered (the common logic will + * treat the selective intercept as being enabled). Note, the + * unconditional intercept has higher priority, i.e. this is + * only relevant if *only* the selective intercept is enabled. + */ + if (vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_CR0_WRITE) || + !(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SELECTIVE_CR0))) break; - cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK; - val = info->src_val & ~SVM_CR0_SELECTIVE_MASK; + /* CLTS never triggers INTERCEPT_SELECTIVE_CR0 */ + if (info->intercept == x86_intercept_clts) + break; + /* LMSW always triggers INTERCEPT_SELECTIVE_CR0 */ if (info->intercept == x86_intercept_lmsw) { - cr0 &= 0xfUL; - val &= 0xfUL; - /* lmsw can't clear PE - catch this here */ - if (cr0 & X86_CR0_PE) - val |= X86_CR0_PE; + icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE; + break; } + /* + * MOV-to-CR0 only triggers INTERCEPT_SELECTIVE_CR0 if any bit + * other than SVM_CR0_SELECTIVE_MASK is changed. + */ + cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK; + val = info->src_val & ~SVM_CR0_SELECTIVE_MASK; if (cr0 ^ val) icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE; - break; } case SVM_EXIT_READ_DR0: @@ -5005,6 +5026,7 @@ struct kvm_x86_ops svm_x86_ops __initdata = { .emergency_disable_virtualization_cpu = svm_emergency_disable_virtualization_cpu, .has_emulated_msr = svm_has_emulated_msr, + .vcpu_precreate = svm_vcpu_precreate, .vcpu_create = svm_vcpu_create, .vcpu_free = svm_vcpu_free, .vcpu_reset = svm_vcpu_reset, @@ -5309,7 +5331,9 @@ static __init int svm_hardware_setup(void) if (nested) { pr_info("Nested Virtualization enabled\n"); - kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); + kvm_enable_efer_bits(EFER_SVME); + if (!boot_cpu_has(X86_FEATURE_EFER_LMSLE_MBZ)) + kvm_enable_efer_bits(EFER_LMSLE); r = nested_svm_init_msrpm_merge_offsets(); if (r) diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index dd78e6402345..9e151dbdef25 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -117,9 +117,6 @@ struct kvm_sev_info { cpumask_var_t have_run_cpus; /* CPUs that have done VMRUN for this VM. */ }; -#define SEV_POLICY_NODBG BIT_ULL(0) -#define SNP_POLICY_DEBUG BIT_ULL(19) - struct kvm_svm { struct kvm kvm; @@ -807,6 +804,7 @@ extern struct kvm_x86_nested_ops svm_nested_ops; bool __init avic_hardware_setup(void); void avic_hardware_unsetup(void); +int avic_alloc_physical_id_table(struct kvm *kvm); void avic_vm_destroy(struct kvm *kvm); int avic_vm_init(struct kvm *kvm); void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb); diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index da5f481cb17e..3392bcadfb89 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -52,11 +52,23 @@ * there must not be any returns or indirect branches between this code * and vmentry. */ - movl SVM_spec_ctrl(%_ASM_DI), %eax - cmp PER_CPU_VAR(x86_spec_ctrl_current), %eax +#ifdef CONFIG_X86_64 + mov SVM_spec_ctrl(%rdi), %rdx + cmp PER_CPU_VAR(x86_spec_ctrl_current), %rdx + je 801b + movl %edx, %eax + shr $32, %rdx +#else + mov SVM_spec_ctrl(%edi), %eax + mov PER_CPU_VAR(x86_spec_ctrl_current), %ecx + xor %eax, %ecx + mov SVM_spec_ctrl + 4(%edi), %edx + mov PER_CPU_VAR(x86_spec_ctrl_current + 4), %esi + xor %edx, %esi + or %esi, %ecx je 801b +#endif mov $MSR_IA32_SPEC_CTRL, %ecx - xor %edx, %edx wrmsr jmp 801b .endm @@ -81,13 +93,25 @@ jnz 998f rdmsr movl %eax, SVM_spec_ctrl(%_ASM_DI) + movl %edx, SVM_spec_ctrl + 4(%_ASM_DI) 998: - /* Now restore the host value of the MSR if different from the guest's. */ - movl PER_CPU_VAR(x86_spec_ctrl_current), %eax - cmp SVM_spec_ctrl(%_ASM_DI), %eax +#ifdef CONFIG_X86_64 + mov PER_CPU_VAR(x86_spec_ctrl_current), %rdx + cmp SVM_spec_ctrl(%rdi), %rdx je 901b - xor %edx, %edx + movl %edx, %eax + shr $32, %rdx +#else + mov PER_CPU_VAR(x86_spec_ctrl_current), %eax + mov SVM_spec_ctrl(%edi), %esi + xor %eax, %esi + mov PER_CPU_VAR(x86_spec_ctrl_current + 4), %edx + mov SVM_spec_ctrl + 4(%edi), %edi + xor %edx, %edi + or %edi, %esi + je 901b +#endif wrmsr jmp 901b .endm @@ -136,7 +160,7 @@ SYM_FUNC_START(__svm_vcpu_run) mov %_ASM_ARG1, %_ASM_DI .endif - /* Clobbers RAX, RCX, RDX. */ + /* Clobbers RAX, RCX, RDX (and ESI on 32-bit), consumes RDI (@svm). */ RESTORE_GUEST_SPEC_CTRL /* @@ -213,7 +237,10 @@ SYM_FUNC_START(__svm_vcpu_run) /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT - /* Clobbers RAX, RCX, RDX. */ + /* + * Clobbers RAX, RCX, RDX (and ESI, EDI on 32-bit), consumes RDI (@svm) + * and RSP (pointer to @spec_ctrl_intercepted). + */ RESTORE_HOST_SPEC_CTRL /* @@ -333,7 +360,7 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) mov %rdi, SEV_ES_RDI (%rdx) mov %rsi, SEV_ES_RSI (%rdx) - /* Clobbers RAX, RCX, RDX (@hostsa). */ + /* Clobbers RAX, RCX, and RDX (@hostsa), consumes RDI (@svm). */ RESTORE_GUEST_SPEC_CTRL /* Get svm->current_vmcb->pa into RAX. */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ea66d0047c9b..0c6d899d53dd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9332,6 +9332,23 @@ static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt) return false; } +static bool is_soft_int_instruction(struct x86_emulate_ctxt *ctxt, + int emulation_type) +{ + u8 vector = EMULTYPE_GET_SOFT_INT_VECTOR(emulation_type); + + switch (ctxt->b) { + case 0xcc: + return vector == BP_VECTOR; + case 0xcd: + return vector == ctxt->src.val; + case 0xce: + return vector == OF_VECTOR; + default: + return false; + } +} + /* * Decode an instruction for emulation. The caller is responsible for handling * code breakpoints. Note, manually detecting code breakpoints is unnecessary @@ -9442,6 +9459,10 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, * injecting single-step #DBs. */ if (emulation_type & EMULTYPE_SKIP) { + if (emulation_type & EMULTYPE_SKIP_SOFT_INT && + !is_soft_int_instruction(ctxt, emulation_type)) + return 0; + if (ctxt->mode != X86EMUL_MODE_PROT64) ctxt->eip = (u32)ctxt->_eip; else diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 0d13d47c164b..db7c7c50cebc 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -2777,6 +2777,43 @@ void sev_platform_shutdown(void) } EXPORT_SYMBOL_GPL(sev_platform_shutdown); +u64 sev_get_snp_policy_bits(void) +{ + struct psp_device *psp = psp_master; + struct sev_device *sev; + u64 policy_bits; + + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) + return 0; + + if (!psp || !psp->sev_data) + return 0; + + sev = psp->sev_data; + + policy_bits = SNP_POLICY_MASK_BASE; + + if (sev->snp_plat_status.feature_info) { + if (sev->snp_feat_info_0.ecx & SNP_RAPL_DISABLE_SUPPORTED) + policy_bits |= SNP_POLICY_MASK_RAPL_DIS; + + if (sev->snp_feat_info_0.ecx & SNP_CIPHER_TEXT_HIDING_SUPPORTED) + policy_bits |= SNP_POLICY_MASK_CIPHERTEXT_HIDING_DRAM; + + if (sev->snp_feat_info_0.ecx & SNP_AES_256_XTS_POLICY_SUPPORTED) + policy_bits |= SNP_POLICY_MASK_MEM_AES_256_XTS; + + if (sev->snp_feat_info_0.ecx & SNP_CXL_ALLOW_POLICY_SUPPORTED) + policy_bits |= SNP_POLICY_MASK_CXL_ALLOW; + + if (sev_version_greater_or_equal(1, 58)) + policy_bits |= SNP_POLICY_MASK_PAGE_SWAP_DISABLE; + } + + return policy_bits; +} +EXPORT_SYMBOL_GPL(sev_get_snp_policy_bits); + void sev_dev_destroy(struct psp_device *psp) { struct sev_device *sev = psp->sev_data; diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index e0dbcb4b4fd9..abcdee256c65 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -14,6 +14,39 @@ #include <uapi/linux/psp-sev.h> +/* As defined by SEV API, under "Guest Policy". */ +#define SEV_POLICY_MASK_NODBG BIT(0) +#define SEV_POLICY_MASK_NOKS BIT(1) +#define SEV_POLICY_MASK_ES BIT(2) +#define SEV_POLICY_MASK_NOSEND BIT(3) +#define SEV_POLICY_MASK_DOMAIN BIT(4) +#define SEV_POLICY_MASK_SEV BIT(5) +#define SEV_POLICY_MASK_API_MAJOR GENMASK(23, 16) +#define SEV_POLICY_MASK_API_MINOR GENMASK(31, 24) + +/* As defined by SEV-SNP Firmware ABI, under "Guest Policy". */ +#define SNP_POLICY_MASK_API_MINOR GENMASK_ULL(7, 0) +#define SNP_POLICY_MASK_API_MAJOR GENMASK_ULL(15, 8) +#define SNP_POLICY_MASK_SMT BIT_ULL(16) +#define SNP_POLICY_MASK_RSVD_MBO BIT_ULL(17) +#define SNP_POLICY_MASK_MIGRATE_MA BIT_ULL(18) +#define SNP_POLICY_MASK_DEBUG BIT_ULL(19) +#define SNP_POLICY_MASK_SINGLE_SOCKET BIT_ULL(20) +#define SNP_POLICY_MASK_CXL_ALLOW BIT_ULL(21) +#define SNP_POLICY_MASK_MEM_AES_256_XTS BIT_ULL(22) +#define SNP_POLICY_MASK_RAPL_DIS BIT_ULL(23) +#define SNP_POLICY_MASK_CIPHERTEXT_HIDING_DRAM BIT_ULL(24) +#define SNP_POLICY_MASK_PAGE_SWAP_DISABLE BIT_ULL(25) + +/* Base SEV-SNP policy bitmask for minimum supported SEV firmware version */ +#define SNP_POLICY_MASK_BASE (SNP_POLICY_MASK_API_MINOR | \ + SNP_POLICY_MASK_API_MAJOR | \ + SNP_POLICY_MASK_SMT | \ + SNP_POLICY_MASK_RSVD_MBO | \ + SNP_POLICY_MASK_MIGRATE_MA | \ + SNP_POLICY_MASK_DEBUG | \ + SNP_POLICY_MASK_SINGLE_SOCKET) + #define SEV_FW_BLOB_MAX_SIZE 0x4000 /* 16KB */ /** @@ -849,7 +882,10 @@ struct snp_feature_info { u32 edx; } __packed; +#define SNP_RAPL_DISABLE_SUPPORTED BIT(2) #define SNP_CIPHER_TEXT_HIDING_SUPPORTED BIT(3) +#define SNP_AES_256_XTS_POLICY_SUPPORTED BIT(4) +#define SNP_CXL_ALLOW_POLICY_SUPPORTED BIT(5) #ifdef CONFIG_CRYPTO_DEV_SP_PSP @@ -995,6 +1031,7 @@ void *snp_alloc_firmware_page(gfp_t mask); void snp_free_firmware_page(void *addr); void sev_platform_shutdown(void); bool sev_is_snp_ciphertext_hiding_supported(void); +u64 sev_get_snp_policy_bits(void); #else /* !CONFIG_CRYPTO_DEV_SP_PSP */ |