summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/lapic.c
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2025-11-26 09:34:21 +0100
committerPaolo Bonzini <pbonzini@redhat.com>2025-11-26 09:34:21 +0100
commite64dcfab57ac41b19e7433511ba0fa5be4f99e83 (patch)
tree4d69afef26d929b535430f4e8c3dceda42e36c91 /arch/x86/kvm/lapic.c
parent236831743ced9162a8953aa281b73c65bd68b822 (diff)
parentc09816f2afce0f89f176c4bc58dc57ec9f204998 (diff)
Merge tag 'kvm-x86-misc-6.19' of https://github.com/kvm-x86/linux into HEAD
KVM x86 misc changes for 6.19: - Fix an async #PF bug where KVM would clear the completion queue when the guest transitioned in and out of paging mode, e.g. when handling an SMI and then returning to paged mode via RSM. - Fix a bug where TDX would effectively corrupt user-return MSR values if the TDX Module rejects VP.ENTER and thus doesn't clobber host MSRs as expected. - Leave the user-return notifier used to restore MSRs registered when disabling virtualization, and instead pin kvm.ko. Restoring host MSRs via IPI callback is either pointless (clean reboot) or dangerous (forced reboot) since KVM has no idea what code it's interrupting. - Use the checked version of {get,put}_user(), as Linus wants to kill them off, and they're measurably faster on modern CPUs due to the unchecked versions containing an LFENCE. - Fix a long-lurking bug where KVM's lack of catch-up logic for periodic APIC timers can result in a hard lockup in the host. - Revert the periodic kvmclock sync logic now that KVM doesn't use a clocksource that's subject to NPT corrections. - Clean up KVM's handling of MMIO Stale Data and L1TF, and bury the latter behind CONFIG_CPU_MITIGATIONS. - Context switch XCR0, XSS, and PKRU outside of the entry/exit fastpath as the only reason they were handled in the faspath was to paper of a bug in the core #MC code that has long since been fixed. - Add emulator support for AVX MOV instructions to play nice with emulated devices whose PCI BARs guest drivers like to access with large multi-byte instructions.
Diffstat (limited to 'arch/x86/kvm/lapic.c')
-rw-r--r--arch/x86/kvm/lapic.c44
1 files changed, 31 insertions, 13 deletions
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0ae7f913d782..1597dd0b0cc6 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2126,23 +2126,41 @@ static bool set_target_expiration(struct kvm_lapic *apic, u32 count_reg)
static void advance_periodic_target_expiration(struct kvm_lapic *apic)
{
+ struct kvm_timer *ktimer = &apic->lapic_timer;
ktime_t now = ktime_get();
u64 tscl = rdtsc();
ktime_t delta;
/*
- * Synchronize both deadlines to the same time source or
- * differences in the periods (caused by differences in the
- * underlying clocks or numerical approximation errors) will
- * cause the two to drift apart over time as the errors
- * accumulate.
+ * Use kernel time as the time source for both the hrtimer deadline and
+ * TSC-based deadline so that they stay synchronized. Computing each
+ * deadline independently will cause the two deadlines to drift apart
+ * over time as differences in the periods accumulate, e.g. due to
+ * differences in the underlying clocks or numerical approximation errors.
*/
- apic->lapic_timer.target_expiration =
- ktime_add_ns(apic->lapic_timer.target_expiration,
- apic->lapic_timer.period);
- delta = ktime_sub(apic->lapic_timer.target_expiration, now);
- apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
- nsec_to_cycles(apic->vcpu, delta);
+ ktimer->target_expiration = ktime_add_ns(ktimer->target_expiration,
+ ktimer->period);
+
+ /*
+ * If the new expiration is in the past, e.g. because userspace stopped
+ * running the VM for an extended duration, then force the expiration
+ * to "now" and don't try to play catch-up with the missed events. KVM
+ * will only deliver a single interrupt regardless of how many events
+ * are pending, i.e. restarting the timer with an expiration in the
+ * past will do nothing more than waste host cycles, and can even lead
+ * to a hard lockup in extreme cases.
+ */
+ if (ktime_before(ktimer->target_expiration, now))
+ ktimer->target_expiration = now;
+
+ /*
+ * Note, ensuring the expiration isn't in the past also prevents delta
+ * from going negative, which could cause the TSC deadline to become
+ * excessively large due to it an unsigned value.
+ */
+ delta = ktime_sub(ktimer->target_expiration, now);
+ ktimer->tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
+ nsec_to_cycles(apic->vcpu, delta);
}
static void start_sw_period(struct kvm_lapic *apic)
@@ -2970,9 +2988,9 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
apic_timer_expired(apic, true);
- if (lapic_is_periodic(apic)) {
+ if (lapic_is_periodic(apic) && !WARN_ON_ONCE(!apic->lapic_timer.period)) {
advance_periodic_target_expiration(apic);
- hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
+ hrtimer_set_expires(&ktimer->timer, ktimer->target_expiration);
return HRTIMER_RESTART;
} else
return HRTIMER_NORESTART;