diff options
| -rw-r--r-- | arch/x86/include/asm/mce.h | 6 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mce/amd.c | 111 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mce/core.c | 31 |
3 files changed, 49 insertions, 99 deletions
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 31e3cb550fb3..7d6588195d56 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -166,6 +166,12 @@ #define MCE_IN_KERNEL_COPYIN BIT_ULL(7) /* + * Indicates that handler should check and clear Deferred error registers + * rather than common ones. + */ +#define MCE_CHECK_DFR_REGS BIT_ULL(8) + +/* * This structure contains all data related to the MCE log. Also * carries a signature to make it easier to find from external * debugging tools. Each entry is only valid when its finished flag diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index ac6a98aa7bc2..d9f9ee7db5c8 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -56,6 +56,7 @@ static bool thresholding_irq_en; struct mce_amd_cpu_data { mce_banks_t thr_intr_banks; + mce_banks_t dfr_intr_banks; }; static DEFINE_PER_CPU_READ_MOSTLY(struct mce_amd_cpu_data, mce_amd_data); @@ -300,8 +301,10 @@ static void smca_configure(unsigned int bank, unsigned int cpu) * APIC based interrupt. First, check that no interrupt has been * set. */ - if ((low & BIT(5)) && !((high >> 5) & 0x3)) + if ((low & BIT(5)) && !((high >> 5) & 0x3)) { + __set_bit(bank, this_cpu_ptr(&mce_amd_data)->dfr_intr_banks); high |= BIT(5); + } this_cpu_ptr(mce_banks_array)[bank].lsb_in_status = !!(low & BIT(8)); @@ -792,37 +795,6 @@ bool amd_mce_usable_address(struct mce *m) return false; } -static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc) -{ - struct mce_hw_err err; - struct mce *m = &err.m; - - mce_prep_record(&err); - - m->status = status; - m->misc = misc; - m->bank = bank; - m->tsc = rdtsc(); - - if (m->status & MCI_STATUS_ADDRV) { - m->addr = addr; - - smca_extract_err_addr(m); - } - - if (mce_flags.smca) { - rdmsrq(MSR_AMD64_SMCA_MCx_IPID(bank), m->ipid); - - if (m->status & MCI_STATUS_SYNDV) { - rdmsrq(MSR_AMD64_SMCA_MCx_SYND(bank), m->synd); - rdmsrq(MSR_AMD64_SMCA_MCx_SYND1(bank), err.vendor.amd.synd1); - rdmsrq(MSR_AMD64_SMCA_MCx_SYND2(bank), err.vendor.amd.synd2); - } - } - - mce_log(&err); -} - DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error) { trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR); @@ -832,75 +804,10 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error) apic_eoi(); } -/* - * Returns true if the logged error is deferred. False, otherwise. - */ -static inline bool -_log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc) -{ - u64 status, addr = 0; - - rdmsrq(msr_stat, status); - if (!(status & MCI_STATUS_VAL)) - return false; - - if (status & MCI_STATUS_ADDRV) - rdmsrq(msr_addr, addr); - - __log_error(bank, status, addr, misc); - - wrmsrq(msr_stat, 0); - - return status & MCI_STATUS_DEFERRED; -} - -static bool _log_error_deferred(unsigned int bank, u32 misc) -{ - if (!_log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS), - mca_msr_reg(bank, MCA_ADDR), misc)) - return false; - - /* - * Non-SMCA systems don't have MCA_DESTAT/MCA_DEADDR registers. - * Return true here to avoid accessing these registers. - */ - if (!mce_flags.smca) - return true; - - /* Clear MCA_DESTAT if the deferred error was logged from MCA_STATUS. */ - wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0); - return true; -} - -/* - * We have three scenarios for checking for Deferred errors: - * - * 1) Non-SMCA systems check MCA_STATUS and log error if found. - * 2) SMCA systems check MCA_STATUS. If error is found then log it and also - * clear MCA_DESTAT. - * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and - * log it. - */ -static void log_error_deferred(unsigned int bank) -{ - if (_log_error_deferred(bank, 0)) - return; - - /* - * Only deferred errors are logged in MCA_DE{STAT,ADDR} so just check - * for a valid error. - */ - _log_error_bank(bank, MSR_AMD64_SMCA_MCx_DESTAT(bank), - MSR_AMD64_SMCA_MCx_DEADDR(bank), 0); -} - /* APIC interrupt handler for deferred errors */ static void amd_deferred_error_interrupt(void) { - unsigned int bank; - - for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) - log_error_deferred(bank); + machine_check_poll(MCP_TIMESTAMP, &this_cpu_ptr(&mce_amd_data)->dfr_intr_banks); } static void reset_block(struct threshold_block *block) @@ -952,6 +859,14 @@ void amd_clear_bank(struct mce *m) { amd_reset_thr_limit(m->bank); + /* Clear MCA_DESTAT for all deferred errors even those logged in MCA_STATUS. */ + if (m->status & MCI_STATUS_DEFERRED) + mce_wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank), 0); + + /* Don't clear MCA_STATUS if MCA_DESTAT was used exclusively. */ + if (m->kflags & MCE_CHECK_DFR_REGS) + return; + mce_wrmsrq(mca_msr_reg(m->bank, MCA_STATUS), 0); } diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 460e90a1a0b1..4aff14e04287 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -687,7 +687,10 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i) m->misc = mce_rdmsrq(mca_msr_reg(i, MCA_MISC)); if (m->status & MCI_STATUS_ADDRV) { - m->addr = mce_rdmsrq(mca_msr_reg(i, MCA_ADDR)); + if (m->kflags & MCE_CHECK_DFR_REGS) + m->addr = mce_rdmsrq(MSR_AMD64_SMCA_MCx_DEADDR(i)); + else + m->addr = mce_rdmsrq(mca_msr_reg(i, MCA_ADDR)); /* * Mask the reported address by the reported granularity. @@ -715,6 +718,29 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i) DEFINE_PER_CPU(unsigned, mce_poll_count); /* + * We have three scenarios for checking for Deferred errors: + * + * 1) Non-SMCA systems check MCA_STATUS and log error if found. + * 2) SMCA systems check MCA_STATUS. If error is found then log it and also + * clear MCA_DESTAT. + * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and + * log it. + */ +static bool smca_should_log_poll_error(struct mce *m) +{ + if (m->status & MCI_STATUS_VAL) + return true; + + m->status = mce_rdmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank)); + if ((m->status & MCI_STATUS_VAL) && (m->status & MCI_STATUS_DEFERRED)) { + m->kflags |= MCE_CHECK_DFR_REGS; + return true; + } + + return false; +} + +/* * Newer Intel systems that support software error * recovery need to make additional checks. Other * CPUs should skip over uncorrected errors, but log @@ -740,6 +766,9 @@ static bool should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err) { struct mce *m = &err->m; + if (mce_flags.smca) + return smca_should_log_poll_error(m); + /* If this entry is not valid, ignore it. */ if (!(m->status & MCI_STATUS_VAL)) return false; |