diff options
author | Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> | 2019-04-29 23:45:48 +0530 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2019-05-01 22:22:24 +1000 |
commit | d6e8a150850601277039a548ffcdddd1bfe3e365 (patch) | |
tree | d245f4bdb0c78b2b9184b0171f5933daf8034420 /arch/powerpc | |
parent | 5b2a15296210d3b70e06d0f09a8e701ff74ccbe8 (diff) |
powerpc/powernv/mce: Reduce MCE console logs to lesser lines.
Also add cpu number while displaying MCE log. This will help cleaner
logs when MCE hits on multiple cpus simultaneously.
Before the changes the MCE output was:
Severe Machine check interrupt [Recovered]
NIP [d00000000ba80280]: insert_slb_entry.constprop.0+0x278/0x2c0 [mcetest_slb]
Initiator: CPU
Error type: SLB [Multihit]
Effective address: d00000000ba80280
After this patch series changes the MCE output will be:
MCE: CPU80: machine check (Warning) Host SLB Multihit [Recovered]
MCE: CPU80: NIP: [d00000000b550280] insert_slb_entry.constprop.0+0x278/0x2c0 [mcetest_slb]
MCE: CPU80: Probable software error (some chance of hardware cause)
UE in host application:
MCE: CPU48: machine check (Severe) Host UE Load/Store DAR: 00007fffc6079a80 paddr: 0000000f8e260000 [Not recovered]
MCE: CPU48: PID: 4584 Comm: find NIP: [0000000010023368]
MCE: CPU48: Hardware error
and for MCE in Guest:
MCE: CPU80: machine check (Warning) Guest SLB Multihit DAR: 000001001b6e0320 [Recovered]
MCE: CPU80: PID: 24765 Comm: qemu-system-ppc Guest NIP: [00007fffa309dc60]
MCE: CPU80: Probable software error (some chance of hardware cause)
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc')
-rw-r--r-- | arch/powerpc/include/asm/mce.h | 2 | ||||
-rw-r--r-- | arch/powerpc/kernel/mce.c | 89 |
2 files changed, 49 insertions, 42 deletions
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index ad47fa865324..c888ef9a3eaf 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -116,7 +116,7 @@ struct machine_check_event { enum MCE_Initiator initiator:8; /* 0x03 */ enum MCE_ErrorType error_type:8; /* 0x04 */ enum MCE_Disposition disposition:8; /* 0x05 */ - uint8_t reserved_1[2]; /* 0x06 */ + uint16_t cpu; /* 0x06 */ uint64_t gpr3; /* 0x08 */ uint64_t srr0; /* 0x10 */ uint64_t srr1; /* 0x18 */ diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index b5fec1f9751a..25a8b20cbbdc 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -112,6 +112,7 @@ void save_mce_event(struct pt_regs *regs, long handled, mce->srr1 = regs->msr; mce->gpr3 = regs->gpr[3]; mce->in_use = 1; + mce->cpu = get_paca()->paca_index; /* Mark it recovered if we have handled it and MSR(RI=1). */ if (handled && (regs->msr & MSR_RI)) @@ -310,7 +311,11 @@ static void machine_check_process_queued_event(struct irq_work *work) void machine_check_print_event_info(struct machine_check_event *evt, bool user_mode, bool in_guest) { - const char *level, *sevstr, *subtype; + const char *level, *sevstr, *subtype, *err_type; + uint64_t ea = 0, pa = 0; + int n = 0; + char dar_str[50]; + char pa_str[50]; static const char *mc_ue_types[] = { "Indeterminate", "Instruction fetch", @@ -384,101 +389,103 @@ void machine_check_print_event_info(struct machine_check_event *evt, break; } - printk("%s%s Machine check interrupt [%s]\n", level, sevstr, - evt->disposition == MCE_DISPOSITION_RECOVERED ? - "Recovered" : "Not recovered"); - - if (in_guest) { - printk("%s Guest NIP: %016llx\n", level, evt->srr0); - } else if (user_mode) { - printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level, - evt->srr0, current->pid, current->comm); - } else { - printk("%s NIP [%016llx]: %pS\n", level, evt->srr0, - (void *)evt->srr0); - } - - printk("%s Initiator: %s\n", level, - evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown"); switch (evt->error_type) { case MCE_ERROR_TYPE_UE: + err_type = "UE"; subtype = evt->u.ue_error.ue_error_type < ARRAY_SIZE(mc_ue_types) ? mc_ue_types[evt->u.ue_error.ue_error_type] : "Unknown"; - printk("%s Error type: UE [%s]\n", level, subtype); if (evt->u.ue_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.ue_error.effective_address); + ea = evt->u.ue_error.effective_address; if (evt->u.ue_error.physical_address_provided) - printk("%s Physical address: %016llx\n", - level, evt->u.ue_error.physical_address); + pa = evt->u.ue_error.physical_address; break; case MCE_ERROR_TYPE_SLB: + err_type = "SLB"; subtype = evt->u.slb_error.slb_error_type < ARRAY_SIZE(mc_slb_types) ? mc_slb_types[evt->u.slb_error.slb_error_type] : "Unknown"; - printk("%s Error type: SLB [%s]\n", level, subtype); if (evt->u.slb_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.slb_error.effective_address); + ea = evt->u.slb_error.effective_address; break; case MCE_ERROR_TYPE_ERAT: + err_type = "ERAT"; subtype = evt->u.erat_error.erat_error_type < ARRAY_SIZE(mc_erat_types) ? mc_erat_types[evt->u.erat_error.erat_error_type] : "Unknown"; - printk("%s Error type: ERAT [%s]\n", level, subtype); if (evt->u.erat_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.erat_error.effective_address); + ea = evt->u.erat_error.effective_address; break; case MCE_ERROR_TYPE_TLB: + err_type = "TLB"; subtype = evt->u.tlb_error.tlb_error_type < ARRAY_SIZE(mc_tlb_types) ? mc_tlb_types[evt->u.tlb_error.tlb_error_type] : "Unknown"; - printk("%s Error type: TLB [%s]\n", level, subtype); if (evt->u.tlb_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.tlb_error.effective_address); + ea = evt->u.tlb_error.effective_address; break; case MCE_ERROR_TYPE_USER: + err_type = "User"; subtype = evt->u.user_error.user_error_type < ARRAY_SIZE(mc_user_types) ? mc_user_types[evt->u.user_error.user_error_type] : "Unknown"; - printk("%s Error type: User [%s]\n", level, subtype); if (evt->u.user_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.user_error.effective_address); + ea = evt->u.user_error.effective_address; break; case MCE_ERROR_TYPE_RA: + err_type = "Real address"; subtype = evt->u.ra_error.ra_error_type < ARRAY_SIZE(mc_ra_types) ? mc_ra_types[evt->u.ra_error.ra_error_type] : "Unknown"; - printk("%s Error type: Real address [%s]\n", level, subtype); if (evt->u.ra_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.ra_error.effective_address); + ea = evt->u.ra_error.effective_address; break; case MCE_ERROR_TYPE_LINK: + err_type = "Link"; subtype = evt->u.link_error.link_error_type < ARRAY_SIZE(mc_link_types) ? mc_link_types[evt->u.link_error.link_error_type] : "Unknown"; - printk("%s Error type: Link [%s]\n", level, subtype); if (evt->u.link_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.link_error.effective_address); + ea = evt->u.link_error.effective_address; break; default: case MCE_ERROR_TYPE_UNKNOWN: - printk("%s Error type: Unknown\n", level); + err_type = "Unknown"; + subtype = ""; break; } + + dar_str[0] = pa_str[0] = '\0'; + if (ea && evt->srr0 != ea) { + /* Load/Store address */ + n = sprintf(dar_str, "DAR: %016llx ", ea); + if (pa) + sprintf(dar_str + n, "paddr: %016llx ", pa); + } else if (pa) { + sprintf(pa_str, " paddr: %016llx", pa); + } + + printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n", + level, evt->cpu, sevstr, in_guest ? "Guest" : "Host", + err_type, subtype, dar_str, + evt->disposition == MCE_DISPOSITION_RECOVERED ? + "Recovered" : "Not recovered"); + + if (in_guest || user_mode) { + printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n", + level, evt->cpu, current->pid, current->comm, + in_guest ? "Guest " : "", evt->srr0, pa_str); + } else { + printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n", + level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str); + } } EXPORT_SYMBOL_GPL(machine_check_print_event_info); |