Skip to content

Commit

Permalink
MIPS: Cavium: Add EDAC support.
Browse files Browse the repository at this point in the history
Drivers for EDAC on Cavium.  Supported subsystems are:

 o CPU primary caches.  These are parity protected only, so only error
   reporting.
 o Second level cache - ECC protected, provides SECDED.
 o Memory: ECC / SECDEC if used with suitable DRAM modules.  The driver will
   will only initialize if ECC is enabled on a system so is safe to run on
   non-ECC memory.
 o PCI: Parity error reporting

Since it is very hard to test this sort of code the implementation is very
conservative and uses polling where possible for now.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Reviewed-by: Borislav Petkov <borislav.petkov@amd.com>
  • Loading branch information
Ralf Baechle committed Dec 12, 2012
1 parent aa1762f commit f65aad4
Show file tree
Hide file tree
Showing 12 changed files with 725 additions and 24 deletions.
9 changes: 9 additions & 0 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -2722,6 +2722,15 @@ W: bluesmoke.sourceforge.net
S: Maintained
F: drivers/edac/amd64_edac*

EDAC-CAVIUM
M: Ralf Baechle <ralf@linux-mips.org>
M: David Daney <david.daney@cavium.com>
L: linux-edac@vger.kernel.org
L: linux-mips@linux-mips.org
W: bluesmoke.sourceforge.net
S: Supported
F: drivers/edac/octeon_edac*

EDAC-E752X
M: Mark Gross <mark.gross@intel.com>
M: Doug Thompson <dougthompson@xmission.com>
Expand Down
1 change: 1 addition & 0 deletions arch/mips/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -774,6 +774,7 @@ config CAVIUM_OCTEON_REFERENCE_BOARD
select DMA_COHERENT
select SYS_SUPPORTS_64BIT_KERNEL
select SYS_SUPPORTS_BIG_ENDIAN
select EDAC_SUPPORT
select SYS_SUPPORTS_HOTPLUG_CPU
select SYS_HAS_EARLY_PRINTK
select SYS_HAS_CPU_CAVIUM_OCTEON
Expand Down
30 changes: 29 additions & 1 deletion arch/mips/cavium-octeon/setup.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
* for more details.
*
* Copyright (C) 2004-2007 Cavium Networks
* Copyright (C) 2008 Wind River Systems
* Copyright (C) 2008, 2009 Wind River Systems
* written by Ralf Baechle <ralf@linux-mips.org>
*/
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/console.h>
#include <linux/delay.h>
#include <linux/export.h>
Expand Down Expand Up @@ -821,3 +823,29 @@ void __init device_tree_init(void)
}
unflatten_device_tree();
}

static char *edac_device_names[] = {
"co_l2c_edac",
"co_lmc_edac",
"co_pc_edac",
};

static int __init edac_devinit(void)
{
struct platform_device *dev;
int i, err = 0;
char *name;

for (i = 0; i < ARRAY_SIZE(edac_device_names); i++) {
name = edac_device_names[i];
dev = platform_device_register_simple(name, -1, NULL, 0);
if (IS_ERR(dev)) {
pr_err("Registation of %s failed!\n", name);
err = PTR_ERR(dev);
}
}

return err;
}

device_initcall(edac_devinit);
46 changes: 24 additions & 22 deletions arch/mips/mm/c-octeon.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
*
* Copyright (C) 2005-2007 Cavium Networks
*/
#include <linux/export.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/sched.h>
Expand All @@ -28,6 +29,7 @@
#include <asm/octeon/octeon.h>

unsigned long long cache_err_dcache[NR_CPUS];
EXPORT_SYMBOL_GPL(cache_err_dcache);

/**
* Octeon automatically flushes the dcache on tlb changes, so
Expand Down Expand Up @@ -288,42 +290,42 @@ void __cpuinit octeon_cache_init(void)
* Handle a cache error exception
*/

static void cache_parity_error_octeon(int non_recoverable)
static RAW_NOTIFIER_HEAD(co_cache_error_chain);

int register_co_cache_error_notifier(struct notifier_block *nb)
{
unsigned long coreid = cvmx_get_core_num();
uint64_t icache_err = read_octeon_c0_icacheerr();

pr_err("Cache error exception:\n");
pr_err("cp0_errorepc == %lx\n", read_c0_errorepc());
if (icache_err & 1) {
pr_err("CacheErr (Icache) == %llx\n",
(unsigned long long)icache_err);
write_octeon_c0_icacheerr(0);
}
if (cache_err_dcache[coreid] & 1) {
pr_err("CacheErr (Dcache) == %llx\n",
(unsigned long long)cache_err_dcache[coreid]);
cache_err_dcache[coreid] = 0;
}
return raw_notifier_chain_register(&co_cache_error_chain, nb);
}
EXPORT_SYMBOL_GPL(register_co_cache_error_notifier);

if (non_recoverable)
panic("Can't handle cache error: nested exception");
int unregister_co_cache_error_notifier(struct notifier_block *nb)
{
return raw_notifier_chain_unregister(&co_cache_error_chain, nb);
}
EXPORT_SYMBOL_GPL(unregister_co_cache_error_notifier);

static inline int co_cache_error_call_notifiers(unsigned long val)
{
return raw_notifier_call_chain(&co_cache_error_chain, val, NULL);
}

/**
* Called when the the exception is recoverable
*/

asmlinkage void cache_parity_error_octeon_recoverable(void)
{
cache_parity_error_octeon(0);
co_cache_error_call_notifiers(0);
}

/**
* Called when the the exception is not recoverable
*
* The issue not that the cache error exception itself was non-recoverable
* but that due to nesting of exception may have lost some state so can't
* continue.
*/

asmlinkage void cache_parity_error_octeon_non_recoverable(void)
{
cache_parity_error_octeon(1);
co_cache_error_call_notifiers(1);
panic("Can't handle cache error: nested exception");
}
4 changes: 4 additions & 0 deletions arch/mips/pci/pci-octeon.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <linux/interrupt.h>
#include <linux/time.h>
#include <linux/delay.h>
#include <linux/platform_device.h>
#include <linux/swiotlb.h>

#include <asm/time.h>
Expand Down Expand Up @@ -704,6 +705,9 @@ static int __init octeon_pci_setup(void)
*/
cvmx_write_csr(CVMX_NPI_PCI_INT_SUM2, -1);

if (IS_ERR(platform_device_register_simple("co_pci_edac", 0, NULL, 0)))
pr_err("Registation of co_pci_edac failed!\n");

octeon_pci_dma_init();

return 0;
Expand Down
33 changes: 32 additions & 1 deletion drivers/edac/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
menuconfig EDAC
bool "EDAC (Error Detection And Correction) reporting"
depends on HAS_IOMEM
depends on X86 || PPC || TILE || ARM
depends on X86 || PPC || TILE || ARM || EDAC_SUPPORT
help
EDAC is designed to report errors in the core system.
These are low-level errors that are reported in the CPU or
Expand All @@ -27,6 +27,9 @@ menuconfig EDAC
There is also a mailing list for the EDAC project, which can
be found via the sourceforge page.

config EDAC_SUPPORT
bool

if EDAC

comment "Reporting subsystems"
Expand Down Expand Up @@ -316,4 +319,32 @@ config EDAC_HIGHBANK_L2
Support for error detection and correction on the
Calxeda Highbank memory controller.

config EDAC_OCTEON_PC
tristate "Cavium Octeon Primary Caches"
depends on EDAC_MM_EDAC && CPU_CAVIUM_OCTEON
help
Support for error detection and correction on the primary caches of
the cnMIPS cores of Cavium Octeon family SOCs.

config EDAC_OCTEON_L2C
tristate "Cavium Octeon Secondary Caches (L2C)"
depends on EDAC_MM_EDAC && CPU_CAVIUM_OCTEON
help
Support for error detection and correction on the
Cavium Octeon family of SOCs.

config EDAC_OCTEON_LMC
tristate "Cavium Octeon DRAM Memory Controller (LMC)"
depends on EDAC_MM_EDAC && CPU_CAVIUM_OCTEON
help
Support for error detection and correction on the
Cavium Octeon family of SOCs.

config EDAC_OCTEON_PCI
tristate "Cavium Octeon PCI Controller"
depends on EDAC_MM_EDAC && PCI && CPU_CAVIUM_OCTEON
help
Support for error detection and correction on the
Cavium Octeon family of SOCs.

endif # EDAC
5 changes: 5 additions & 0 deletions drivers/edac/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,8 @@ obj-$(CONFIG_EDAC_TILE) += tile_edac.o

obj-$(CONFIG_EDAC_HIGHBANK_MC) += highbank_mc_edac.o
obj-$(CONFIG_EDAC_HIGHBANK_L2) += highbank_l2_edac.o

obj-$(CONFIG_EDAC_OCTEON_PC) += octeon_edac-pc.o
obj-$(CONFIG_EDAC_OCTEON_L2C) += octeon_edac-l2c.o
obj-$(CONFIG_EDAC_OCTEON_LMC) += octeon_edac-lmc.o
obj-$(CONFIG_EDAC_OCTEON_PCI) += octeon_edac-pci.o
118 changes: 118 additions & 0 deletions drivers/edac/octeon_edac-l2c.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (C) 2009 Wind River Systems,
* written by Ralf Baechle <ralf@linux-mips.org>
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/io.h>
#include <linux/edac.h>

#include <asm/octeon/cvmx.h>

#include "edac_core.h"
#include "edac_module.h"

#define EDAC_MOD_STR "octeon-l2c"

static void co_l2c_poll(struct edac_device_ctl_info *l2c)
{
union cvmx_l2t_err l2t_err;

l2t_err.u64 = cvmx_read_csr(CVMX_L2T_ERR);
if (l2t_err.s.sec_err) {
edac_device_handle_ce(l2c, 0, 0,
"Single bit error (corrected)");
l2t_err.s.sec_err = 1; /* Reset */
cvmx_write_csr(CVMX_L2T_ERR, l2t_err.u64);
}
if (l2t_err.s.ded_err) {
edac_device_handle_ue(l2c, 0, 0,
"Double bit error (corrected)");
l2t_err.s.ded_err = 1; /* Reset */
cvmx_write_csr(CVMX_L2T_ERR, l2t_err.u64);
}
}

static int __devinit co_l2c_probe(struct platform_device *pdev)
{
struct edac_device_ctl_info *l2c;
union cvmx_l2t_err l2t_err;
int res = 0;

l2c = edac_device_alloc_ctl_info(0, "l2c", 1, NULL, 0, 0,
NULL, 0, edac_device_alloc_index());
if (!l2c)
return -ENOMEM;

l2c->dev = &pdev->dev;
platform_set_drvdata(pdev, l2c);
l2c->dev_name = dev_name(&pdev->dev);

l2c->mod_name = "octeon-l2c";
l2c->ctl_name = "octeon_l2c_err";
l2c->edac_check = co_l2c_poll;

if (edac_device_add_device(l2c) > 0) {
pr_err("%s: edac_device_add_device() failed\n", __func__);
goto err;
}

l2t_err.u64 = cvmx_read_csr(CVMX_L2T_ERR);
l2t_err.s.sec_intena = 0; /* We poll */
l2t_err.s.ded_intena = 0;
l2t_err.s.sec_err = 1; /* Clear, just in case */
l2t_err.s.ded_err = 1;
cvmx_write_csr(CVMX_L2T_ERR, l2t_err.u64);

return 0;

err:
edac_device_free_ctl_info(l2c);

return res;
}

static int co_l2c_remove(struct platform_device *pdev)
{
struct edac_device_ctl_info *l2c = platform_get_drvdata(pdev);

edac_device_del_device(&pdev->dev);
edac_device_free_ctl_info(l2c);

return 0;
}

static struct platform_driver co_l2c_driver = {
.probe = co_l2c_probe,
.remove = co_l2c_remove,
.driver = {
.name = "co_l2c_edac",
}
};

static int __init co_edac_init(void)
{
int ret;

ret = platform_driver_register(&co_l2c_driver);
if (ret)
pr_warning(EDAC_MOD_STR " EDAC failed to register\n");

return ret;
}

static void __exit co_edac_exit(void)
{
platform_driver_unregister(&co_l2c_driver);
}

module_init(co_edac_init);
module_exit(co_edac_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>");
Loading

0 comments on commit f65aad4

Please sign in to comment.