Skip to content

Commit

Permalink
Merge branch 'pci/aer' into next
Browse files Browse the repository at this point in the history
* pci/aer:
  PCI/AER: Fix aer_probe() kernel-doc comment
  PCI/AER: Cache capability position
  PCI/AER: Avoid memory allocation in interrupt handling path
  ACPI / APEI: Send correct severity to calculate AER severity
  PCI/AER: Remove duplicate AER severity translation
  PCI/AER: Remove aerdriver.forceload kernel parameter
  PCI/AER: Remove aerdriver.nosourceid kernel parameter
  x86/PCI: VMD: Add quirk for AER to ignore source ID
  PCI/AER: Add bus flag to skip source ID matching

Conflicts:
	drivers/pci/probe.c
  • Loading branch information
Bjorn Helgaas committed Oct 3, 2016
2 parents 9e18ad9 + 6b20f72 commit 4dc2db0
Show file tree
Hide file tree
Showing 10 changed files with 67 additions and 89 deletions.
26 changes: 9 additions & 17 deletions Documentation/PCI/pcieaer-howto.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,25 +49,17 @@ depends on CONFIG_PCIEPORTBUS, so pls. set CONFIG_PCIEPORTBUS=y and
CONFIG_PCIEAER = y.

2.2 Load PCI Express AER Root Driver
There is a case where a system has AER support in BIOS. Enabling the AER
Root driver and having AER support in BIOS may result unpredictable
behavior. To avoid this conflict, a successful load of the AER Root driver
requires ACPI _OSC support in the BIOS to allow the AER Root driver to
request for native control of AER. See the PCI FW 3.0 Specification for
details regarding OSC usage. Currently, lots of firmwares don't provide
_OSC support while they use PCI Express. To support such firmwares,
forceload, a parameter of type bool, could enable AER to continue to
be initiated although firmwares have no _OSC support. To enable the
walkaround, pls. add aerdriver.forceload=y to kernel boot parameter line
when booting kernel. Note that forceload=n by default.

nosourceid, another parameter of type bool, can be used when broken
hardware (mostly chipsets) has root ports that cannot obtain the reporting
source ID. nosourceid=n by default.

Some systems have AER support in firmware. Enabling Linux AER support at
the same time the firmware handles AER may result in unpredictable
behavior. Therefore, Linux does not handle AER events unless the firmware
grants AER control to the OS via the ACPI _OSC method. See the PCI FW 3.0
Specification for details regarding _OSC usage.

2.3 AER error output
When a PCI-E AER error is captured, an error message will be outputted to
console. If it's a correctable error, it is outputted as a warning.

When a PCIe AER error is captured, an error message will be output to
console. If it's a correctable error, it is output as a warning.
Otherwise, it is printed as an error. So users could choose different
log level to filter out correctable error messages.

Expand Down
2 changes: 1 addition & 1 deletion drivers/acpi/apei/ghes.c
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ static void ghes_do_proc(struct ghes *ghes,

devfn = PCI_DEVFN(pcie_err->device_id.device,
pcie_err->device_id.function);
aer_severity = cper_severity_to_aer(sev);
aer_severity = cper_severity_to_aer(gdata->error_severity);

/*
* If firmware reset the component to contain
Expand Down
18 changes: 6 additions & 12 deletions drivers/pci/pcie/aer/aerdrv.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ static int pcie_aer_disable;

void pci_no_aer(void)
{
pcie_aer_disable = 1; /* has priority over 'forceload' */
pcie_aer_disable = 1;
}

bool pci_aer_available(void)
Expand Down Expand Up @@ -130,7 +130,7 @@ static void aer_enable_rootport(struct aer_rpc *rpc)
pcie_capability_clear_word(pdev, PCI_EXP_RTCTL,
SYSTEM_ERROR_INTR_ON_MESG_MASK);

aer_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
aer_pos = pdev->aer_cap;
/* Clear error status */
pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, &reg32);
pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32);
Expand Down Expand Up @@ -169,7 +169,7 @@ static void aer_disable_rootport(struct aer_rpc *rpc)
*/
set_downstream_devices_error_reporting(pdev, false);

pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
pos = pdev->aer_cap;
/* Disable Root's interrupt in response to error messages */
pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, &reg32);
reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
Expand All @@ -196,7 +196,7 @@ irqreturn_t aer_irq(int irq, void *context)
unsigned long flags;
int pos;

pos = pci_find_ext_capability(pdev->port, PCI_EXT_CAP_ID_ERR);
pos = pdev->port->aer_cap;
/*
* Must lock access to Root Error Status Reg, Root Error ID Reg,
* and Root error producer/consumer index
Expand Down Expand Up @@ -290,7 +290,6 @@ static void aer_remove(struct pcie_device *dev)
/**
* aer_probe - initialize resources
* @dev: pointer to the pcie_dev data structure
* @id: pointer to the service id data structure
*
* Invoked when PCI Express bus loads AER service driver.
*/
Expand All @@ -300,11 +299,6 @@ static int aer_probe(struct pcie_device *dev)
struct aer_rpc *rpc;
struct device *device = &dev->device;

/* Init */
status = aer_init(dev);
if (status)
return status;

/* Alloc rpc data structure */
rpc = aer_alloc_rpc(dev);
if (!rpc) {
Expand Down Expand Up @@ -339,7 +333,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
u32 reg32;
int pos;

pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
pos = dev->aer_cap;

/* Disable Root's interrupt in response to error messages */
pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, &reg32);
Expand Down Expand Up @@ -392,7 +386,7 @@ static void aer_error_resume(struct pci_dev *dev)
pcie_capability_write_word(dev, PCI_EXP_DEVSTA, reg16);

/* Clean AER Root Error Status */
pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
pos = dev->aer_cap;
pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask);
if (dev->error_state == pci_channel_io_normal)
Expand Down
9 changes: 1 addition & 8 deletions drivers/pci/pcie/aer/aerdrv.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ struct aer_rpc {
struct pcie_device *rpd; /* Root Port device */
struct work_struct dpc_handler;
struct aer_err_source e_sources[AER_ERROR_SOURCES_MAX];
struct aer_err_info e_info;
unsigned short prod_idx; /* Error Producer Index */
unsigned short cons_idx; /* Error Consumer Index */
int isr;
Expand Down Expand Up @@ -105,7 +106,6 @@ static inline pci_ers_result_t merge_result(enum pci_ers_result orig,
}

extern struct bus_type pcie_port_bus_type;
int aer_init(struct pcie_device *dev);
void aer_isr(struct work_struct *work);
void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info);
Expand All @@ -121,11 +121,4 @@ static inline int pcie_aer_get_firmware_first(struct pci_dev *pci_dev)
return 0;
}
#endif

static inline void pcie_aer_force_firmware_first(struct pci_dev *pci_dev,
int enable)
{
pci_dev->__aer_firmware_first = !!enable;
pci_dev->__aer_firmware_first_valid = 1;
}
#endif /* _AERDRV_H_ */
61 changes: 19 additions & 42 deletions drivers/pci/pcie/aer/aerdrv_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,6 @@
#include <linux/kfifo.h>
#include "aerdrv.h"

static bool forceload;
static bool nosourceid;
module_param(forceload, bool, 0);
module_param(nosourceid, bool, 0);

#define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \
PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE)

Expand All @@ -40,7 +35,7 @@ int pci_enable_pcie_error_reporting(struct pci_dev *dev)
if (pcie_aer_get_firmware_first(dev))
return -EIO;

if (!pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR))
if (!dev->aer_cap)
return -EIO;

return pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS);
Expand All @@ -62,7 +57,7 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
int pos;
u32 status;

pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
pos = dev->aer_cap;
if (!pos)
return -EIO;

Expand All @@ -83,7 +78,7 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
if (!pci_is_pcie(dev))
return -ENODEV;

pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
pos = dev->aer_cap;
if (!pos)
return -EIO;

Expand All @@ -102,6 +97,12 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
return 0;
}

int pci_aer_init(struct pci_dev *dev)
{
dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
return pci_cleanup_aer_error_status_regs(dev);
}

/**
* add_error_device - list device to be handled
* @e_info: pointer to error info
Expand Down Expand Up @@ -132,7 +133,8 @@ static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info)
* When bus id is equal to 0, it might be a bad id
* reported by root port.
*/
if (!nosourceid && (PCI_BUS_NUM(e_info->id) != 0)) {
if ((PCI_BUS_NUM(e_info->id) != 0) &&
!(dev->bus->bus_flags & PCI_BUS_FLAGS_NO_AERSID)) {
/* Device ID match? */
if (e_info->id == ((dev->bus->number << 8) | dev->devfn))
return true;
Expand All @@ -144,10 +146,10 @@ static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info)

/*
* When either
* 1) nosourceid==y;
* 2) bus id is equal to 0. Some ports might lose the bus
* 1) bus id is equal to 0. Some ports might lose the bus
* id of error source id;
* 3) There are multiple errors and prior id comparing fails;
* 2) bus flag PCI_BUS_FLAGS_NO_AERSID is set
* 3) There are multiple errors and prior ID comparing fails;
* We check AER status registers to find possible reporter.
*/
if (atomic_read(&dev->enable_cnt) == 0)
Expand All @@ -158,7 +160,7 @@ static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info)
if (!(reg16 & PCI_EXP_AER_FLAGS))
return false;

pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
pos = dev->aer_cap;
if (!pos)
return false;

Expand Down Expand Up @@ -555,7 +557,7 @@ static void handle_error_source(struct pcie_device *aerdev,
* Correctable error does not need software intervention.
* No need to go through error recovery process.
*/
pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
pos = dev->aer_cap;
if (pos)
pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS,
info->status);
Expand Down Expand Up @@ -647,7 +649,7 @@ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
info->status = 0;
info->tlp_header_valid = 0;

pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
pos = dev->aer_cap;

/* The device might not support AER */
if (!pos)
Expand Down Expand Up @@ -715,15 +717,8 @@ static inline void aer_process_err_devices(struct pcie_device *p_device,
static void aer_isr_one_error(struct pcie_device *p_device,
struct aer_err_source *e_src)
{
struct aer_err_info *e_info;

/* struct aer_err_info might be big, so we allocate it with slab */
e_info = kmalloc(sizeof(struct aer_err_info), GFP_KERNEL);
if (!e_info) {
dev_printk(KERN_DEBUG, &p_device->port->dev,
"Can't allocate mem when processing AER errors\n");
return;
}
struct aer_rpc *rpc = get_service_data(p_device);
struct aer_err_info *e_info = &rpc->e_info;

/*
* There is a possibility that both correctable error and
Expand Down Expand Up @@ -762,8 +757,6 @@ static void aer_isr_one_error(struct pcie_device *p_device,
if (find_source_device(p_device->port, e_info))
aer_process_err_devices(p_device, e_info);
}

kfree(e_info);
}

/**
Expand Down Expand Up @@ -812,19 +805,3 @@ void aer_isr(struct work_struct *work)
aer_isr_one_error(p_device, &e_src);
mutex_unlock(&rpc->rpc_mutex);
}

/**
* aer_init - provide AER initialization
* @dev: pointer to AER pcie device
*
* Invoked when AER service driver is loaded.
*/
int aer_init(struct pcie_device *dev)
{
if (forceload) {
dev_printk(KERN_DEBUG, &dev->device,
"aerdrv forceload requested.\n");
pcie_aer_force_firmware_first(dev->port, 0);
}
return 0;
}
6 changes: 2 additions & 4 deletions drivers/pci/pcie/aer/aerdrv_errprint.c
Original file line number Diff line number Diff line change
Expand Up @@ -219,15 +219,13 @@ int cper_severity_to_aer(int cper_severity)
}
EXPORT_SYMBOL_GPL(cper_severity_to_aer);

void cper_print_aer(struct pci_dev *dev, int cper_severity,
void cper_print_aer(struct pci_dev *dev, int aer_severity,
struct aer_capability_regs *aer)
{
int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0;
int layer, agent, status_strs_size, tlp_header_valid = 0;
u32 status, mask;
const char **status_strs;

aer_severity = cper_severity_to_aer(cper_severity);

if (aer_severity == AER_CORRECTABLE) {
status = aer->cor_status;
mask = aer->cor_mask;
Expand Down
5 changes: 3 additions & 2 deletions drivers/pci/probe.c
Original file line number Diff line number Diff line change
Expand Up @@ -1666,10 +1666,11 @@ static void pci_init_capabilities(struct pci_dev *dev)
/* Enable ACS P2P upstream forwarding */
pci_enable_acs(dev);

pci_cleanup_aer_error_status_regs(dev);

/* Precision Time Measurement */
pci_ptm_init(dev);

/* Advanced Error Reporting */
pci_aer_init(dev);
}

/*
Expand Down
17 changes: 17 additions & 0 deletions drivers/pci/quirks.c
Original file line number Diff line number Diff line change
Expand Up @@ -4428,3 +4428,20 @@ static void quirk_intel_qat_vf_cap(struct pci_dev *pdev)
}
}
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x443, quirk_intel_qat_vf_cap);

/*
* VMD-enabled root ports will change the source ID for all messages
* to the VMD device. Rather than doing device matching with the source
* ID, the AER driver should traverse the child device tree, reading
* AER registers to find the faulting device.
*/
static void quirk_no_aersid(struct pci_dev *pdev)
{
/* VMD Domain */
if (pdev->bus->sysdata && pci_domain_nr(pdev->bus) >= 0x10000)
pdev->bus->bus_flags |= PCI_BUS_FLAGS_NO_AERSID;
}
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2030, quirk_no_aersid);
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2031, quirk_no_aersid);
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2032, quirk_no_aersid);
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid);
2 changes: 1 addition & 1 deletion include/linux/aer.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ static inline int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
}
#endif

void cper_print_aer(struct pci_dev *dev, int cper_severity,
void cper_print_aer(struct pci_dev *dev, int aer_severity,
struct aer_capability_regs *aer);
int cper_severity_to_aer(int cper_severity);
void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
Expand Down
10 changes: 8 additions & 2 deletions include/linux/pci.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,9 @@ enum pci_irq_reroute_variant {

typedef unsigned short __bitwise pci_bus_flags_t;
enum pci_bus_flags {
PCI_BUS_FLAGS_NO_MSI = (__force pci_bus_flags_t) 1,
PCI_BUS_FLAGS_NO_MMRBC = (__force pci_bus_flags_t) 2,
PCI_BUS_FLAGS_NO_MSI = (__force pci_bus_flags_t) 1,
PCI_BUS_FLAGS_NO_MMRBC = (__force pci_bus_flags_t) 2,
PCI_BUS_FLAGS_NO_AERSID = (__force pci_bus_flags_t) 4,
};

/* These values come from the PCI Express Spec */
Expand Down Expand Up @@ -268,6 +269,9 @@ struct pci_dev {
unsigned int class; /* 3 bytes: (base,sub,prog-if) */
u8 revision; /* PCI revision, low byte of class word */
u8 hdr_type; /* PCI header type (`multi' flag masked out) */
#ifdef CONFIG_PCIEAER
u16 aer_cap; /* AER capability offset */
#endif
u8 pcie_cap; /* PCIe capability offset */
u8 msi_cap; /* MSI capability offset */
u8 msix_cap; /* MSI-X capability offset */
Expand Down Expand Up @@ -1374,9 +1378,11 @@ static inline bool pcie_aspm_support_enabled(void) { return false; }
#ifdef CONFIG_PCIEAER
void pci_no_aer(void);
bool pci_aer_available(void);
int pci_aer_init(struct pci_dev *dev);
#else
static inline void pci_no_aer(void) { }
static inline bool pci_aer_available(void) { return false; }
static inline int pci_aer_init(struct pci_dev *d) { return -ENODEV; }
#endif

#ifdef CONFIG_PCIE_ECRC
Expand Down

0 comments on commit 4dc2db0

Please sign in to comment.