Skip to content

Commit

Permalink
powerpc/eeh: Add a eeh_dev_break debugfs interface
Browse files Browse the repository at this point in the history
Add an interface to debugfs for generating an EEH event on a given device.
This works by disabling memory accesses to and from the device by setting
the PCI_COMMAND register (or the VF Memory Space Enable on the parent PF).

This is a somewhat portable alternative to using the platform specific
error injection mechanisms since those tend to be either hard to use, or
straight up broken. For pseries the interfaces also requires the use of
/dev/mem which is probably going to go away in a post-LOCKDOWN world
(and it's a horrific hack to begin with) so moving to a kernel-provided
interface makes sense and provides a sane, cross-platform interface for
userspace so we can write more generic testing scripts.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20190903101605.2890-14-oohall@gmail.com
  • Loading branch information
Oliver O'Halloran authored and Michael Ellerman committed Sep 5, 2019
1 parent 22cda7c commit bd6461c
Showing 1 changed file with 138 additions and 1 deletion.
139 changes: 138 additions & 1 deletion arch/powerpc/kernel/eeh.c
Original file line number Diff line number Diff line change
Expand Up @@ -1892,7 +1892,8 @@ static ssize_t eeh_dev_check_write(struct file *filp,
char buf[20];
int ret;

ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
memset(buf, 0, sizeof(buf));
ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
if (!ret)
return -EFAULT;

Expand Down Expand Up @@ -1929,6 +1930,139 @@ static const struct file_operations eeh_dev_check_fops = {
.read = eeh_debugfs_dev_usage,
};

static int eeh_debugfs_break_device(struct pci_dev *pdev)
{
struct resource *bar = NULL;
void __iomem *mapped;
u16 old, bit;
int i, pos;

/* Do we have an MMIO BAR to disable? */
for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
struct resource *r = &pdev->resource[i];

if (!r->flags || !r->start)
continue;
if (r->flags & IORESOURCE_IO)
continue;
if (r->flags & IORESOURCE_UNSET)
continue;

bar = r;
break;
}

if (!bar) {
pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n");
return -ENXIO;
}

pci_err(pdev, "Going to break: %pR\n", bar);

if (pdev->is_virtfn) {
#ifndef CONFIG_IOV
return -ENXIO;
#else
/*
* VFs don't have a per-function COMMAND register, so the best
* we can do is clear the Memory Space Enable bit in the PF's
* SRIOV control reg.
*
* Unfortunately, this requires that we have a PF (i.e doesn't
* work for a passed-through VF) and it has the potential side
* effect of also causing an EEH on every other VF under the
* PF. Oh well.
*/
pdev = pdev->physfn;
if (!pdev)
return -ENXIO; /* passed through VFs have no PF */

pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
pos += PCI_SRIOV_CTRL;
bit = PCI_SRIOV_CTRL_MSE;
#endif /* !CONFIG_IOV */
} else {
bit = PCI_COMMAND_MEMORY;
pos = PCI_COMMAND;
}

/*
* Process here is:
*
* 1. Disable Memory space.
*
* 2. Perform an MMIO to the device. This should result in an error
* (CA / UR) being raised by the device which results in an EEH
* PE freeze. Using the in_8() accessor skips the eeh detection hook
* so the freeze hook so the EEH Detection machinery won't be
* triggered here. This is to match the usual behaviour of EEH
* where the HW will asyncronously freeze a PE and it's up to
* the kernel to notice and deal with it.
*
* 3. Turn Memory space back on. This is more important for VFs
* since recovery will probably fail if we don't. For normal
* the COMMAND register is reset as a part of re-initialising
* the device.
*
* Breaking stuff is the point so who cares if it's racy ;)
*/
pci_read_config_word(pdev, pos, &old);

mapped = ioremap(bar->start, PAGE_SIZE);
if (!mapped) {
pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar);
return -ENXIO;
}

pci_write_config_word(pdev, pos, old & ~bit);
in_8(mapped);
pci_write_config_word(pdev, pos, old);

iounmap(mapped);

return 0;
}

static ssize_t eeh_dev_break_write(struct file *filp,
const char __user *user_buf,
size_t count, loff_t *ppos)
{
uint32_t domain, bus, dev, fn;
struct pci_dev *pdev;
char buf[20];
int ret;

memset(buf, 0, sizeof(buf));
ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
if (!ret)
return -EFAULT;

ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
if (ret != 4) {
pr_err("%s: expected 4 args, got %d\n", __func__, ret);
return -EINVAL;
}

pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn);
if (!pdev)
return -ENODEV;

ret = eeh_debugfs_break_device(pdev);
pci_dev_put(pdev);

if (ret < 0)
return ret;

return count;
}

static const struct file_operations eeh_dev_break_fops = {
.open = simple_open,
.llseek = no_llseek,
.write = eeh_dev_break_write,
.read = eeh_debugfs_dev_usage,
};

#endif

static int __init eeh_init_proc(void)
Expand All @@ -1947,6 +2081,9 @@ static int __init eeh_init_proc(void)
debugfs_create_file_unsafe("eeh_dev_check", 0600,
powerpc_debugfs_root, NULL,
&eeh_dev_check_fops);
debugfs_create_file_unsafe("eeh_dev_break", 0600,
powerpc_debugfs_root, NULL,
&eeh_dev_break_fops);
debugfs_create_file_unsafe("eeh_force_recover", 0600,
powerpc_debugfs_root, NULL,
&eeh_force_recover_fops);
Expand Down

0 comments on commit bd6461c

Please sign in to comment.