Skip to content

Commit

Permalink
EDAC/ghes: Fix locking and memory barrier issues
Browse files Browse the repository at this point in the history
The ghes registration and refcount is broken in several ways:

 * ghes_edac_register() returns with success for a 2nd instance
   even if a first instance's registration is still running. This is
   not correct as the first instance may fail later. A subsequent
   registration may not finish before the first. Parallel registrations
   must be avoided.

 * The refcount was increased even if a registration failed. This
   leads to stale counters preventing the device from being released.

 * The ghes refcount may not be decremented properly on unregistration.
   Always decrement the refcount once ghes_edac_unregister() is called to
   keep the refcount sane.

 * The ghes_pvt pointer is handed to the irq handler before registration
   finished.

 * The mci structure could be freed while the irq handler is running.

Fix this by adding a mutex to ghes_edac_register(). This mutex
serializes instances to register and unregister. The refcount is only
increased if the registration succeeded. This makes sure the refcount is
in a consistent state after registering or unregistering a device.

Note: A spinlock cannot be used here as the code section may sleep.

The ghes_pvt is protected by ghes_lock now. This ensures the pointer is
not updated before registration was finished or while the irq handler is
running. It is unset before unregistering the device including necessary
(implicit) memory barriers making the changes visible to other CPUs.
Thus, the device can not be used anymore by an interrupt.

Also, rename ghes_init to ghes_refcount for better readability and
switch to refcount API.

A refcount is needed because there can be multiple GHES structures being
defined (see ACPI 6.3 specification, 18.3.2.7 Generic Hardware Error
Source, "Some platforms may describe multiple Generic Hardware Error
Source structures with different notification types, ...").

Another approach to use the mci's device refcount (get_device()) and
have a release function does not work here. A release function will be
called only for device_release() with the last put_device() call. The
device must be deleted *before* that with device_del(). This is only
possible by maintaining an own refcount.

 [ bp: touchups. ]

Fixes: 0fe5f28 ("EDAC, ghes: Model a single, logical memory controller")
Fixes: 1e72e67 ("EDAC/ghes: Fix Use after free in ghes_edac remove path")
Co-developed-by: James Morse <james.morse@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Co-developed-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Robert Richter <rrichter@marvell.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: "linux-edac@vger.kernel.org" <linux-edac@vger.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Tony Luck <tony.luck@intel.com>
Link: https://lkml.kernel.org/r/20191105200732.3053-1-rrichter@marvell.com
  • Loading branch information
Robert Richter authored and Borislav Petkov committed Nov 8, 2019
1 parent 582f94b commit 23f61b9
Showing 1 changed file with 66 additions and 24 deletions.
90 changes: 66 additions & 24 deletions drivers/edac/ghes_edac.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,18 @@ struct ghes_edac_pvt {
char msg[80];
};

static atomic_t ghes_init = ATOMIC_INIT(0);
static refcount_t ghes_refcount = REFCOUNT_INIT(0);

/*
* Access to ghes_pvt must be protected by ghes_lock. The spinlock
* also provides the necessary (implicit) memory barrier for the SMP
* case to make the pointer visible on another CPU.
*/
static struct ghes_edac_pvt *ghes_pvt;

/* GHES registration mutex */
static DEFINE_MUTEX(ghes_reg_mutex);

/*
* Sync with other, potentially concurrent callers of
* ghes_edac_report_mem_error(). We don't know what the
Expand Down Expand Up @@ -79,9 +88,8 @@ static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg)
(*num_dimm)++;
}

static int get_dimm_smbios_index(u16 handle)
static int get_dimm_smbios_index(struct mem_ctl_info *mci, u16 handle)
{
struct mem_ctl_info *mci = ghes_pvt->mci;
int i;

for (i = 0; i < mci->tot_dimms; i++) {
Expand Down Expand Up @@ -198,14 +206,11 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
enum hw_event_mc_err_type type;
struct edac_raw_error_desc *e;
struct mem_ctl_info *mci;
struct ghes_edac_pvt *pvt = ghes_pvt;
struct ghes_edac_pvt *pvt;
unsigned long flags;
char *p;
u8 grain_bits;

if (!pvt)
return;

/*
* We can do the locking below because GHES defers error processing
* from NMI to IRQ context. Whenever that changes, we'd at least
Expand All @@ -216,6 +221,10 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)

spin_lock_irqsave(&ghes_lock, flags);

pvt = ghes_pvt;
if (!pvt)
goto unlock;

mci = pvt->mci;
e = &mci->error_desc;

Expand Down Expand Up @@ -348,7 +357,7 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
p += sprintf(p, "DIMM DMI handle: 0x%.4x ",
mem_err->mem_dev_handle);

index = get_dimm_smbios_index(mem_err->mem_dev_handle);
index = get_dimm_smbios_index(mci, mem_err->mem_dev_handle);
if (index >= 0) {
e->top_layer = index;
e->enable_per_layer_report = true;
Expand Down Expand Up @@ -443,6 +452,8 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
grain_bits, e->syndrome, pvt->detail_location);

edac_raw_mc_handle_error(type, mci, e);

unlock:
spin_unlock_irqrestore(&ghes_lock, flags);
}

Expand All @@ -457,10 +468,12 @@ static struct acpi_platform_list plat_list[] = {
int ghes_edac_register(struct ghes *ghes, struct device *dev)
{
bool fake = false;
int rc, num_dimm = 0;
int rc = 0, num_dimm = 0;
struct mem_ctl_info *mci;
struct ghes_edac_pvt *pvt;
struct edac_mc_layer layers[1];
struct ghes_edac_dimm_fill dimm_fill;
unsigned long flags;
int idx = -1;

if (IS_ENABLED(CONFIG_X86)) {
Expand All @@ -472,11 +485,14 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
idx = 0;
}

/* finish another registration/unregistration instance first */
mutex_lock(&ghes_reg_mutex);

/*
* We have only one logical memory controller to which all DIMMs belong.
*/
if (atomic_inc_return(&ghes_init) > 1)
return 0;
if (refcount_inc_not_zero(&ghes_refcount))
goto unlock;

/* Get the number of DIMMs */
dmi_walk(ghes_edac_count_dimms, &num_dimm);
Expand All @@ -494,12 +510,13 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(struct ghes_edac_pvt));
if (!mci) {
pr_info("Can't allocate memory for EDAC data\n");
return -ENOMEM;
rc = -ENOMEM;
goto unlock;
}

ghes_pvt = mci->pvt_info;
ghes_pvt->ghes = ghes;
ghes_pvt->mci = mci;
pvt = mci->pvt_info;
pvt->ghes = ghes;
pvt->mci = mci;

mci->pdev = dev;
mci->mtype_cap = MEM_FLAG_EMPTY;
Expand Down Expand Up @@ -541,23 +558,48 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
if (rc < 0) {
pr_info("Can't register at EDAC core\n");
edac_mc_free(mci);
return -ENODEV;
rc = -ENODEV;
goto unlock;
}
return 0;

spin_lock_irqsave(&ghes_lock, flags);
ghes_pvt = pvt;
spin_unlock_irqrestore(&ghes_lock, flags);

/* only increment on success */
refcount_inc(&ghes_refcount);

unlock:
mutex_unlock(&ghes_reg_mutex);

return rc;
}

void ghes_edac_unregister(struct ghes *ghes)
{
struct mem_ctl_info *mci;
unsigned long flags;

if (!ghes_pvt)
return;
mutex_lock(&ghes_reg_mutex);

if (atomic_dec_return(&ghes_init))
return;
if (!refcount_dec_and_test(&ghes_refcount))
goto unlock;

mci = ghes_pvt->mci;
/*
* Wait for the irq handler being finished.
*/
spin_lock_irqsave(&ghes_lock, flags);
mci = ghes_pvt ? ghes_pvt->mci : NULL;
ghes_pvt = NULL;
edac_mc_del_mc(mci->pdev);
edac_mc_free(mci);
spin_unlock_irqrestore(&ghes_lock, flags);

if (!mci)
goto unlock;

mci = edac_mc_del_mc(mci->pdev);
if (mci)
edac_mc_free(mci);

unlock:
mutex_unlock(&ghes_reg_mutex);
}

0 comments on commit 23f61b9

Please sign in to comment.