Skip to content

Commit

Permalink
[SCSI] aacraid: fix File System going into read-only mode
Browse files Browse the repository at this point in the history
These particular problems were reported by Cisco and SAP and customers
as well. Cisco reported on RHEL4 U6 and SAP reported on SLES9 SP4 and
SLES10 SP2. We added these fixes on RHEL4 U6 and gave a private build
to IBM and Cisco. Cisco and IBM tested it for more than 15 days and
they reported that they did not see the issue so far. Before the fix,
Cisco used to see the issue within 5 days. We generated a patch for
SLES9 SP4 and SLES10 SP2 and submitted to Novell. Novell applied the
patch and gave a test build to SAP. SAP tested and reported that the
build is working properly.

We also tested in our lab using the tools "dishogsync", which is IO
stress tool and the tool was provided by Cisco.

Issue1:  File System going into read-only mode

Root cause: The driver tends to not free the memory (FIB) when the
management request exits prematurely. The accumulation of such
un-freed memory causes the driver to fail to allocate anymore memory
(FIB) and hence return 0x70000 value to the upper layer, which puts
the file system into read only mode.

Fix details: The fix makes sure to free the memory (FIB) even if the
request exits prematurely hence ensuring the driver wouldn't run out
of memory (FIBs).


Issue2: False Raid Alert occurs

When the Physical Drives and Logical drives are reported as deleted or
added, even though there is no change done on the system

Root cause: Driver IOCTLs is signaled with EINTR while waiting on
response from the lower layers. Returning "EINTR" will never initiate
internal retry.

Fix details: The issue was fixed by replacing "EINTR" with
"ERESTARTSYS" for mid-layer retries.

Signed-off-by: Penchala Narasimha Reddy <ServeRAIDDriver@hcl.in>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
  • Loading branch information
Penchala Narasimha Reddy Chilakala, ERS-HCLTech authored and James Bottomley committed Jan 17, 2010
1 parent e6622df commit cacb6dc
Show file tree
Hide file tree
Showing 6 changed files with 154 additions and 45 deletions.
52 changes: 40 additions & 12 deletions drivers/scsi/aacraid/aachba.c
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,10 @@ int aac_get_config_status(struct aac_dev *dev, int commit_flag)
status = -EINVAL;
}
}
aac_fib_complete(fibptr);
/* Do not set XferState to zero unless receives a response from F/W */
if (status >= 0)
aac_fib_complete(fibptr);

/* Send a CT_COMMIT_CONFIG to enable discovery of devices */
if (status >= 0) {
if ((aac_commit == 1) || commit_flag) {
Expand All @@ -310,13 +313,18 @@ int aac_get_config_status(struct aac_dev *dev, int commit_flag)
FsaNormal,
1, 1,
NULL, NULL);
aac_fib_complete(fibptr);
/* Do not set XferState to zero unless
* receives a response from F/W */
if (status >= 0)
aac_fib_complete(fibptr);
} else if (aac_commit == 0) {
printk(KERN_WARNING
"aac_get_config_status: Foreign device configurations are being ignored\n");
}
}
aac_fib_free(fibptr);
/* FIB should be freed only after getting the response from the F/W */
if (status != -ERESTARTSYS)
aac_fib_free(fibptr);
return status;
}

Expand Down Expand Up @@ -355,7 +363,9 @@ int aac_get_containers(struct aac_dev *dev)
maximum_num_containers = le32_to_cpu(dresp->ContainerSwitchEntries);
aac_fib_complete(fibptr);
}
aac_fib_free(fibptr);
/* FIB should be freed only after getting the response from the F/W */
if (status != -ERESTARTSYS)
aac_fib_free(fibptr);

if (maximum_num_containers < MAXIMUM_NUM_CONTAINERS)
maximum_num_containers = MAXIMUM_NUM_CONTAINERS;
Expand Down Expand Up @@ -1245,8 +1255,12 @@ int aac_get_adapter_info(struct aac_dev* dev)
NULL);

if (rcode < 0) {
aac_fib_complete(fibptr);
aac_fib_free(fibptr);
/* FIB should be freed only after
* getting the response from the F/W */
if (rcode != -ERESTARTSYS) {
aac_fib_complete(fibptr);
aac_fib_free(fibptr);
}
return rcode;
}
memcpy(&dev->adapter_info, info, sizeof(*info));
Expand All @@ -1270,6 +1284,12 @@ int aac_get_adapter_info(struct aac_dev* dev)

if (rcode >= 0)
memcpy(&dev->supplement_adapter_info, sinfo, sizeof(*sinfo));
if (rcode == -ERESTARTSYS) {
fibptr = aac_fib_alloc(dev);
if (!fibptr)
return -ENOMEM;
}

}


Expand Down Expand Up @@ -1470,9 +1490,11 @@ int aac_get_adapter_info(struct aac_dev* dev)
(dev->scsi_host_ptr->sg_tablesize * 8) + 112;
}
}

aac_fib_complete(fibptr);
aac_fib_free(fibptr);
/* FIB should be freed only after getting the response from the F/W */
if (rcode != -ERESTARTSYS) {
aac_fib_complete(fibptr);
aac_fib_free(fibptr);
}

return rcode;
}
Expand Down Expand Up @@ -1633,6 +1655,7 @@ static int aac_read(struct scsi_cmnd * scsicmd)
* Alocate and initialize a Fib
*/
if (!(cmd_fibcontext = aac_fib_alloc(dev))) {
printk(KERN_WARNING "aac_read: fib allocation failed\n");
return -1;
}

Expand Down Expand Up @@ -1712,9 +1735,14 @@ static int aac_write(struct scsi_cmnd * scsicmd)
* Allocate and initialize a Fib then setup a BlockWrite command
*/
if (!(cmd_fibcontext = aac_fib_alloc(dev))) {
scsicmd->result = DID_ERROR << 16;
scsicmd->scsi_done(scsicmd);
return 0;
/* FIB temporarily unavailable,not catastrophic failure */

/* scsicmd->result = DID_ERROR << 16;
* scsicmd->scsi_done(scsicmd);
* return 0;
*/
printk(KERN_WARNING "aac_write: fib allocation failed\n");
return -1;
}

status = aac_adapter_write(cmd_fibcontext, scsicmd, lba, count, fua);
Expand Down
5 changes: 4 additions & 1 deletion drivers/scsi/aacraid/aacraid.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
*----------------------------------------------------------------------------*/

#ifndef AAC_DRIVER_BUILD
# define AAC_DRIVER_BUILD 2461
# define AAC_DRIVER_BUILD 24702
# define AAC_DRIVER_BRANCH "-ms"
#endif
#define MAXIMUM_NUM_CONTAINERS 32
Expand Down Expand Up @@ -1036,6 +1036,9 @@ struct aac_dev
u8 printf_enabled;
u8 in_reset;
u8 msi;
int management_fib_count;
spinlock_t manage_lock;

};

#define aac_adapter_interrupt(dev) \
Expand Down
28 changes: 14 additions & 14 deletions drivers/scsi/aacraid/commctrl.c
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ static int ioctl_send_fib(struct aac_dev * dev, void __user *arg)
fibptr->hw_fib_pa = hw_fib_pa;
fibptr->hw_fib_va = hw_fib;
}
if (retval != -EINTR)
if (retval != -ERESTARTSYS)
aac_fib_free(fibptr);
return retval;
}
Expand Down Expand Up @@ -322,7 +322,7 @@ static int next_getadapter_fib(struct aac_dev * dev, void __user *arg)
}
if (f.wait) {
if(down_interruptible(&fibctx->wait_sem) < 0) {
status = -EINTR;
status = -ERESTARTSYS;
} else {
/* Lock again and retry */
spin_lock_irqsave(&dev->fib_lock, flags);
Expand Down Expand Up @@ -593,10 +593,10 @@ static int aac_send_raw_srb(struct aac_dev* dev, void __user * arg)
u64 addr;
void* p;
if (upsg->sg[i].count >
(dev->adapter_info.options &
((dev->adapter_info.options &
AAC_OPT_NEW_COMM) ?
(dev->scsi_host_ptr->max_sectors << 9) :
65536) {
65536)) {
rcode = -EINVAL;
goto cleanup;
}
Expand Down Expand Up @@ -645,10 +645,10 @@ static int aac_send_raw_srb(struct aac_dev* dev, void __user * arg)
u64 addr;
void* p;
if (usg->sg[i].count >
(dev->adapter_info.options &
((dev->adapter_info.options &
AAC_OPT_NEW_COMM) ?
(dev->scsi_host_ptr->max_sectors << 9) :
65536) {
65536)) {
rcode = -EINVAL;
goto cleanup;
}
Expand Down Expand Up @@ -695,10 +695,10 @@ static int aac_send_raw_srb(struct aac_dev* dev, void __user * arg)
uintptr_t addr;
void* p;
if (usg->sg[i].count >
(dev->adapter_info.options &
((dev->adapter_info.options &
AAC_OPT_NEW_COMM) ?
(dev->scsi_host_ptr->max_sectors << 9) :
65536) {
65536)) {
rcode = -EINVAL;
goto cleanup;
}
Expand Down Expand Up @@ -734,10 +734,10 @@ static int aac_send_raw_srb(struct aac_dev* dev, void __user * arg)
dma_addr_t addr;
void* p;
if (upsg->sg[i].count >
(dev->adapter_info.options &
((dev->adapter_info.options &
AAC_OPT_NEW_COMM) ?
(dev->scsi_host_ptr->max_sectors << 9) :
65536) {
65536)) {
rcode = -EINVAL;
goto cleanup;
}
Expand Down Expand Up @@ -772,8 +772,8 @@ static int aac_send_raw_srb(struct aac_dev* dev, void __user * arg)
psg->count = cpu_to_le32(sg_indx+1);
status = aac_fib_send(ScsiPortCommand, srbfib, actual_fibsize, FsaNormal, 1, 1, NULL, NULL);
}
if (status == -EINTR) {
rcode = -EINTR;
if (status == -ERESTARTSYS) {
rcode = -ERESTARTSYS;
goto cleanup;
}

Expand Down Expand Up @@ -810,7 +810,7 @@ static int aac_send_raw_srb(struct aac_dev* dev, void __user * arg)
for(i=0; i <= sg_indx; i++){
kfree(sg_list[i]);
}
if (rcode != -EINTR) {
if (rcode != -ERESTARTSYS) {
aac_fib_complete(srbfib);
aac_fib_free(srbfib);
}
Expand Down Expand Up @@ -848,7 +848,7 @@ int aac_do_ioctl(struct aac_dev * dev, int cmd, void __user *arg)
*/

status = aac_dev_ioctl(dev, cmd, arg);
if(status != -ENOTTY)
if (status != -ENOTTY)
return status;

switch (cmd) {
Expand Down
6 changes: 5 additions & 1 deletion drivers/scsi/aacraid/comminit.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,9 @@ int aac_send_shutdown(struct aac_dev * dev)

if (status >= 0)
aac_fib_complete(fibctx);
aac_fib_free(fibctx);
/* FIB should be freed only after getting the response from the F/W */
if (status != -ERESTARTSYS)
aac_fib_free(fibctx);
return status;
}

Expand Down Expand Up @@ -304,6 +306,8 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev)
/*
* Check the preferred comm settings, defaults from template.
*/
dev->management_fib_count = 0;
spin_lock_init(&dev->manage_lock);
dev->max_fib_size = sizeof(struct hw_fib);
dev->sg_tablesize = host->sg_tablesize = (dev->max_fib_size
- sizeof(struct aac_fibhdr)
Expand Down
72 changes: 61 additions & 11 deletions drivers/scsi/aacraid/commsup.c
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,14 @@ struct fib *aac_fib_alloc(struct aac_dev *dev)

void aac_fib_free(struct fib *fibptr)
{
unsigned long flags;
unsigned long flags, flagsv;

spin_lock_irqsave(&fibptr->event_lock, flagsv);
if (fibptr->done == 2) {
spin_unlock_irqrestore(&fibptr->event_lock, flagsv);
return;
}
spin_unlock_irqrestore(&fibptr->event_lock, flagsv);

spin_lock_irqsave(&fibptr->dev->fib_lock, flags);
if (unlikely(fibptr->flags & FIB_CONTEXT_FLAG_TIMED_OUT))
Expand Down Expand Up @@ -390,6 +397,8 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
struct hw_fib * hw_fib = fibptr->hw_fib_va;
unsigned long flags = 0;
unsigned long qflags;
unsigned long mflags = 0;


if (!(hw_fib->header.XferState & cpu_to_le32(HostOwned)))
return -EBUSY;
Expand Down Expand Up @@ -471,9 +480,31 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
if (!dev->queues)
return -EBUSY;

if(wait)
if (wait) {

spin_lock_irqsave(&dev->manage_lock, mflags);
if (dev->management_fib_count >= AAC_NUM_MGT_FIB) {
printk(KERN_INFO "No management Fibs Available:%d\n",
dev->management_fib_count);
spin_unlock_irqrestore(&dev->manage_lock, mflags);
return -EBUSY;
}
dev->management_fib_count++;
spin_unlock_irqrestore(&dev->manage_lock, mflags);
spin_lock_irqsave(&fibptr->event_lock, flags);
aac_adapter_deliver(fibptr);
}

if (aac_adapter_deliver(fibptr) != 0) {
printk(KERN_ERR "aac_fib_send: returned -EBUSY\n");
if (wait) {
spin_unlock_irqrestore(&fibptr->event_lock, flags);
spin_lock_irqsave(&dev->manage_lock, mflags);
dev->management_fib_count--;
spin_unlock_irqrestore(&dev->manage_lock, mflags);
}
return -EBUSY;
}


/*
* If the caller wanted us to wait for response wait now.
Expand Down Expand Up @@ -516,14 +547,15 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
udelay(5);
}
} else if (down_interruptible(&fibptr->event_wait)) {
fibptr->done = 2;
up(&fibptr->event_wait);
/* Do nothing ... satisfy
* down_interruptible must_check */
}

spin_lock_irqsave(&fibptr->event_lock, flags);
if ((fibptr->done == 0) || (fibptr->done == 2)) {
if (fibptr->done == 0) {
fibptr->done = 2; /* Tell interrupt we aborted */
spin_unlock_irqrestore(&fibptr->event_lock, flags);
return -EINTR;
return -ERESTARTSYS;
}
spin_unlock_irqrestore(&fibptr->event_lock, flags);
BUG_ON(fibptr->done == 0);
Expand Down Expand Up @@ -689,6 +721,7 @@ int aac_fib_adapter_complete(struct fib *fibptr, unsigned short size)

int aac_fib_complete(struct fib *fibptr)
{
unsigned long flags;
struct hw_fib * hw_fib = fibptr->hw_fib_va;

/*
Expand All @@ -709,6 +742,13 @@ int aac_fib_complete(struct fib *fibptr)
* command is complete that we had sent to the adapter and this
* cdb could be reused.
*/
spin_lock_irqsave(&fibptr->event_lock, flags);
if (fibptr->done == 2) {
spin_unlock_irqrestore(&fibptr->event_lock, flags);
return 0;
}
spin_unlock_irqrestore(&fibptr->event_lock, flags);

if((hw_fib->header.XferState & cpu_to_le32(SentFromHost)) &&
(hw_fib->header.XferState & cpu_to_le32(AdapterProcessed)))
{
Expand Down Expand Up @@ -1355,7 +1395,10 @@ int aac_reset_adapter(struct aac_dev * aac, int forced)

if (status >= 0)
aac_fib_complete(fibctx);
aac_fib_free(fibctx);
/* FIB should be freed only after getting
* the response from the F/W */
if (status != -ERESTARTSYS)
aac_fib_free(fibctx);
}
}

Expand Down Expand Up @@ -1759,6 +1802,7 @@ int aac_command_thread(void *data)
struct fib *fibptr;

if ((fibptr = aac_fib_alloc(dev))) {
int status;
__le32 *info;

aac_fib_init(fibptr);
Expand All @@ -1769,15 +1813,21 @@ int aac_command_thread(void *data)

*info = cpu_to_le32(now.tv_sec);

(void)aac_fib_send(SendHostTime,
status = aac_fib_send(SendHostTime,
fibptr,
sizeof(*info),
FsaNormal,
1, 1,
NULL,
NULL);
aac_fib_complete(fibptr);
aac_fib_free(fibptr);
/* Do not set XferState to zero unless
* receives a response from F/W */
if (status >= 0)
aac_fib_complete(fibptr);
/* FIB should be freed only after
* getting the response from the F/W */
if (status != -ERESTARTSYS)
aac_fib_free(fibptr);
}
difference = (long)(unsigned)update_interval*HZ;
} else {
Expand Down
Loading

0 comments on commit cacb6dc

Please sign in to comment.