Skip to content

Commit

Permalink
[S390] zcrypt: fix request timeout handling
Browse files Browse the repository at this point in the history
Under very high load zcrypt requests may timeout while waiting on the
request queue. Modify zcrypt that timeouts are based on crypto adapter
responses. A timeout occurs only if a crypto adapter does not respond
within a given time frame to sumitted requests.

Signed-off-by: Ralph Wuerthner <rwuerthn@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
  • Loading branch information
Ralph Wuerthner authored and Martin Schwidefsky committed Jul 10, 2007
1 parent 987ad70 commit af512ed
Show file tree
Hide file tree
Showing 6 changed files with 140 additions and 90 deletions.
98 changes: 89 additions & 9 deletions drivers/s390/crypto/ap_bus.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ static void ap_poll_all(unsigned long);
static void ap_poll_timeout(unsigned long);
static int ap_poll_thread_start(void);
static void ap_poll_thread_stop(void);
static void ap_request_timeout(unsigned long);

/**
* Module description.
Expand Down Expand Up @@ -189,6 +190,7 @@ int ap_send(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length)
case AP_RESPONSE_NORMAL:
return 0;
case AP_RESPONSE_Q_FULL:
case AP_RESPONSE_RESET_IN_PROGRESS:
return -EBUSY;
default: /* Device is gone. */
return -ENODEV;
Expand Down Expand Up @@ -252,6 +254,8 @@ int ap_recv(ap_qid_t qid, unsigned long long *psmid, void *msg, size_t length)
if (status.queue_empty)
return -ENOENT;
return -EBUSY;
case AP_RESPONSE_RESET_IN_PROGRESS:
return -EBUSY;
default:
return -ENODEV;
}
Expand Down Expand Up @@ -326,11 +330,12 @@ static int ap_init_queue(ap_qid_t qid)
i = AP_MAX_RESET; /* return with -ENODEV */
break;
case AP_RESPONSE_RESET_IN_PROGRESS:
rc = -EBUSY;
case AP_RESPONSE_BUSY:
default:
break;
}
if (rc != -ENODEV)
if (rc != -ENODEV && rc != -EBUSY)
break;
if (i < AP_MAX_RESET - 1) {
udelay(5);
Expand All @@ -340,6 +345,40 @@ static int ap_init_queue(ap_qid_t qid)
return rc;
}

/**
* Arm request timeout if a AP device was idle and a new request is submitted.
*/
static void ap_increase_queue_count(struct ap_device *ap_dev)
{
int timeout = ap_dev->drv->request_timeout;

ap_dev->queue_count++;
if (ap_dev->queue_count == 1) {
mod_timer(&ap_dev->timeout, jiffies + timeout);
ap_dev->reset = AP_RESET_ARMED;
}
}

/**
* AP device is still alive, re-schedule request timeout if there are still
* pending requests.
*/
static void ap_decrease_queue_count(struct ap_device *ap_dev)
{
int timeout = ap_dev->drv->request_timeout;

ap_dev->queue_count--;
if (ap_dev->queue_count > 0)
mod_timer(&ap_dev->timeout, jiffies + timeout);
else
/**
* The timeout timer should to be disabled now - since
* del_timer_sync() is very expensive, we just tell via the
* reset flag to ignore the pending timeout timer.
*/
ap_dev->reset = AP_RESET_IGNORE;
}

/**
* AP device related attributes.
*/
Expand Down Expand Up @@ -498,6 +537,7 @@ static int ap_device_remove(struct device *dev)
struct ap_driver *ap_drv = ap_dev->drv;

ap_flush_queue(ap_dev);
del_timer_sync(&ap_dev->timeout);
if (ap_drv->remove)
ap_drv->remove(ap_dev);
spin_lock_bh(&ap_device_lock);
Expand Down Expand Up @@ -759,17 +799,21 @@ static void ap_scan_bus(struct work_struct *unused)
__ap_scan_bus);
rc = ap_query_queue(qid, &queue_depth, &device_type);
if (dev) {
if (rc == -EBUSY) {
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(AP_RESET_TIMEOUT);
rc = ap_query_queue(qid, &queue_depth,
&device_type);
}
ap_dev = to_ap_dev(dev);
spin_lock_bh(&ap_dev->lock);
if (rc || ap_dev->unregistered) {
spin_unlock_bh(&ap_dev->lock);
put_device(dev);
device_unregister(dev);
put_device(dev);
continue;
} else
spin_unlock_bh(&ap_dev->lock);
}
if (dev) {
}
spin_unlock_bh(&ap_dev->lock);
put_device(dev);
continue;
}
Expand All @@ -788,6 +832,8 @@ static void ap_scan_bus(struct work_struct *unused)
INIT_LIST_HEAD(&ap_dev->pendingq);
INIT_LIST_HEAD(&ap_dev->requestq);
INIT_LIST_HEAD(&ap_dev->list);
setup_timer(&ap_dev->timeout, ap_request_timeout,
(unsigned long) ap_dev);
if (device_type == 0)
ap_probe_device_type(ap_dev);
else
Expand Down Expand Up @@ -853,7 +899,7 @@ static int ap_poll_read(struct ap_device *ap_dev, unsigned long *flags)
switch (status.response_code) {
case AP_RESPONSE_NORMAL:
atomic_dec(&ap_poll_requests);
ap_dev->queue_count--;
ap_decrease_queue_count(ap_dev);
list_for_each_entry(ap_msg, &ap_dev->pendingq, list) {
if (ap_msg->psmid != ap_dev->reply->psmid)
continue;
Expand Down Expand Up @@ -904,7 +950,7 @@ static int ap_poll_write(struct ap_device *ap_dev, unsigned long *flags)
switch (status.response_code) {
case AP_RESPONSE_NORMAL:
atomic_inc(&ap_poll_requests);
ap_dev->queue_count++;
ap_increase_queue_count(ap_dev);
list_move_tail(&ap_msg->list, &ap_dev->pendingq);
ap_dev->requestq_count--;
ap_dev->pendingq_count++;
Expand All @@ -914,6 +960,7 @@ static int ap_poll_write(struct ap_device *ap_dev, unsigned long *flags)
*flags |= 2;
break;
case AP_RESPONSE_Q_FULL:
case AP_RESPONSE_RESET_IN_PROGRESS:
*flags |= 2;
break;
case AP_RESPONSE_MESSAGE_TOO_BIG:
Expand Down Expand Up @@ -960,10 +1007,11 @@ static int __ap_queue_message(struct ap_device *ap_dev, struct ap_message *ap_ms
list_add_tail(&ap_msg->list, &ap_dev->pendingq);
atomic_inc(&ap_poll_requests);
ap_dev->pendingq_count++;
ap_dev->queue_count++;
ap_increase_queue_count(ap_dev);
ap_dev->total_request_count++;
break;
case AP_RESPONSE_Q_FULL:
case AP_RESPONSE_RESET_IN_PROGRESS:
list_add_tail(&ap_msg->list, &ap_dev->requestq);
ap_dev->requestq_count++;
ap_dev->total_request_count++;
Expand Down Expand Up @@ -1045,6 +1093,25 @@ static void ap_poll_timeout(unsigned long unused)
tasklet_schedule(&ap_tasklet);
}

/**
* Reset a not responding AP device and move all requests from the
* pending queue to the request queue.
*/
static void ap_reset(struct ap_device *ap_dev)
{
int rc;

ap_dev->reset = AP_RESET_IGNORE;
atomic_sub(ap_dev->queue_count, &ap_poll_requests);
ap_dev->queue_count = 0;
list_splice_init(&ap_dev->pendingq, &ap_dev->requestq);
ap_dev->requestq_count += ap_dev->pendingq_count;
ap_dev->pendingq_count = 0;
rc = ap_init_queue(ap_dev->qid);
if (rc == -ENODEV)
ap_dev->unregistered = 1;
}

/**
* Poll all AP devices on the bus in a round robin fashion. Continue
* polling until bit 2^0 of the control flags is not set. If bit 2^1
Expand All @@ -1056,6 +1123,8 @@ static int __ap_poll_all(struct ap_device *ap_dev, unsigned long *flags)
if (!ap_dev->unregistered) {
if (ap_poll_queue(ap_dev, flags))
ap_dev->unregistered = 1;
if (ap_dev->reset == AP_RESET_DO)
ap_reset(ap_dev);
}
spin_unlock(&ap_dev->lock);
return 0;
Expand Down Expand Up @@ -1147,6 +1216,17 @@ static void ap_poll_thread_stop(void)
mutex_unlock(&ap_poll_thread_mutex);
}

/**
* Handling of request timeouts
*/
static void ap_request_timeout(unsigned long data)
{
struct ap_device *ap_dev = (struct ap_device *) data;

if (ap_dev->reset == AP_RESET_ARMED)
ap_dev->reset = AP_RESET_DO;
}

static void ap_reset_domain(void)
{
int i;
Expand Down
11 changes: 11 additions & 0 deletions drivers/s390/crypto/ap_bus.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#define AP_DEVICES 64 /* Number of AP devices. */
#define AP_DOMAINS 16 /* Number of AP domains. */
#define AP_MAX_RESET 90 /* Maximum number of resets. */
#define AP_RESET_TIMEOUT (HZ/2) /* Time in ticks for reset timeouts. */
#define AP_CONFIG_TIME 30 /* Time in seconds between AP bus rescans. */
#define AP_POLL_TIME 1 /* Time in ticks between receive polls. */

Expand Down Expand Up @@ -83,6 +84,13 @@ struct ap_queue_status {
#define AP_DEVICE_TYPE_CEX2A 6
#define AP_DEVICE_TYPE_CEX2C 7

/**
* AP reset flag states
*/
#define AP_RESET_IGNORE 0 /* request timeout will be ignored */
#define AP_RESET_ARMED 1 /* request timeout timer is active */
#define AP_RESET_DO 2 /* AP reset required */

struct ap_device;
struct ap_message;

Expand All @@ -95,6 +103,7 @@ struct ap_driver {
/* receive is called from tasklet context */
void (*receive)(struct ap_device *, struct ap_message *,
struct ap_message *);
int request_timeout; /* request timeout in jiffies */
};

#define to_ap_drv(x) container_of((x), struct ap_driver, driver)
Expand All @@ -112,6 +121,8 @@ struct ap_device {
int queue_depth; /* AP queue depth.*/
int device_type; /* AP device type. */
int unregistered; /* marks AP device as unregistered */
struct timer_list timeout; /* Timer for request timeouts. */
int reset; /* Reset required after req. timeout. */

int queue_count; /* # messages currently on AP queue. */

Expand Down
27 changes: 9 additions & 18 deletions drivers/s390/crypto/zcrypt_cex2a.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ static struct ap_driver zcrypt_cex2a_driver = {
.remove = zcrypt_cex2a_remove,
.receive = zcrypt_cex2a_receive,
.ids = zcrypt_cex2a_ids,
.request_timeout = CEX2A_CLEANUP_TIME,
};

/**
Expand Down Expand Up @@ -306,18 +307,13 @@ static long zcrypt_cex2a_modexpo(struct zcrypt_device *zdev,
goto out_free;
init_completion(&work);
ap_queue_message(zdev->ap_dev, &ap_msg);
rc = wait_for_completion_interruptible_timeout(
&work, CEX2A_CLEANUP_TIME);
if (rc > 0)
rc = wait_for_completion_interruptible(&work);
if (rc == 0)
rc = convert_response(zdev, &ap_msg, mex->outputdata,
mex->outputdatalength);
else {
/* Signal pending or message timed out. */
else
/* Signal pending. */
ap_cancel_message(zdev->ap_dev, &ap_msg);
if (rc == 0)
/* Message timed out. */
rc = -ETIME;
}
out_free:
kfree(ap_msg.message);
return rc;
Expand Down Expand Up @@ -348,18 +344,13 @@ static long zcrypt_cex2a_modexpo_crt(struct zcrypt_device *zdev,
goto out_free;
init_completion(&work);
ap_queue_message(zdev->ap_dev, &ap_msg);
rc = wait_for_completion_interruptible_timeout(
&work, CEX2A_CLEANUP_TIME);
if (rc > 0)
rc = wait_for_completion_interruptible(&work);
if (rc == 0)
rc = convert_response(zdev, &ap_msg, crt->outputdata,
crt->outputdatalength);
else {
/* Signal pending or message timed out. */
else
/* Signal pending. */
ap_cancel_message(zdev->ap_dev, &ap_msg);
if (rc == 0)
/* Message timed out. */
rc = -ETIME;
}
out_free:
kfree(ap_msg.message);
return rc;
Expand Down
27 changes: 9 additions & 18 deletions drivers/s390/crypto/zcrypt_pcica.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ static struct ap_driver zcrypt_pcica_driver = {
.remove = zcrypt_pcica_remove,
.receive = zcrypt_pcica_receive,
.ids = zcrypt_pcica_ids,
.request_timeout = PCICA_CLEANUP_TIME,
};

/**
Expand Down Expand Up @@ -290,18 +291,13 @@ static long zcrypt_pcica_modexpo(struct zcrypt_device *zdev,
goto out_free;
init_completion(&work);
ap_queue_message(zdev->ap_dev, &ap_msg);
rc = wait_for_completion_interruptible_timeout(
&work, PCICA_CLEANUP_TIME);
if (rc > 0)
rc = wait_for_completion_interruptible(&work);
if (rc == 0)
rc = convert_response(zdev, &ap_msg, mex->outputdata,
mex->outputdatalength);
else {
/* Signal pending or message timed out. */
else
/* Signal pending. */
ap_cancel_message(zdev->ap_dev, &ap_msg);
if (rc == 0)
/* Message timed out. */
rc = -ETIME;
}
out_free:
kfree(ap_msg.message);
return rc;
Expand Down Expand Up @@ -332,18 +328,13 @@ static long zcrypt_pcica_modexpo_crt(struct zcrypt_device *zdev,
goto out_free;
init_completion(&work);
ap_queue_message(zdev->ap_dev, &ap_msg);
rc = wait_for_completion_interruptible_timeout(
&work, PCICA_CLEANUP_TIME);
if (rc > 0)
rc = wait_for_completion_interruptible(&work);
if (rc == 0)
rc = convert_response(zdev, &ap_msg, crt->outputdata,
crt->outputdatalength);
else {
/* Signal pending or message timed out. */
else
/* Signal pending. */
ap_cancel_message(zdev->ap_dev, &ap_msg);
if (rc == 0)
/* Message timed out. */
rc = -ETIME;
}
out_free:
kfree(ap_msg.message);
return rc;
Expand Down
Loading

0 comments on commit af512ed

Please sign in to comment.