s390/zcrypt: revisit ap device remove procedure

Working with the vfio-ap driver let to some revisit of the way
how an ap (queue) device is removed from the driver.
With the current implementation all the cleanup was done before
the driver even got notified about the removal. Now the ap
queue removal is done in 3 steps:
1) A preparation step, all ap messages within the queue
   are flushed and so the driver does 'receive' them.
   Also a new state AP_STATE_REMOVE assigned to the queue
   makes sure there are no new messages queued in.
2) Now the driver's remove function is invoked and the
   driver should do the job of cleaning up it's internal
   administration lists or whatever. After 2) is done
   it is guaranteed, that the driver is not invoked any
   more. On the other hand the driver has to make sure
   that the APQN is not accessed any more after step 2
   is complete.
3) Now the ap bus code does the job of total cleanup of the
   APQN. A reset with zero is triggered and the state of
   the queue goes to AP_STATE_UNBOUND.
   After step 3) is complete, the ap queue has no pending
   messages and the APQN is cleared and so there are no
   requests and replies lingering around in the firmware
   queue for this APQN. Also the interrupts are disabled.

After these remove steps the ap queue device may be assigned
to another driver.

Stress testing this remove/probe procedure showed a problem with the
correct module reference counting. The actual receive of an reply in
the driver is done asynchronous with completions. So with a driver
change on an ap queue the message flush triggers completions but the
threads waiting for the completions may run at a time where the queue
already has the new driver assigned. So the module_put() at receive
time needs to be done on the driver module which queued the ap
message. This change is also part of this patch.

Signed-off-by: Harald Freudenberger <freude@linux.ibm.com>
Reviewed-by: Ingo Franzki <ifranzki@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
This commit is contained in:
Harald Freudenberger 2019-02-22 17:24:11 +01:00 committed by Martin Schwidefsky
parent cd479eccd2
commit 01396a374c
4 changed files with 50 additions and 19 deletions

View File

@ -810,11 +810,18 @@ static int ap_device_remove(struct device *dev)
struct ap_device *ap_dev = to_ap_dev(dev);
struct ap_driver *ap_drv = ap_dev->drv;
/* prepare ap queue device removal */
if (is_queue_dev(dev))
ap_queue_remove(to_ap_queue(dev));
ap_queue_prepare_remove(to_ap_queue(dev));
/* driver's chance to clean up gracefully */
if (ap_drv->remove)
ap_drv->remove(ap_dev);
/* now do the ap queue device remove */
if (is_queue_dev(dev))
ap_queue_remove(to_ap_queue(dev));
/* Remove queue/card from list of active queues/cards */
spin_lock_bh(&ap_list_lock);
if (is_card_dev(dev))

View File

@ -91,6 +91,7 @@ enum ap_state {
AP_STATE_WORKING,
AP_STATE_QUEUE_FULL,
AP_STATE_SUSPEND_WAIT,
AP_STATE_REMOVE, /* about to be removed from driver */
AP_STATE_UNBOUND, /* momentary not bound to a driver */
AP_STATE_BORKED, /* broken */
NR_AP_STATES
@ -252,6 +253,7 @@ void ap_bus_force_rescan(void);
void ap_queue_init_reply(struct ap_queue *aq, struct ap_message *ap_msg);
struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type);
void ap_queue_prepare_remove(struct ap_queue *aq);
void ap_queue_remove(struct ap_queue *aq);
void ap_queue_suspend(struct ap_device *ap_dev);
void ap_queue_resume(struct ap_device *ap_dev);

View File

@ -420,6 +420,10 @@ static ap_func_t *ap_jumptable[NR_AP_STATES][NR_AP_EVENTS] = {
[AP_EVENT_POLL] = ap_sm_suspend_read,
[AP_EVENT_TIMEOUT] = ap_sm_nop,
},
[AP_STATE_REMOVE] = {
[AP_EVENT_POLL] = ap_sm_nop,
[AP_EVENT_TIMEOUT] = ap_sm_nop,
},
[AP_STATE_UNBOUND] = {
[AP_EVENT_POLL] = ap_sm_nop,
[AP_EVENT_TIMEOUT] = ap_sm_nop,
@ -740,18 +744,31 @@ void ap_flush_queue(struct ap_queue *aq)
}
EXPORT_SYMBOL(ap_flush_queue);
void ap_queue_prepare_remove(struct ap_queue *aq)
{
spin_lock_bh(&aq->lock);
/* flush queue */
__ap_flush_queue(aq);
/* set REMOVE state to prevent new messages are queued in */
aq->state = AP_STATE_REMOVE;
del_timer_sync(&aq->timeout);
spin_unlock_bh(&aq->lock);
}
void ap_queue_remove(struct ap_queue *aq)
{
ap_flush_queue(aq);
del_timer_sync(&aq->timeout);
/* reset with zero, also clears irq registration */
/*
* all messages have been flushed and the state is
* AP_STATE_REMOVE. Now reset with zero which also
* clears the irq registration and move the state
* to AP_STATE_UNBOUND to signal that this queue
* is not used by any driver currently.
*/
spin_lock_bh(&aq->lock);
ap_zapq(aq->qid);
aq->state = AP_STATE_UNBOUND;
spin_unlock_bh(&aq->lock);
}
EXPORT_SYMBOL(ap_queue_remove);
void ap_queue_reinit_state(struct ap_queue *aq)
{
@ -760,4 +777,3 @@ void ap_queue_reinit_state(struct ap_queue *aq)
ap_wait(ap_sm_event(aq, AP_EVENT_POLL));
spin_unlock_bh(&aq->lock);
}
EXPORT_SYMBOL(ap_queue_reinit_state);

View File

@ -586,6 +586,7 @@ static inline bool zcrypt_check_queue(struct ap_perms *perms, int queue)
static inline struct zcrypt_queue *zcrypt_pick_queue(struct zcrypt_card *zc,
struct zcrypt_queue *zq,
struct module **pmod,
unsigned int weight)
{
if (!zq || !try_module_get(zq->queue->ap_dev.drv->driver.owner))
@ -595,15 +596,15 @@ static inline struct zcrypt_queue *zcrypt_pick_queue(struct zcrypt_card *zc,
atomic_add(weight, &zc->load);
atomic_add(weight, &zq->load);
zq->request_count++;
*pmod = zq->queue->ap_dev.drv->driver.owner;
return zq;
}
static inline void zcrypt_drop_queue(struct zcrypt_card *zc,
struct zcrypt_queue *zq,
struct module *mod,
unsigned int weight)
{
struct module *mod = zq->queue->ap_dev.drv->driver.owner;
zq->request_count--;
atomic_sub(weight, &zc->load);
atomic_sub(weight, &zq->load);
@ -653,6 +654,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms,
unsigned int weight, pref_weight;
unsigned int func_code;
int qid = 0, rc = -ENODEV;
struct module *mod;
trace_s390_zcrypt_req(mex, TP_ICARSAMODEXPO);
@ -706,7 +708,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms,
pref_weight = weight;
}
}
pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, weight);
pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight);
spin_unlock(&zcrypt_list_lock);
if (!pref_zq) {
@ -718,7 +720,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms,
rc = pref_zq->ops->rsa_modexpo(pref_zq, mex);
spin_lock(&zcrypt_list_lock);
zcrypt_drop_queue(pref_zc, pref_zq, weight);
zcrypt_drop_queue(pref_zc, pref_zq, mod, weight);
spin_unlock(&zcrypt_list_lock);
out:
@ -735,6 +737,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms,
unsigned int weight, pref_weight;
unsigned int func_code;
int qid = 0, rc = -ENODEV;
struct module *mod;
trace_s390_zcrypt_req(crt, TP_ICARSACRT);
@ -788,7 +791,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms,
pref_weight = weight;
}
}
pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, weight);
pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight);
spin_unlock(&zcrypt_list_lock);
if (!pref_zq) {
@ -800,7 +803,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms,
rc = pref_zq->ops->rsa_modexpo_crt(pref_zq, crt);
spin_lock(&zcrypt_list_lock);
zcrypt_drop_queue(pref_zc, pref_zq, weight);
zcrypt_drop_queue(pref_zc, pref_zq, mod, weight);
spin_unlock(&zcrypt_list_lock);
out:
@ -819,6 +822,7 @@ static long _zcrypt_send_cprb(struct ap_perms *perms,
unsigned int func_code;
unsigned short *domain;
int qid = 0, rc = -ENODEV;
struct module *mod;
trace_s390_zcrypt_req(xcRB, TB_ZSECSENDCPRB);
@ -865,7 +869,7 @@ static long _zcrypt_send_cprb(struct ap_perms *perms,
pref_weight = weight;
}
}
pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, weight);
pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight);
spin_unlock(&zcrypt_list_lock);
if (!pref_zq) {
@ -881,7 +885,7 @@ static long _zcrypt_send_cprb(struct ap_perms *perms,
rc = pref_zq->ops->send_cprb(pref_zq, xcRB, &ap_msg);
spin_lock(&zcrypt_list_lock);
zcrypt_drop_queue(pref_zc, pref_zq, weight);
zcrypt_drop_queue(pref_zc, pref_zq, mod, weight);
spin_unlock(&zcrypt_list_lock);
out:
@ -932,6 +936,7 @@ static long zcrypt_send_ep11_cprb(struct ap_perms *perms,
unsigned int func_code;
struct ap_message ap_msg;
int qid = 0, rc = -ENODEV;
struct module *mod;
trace_s390_zcrypt_req(xcrb, TP_ZSENDEP11CPRB);
@ -1000,7 +1005,7 @@ static long zcrypt_send_ep11_cprb(struct ap_perms *perms,
pref_weight = weight;
}
}
pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, weight);
pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight);
spin_unlock(&zcrypt_list_lock);
if (!pref_zq) {
@ -1012,7 +1017,7 @@ static long zcrypt_send_ep11_cprb(struct ap_perms *perms,
rc = pref_zq->ops->send_ep11_cprb(pref_zq, xcrb, &ap_msg);
spin_lock(&zcrypt_list_lock);
zcrypt_drop_queue(pref_zc, pref_zq, weight);
zcrypt_drop_queue(pref_zc, pref_zq, mod, weight);
spin_unlock(&zcrypt_list_lock);
out_free:
@ -1033,6 +1038,7 @@ static long zcrypt_rng(char *buffer)
struct ap_message ap_msg;
unsigned int domain;
int qid = 0, rc = -ENODEV;
struct module *mod;
trace_s390_zcrypt_req(buffer, TP_HWRNGCPRB);
@ -1064,7 +1070,7 @@ static long zcrypt_rng(char *buffer)
pref_weight = weight;
}
}
pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, weight);
pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight);
spin_unlock(&zcrypt_list_lock);
if (!pref_zq) {
@ -1076,7 +1082,7 @@ static long zcrypt_rng(char *buffer)
rc = pref_zq->ops->rng(pref_zq, buffer, &ap_msg);
spin_lock(&zcrypt_list_lock);
zcrypt_drop_queue(pref_zc, pref_zq, weight);
zcrypt_drop_queue(pref_zc, pref_zq, mod, weight);
spin_unlock(&zcrypt_list_lock);
out: