Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tcmur_device: add priv lock support #667

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1044,9 +1044,15 @@ static int dev_added(struct tcmu_device *dev)
goto cleanup_format_lock;
}

ret = pthread_cond_init(&rdev->report_event_cond, NULL);
if (ret) {
ret = -ret;
goto cleanup_rdev_lock;
}

ret = setup_io_work_queue(dev);
if (ret < 0)
goto cleanup_rdev_lock;
goto cleanup_event_cond;

ret = setup_aio_tracking(rdev);
if (ret < 0)
Expand Down Expand Up @@ -1088,6 +1094,8 @@ static int dev_added(struct tcmu_device *dev)
cleanup_aio_tracking(rdev);
cleanup_io_work_queue:
cleanup_io_work_queue(dev, true);
cleanup_event_cond:
pthread_cond_destroy(&rdev->report_event_cond);
cleanup_rdev_lock:
pthread_mutex_destroy(&rdev->rdev_lock);
cleanup_format_lock:
Expand Down Expand Up @@ -1130,6 +1138,10 @@ static void dev_removed(struct tcmu_device *dev)

tcmur_destroy_work(rdev->event_work);

ret = pthread_cond_destroy(&rdev->report_event_cond);
if (ret != 0)
tcmu_err("could not cleanup report event cond %d\n", ret);

ret = pthread_mutex_destroy(&rdev->rdev_lock);
if (ret != 0)
tcmu_err("could not cleanup state lock %d\n", ret);
Expand Down
9 changes: 3 additions & 6 deletions rbd.c
Original file line number Diff line number Diff line change
Expand Up @@ -146,17 +146,14 @@ static int tcmu_rbd_service_status_update(struct tcmu_device *dev,

#endif /* RBD_LOCK_ACQUIRE_SUPPORT */

static int tcmu_rbd_report_event(struct tcmu_device *dev)
static int tcmu_rbd_report_event(struct tcmu_device *dev, bool has_lock)
{
struct tcmur_device *rdev = tcmu_dev_get_private(dev);

/*
* We ignore the specific event and report all the current counter
* values, because tools like gwcli/dashboard may not see every
* update, and we do not want one event to overwrite the info.
*/
return tcmu_rbd_service_status_update(dev,
rdev->lock_state == TCMUR_DEV_LOCK_WRITE_LOCKED ? true : false);
return tcmu_rbd_service_status_update(dev, has_lock);
}

static int tcmu_rbd_service_register(struct tcmu_device *dev)
Expand Down Expand Up @@ -215,7 +212,7 @@ static int tcmu_rbd_service_register(struct tcmu_device *dev)
goto free_meta_buf;
}

ret = tcmu_rbd_report_event(dev);
ret = tcmu_rbd_report_event(dev, false);

free_meta_buf:
free(metadata_buf);
Expand Down
6 changes: 6 additions & 0 deletions target.c
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,12 @@ static void tgt_port_grp_recovery_work_fn(void *arg)
*/
list_for_each_safe(&tpg->devs, rdev, tmp_rdev, recovery_entry) {
list_del(&rdev->recovery_entry);

pthread_mutex_lock(&rdev->rdev_lock);
if (rdev->flags & TCMUR_DEV_FLAG_REPORTING_EVENT)
pthread_cond_wait(&rdev->report_event_cond, &rdev->rdev_lock);
pthread_mutex_unlock(&rdev->rdev_lock);

ret = __tcmu_reopen_dev(rdev->dev, -1);
if (ret) {
tcmu_dev_err(rdev->dev, "Could not reinitialize device. (err %d).\n",
Expand Down
2 changes: 1 addition & 1 deletion tcmu-runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ struct tcmur_handler {
*
* Return 0 on success and a -Exyz error code on error.
*/
int (*report_event)(struct tcmu_device *dev);
int (*report_event)(struct tcmu_device *dev, bool has_lock);

/*
* If the lock is acquired and the tag is not TCMU_INVALID_LOCK_TAG,
Expand Down
28 changes: 26 additions & 2 deletions tcmur_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ int __tcmu_reopen_dev(struct tcmu_device *dev, int retries)
rhandler->close(dev);
}

pthread_mutex_lock(&rdev->rdev_lock);
ret = -EIO;
pthread_mutex_lock(&rdev->rdev_lock);
while (ret != 0 && !(rdev->flags & TCMUR_DEV_FLAG_STOPPING) &&
(retries < 0 || attempt <= retries)) {
pthread_mutex_unlock(&rdev->rdev_lock);
Expand Down Expand Up @@ -128,7 +128,10 @@ int tcmu_reopen_dev(struct tcmu_device *dev, int retries)
pthread_mutex_unlock(&rdev->rdev_lock);
return -EBUSY;
}

rdev->flags |= TCMUR_DEV_FLAG_IN_RECOVERY;
if (rdev->flags & TCMUR_DEV_FLAG_REPORTING_EVENT)
pthread_cond_wait(&rdev->report_event_cond, &rdev->rdev_lock);
pthread_mutex_unlock(&rdev->rdev_lock);

return __tcmu_reopen_dev(dev, retries);
Expand Down Expand Up @@ -159,6 +162,7 @@ static void __tcmu_report_event(void *data)
struct tcmu_device *dev = data;
struct tcmur_device *rdev = tcmu_dev_get_private(dev);
struct tcmur_handler *rhandler = tcmu_get_runner_handler(dev);
bool has_lock;
int ret;

/*
Expand All @@ -168,9 +172,29 @@ static void __tcmu_report_event(void *data)
sleep(1);

pthread_mutex_lock(&rdev->rdev_lock);
ret = rhandler->report_event(dev);
/*
* If the device is in recovery, will skip reporting the event
* this time because the event should be report when the device
* is reopening.
*/
if (rdev->flags & (TCMUR_DEV_FLAG_IN_RECOVERY |
TCMUR_DEV_FLAG_STOPPING |
TCMUR_DEV_FLAG_STOPPED)) {
pthread_mutex_unlock(&rdev->rdev_lock);
return;
}

rdev->flags |= TCMUR_DEV_FLAG_REPORTING_EVENT;
has_lock = rdev->lock_state == TCMUR_DEV_LOCK_WRITE_LOCKED;
pthread_mutex_unlock(&rdev->rdev_lock);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is rdev_lock released here? ->report_event() used to be called with it held.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The rdev_lock should always be released when calling the handler's hooks. I think we need to pass a has_lock boolean parameter.


ret = rhandler->report_event(dev, has_lock);
if (ret)
tcmu_dev_err(dev, "Could not report events. Error %d.\n", ret);

pthread_mutex_lock(&rdev->rdev_lock);
rdev->flags &= ~TCMUR_DEV_FLAG_REPORTING_EVENT;
pthread_cond_signal(&rdev->report_event_cond);
pthread_mutex_unlock(&rdev->rdev_lock);
}

Expand Down
3 changes: 3 additions & 0 deletions tcmur_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#define TCMUR_DEV_FLAG_IS_OPEN (1 << 2)
#define TCMUR_DEV_FLAG_STOPPING (1 << 3)
#define TCMUR_DEV_FLAG_STOPPED (1 << 4)
#define TCMUR_DEV_FLAG_REPORTING_EVENT (1 << 5)

#define TCMUR_UA_DEV_SIZE_CHANGED 0

Expand Down Expand Up @@ -83,6 +84,8 @@ struct tcmur_device {

int cmd_time_out;

pthread_cond_t report_event_cond;

pthread_spinlock_t cmds_list_lock; /* protects cmds_list */
struct list_head cmds_list;
};
Expand Down