mirror of
https://github.com/torvalds/linux.git
synced 2025-08-15 14:11:42 +02:00
block: unifying elevator change
Elevator change is one well-define behavior: - tear down current elevator if it exists - setup new elevator It is supposed to cover any case for changing elevator by single internal API, typically the following cases: - setup default elevator in add_disk() - switch to none in del_disk() - reset elevator in blk_mq_update_nr_hw_queues() - switch elevator in sysfs `store` elevator attribute This patch uses elevator_change() to cover all above cases: - every elevator switch is serialized with each other: add_disk/del_disk/ store elevator is serialized already, blk_mq_update_nr_hw_queues() uses srcu for syncing with the other three cases - for both add_disk()/del_disk(), queue freeze works at atomic mode or has been froze, so the freeze in elevator_change() won't add extra delay - `struct elev_change_ctx` instance holds any info for changing elevator Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Nilay Shroff <nilay@linux.ibm.com> Reviewed-by: Hannes Reinecke <hare@suse.de> Signed-off-by: Ming Lei <ming.lei@redhat.com> Link: https://lore.kernel.org/r/20250505141805.2751237-17-ming.lei@redhat.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
1e9db5c427
commit
1e44bedbc9
4 changed files with 67 additions and 101 deletions
|
@ -869,14 +869,9 @@ int blk_register_queue(struct gendisk *disk)
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out_unregister_ia_ranges;
|
goto out_unregister_ia_ranges;
|
||||||
|
|
||||||
|
if (queue_is_mq(q))
|
||||||
|
elevator_set_default(q);
|
||||||
mutex_lock(&q->elevator_lock);
|
mutex_lock(&q->elevator_lock);
|
||||||
if (q->elevator) {
|
|
||||||
ret = elv_register_queue(q, false);
|
|
||||||
if (ret) {
|
|
||||||
mutex_unlock(&q->elevator_lock);
|
|
||||||
goto out_crypto_sysfs_unregister;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
wbt_enable_default(disk);
|
wbt_enable_default(disk);
|
||||||
mutex_unlock(&q->elevator_lock);
|
mutex_unlock(&q->elevator_lock);
|
||||||
|
|
||||||
|
@ -902,8 +897,6 @@ int blk_register_queue(struct gendisk *disk)
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
out_crypto_sysfs_unregister:
|
|
||||||
blk_crypto_sysfs_unregister(disk);
|
|
||||||
out_unregister_ia_ranges:
|
out_unregister_ia_ranges:
|
||||||
disk_unregister_independent_access_ranges(disk);
|
disk_unregister_independent_access_ranges(disk);
|
||||||
out_debugfs_remove:
|
out_debugfs_remove:
|
||||||
|
@ -951,9 +944,11 @@ void blk_unregister_queue(struct gendisk *disk)
|
||||||
blk_mq_sysfs_unregister(disk);
|
blk_mq_sysfs_unregister(disk);
|
||||||
blk_crypto_sysfs_unregister(disk);
|
blk_crypto_sysfs_unregister(disk);
|
||||||
|
|
||||||
mutex_lock(&q->elevator_lock);
|
if (queue_is_mq(q)) {
|
||||||
elv_unregister_queue(q);
|
blk_mq_quiesce_queue(q);
|
||||||
mutex_unlock(&q->elevator_lock);
|
elevator_set_none(q);
|
||||||
|
blk_mq_unquiesce_queue(q);
|
||||||
|
}
|
||||||
|
|
||||||
mutex_lock(&q->sysfs_lock);
|
mutex_lock(&q->sysfs_lock);
|
||||||
disk_unregister_independent_access_ranges(disk);
|
disk_unregister_independent_access_ranges(disk);
|
||||||
|
|
|
@ -323,9 +323,8 @@ bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
|
||||||
bool blk_insert_flush(struct request *rq);
|
bool blk_insert_flush(struct request *rq);
|
||||||
|
|
||||||
void elv_update_nr_hw_queues(struct request_queue *q);
|
void elv_update_nr_hw_queues(struct request_queue *q);
|
||||||
void elevator_exit(struct request_queue *q);
|
void elevator_set_default(struct request_queue *q);
|
||||||
int elv_register_queue(struct request_queue *q, bool uevent);
|
void elevator_set_none(struct request_queue *q);
|
||||||
void elv_unregister_queue(struct request_queue *q);
|
|
||||||
|
|
||||||
ssize_t part_size_show(struct device *dev, struct device_attribute *attr,
|
ssize_t part_size_show(struct device *dev, struct device_attribute *attr,
|
||||||
char *buf);
|
char *buf);
|
||||||
|
|
116
block/elevator.c
116
block/elevator.c
|
@ -154,7 +154,7 @@ static void elevator_release(struct kobject *kobj)
|
||||||
kfree(e);
|
kfree(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
void elevator_exit(struct request_queue *q)
|
static void elevator_exit(struct request_queue *q)
|
||||||
{
|
{
|
||||||
struct elevator_queue *e = q->elevator;
|
struct elevator_queue *e = q->elevator;
|
||||||
|
|
||||||
|
@ -458,7 +458,7 @@ static const struct kobj_type elv_ktype = {
|
||||||
.release = elevator_release,
|
.release = elevator_release,
|
||||||
};
|
};
|
||||||
|
|
||||||
int elv_register_queue(struct request_queue *q, bool uevent)
|
static int elv_register_queue(struct request_queue *q, bool uevent)
|
||||||
{
|
{
|
||||||
struct elevator_queue *e = q->elevator;
|
struct elevator_queue *e = q->elevator;
|
||||||
int error;
|
int error;
|
||||||
|
@ -488,7 +488,7 @@ int elv_register_queue(struct request_queue *q, bool uevent)
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
void elv_unregister_queue(struct request_queue *q)
|
static void elv_unregister_queue(struct request_queue *q)
|
||||||
{
|
{
|
||||||
struct elevator_queue *e = q->elevator;
|
struct elevator_queue *e = q->elevator;
|
||||||
|
|
||||||
|
@ -561,66 +561,6 @@ void elv_unregister(struct elevator_type *e)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(elv_unregister);
|
EXPORT_SYMBOL_GPL(elv_unregister);
|
||||||
|
|
||||||
/*
|
|
||||||
* For single queue devices, default to using mq-deadline. If we have multiple
|
|
||||||
* queues or mq-deadline is not available, default to "none".
|
|
||||||
*/
|
|
||||||
static struct elevator_type *elevator_get_default(struct request_queue *q)
|
|
||||||
{
|
|
||||||
if (q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
if (q->nr_hw_queues != 1 &&
|
|
||||||
!blk_mq_is_shared_tags(q->tag_set->flags))
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
return elevator_find_get("mq-deadline");
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Use the default elevator settings. If the chosen elevator initialization
|
|
||||||
* fails, fall back to the "none" elevator (no elevator).
|
|
||||||
*/
|
|
||||||
void elevator_init_mq(struct request_queue *q)
|
|
||||||
{
|
|
||||||
struct elevator_type *e;
|
|
||||||
unsigned int memflags;
|
|
||||||
int err;
|
|
||||||
|
|
||||||
WARN_ON_ONCE(blk_queue_registered(q));
|
|
||||||
|
|
||||||
if (unlikely(q->elevator))
|
|
||||||
return;
|
|
||||||
|
|
||||||
e = elevator_get_default(q);
|
|
||||||
if (!e)
|
|
||||||
return;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We are called before adding disk, when there isn't any FS I/O,
|
|
||||||
* so freezing queue plus canceling dispatch work is enough to
|
|
||||||
* drain any dispatch activities originated from passthrough
|
|
||||||
* requests, then no need to quiesce queue which may add long boot
|
|
||||||
* latency, especially when lots of disks are involved.
|
|
||||||
*
|
|
||||||
* Disk isn't added yet, so verifying queue lock only manually.
|
|
||||||
*/
|
|
||||||
memflags = blk_mq_freeze_queue(q);
|
|
||||||
|
|
||||||
blk_mq_cancel_work_sync(q);
|
|
||||||
|
|
||||||
err = blk_mq_init_sched(q, e);
|
|
||||||
|
|
||||||
blk_mq_unfreeze_queue(q, memflags);
|
|
||||||
|
|
||||||
if (err) {
|
|
||||||
pr_warn("\"%s\" elevator initialization failed, "
|
|
||||||
"falling back to \"none\"\n", e->elevator_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
elevator_put(e);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Switch to new_e io scheduler.
|
* Switch to new_e io scheduler.
|
||||||
*
|
*
|
||||||
|
@ -688,6 +628,16 @@ static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx)
|
||||||
lockdep_assert_held(&q->tag_set->update_nr_hwq_lock);
|
lockdep_assert_held(&q->tag_set->update_nr_hwq_lock);
|
||||||
|
|
||||||
memflags = blk_mq_freeze_queue(q);
|
memflags = blk_mq_freeze_queue(q);
|
||||||
|
/*
|
||||||
|
* May be called before adding disk, when there isn't any FS I/O,
|
||||||
|
* so freezing queue plus canceling dispatch work is enough to
|
||||||
|
* drain any dispatch activities originated from passthrough
|
||||||
|
* requests, then no need to quiesce queue which may add long boot
|
||||||
|
* latency, especially when lots of disks are involved.
|
||||||
|
*
|
||||||
|
* Disk isn't added yet, so verifying queue lock only manually.
|
||||||
|
*/
|
||||||
|
blk_mq_cancel_work_sync(q);
|
||||||
mutex_lock(&q->elevator_lock);
|
mutex_lock(&q->elevator_lock);
|
||||||
if (!(q->elevator && elevator_match(q->elevator->type, ctx->name)))
|
if (!(q->elevator && elevator_match(q->elevator->type, ctx->name)))
|
||||||
ret = elevator_switch(q, ctx);
|
ret = elevator_switch(q, ctx);
|
||||||
|
@ -716,6 +666,46 @@ void elv_update_nr_hw_queues(struct request_queue *q)
|
||||||
mutex_unlock(&q->elevator_lock);
|
mutex_unlock(&q->elevator_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Use the default elevator settings. If the chosen elevator initialization
|
||||||
|
* fails, fall back to the "none" elevator (no elevator).
|
||||||
|
*/
|
||||||
|
void elevator_set_default(struct request_queue *q)
|
||||||
|
{
|
||||||
|
struct elv_change_ctx ctx = {
|
||||||
|
.name = "mq-deadline",
|
||||||
|
.no_uevent = true,
|
||||||
|
};
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
if (q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For single queue devices, default to using mq-deadline. If we
|
||||||
|
* have multiple queues or mq-deadline is not available, default
|
||||||
|
* to "none".
|
||||||
|
*/
|
||||||
|
if (elevator_find_get(ctx.name) && (q->nr_hw_queues == 1 ||
|
||||||
|
blk_mq_is_shared_tags(q->tag_set->flags)))
|
||||||
|
err = elevator_change(q, &ctx);
|
||||||
|
if (err < 0)
|
||||||
|
pr_warn("\"%s\" elevator initialization, failed %d, "
|
||||||
|
"falling back to \"none\"\n", ctx.name, err);
|
||||||
|
}
|
||||||
|
|
||||||
|
void elevator_set_none(struct request_queue *q)
|
||||||
|
{
|
||||||
|
struct elv_change_ctx ctx = {
|
||||||
|
.name = "none",
|
||||||
|
};
|
||||||
|
int err;
|
||||||
|
|
||||||
|
err = elevator_change(q, &ctx);
|
||||||
|
if (err < 0)
|
||||||
|
pr_warn("%s: set none elevator failed %d\n", __func__, err);
|
||||||
|
}
|
||||||
|
|
||||||
static void elv_iosched_load_module(const char *elevator_name)
|
static void elv_iosched_load_module(const char *elevator_name)
|
||||||
{
|
{
|
||||||
struct elevator_type *found;
|
struct elevator_type *found;
|
||||||
|
|
|
@ -432,12 +432,6 @@ static int __add_disk(struct device *parent, struct gendisk *disk,
|
||||||
*/
|
*/
|
||||||
if (disk->fops->submit_bio || disk->fops->poll_bio)
|
if (disk->fops->submit_bio || disk->fops->poll_bio)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
/*
|
|
||||||
* Initialize the I/O scheduler code and pick a default one if
|
|
||||||
* needed.
|
|
||||||
*/
|
|
||||||
elevator_init_mq(disk->queue);
|
|
||||||
} else {
|
} else {
|
||||||
if (!disk->fops->submit_bio)
|
if (!disk->fops->submit_bio)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
@ -454,7 +448,7 @@ static int __add_disk(struct device *parent, struct gendisk *disk,
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
if (disk->major) {
|
if (disk->major) {
|
||||||
if (WARN_ON(!disk->minors))
|
if (WARN_ON(!disk->minors))
|
||||||
goto out_exit_elevator;
|
goto out;
|
||||||
|
|
||||||
if (disk->minors > DISK_MAX_PARTS) {
|
if (disk->minors > DISK_MAX_PARTS) {
|
||||||
pr_err("block: can't allocate more than %d partitions\n",
|
pr_err("block: can't allocate more than %d partitions\n",
|
||||||
|
@ -464,14 +458,14 @@ static int __add_disk(struct device *parent, struct gendisk *disk,
|
||||||
if (disk->first_minor > MINORMASK ||
|
if (disk->first_minor > MINORMASK ||
|
||||||
disk->minors > MINORMASK + 1 ||
|
disk->minors > MINORMASK + 1 ||
|
||||||
disk->first_minor + disk->minors > MINORMASK + 1)
|
disk->first_minor + disk->minors > MINORMASK + 1)
|
||||||
goto out_exit_elevator;
|
goto out;
|
||||||
} else {
|
} else {
|
||||||
if (WARN_ON(disk->minors))
|
if (WARN_ON(disk->minors))
|
||||||
goto out_exit_elevator;
|
goto out;
|
||||||
|
|
||||||
ret = blk_alloc_ext_minor();
|
ret = blk_alloc_ext_minor();
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto out_exit_elevator;
|
goto out;
|
||||||
disk->major = BLOCK_EXT_MAJOR;
|
disk->major = BLOCK_EXT_MAJOR;
|
||||||
disk->first_minor = ret;
|
disk->first_minor = ret;
|
||||||
}
|
}
|
||||||
|
@ -561,12 +555,7 @@ out_device_del:
|
||||||
out_free_ext_minor:
|
out_free_ext_minor:
|
||||||
if (disk->major == BLOCK_EXT_MAJOR)
|
if (disk->major == BLOCK_EXT_MAJOR)
|
||||||
blk_free_ext_minor(disk->first_minor);
|
blk_free_ext_minor(disk->first_minor);
|
||||||
out_exit_elevator:
|
out:
|
||||||
if (disk->queue->elevator) {
|
|
||||||
mutex_lock(&disk->queue->elevator_lock);
|
|
||||||
elevator_exit(disk->queue);
|
|
||||||
mutex_unlock(&disk->queue->elevator_lock);
|
|
||||||
}
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -760,14 +749,7 @@ static void __del_gendisk(struct gendisk *disk)
|
||||||
if (queue_is_mq(q))
|
if (queue_is_mq(q))
|
||||||
blk_mq_cancel_work_sync(q);
|
blk_mq_cancel_work_sync(q);
|
||||||
|
|
||||||
blk_mq_quiesce_queue(q);
|
|
||||||
if (q->elevator) {
|
|
||||||
mutex_lock(&q->elevator_lock);
|
|
||||||
elevator_exit(q);
|
|
||||||
mutex_unlock(&q->elevator_lock);
|
|
||||||
}
|
|
||||||
rq_qos_exit(q);
|
rq_qos_exit(q);
|
||||||
blk_mq_unquiesce_queue(q);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the disk does not own the queue, allow using passthrough requests
|
* If the disk does not own the queue, allow using passthrough requests
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue