block: use an xarray for disk->part_tbl

Now that no fast path lookups in the partition table are left, there is
no point in micro-optimizing the data structure for it.  Just use a bog
standard xarray.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Christoph Hellwig 2021-01-24 11:02:41 +01:00 committed by Jens Axboe
parent 0470dd9d5f
commit a33df75c63
5 changed files with 22 additions and 193 deletions

View File

@ -865,7 +865,7 @@ void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
* we do nothing special as far as the block layer is concerned. * we do nothing special as far as the block layer is concerned.
*/ */
if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) || if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) ||
disk_has_partitions(disk)) !xa_empty(&disk->part_tbl))
model = BLK_ZONED_NONE; model = BLK_ZONED_NONE;
break; break;
case BLK_ZONED_NONE: case BLK_ZONED_NONE:

View File

@ -345,7 +345,6 @@ int bdev_add_partition(struct block_device *bdev, int partno,
int bdev_del_partition(struct block_device *bdev, int partno); int bdev_del_partition(struct block_device *bdev, int partno);
int bdev_resize_partition(struct block_device *bdev, int partno, int bdev_resize_partition(struct block_device *bdev, int partno,
sector_t start, sector_t length); sector_t start, sector_t length);
int disk_expand_part_tbl(struct gendisk *disk, int target);
int bio_add_hw_page(struct request_queue *q, struct bio *bio, int bio_add_hw_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset, struct page *page, unsigned int len, unsigned int offset,

View File

@ -161,15 +161,6 @@ static void part_in_flight_rw(struct block_device *part,
inflight[1] = 0; inflight[1] = 0;
} }
static struct block_device *__disk_get_part(struct gendisk *disk, int partno)
{
struct disk_part_tbl *ptbl = rcu_dereference(disk->part_tbl);
if (unlikely(partno < 0 || partno >= ptbl->len))
return NULL;
return rcu_dereference(ptbl->part[partno]);
}
/** /**
* disk_part_iter_init - initialize partition iterator * disk_part_iter_init - initialize partition iterator
* @piter: iterator to initialize * @piter: iterator to initialize
@ -204,41 +195,26 @@ void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk,
*/ */
struct block_device *disk_part_iter_next(struct disk_part_iter *piter) struct block_device *disk_part_iter_next(struct disk_part_iter *piter)
{ {
struct disk_part_tbl *ptbl; struct block_device *part;
unsigned long idx;
/* put the last partition */ /* put the last partition */
disk_part_iter_exit(piter); disk_part_iter_exit(piter);
/* get part_tbl */
rcu_read_lock(); rcu_read_lock();
ptbl = rcu_dereference(piter->disk->part_tbl); xa_for_each_start(&piter->disk->part_tbl, idx, part, piter->idx) {
/* iterate to the next partition */
for (; piter->idx != ptbl->len; piter->idx += 1) {
struct block_device *part;
part = rcu_dereference(ptbl->part[piter->idx]);
if (!part)
continue;
piter->part = bdgrab(part);
if (!piter->part)
continue;
if (!bdev_nr_sectors(part) && if (!bdev_nr_sectors(part) &&
!(piter->flags & DISK_PITER_INCL_EMPTY) && !(piter->flags & DISK_PITER_INCL_EMPTY) &&
!(piter->flags & DISK_PITER_INCL_EMPTY_PART0 && !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
piter->idx == 0)) { piter->idx == 0))
bdput(piter->part);
piter->part = NULL;
continue; continue;
}
piter->part = bdgrab(part); piter->part = bdgrab(part);
if (!piter->part) if (!piter->part)
continue; continue;
piter->idx += 1; piter->idx = idx + 1;
break; break;
} }
rcu_read_unlock(); rcu_read_unlock();
return piter->part; return piter->part;
@ -260,42 +236,6 @@ void disk_part_iter_exit(struct disk_part_iter *piter)
piter->part = NULL; piter->part = NULL;
} }
/**
* disk_has_partitions
* @disk: gendisk of interest
*
* Walk through the partition table and check if valid partition exists.
*
* CONTEXT:
* Don't care.
*
* RETURNS:
* True if the gendisk has at least one valid non-zero size partition.
* Otherwise false.
*/
bool disk_has_partitions(struct gendisk *disk)
{
struct disk_part_tbl *ptbl;
int i;
bool ret = false;
rcu_read_lock();
ptbl = rcu_dereference(disk->part_tbl);
/* Iterate partitions skipping the whole device at index 0 */
for (i = 1; i < ptbl->len; i++) {
if (rcu_dereference(ptbl->part[i])) {
ret = true;
break;
}
}
rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(disk_has_partitions);
/* /*
* Can be deleted altogether. Later. * Can be deleted altogether. Later.
* *
@ -858,7 +798,7 @@ struct block_device *bdget_disk(struct gendisk *disk, int partno)
struct block_device *bdev = NULL; struct block_device *bdev = NULL;
rcu_read_lock(); rcu_read_lock();
bdev = __disk_get_part(disk, partno); bdev = xa_load(&disk->part_tbl, partno);
if (bdev && !bdgrab(bdev)) if (bdev && !bdgrab(bdev))
bdev = NULL; bdev = NULL;
rcu_read_unlock(); rcu_read_unlock();
@ -1248,83 +1188,6 @@ static const struct attribute_group *disk_attr_groups[] = {
NULL NULL
}; };
/**
* disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way
* @disk: disk to replace part_tbl for
* @new_ptbl: new part_tbl to install
*
* Replace disk->part_tbl with @new_ptbl in RCU-safe way. The
* original ptbl is freed using RCU callback.
*
* LOCKING:
* Matching bd_mutex locked or the caller is the only user of @disk.
*/
static void disk_replace_part_tbl(struct gendisk *disk,
struct disk_part_tbl *new_ptbl)
{
struct disk_part_tbl *old_ptbl =
rcu_dereference_protected(disk->part_tbl, 1);
rcu_assign_pointer(disk->part_tbl, new_ptbl);
if (old_ptbl) {
rcu_assign_pointer(old_ptbl->last_lookup, NULL);
kfree_rcu(old_ptbl, rcu_head);
}
}
/**
* disk_expand_part_tbl - expand disk->part_tbl
* @disk: disk to expand part_tbl for
* @partno: expand such that this partno can fit in
*
* Expand disk->part_tbl such that @partno can fit in. disk->part_tbl
* uses RCU to allow unlocked dereferencing for stats and other stuff.
*
* LOCKING:
* Matching bd_mutex locked or the caller is the only user of @disk.
* Might sleep.
*
* RETURNS:
* 0 on success, -errno on failure.
*/
int disk_expand_part_tbl(struct gendisk *disk, int partno)
{
struct disk_part_tbl *old_ptbl =
rcu_dereference_protected(disk->part_tbl, 1);
struct disk_part_tbl *new_ptbl;
int len = old_ptbl ? old_ptbl->len : 0;
int i, target;
/*
* check for int overflow, since we can get here from blkpg_ioctl()
* with a user passed 'partno'.
*/
target = partno + 1;
if (target < 0)
return -EINVAL;
/* disk_max_parts() is zero during initialization, ignore if so */
if (disk_max_parts(disk) && target > disk_max_parts(disk))
return -EINVAL;
if (target <= len)
return 0;
new_ptbl = kzalloc_node(struct_size(new_ptbl, part, target), GFP_KERNEL,
disk->node_id);
if (!new_ptbl)
return -ENOMEM;
new_ptbl->len = target;
for (i = 0; i < len; i++)
rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]);
disk_replace_part_tbl(disk, new_ptbl);
return 0;
}
/** /**
* disk_release - releases all allocated resources of the gendisk * disk_release - releases all allocated resources of the gendisk
* @dev: the device representing this disk * @dev: the device representing this disk
@ -1348,7 +1211,7 @@ static void disk_release(struct device *dev)
blk_free_devt(dev->devt); blk_free_devt(dev->devt);
disk_release_events(disk); disk_release_events(disk);
kfree(disk->random); kfree(disk->random);
disk_replace_part_tbl(disk, NULL); xa_destroy(&disk->part_tbl);
bdput(disk->part0); bdput(disk->part0);
if (disk->queue) if (disk->queue)
blk_put_queue(disk->queue); blk_put_queue(disk->queue);
@ -1501,7 +1364,6 @@ dev_t blk_lookup_devt(const char *name, int partno)
struct gendisk *__alloc_disk_node(int minors, int node_id) struct gendisk *__alloc_disk_node(int minors, int node_id)
{ {
struct gendisk *disk; struct gendisk *disk;
struct disk_part_tbl *ptbl;
if (minors > DISK_MAX_PARTS) { if (minors > DISK_MAX_PARTS) {
printk(KERN_ERR printk(KERN_ERR
@ -1519,11 +1381,9 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
goto out_free_disk; goto out_free_disk;
disk->node_id = node_id; disk->node_id = node_id;
if (disk_expand_part_tbl(disk, 0)) xa_init(&disk->part_tbl);
goto out_bdput; if (xa_insert(&disk->part_tbl, 0, disk->part0, GFP_KERNEL))
goto out_destroy_part_tbl;
ptbl = rcu_dereference_protected(disk->part_tbl, 1);
rcu_assign_pointer(ptbl->part[0], disk->part0);
disk->minors = minors; disk->minors = minors;
rand_initialize_disk(disk); rand_initialize_disk(disk);
@ -1532,7 +1392,8 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
device_initialize(disk_to_dev(disk)); device_initialize(disk_to_dev(disk));
return disk; return disk;
out_bdput: out_destroy_part_tbl:
xa_destroy(&disk->part_tbl);
bdput(disk->part0); bdput(disk->part0);
out_free_disk: out_free_disk:
kfree(disk); kfree(disk);

View File

@ -287,13 +287,7 @@ struct device_type part_type = {
*/ */
void delete_partition(struct block_device *part) void delete_partition(struct block_device *part)
{ {
struct gendisk *disk = part->bd_disk; xa_erase(&part->bd_disk->part_tbl, part->bd_partno);
struct disk_part_tbl *ptbl =
rcu_dereference_protected(disk->part_tbl, 1);
rcu_assign_pointer(ptbl->part[part->bd_partno], NULL);
rcu_assign_pointer(ptbl->last_lookup, NULL);
kobject_put(part->bd_holder_dir); kobject_put(part->bd_holder_dir);
device_del(&part->bd_device); device_del(&part->bd_device);
@ -325,7 +319,6 @@ static struct block_device *add_partition(struct gendisk *disk, int partno,
struct device *ddev = disk_to_dev(disk); struct device *ddev = disk_to_dev(disk);
struct device *pdev; struct device *pdev;
struct block_device *bdev; struct block_device *bdev;
struct disk_part_tbl *ptbl;
const char *dname; const char *dname;
int err; int err;
@ -347,12 +340,7 @@ static struct block_device *add_partition(struct gendisk *disk, int partno,
break; break;
} }
err = disk_expand_part_tbl(disk, partno); if (xa_load(&disk->part_tbl, partno))
if (err)
return ERR_PTR(err);
ptbl = rcu_dereference_protected(disk->part_tbl, 1);
if (ptbl->part[partno])
return ERR_PTR(-EBUSY); return ERR_PTR(-EBUSY);
bdev = bdev_alloc(disk, partno); bdev = bdev_alloc(disk, partno);
@ -405,8 +393,10 @@ static struct block_device *add_partition(struct gendisk *disk, int partno,
} }
/* everything is up and running, commence */ /* everything is up and running, commence */
err = xa_insert(&disk->part_tbl, partno, bdev, GFP_KERNEL);
if (err)
goto out_del;
bdev_add(bdev, devt); bdev_add(bdev, devt);
rcu_assign_pointer(ptbl->part[partno], bdev);
/* suppress uevent if the disk suppresses it */ /* suppress uevent if the disk suppresses it */
if (!dev_get_uevent_suppress(ddev)) if (!dev_get_uevent_suppress(ddev))
@ -612,7 +602,7 @@ static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev,
int blk_add_partitions(struct gendisk *disk, struct block_device *bdev) int blk_add_partitions(struct gendisk *disk, struct block_device *bdev)
{ {
struct parsed_partitions *state; struct parsed_partitions *state;
int ret = -EAGAIN, p, highest; int ret = -EAGAIN, p;
if (!disk_part_scan_enabled(disk)) if (!disk_part_scan_enabled(disk))
return 0; return 0;
@ -660,15 +650,6 @@ int blk_add_partitions(struct gendisk *disk, struct block_device *bdev)
/* tell userspace that the media / partition table may have changed */ /* tell userspace that the media / partition table may have changed */
kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
/*
* Detect the highest partition number and preallocate disk->part_tbl.
* This is an optimization and not strictly necessary.
*/
for (p = 1, highest = 0; p < state->limit; p++)
if (state->parts[p].size)
highest = p;
disk_expand_part_tbl(disk, highest);
for (p = 1; p < state->limit; p++) for (p = 1; p < state->limit; p++)
if (!blk_add_partition(disk, bdev, state, p)) if (!blk_add_partition(disk, bdev, state, p))
goto out_free_state; goto out_free_state;

View File

@ -32,6 +32,7 @@ extern struct class block_class;
#include <linux/string.h> #include <linux/string.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/xarray.h>
#define PARTITION_META_INFO_VOLNAMELTH 64 #define PARTITION_META_INFO_VOLNAMELTH 64
/* /*
@ -116,13 +117,6 @@ enum {
DISK_EVENT_FLAG_UEVENT = 1 << 1, DISK_EVENT_FLAG_UEVENT = 1 << 1,
}; };
struct disk_part_tbl {
struct rcu_head rcu_head;
int len;
struct block_device __rcu *last_lookup;
struct block_device __rcu *part[];
};
struct disk_events; struct disk_events;
struct badblocks; struct badblocks;
@ -148,12 +142,7 @@ struct gendisk {
unsigned short events; /* supported events */ unsigned short events; /* supported events */
unsigned short event_flags; /* flags related to event processing */ unsigned short event_flags; /* flags related to event processing */
/* Array of pointers to partitions indexed by partno. struct xarray part_tbl;
* Protected with matching bdev lock but stat and other
* non-critical accesses use RCU. Always access through
* helpers.
*/
struct disk_part_tbl __rcu *part_tbl;
struct block_device *part0; struct block_device *part0;
const struct block_device_operations *fops; const struct block_device_operations *fops;
@ -225,7 +214,7 @@ void disk_uevent(struct gendisk *disk, enum kobject_action action);
struct disk_part_iter { struct disk_part_iter {
struct gendisk *disk; struct gendisk *disk;
struct block_device *part; struct block_device *part;
int idx; unsigned long idx;
unsigned int flags; unsigned int flags;
}; };
@ -233,7 +222,6 @@ extern void disk_part_iter_init(struct disk_part_iter *piter,
struct gendisk *disk, unsigned int flags); struct gendisk *disk, unsigned int flags);
struct block_device *disk_part_iter_next(struct disk_part_iter *piter); struct block_device *disk_part_iter_next(struct disk_part_iter *piter);
extern void disk_part_iter_exit(struct disk_part_iter *piter); extern void disk_part_iter_exit(struct disk_part_iter *piter);
extern bool disk_has_partitions(struct gendisk *disk);
/* block/genhd.c */ /* block/genhd.c */
extern void device_add_disk(struct device *parent, struct gendisk *disk, extern void device_add_disk(struct device *parent, struct gendisk *disk,