Merge branch 'md-6.12-bitmap' into md-6.12

From Yu Kuai (with minor changes by Song Liu):

The background is that currently bitmap is using a global spin_lock,
causing lock contention and huge IO performance degradation for all raid
levels.

However, it's impossible to implement a new lock free bitmap with
current situation that md-bitmap exposes the internal implementation
with lots of exported apis. Hence bitmap_operations is invented, to
describe bitmap core implementation, and a new bitmap can be introduced
with a new bitmap_operations, we only need to switch to the new one
during initialization.

And with this we can build bitmap as kernel module, but that's not
our concern for now.

This version was tested with mdadm tests and lvm2 tests. This set does
not introduce new errors in these tests.

* md-6.12-bitmap: (42 commits)
  md/md-bitmap: make in memory structure internal
  md/md-bitmap: merge md_bitmap_enabled() into bitmap_operations
  md/md-bitmap: merge md_bitmap_wait_behind_writes() into bitmap_operations
  md/md-bitmap: merge md_bitmap_free() into bitmap_operations
  md/md-bitmap: merge md_bitmap_set_pages() into struct bitmap_operations
  md/md-bitmap: merge md_bitmap_copy_from_slot() into struct bitmap_operation.
  md/md-bitmap: merge get_bitmap_from_slot() into bitmap_operations
  md/md-bitmap: merge md_bitmap_resize() into bitmap_operations
  md/md-bitmap: pass in mddev directly for md_bitmap_resize()
  md/md-bitmap: merge md_bitmap_daemon_work() into bitmap_operations
  md/md-bitmap: merge bitmap_unplug() into bitmap_operations
  md/md-bitmap: merge md_bitmap_unplug_async() into md_bitmap_unplug()
  md/md-bitmap: merge md_bitmap_sync_with_cluster() into bitmap_operations
  md/md-bitmap: merge md_bitmap_cond_end_sync() into bitmap_operations
  md/md-bitmap: merge md_bitmap_close_sync() into bitmap_operations
  md/md-bitmap: merge md_bitmap_end_sync() into bitmap_operations
  md/md-bitmap: remove the parameter 'aborted' for md_bitmap_end_sync()
  md/md-bitmap: merge md_bitmap_start_sync() into bitmap_operations
  md/md-bitmap: merge md_bitmap_endwrite() into bitmap_operations
  md/md-bitmap: merge md_bitmap_startwrite() into bitmap_operations
  ...

Signed-off-by: Song Liu <song@kernel.org>
This commit is contained in:
Song Liu 2024-08-28 14:55:57 -07:00
commit 7f67fdae33
11 changed files with 763 additions and 565 deletions

View File

@ -3949,7 +3949,9 @@ static int __load_dirty_region_bitmap(struct raid_set *rs)
/* Try loading the bitmap unless "raid0", which does not have one */
if (!rs_is_raid0(rs) &&
!test_and_set_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags)) {
r = md_bitmap_load(&rs->md);
struct mddev *mddev = &rs->md;
r = mddev->bitmap_ops->load(mddev);
if (r)
DMERR("Failed to load bitmap");
}
@ -4066,7 +4068,8 @@ static int raid_preresume(struct dm_target *ti)
mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)))) {
int chunksize = to_bytes(rs->requested_bitmap_chunk_sectors) ?: mddev->bitmap_info.chunksize;
r = md_bitmap_resize(mddev->bitmap, mddev->dev_sectors, chunksize, 0);
r = mddev->bitmap_ops->resize(mddev, mddev->dev_sectors,
chunksize, false);
if (r)
DMERR("Failed to resize bitmap");
}

View File

@ -32,11 +32,210 @@
#include "md.h"
#include "md-bitmap.h"
#define BITMAP_MAJOR_LO 3
/* version 4 insists the bitmap is in little-endian order
* with version 3, it is host-endian which is non-portable
* Version 5 is currently set only for clustered devices
*/
#define BITMAP_MAJOR_HI 4
#define BITMAP_MAJOR_CLUSTERED 5
#define BITMAP_MAJOR_HOSTENDIAN 3
/*
* in-memory bitmap:
*
* Use 16 bit block counters to track pending writes to each "chunk".
* The 2 high order bits are special-purpose, the first is a flag indicating
* whether a resync is needed. The second is a flag indicating whether a
* resync is active.
* This means that the counter is actually 14 bits:
*
* +--------+--------+------------------------------------------------+
* | resync | resync | counter |
* | needed | active | |
* | (0-1) | (0-1) | (0-16383) |
* +--------+--------+------------------------------------------------+
*
* The "resync needed" bit is set when:
* a '1' bit is read from storage at startup.
* a write request fails on some drives
* a resync is aborted on a chunk with 'resync active' set
* It is cleared (and resync-active set) when a resync starts across all drives
* of the chunk.
*
*
* The "resync active" bit is set when:
* a resync is started on all drives, and resync_needed is set.
* resync_needed will be cleared (as long as resync_active wasn't already set).
* It is cleared when a resync completes.
*
* The counter counts pending write requests, plus the on-disk bit.
* When the counter is '1' and the resync bits are clear, the on-disk
* bit can be cleared as well, thus setting the counter to 0.
* When we set a bit, or in the counter (to start a write), if the fields is
* 0, we first set the disk bit and set the counter to 1.
*
* If the counter is 0, the on-disk bit is clear and the stripe is clean
* Anything that dirties the stripe pushes the counter to 2 (at least)
* and sets the on-disk bit (lazily).
* If a periodic sweep find the counter at 2, it is decremented to 1.
* If the sweep find the counter at 1, the on-disk bit is cleared and the
* counter goes to zero.
*
* Also, we'll hijack the "map" pointer itself and use it as two 16 bit block
* counters as a fallback when "page" memory cannot be allocated:
*
* Normal case (page memory allocated):
*
* page pointer (32-bit)
*
* [ ] ------+
* |
* +-------> [ ][ ]..[ ] (4096 byte page == 2048 counters)
* c1 c2 c2048
*
* Hijacked case (page memory allocation failed):
*
* hijacked page pointer (32-bit)
*
* [ ][ ] (no page memory allocated)
* counter #1 (16-bit) counter #2 (16-bit)
*
*/
#define PAGE_BITS (PAGE_SIZE << 3)
#define PAGE_BIT_SHIFT (PAGE_SHIFT + 3)
#define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK)
#define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK)
#define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX)
/* how many counters per page? */
#define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS)
/* same, except a shift value for more efficient bitops */
#define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT)
/* same, except a mask value for more efficient bitops */
#define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1)
#define BITMAP_BLOCK_SHIFT 9
/*
* bitmap structures:
*/
/* the in-memory bitmap is represented by bitmap_pages */
struct bitmap_page {
/*
* map points to the actual memory page
*/
char *map;
/*
* in emergencies (when map cannot be alloced), hijack the map
* pointer and use it as two counters itself
*/
unsigned int hijacked:1;
/*
* If any counter in this page is '1' or '2' - and so could be
* cleared then that page is marked as 'pending'
*/
unsigned int pending:1;
/*
* count of dirty bits on the page
*/
unsigned int count:30;
};
/* the main bitmap structure - one per mddev */
struct bitmap {
struct bitmap_counts {
spinlock_t lock;
struct bitmap_page *bp;
/* total number of pages in the bitmap */
unsigned long pages;
/* number of pages not yet allocated */
unsigned long missing_pages;
/* chunksize = 2^chunkshift (for bitops) */
unsigned long chunkshift;
/* total number of data chunks for the array */
unsigned long chunks;
} counts;
struct mddev *mddev; /* the md device that the bitmap is for */
__u64 events_cleared;
int need_sync;
struct bitmap_storage {
/* backing disk file */
struct file *file;
/* cached copy of the bitmap file superblock */
struct page *sb_page;
unsigned long sb_index;
/* list of cache pages for the file */
struct page **filemap;
/* attributes associated filemap pages */
unsigned long *filemap_attr;
/* number of pages in the file */
unsigned long file_pages;
/* total bytes in the bitmap */
unsigned long bytes;
} storage;
unsigned long flags;
int allclean;
atomic_t behind_writes;
/* highest actual value at runtime */
unsigned long behind_writes_used;
/*
* the bitmap daemon - periodically wakes up and sweeps the bitmap
* file, cleaning up bits and flushing out pages to disk as necessary
*/
unsigned long daemon_lastrun; /* jiffies of last run */
/*
* when we lasted called end_sync to update bitmap with resync
* progress.
*/
unsigned long last_end_sync;
/* pending writes to the bitmap file */
atomic_t pending_writes;
wait_queue_head_t write_wait;
wait_queue_head_t overflow_wait;
wait_queue_head_t behind_wait;
struct kernfs_node *sysfs_can_clear;
/* slot offset for clustered env */
int cluster_slot;
};
static int __bitmap_resize(struct bitmap *bitmap, sector_t blocks,
int chunksize, bool init);
static inline char *bmname(struct bitmap *bitmap)
{
return bitmap->mddev ? mdname(bitmap->mddev) : "mdX";
}
static bool __bitmap_enabled(struct bitmap *bitmap)
{
return bitmap->storage.filemap &&
!test_bit(BITMAP_STALE, &bitmap->flags);
}
static bool bitmap_enabled(struct mddev *mddev)
{
struct bitmap *bitmap = mddev->bitmap;
if (!bitmap)
return false;
return __bitmap_enabled(bitmap);
}
/*
* check a page and, if necessary, allocate it (or hijack it if the alloc fails)
*
@ -472,9 +671,10 @@ static void md_bitmap_wait_writes(struct bitmap *bitmap)
/* update the event counter and sync the superblock to disk */
void md_bitmap_update_sb(struct bitmap *bitmap)
static void bitmap_update_sb(void *data)
{
bitmap_super_t *sb;
struct bitmap *bitmap = data;
if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
return;
@ -510,10 +710,8 @@ void md_bitmap_update_sb(struct bitmap *bitmap)
write_sb_page(bitmap, bitmap->storage.sb_index,
bitmap->storage.sb_page, 1);
}
EXPORT_SYMBOL(md_bitmap_update_sb);
/* print out the bitmap file superblock */
void md_bitmap_print_sb(struct bitmap *bitmap)
static void bitmap_print_sb(struct bitmap *bitmap)
{
bitmap_super_t *sb;
@ -760,7 +958,7 @@ out_no_sb:
bitmap->mddev->bitmap_info.space > sectors_reserved)
bitmap->mddev->bitmap_info.space = sectors_reserved;
} else {
md_bitmap_print_sb(bitmap);
bitmap_print_sb(bitmap);
if (bitmap->cluster_slot < 0)
md_cluster_stop(bitmap->mddev);
}
@ -893,7 +1091,7 @@ static void md_bitmap_file_unmap(struct bitmap_storage *store)
static void md_bitmap_file_kick(struct bitmap *bitmap)
{
if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) {
md_bitmap_update_sb(bitmap);
bitmap_update_sb(bitmap);
if (bitmap->storage.file) {
pr_warn("%s: kicking failed bitmap file %pD4 from array!\n",
@ -1028,13 +1226,13 @@ static int md_bitmap_file_test_bit(struct bitmap *bitmap, sector_t block)
/* this gets called when the md device is ready to unplug its underlying
* (slave) device queues -- before we let any writes go down, we need to
* sync the dirty pages of the bitmap file to disk */
void md_bitmap_unplug(struct bitmap *bitmap)
static void __bitmap_unplug(struct bitmap *bitmap)
{
unsigned long i;
int dirty, need_write;
int writing = 0;
if (!md_bitmap_enabled(bitmap))
if (!__bitmap_enabled(bitmap))
return;
/* look at each page to see if there are any set bits that need to be
@ -1060,7 +1258,6 @@ void md_bitmap_unplug(struct bitmap *bitmap)
if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
md_bitmap_file_kick(bitmap);
}
EXPORT_SYMBOL(md_bitmap_unplug);
struct bitmap_unplug_work {
struct work_struct work;
@ -1073,11 +1270,11 @@ static void md_bitmap_unplug_fn(struct work_struct *work)
struct bitmap_unplug_work *unplug_work =
container_of(work, struct bitmap_unplug_work, work);
md_bitmap_unplug(unplug_work->bitmap);
__bitmap_unplug(unplug_work->bitmap);
complete(unplug_work->done);
}
void md_bitmap_unplug_async(struct bitmap *bitmap)
static void bitmap_unplug_async(struct bitmap *bitmap)
{
DECLARE_COMPLETION_ONSTACK(done);
struct bitmap_unplug_work unplug_work;
@ -1089,7 +1286,19 @@ void md_bitmap_unplug_async(struct bitmap *bitmap)
queue_work(md_bitmap_wq, &unplug_work.work);
wait_for_completion(&done);
}
EXPORT_SYMBOL(md_bitmap_unplug_async);
static void bitmap_unplug(struct mddev *mddev, bool sync)
{
struct bitmap *bitmap = mddev->bitmap;
if (!bitmap)
return;
if (sync)
__bitmap_unplug(bitmap);
else
bitmap_unplug_async(bitmap);
}
static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
@ -1226,22 +1435,21 @@ static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
return ret;
}
void md_bitmap_write_all(struct bitmap *bitmap)
/* just flag bitmap pages as needing to be written. */
static void bitmap_write_all(struct mddev *mddev)
{
/* We don't actually write all bitmap blocks here,
* just flag them as needing to be written
*/
int i;
struct bitmap *bitmap = mddev->bitmap;
if (!bitmap || !bitmap->storage.filemap)
return;
if (bitmap->storage.file)
/* Only one copy, so nothing needed */
if (bitmap->storage.file)
return;
for (i = 0; i < bitmap->storage.file_pages; i++)
set_page_attr(bitmap, i,
BITMAP_PAGE_NEEDWRITE);
set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE);
bitmap->allclean = 0;
}
@ -1290,7 +1498,7 @@ out:
* bitmap daemon -- periodically wakes up to clean bits and flush pages
* out to disk
*/
void md_bitmap_daemon_work(struct mddev *mddev)
static void bitmap_daemon_work(struct mddev *mddev)
{
struct bitmap *bitmap;
unsigned long j;
@ -1461,8 +1669,11 @@ __acquires(bitmap->lock)
&(bitmap->bp[page].map[pageoff]);
}
int md_bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind)
static int bitmap_startwrite(struct mddev *mddev, sector_t offset,
unsigned long sectors, bool behind)
{
struct bitmap *bitmap = mddev->bitmap;
if (!bitmap)
return 0;
@ -1523,13 +1734,15 @@ int md_bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long s
}
return 0;
}
EXPORT_SYMBOL(md_bitmap_startwrite);
void md_bitmap_endwrite(struct bitmap *bitmap, sector_t offset,
unsigned long sectors, int success, int behind)
static void bitmap_endwrite(struct mddev *mddev, sector_t offset,
unsigned long sectors, bool success, bool behind)
{
struct bitmap *bitmap = mddev->bitmap;
if (!bitmap)
return;
if (behind) {
if (atomic_dec_and_test(&bitmap->behind_writes))
wake_up(&bitmap->behind_wait);
@ -1576,26 +1789,27 @@ void md_bitmap_endwrite(struct bitmap *bitmap, sector_t offset,
sectors = 0;
}
}
EXPORT_SYMBOL(md_bitmap_endwrite);
static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
int degraded)
static bool __bitmap_start_sync(struct bitmap *bitmap, sector_t offset,
sector_t *blocks, bool degraded)
{
bitmap_counter_t *bmc;
int rv;
bool rv;
if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */
*blocks = 1024;
return 1; /* always resync if no bitmap */
return true; /* always resync if no bitmap */
}
spin_lock_irq(&bitmap->counts.lock);
rv = false;
bmc = md_bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
rv = 0;
if (bmc) {
/* locked */
if (RESYNC(*bmc))
rv = 1;
else if (NEEDED(*bmc)) {
rv = 1;
if (RESYNC(*bmc)) {
rv = true;
} else if (NEEDED(*bmc)) {
rv = true;
if (!degraded) { /* don't set/clear bits if degraded */
*bmc |= RESYNC_MASK;
*bmc &= ~NEEDED_MASK;
@ -1603,11 +1817,12 @@ static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t
}
}
spin_unlock_irq(&bitmap->counts.lock);
return rv;
}
int md_bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
int degraded)
static bool bitmap_start_sync(struct mddev *mddev, sector_t offset,
sector_t *blocks, bool degraded)
{
/* bitmap_start_sync must always report on multiples of whole
* pages, otherwise resync (which is very PAGE_SIZE based) will
@ -1616,21 +1831,22 @@ int md_bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *block
* At least PAGE_SIZE>>9 blocks are covered.
* Return the 'or' of the result.
*/
int rv = 0;
bool rv = false;
sector_t blocks1;
*blocks = 0;
while (*blocks < (PAGE_SIZE>>9)) {
rv |= __bitmap_start_sync(bitmap, offset,
rv |= __bitmap_start_sync(mddev->bitmap, offset,
&blocks1, degraded);
offset += blocks1;
*blocks += blocks1;
}
return rv;
}
EXPORT_SYMBOL(md_bitmap_start_sync);
void md_bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted)
static void __bitmap_end_sync(struct bitmap *bitmap, sector_t offset,
sector_t *blocks, bool aborted)
{
bitmap_counter_t *bmc;
unsigned long flags;
@ -1659,9 +1875,14 @@ void md_bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks
unlock:
spin_unlock_irqrestore(&bitmap->counts.lock, flags);
}
EXPORT_SYMBOL(md_bitmap_end_sync);
void md_bitmap_close_sync(struct bitmap *bitmap)
static void bitmap_end_sync(struct mddev *mddev, sector_t offset,
sector_t *blocks)
{
__bitmap_end_sync(mddev->bitmap, offset, blocks, true);
}
static void bitmap_close_sync(struct mddev *mddev)
{
/* Sync has finished, and any bitmap chunks that weren't synced
* properly have been aborted. It remains to us to clear the
@ -1669,19 +1890,23 @@ void md_bitmap_close_sync(struct bitmap *bitmap)
*/
sector_t sector = 0;
sector_t blocks;
struct bitmap *bitmap = mddev->bitmap;
if (!bitmap)
return;
while (sector < bitmap->mddev->resync_max_sectors) {
md_bitmap_end_sync(bitmap, sector, &blocks, 0);
__bitmap_end_sync(bitmap, sector, &blocks, false);
sector += blocks;
}
}
EXPORT_SYMBOL(md_bitmap_close_sync);
void md_bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force)
static void bitmap_cond_end_sync(struct mddev *mddev, sector_t sector,
bool force)
{
sector_t s = 0;
sector_t blocks;
struct bitmap *bitmap = mddev->bitmap;
if (!bitmap)
return;
@ -1700,15 +1925,14 @@ void md_bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force)
sector &= ~((1ULL << bitmap->counts.chunkshift) - 1);
s = 0;
while (s < sector && s < bitmap->mddev->resync_max_sectors) {
md_bitmap_end_sync(bitmap, s, &blocks, 0);
__bitmap_end_sync(bitmap, s, &blocks, false);
s += blocks;
}
bitmap->last_end_sync = jiffies;
sysfs_notify_dirent_safe(bitmap->mddev->sysfs_completed);
}
EXPORT_SYMBOL(md_bitmap_cond_end_sync);
void md_bitmap_sync_with_cluster(struct mddev *mddev,
static void bitmap_sync_with_cluster(struct mddev *mddev,
sector_t old_lo, sector_t old_hi,
sector_t new_lo, sector_t new_hi)
{
@ -1716,18 +1940,17 @@ void md_bitmap_sync_with_cluster(struct mddev *mddev,
sector_t sector, blocks = 0;
for (sector = old_lo; sector < new_lo; ) {
md_bitmap_end_sync(bitmap, sector, &blocks, 0);
__bitmap_end_sync(bitmap, sector, &blocks, false);
sector += blocks;
}
WARN((blocks > new_lo) && old_lo, "alignment is not correct for lo\n");
for (sector = old_hi; sector < new_hi; ) {
md_bitmap_start_sync(bitmap, sector, &blocks, 0);
bitmap_start_sync(mddev, sector, &blocks, false);
sector += blocks;
}
WARN((blocks > new_hi) && old_hi, "alignment is not correct for hi\n");
}
EXPORT_SYMBOL(md_bitmap_sync_with_cluster);
static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
{
@ -1756,12 +1979,18 @@ static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, in
}
/* dirty the memory and file bits for bitmap chunks "s" to "e" */
void md_bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
static void bitmap_dirty_bits(struct mddev *mddev, unsigned long s,
unsigned long e)
{
unsigned long chunk;
struct bitmap *bitmap = mddev->bitmap;
if (!bitmap)
return;
for (chunk = s; chunk <= e; chunk++) {
sector_t sec = (sector_t)chunk << bitmap->counts.chunkshift;
md_bitmap_set_memory_bits(bitmap, sec, 1);
md_bitmap_file_set_bit(bitmap, sec);
if (sec < bitmap->mddev->recovery_cp)
@ -1773,10 +2002,7 @@ void md_bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long
}
}
/*
* flush out any pending updates
*/
void md_bitmap_flush(struct mddev *mddev)
static void bitmap_flush(struct mddev *mddev)
{
struct bitmap *bitmap = mddev->bitmap;
long sleep;
@ -1789,23 +2015,21 @@ void md_bitmap_flush(struct mddev *mddev)
*/
sleep = mddev->bitmap_info.daemon_sleep * 2;
bitmap->daemon_lastrun -= sleep;
md_bitmap_daemon_work(mddev);
bitmap_daemon_work(mddev);
bitmap->daemon_lastrun -= sleep;
md_bitmap_daemon_work(mddev);
bitmap_daemon_work(mddev);
bitmap->daemon_lastrun -= sleep;
md_bitmap_daemon_work(mddev);
bitmap_daemon_work(mddev);
if (mddev->bitmap_info.external)
md_super_wait(mddev);
md_bitmap_update_sb(bitmap);
bitmap_update_sb(bitmap);
}
/*
* free memory that was allocated
*/
void md_bitmap_free(struct bitmap *bitmap)
static void md_bitmap_free(void *data)
{
unsigned long k, pages;
struct bitmap_page *bp;
struct bitmap *bitmap = data;
if (!bitmap) /* there was no bitmap */
return;
@ -1836,9 +2060,8 @@ void md_bitmap_free(struct bitmap *bitmap)
kfree(bp);
kfree(bitmap);
}
EXPORT_SYMBOL(md_bitmap_free);
void md_bitmap_wait_behind_writes(struct mddev *mddev)
static void bitmap_wait_behind_writes(struct mddev *mddev)
{
struct bitmap *bitmap = mddev->bitmap;
@ -1852,14 +2075,14 @@ void md_bitmap_wait_behind_writes(struct mddev *mddev)
}
}
void md_bitmap_destroy(struct mddev *mddev)
static void bitmap_destroy(struct mddev *mddev)
{
struct bitmap *bitmap = mddev->bitmap;
if (!bitmap) /* there was no bitmap */
return;
md_bitmap_wait_behind_writes(mddev);
bitmap_wait_behind_writes(mddev);
if (!mddev->serialize_policy)
mddev_destroy_serial_pool(mddev, NULL);
@ -1878,7 +2101,7 @@ void md_bitmap_destroy(struct mddev *mddev)
* if this returns an error, bitmap_destroy must be called to do clean up
* once mddev->bitmap is set
*/
struct bitmap *md_bitmap_create(struct mddev *mddev, int slot)
static struct bitmap *__bitmap_create(struct mddev *mddev, int slot)
{
struct bitmap *bitmap;
sector_t blocks = mddev->resync_max_sectors;
@ -1948,7 +2171,8 @@ struct bitmap *md_bitmap_create(struct mddev *mddev, int slot)
goto error;
bitmap->daemon_lastrun = jiffies;
err = md_bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize, 1);
err = __bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize,
true);
if (err)
goto error;
@ -1965,7 +2189,18 @@ struct bitmap *md_bitmap_create(struct mddev *mddev, int slot)
return ERR_PTR(err);
}
int md_bitmap_load(struct mddev *mddev)
static int bitmap_create(struct mddev *mddev, int slot)
{
struct bitmap *bitmap = __bitmap_create(mddev, slot);
if (IS_ERR(bitmap))
return PTR_ERR(bitmap);
mddev->bitmap = bitmap;
return 0;
}
static int bitmap_load(struct mddev *mddev)
{
int err = 0;
sector_t start = 0;
@ -1989,10 +2224,10 @@ int md_bitmap_load(struct mddev *mddev)
*/
while (sector < mddev->resync_max_sectors) {
sector_t blocks;
md_bitmap_start_sync(bitmap, sector, &blocks, 0);
bitmap_start_sync(mddev, sector, &blocks, false);
sector += blocks;
}
md_bitmap_close_sync(bitmap);
bitmap_close_sync(mddev);
if (mddev->degraded == 0
|| bitmap->events_cleared == mddev->events)
@ -2014,22 +2249,21 @@ int md_bitmap_load(struct mddev *mddev)
mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true);
md_wakeup_thread(mddev->thread);
md_bitmap_update_sb(bitmap);
bitmap_update_sb(bitmap);
if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
err = -EIO;
out:
return err;
}
EXPORT_SYMBOL_GPL(md_bitmap_load);
/* caller need to free returned bitmap with md_bitmap_free() */
struct bitmap *get_bitmap_from_slot(struct mddev *mddev, int slot)
static void *bitmap_get_from_slot(struct mddev *mddev, int slot)
{
int rv = 0;
struct bitmap *bitmap;
bitmap = md_bitmap_create(mddev, slot);
bitmap = __bitmap_create(mddev, slot);
if (IS_ERR(bitmap)) {
rv = PTR_ERR(bitmap);
return ERR_PTR(rv);
@ -2043,20 +2277,19 @@ struct bitmap *get_bitmap_from_slot(struct mddev *mddev, int slot)
return bitmap;
}
EXPORT_SYMBOL(get_bitmap_from_slot);
/* Loads the bitmap associated with slot and copies the resync information
* to our bitmap
*/
int md_bitmap_copy_from_slot(struct mddev *mddev, int slot,
sector_t *low, sector_t *high, bool clear_bits)
static int bitmap_copy_from_slot(struct mddev *mddev, int slot, sector_t *low,
sector_t *high, bool clear_bits)
{
int rv = 0, i, j;
sector_t block, lo = 0, hi = 0;
struct bitmap_counts *counts;
struct bitmap *bitmap;
bitmap = get_bitmap_from_slot(mddev, slot);
bitmap = bitmap_get_from_slot(mddev, slot);
if (IS_ERR(bitmap)) {
pr_err("%s can't get bitmap from slot %d\n", __func__, slot);
return -1;
@ -2076,53 +2309,59 @@ int md_bitmap_copy_from_slot(struct mddev *mddev, int slot,
}
if (clear_bits) {
md_bitmap_update_sb(bitmap);
bitmap_update_sb(bitmap);
/* BITMAP_PAGE_PENDING is set, but bitmap_unplug needs
* BITMAP_PAGE_DIRTY or _NEEDWRITE to write ... */
for (i = 0; i < bitmap->storage.file_pages; i++)
if (test_page_attr(bitmap, i, BITMAP_PAGE_PENDING))
set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE);
md_bitmap_unplug(bitmap);
__bitmap_unplug(bitmap);
}
md_bitmap_unplug(mddev->bitmap);
__bitmap_unplug(mddev->bitmap);
*low = lo;
*high = hi;
md_bitmap_free(bitmap);
return rv;
}
EXPORT_SYMBOL_GPL(md_bitmap_copy_from_slot);
void md_bitmap_status(struct seq_file *seq, struct bitmap *bitmap)
static void bitmap_set_pages(void *data, unsigned long pages)
{
unsigned long chunk_kb;
struct bitmap_counts *counts;
struct bitmap *bitmap = data;
if (!bitmap)
return;
counts = &bitmap->counts;
chunk_kb = bitmap->mddev->bitmap_info.chunksize >> 10;
seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
"%lu%s chunk",
counts->pages - counts->missing_pages,
counts->pages,
(counts->pages - counts->missing_pages)
<< (PAGE_SHIFT - 10),
chunk_kb ? chunk_kb : bitmap->mddev->bitmap_info.chunksize,
chunk_kb ? "KB" : "B");
if (bitmap->storage.file) {
seq_printf(seq, ", file: ");
seq_file_path(seq, bitmap->storage.file, " \t\n");
}
seq_printf(seq, "\n");
bitmap->counts.pages = pages;
}
int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
int chunksize, int init)
static int bitmap_get_stats(void *data, struct md_bitmap_stats *stats)
{
struct bitmap_storage *storage;
struct bitmap_counts *counts;
struct bitmap *bitmap = data;
bitmap_super_t *sb;
if (!bitmap)
return -ENOENT;
sb = kmap_local_page(bitmap->storage.sb_page);
stats->sync_size = le64_to_cpu(sb->sync_size);
kunmap_local(sb);
counts = &bitmap->counts;
stats->missing_pages = counts->missing_pages;
stats->pages = counts->pages;
storage = &bitmap->storage;
stats->file_pages = storage->file_pages;
stats->file = storage->file;
stats->behind_writes = atomic_read(&bitmap->behind_writes);
stats->behind_wait = wq_has_sleeper(&bitmap->behind_wait);
stats->events_cleared = bitmap->events_cleared;
return 0;
}
static int __bitmap_resize(struct bitmap *bitmap, sector_t blocks,
int chunksize, bool init)
{
/* If chunk_size is 0, choose an appropriate chunk size.
* Then possibly allocate new storage space.
@ -2320,14 +2559,24 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
spin_unlock_irq(&bitmap->counts.lock);
if (!init) {
md_bitmap_unplug(bitmap);
__bitmap_unplug(bitmap);
bitmap->mddev->pers->quiesce(bitmap->mddev, 0);
}
ret = 0;
err:
return ret;
}
EXPORT_SYMBOL_GPL(md_bitmap_resize);
static int bitmap_resize(struct mddev *mddev, sector_t blocks, int chunksize,
bool init)
{
struct bitmap *bitmap = mddev->bitmap;
if (!bitmap)
return 0;
return __bitmap_resize(bitmap, blocks, chunksize, init);
}
static ssize_t
location_show(struct mddev *mddev, char *page)
@ -2367,7 +2616,7 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
goto out;
}
md_bitmap_destroy(mddev);
bitmap_destroy(mddev);
mddev->bitmap_info.offset = 0;
if (mddev->bitmap_info.file) {
struct file *f = mddev->bitmap_info.file;
@ -2377,7 +2626,6 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
} else {
/* No bitmap, OK to set a location */
long long offset;
struct bitmap *bitmap;
if (strncmp(buf, "none", 4) == 0)
/* nothing to be done */;
@ -2404,17 +2652,14 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
}
mddev->bitmap_info.offset = offset;
bitmap = md_bitmap_create(mddev, -1);
if (IS_ERR(bitmap)) {
rv = PTR_ERR(bitmap);
rv = bitmap_create(mddev, -1);
if (rv)
goto out;
}
mddev->bitmap = bitmap;
rv = md_bitmap_load(mddev);
rv = bitmap_load(mddev);
if (rv) {
mddev->bitmap_info.offset = 0;
md_bitmap_destroy(mddev);
bitmap_destroy(mddev);
goto out;
}
}
@ -2450,6 +2695,7 @@ space_show(struct mddev *mddev, char *page)
static ssize_t
space_store(struct mddev *mddev, const char *buf, size_t len)
{
struct bitmap *bitmap;
unsigned long sectors;
int rv;
@ -2460,8 +2706,8 @@ space_store(struct mddev *mddev, const char *buf, size_t len)
if (sectors == 0)
return -EINVAL;
if (mddev->bitmap &&
sectors < (mddev->bitmap->storage.bytes + 511) >> 9)
bitmap = mddev->bitmap;
if (bitmap && sectors < (bitmap->storage.bytes + 511) >> 9)
return -EFBIG; /* Bitmap is too big for this small space */
/* could make sure it isn't too big, but that isn't really
@ -2569,7 +2815,7 @@ backlog_store(struct mddev *mddev, const char *buf, size_t len)
mddev_create_serial_pool(mddev, rdev);
}
if (old_mwb != backlog)
md_bitmap_update_sb(mddev->bitmap);
bitmap_update_sb(mddev->bitmap);
mddev_unlock_and_resume(mddev);
return len;
@ -2638,10 +2884,13 @@ __ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
static ssize_t can_clear_show(struct mddev *mddev, char *page)
{
int len;
struct bitmap *bitmap;
spin_lock(&mddev->lock);
if (mddev->bitmap)
len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ?
"false" : "true"));
bitmap = mddev->bitmap;
if (bitmap)
len = sprintf(page, "%s\n", (bitmap->need_sync ? "false" :
"true"));
else
len = sprintf(page, "\n");
spin_unlock(&mddev->lock);
@ -2650,17 +2899,24 @@ static ssize_t can_clear_show(struct mddev *mddev, char *page)
static ssize_t can_clear_store(struct mddev *mddev, const char *buf, size_t len)
{
if (mddev->bitmap == NULL)
struct bitmap *bitmap = mddev->bitmap;
if (!bitmap)
return -ENOENT;
if (strncmp(buf, "false", 5) == 0)
mddev->bitmap->need_sync = 1;
else if (strncmp(buf, "true", 4) == 0) {
if (strncmp(buf, "false", 5) == 0) {
bitmap->need_sync = 1;
return len;
}
if (strncmp(buf, "true", 4) == 0) {
if (mddev->degraded)
return -EBUSY;
mddev->bitmap->need_sync = 0;
} else
return -EINVAL;
bitmap->need_sync = 0;
return len;
}
return -EINVAL;
}
static struct md_sysfs_entry bitmap_can_clear =
@ -2670,21 +2926,26 @@ static ssize_t
behind_writes_used_show(struct mddev *mddev, char *page)
{
ssize_t ret;
struct bitmap *bitmap;
spin_lock(&mddev->lock);
if (mddev->bitmap == NULL)
bitmap = mddev->bitmap;
if (!bitmap)
ret = sprintf(page, "0\n");
else
ret = sprintf(page, "%lu\n",
mddev->bitmap->behind_writes_used);
ret = sprintf(page, "%lu\n", bitmap->behind_writes_used);
spin_unlock(&mddev->lock);
return ret;
}
static ssize_t
behind_writes_used_reset(struct mddev *mddev, const char *buf, size_t len)
{
if (mddev->bitmap)
mddev->bitmap->behind_writes_used = 0;
struct bitmap *bitmap = mddev->bitmap;
if (bitmap)
bitmap->behind_writes_used = 0;
return len;
}
@ -2707,3 +2968,38 @@ const struct attribute_group md_bitmap_group = {
.name = "bitmap",
.attrs = md_bitmap_attrs,
};
static struct bitmap_operations bitmap_ops = {
.enabled = bitmap_enabled,
.create = bitmap_create,
.resize = bitmap_resize,
.load = bitmap_load,
.destroy = bitmap_destroy,
.flush = bitmap_flush,
.write_all = bitmap_write_all,
.dirty_bits = bitmap_dirty_bits,
.unplug = bitmap_unplug,
.daemon_work = bitmap_daemon_work,
.wait_behind_writes = bitmap_wait_behind_writes,
.startwrite = bitmap_startwrite,
.endwrite = bitmap_endwrite,
.start_sync = bitmap_start_sync,
.end_sync = bitmap_end_sync,
.cond_end_sync = bitmap_cond_end_sync,
.close_sync = bitmap_close_sync,
.update_sb = bitmap_update_sb,
.get_stats = bitmap_get_stats,
.sync_with_cluster = bitmap_sync_with_cluster,
.get_from_slot = bitmap_get_from_slot,
.copy_from_slot = bitmap_copy_from_slot,
.set_pages = bitmap_set_pages,
.free = md_bitmap_free,
};
void mddev_set_bitmap_ops(struct mddev *mddev)
{
mddev->bitmap_ops = &bitmap_ops;
}

View File

@ -7,81 +7,7 @@
#ifndef BITMAP_H
#define BITMAP_H 1
#define BITMAP_MAJOR_LO 3
/* version 4 insists the bitmap is in little-endian order
* with version 3, it is host-endian which is non-portable
* Version 5 is currently set only for clustered devices
*/
#define BITMAP_MAJOR_HI 4
#define BITMAP_MAJOR_CLUSTERED 5
#define BITMAP_MAJOR_HOSTENDIAN 3
/*
* in-memory bitmap:
*
* Use 16 bit block counters to track pending writes to each "chunk".
* The 2 high order bits are special-purpose, the first is a flag indicating
* whether a resync is needed. The second is a flag indicating whether a
* resync is active.
* This means that the counter is actually 14 bits:
*
* +--------+--------+------------------------------------------------+
* | resync | resync | counter |
* | needed | active | |
* | (0-1) | (0-1) | (0-16383) |
* +--------+--------+------------------------------------------------+
*
* The "resync needed" bit is set when:
* a '1' bit is read from storage at startup.
* a write request fails on some drives
* a resync is aborted on a chunk with 'resync active' set
* It is cleared (and resync-active set) when a resync starts across all drives
* of the chunk.
*
*
* The "resync active" bit is set when:
* a resync is started on all drives, and resync_needed is set.
* resync_needed will be cleared (as long as resync_active wasn't already set).
* It is cleared when a resync completes.
*
* The counter counts pending write requests, plus the on-disk bit.
* When the counter is '1' and the resync bits are clear, the on-disk
* bit can be cleared as well, thus setting the counter to 0.
* When we set a bit, or in the counter (to start a write), if the fields is
* 0, we first set the disk bit and set the counter to 1.
*
* If the counter is 0, the on-disk bit is clear and the stripe is clean
* Anything that dirties the stripe pushes the counter to 2 (at least)
* and sets the on-disk bit (lazily).
* If a periodic sweep find the counter at 2, it is decremented to 1.
* If the sweep find the counter at 1, the on-disk bit is cleared and the
* counter goes to zero.
*
* Also, we'll hijack the "map" pointer itself and use it as two 16 bit block
* counters as a fallback when "page" memory cannot be allocated:
*
* Normal case (page memory allocated):
*
* page pointer (32-bit)
*
* [ ] ------+
* |
* +-------> [ ][ ]..[ ] (4096 byte page == 2048 counters)
* c1 c2 c2048
*
* Hijacked case (page memory allocation failed):
*
* hijacked page pointer (32-bit)
*
* [ ][ ] (no page memory allocated)
* counter #1 (16-bit) counter #2 (16-bit)
*
*/
#ifdef __KERNEL__
#define PAGE_BITS (PAGE_SIZE << 3)
#define PAGE_BIT_SHIFT (PAGE_SHIFT + 3)
#define BITMAP_MAGIC 0x6d746962
typedef __u16 bitmap_counter_t;
#define COUNTER_BITS 16
@ -91,26 +17,6 @@ typedef __u16 bitmap_counter_t;
#define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1)))
#define RESYNC_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 2)))
#define COUNTER_MAX ((bitmap_counter_t) RESYNC_MASK - 1)
#define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK)
#define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK)
#define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX)
/* how many counters per page? */
#define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS)
/* same, except a shift value for more efficient bitops */
#define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT)
/* same, except a mask value for more efficient bitops */
#define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1)
#define BITMAP_BLOCK_SHIFT 9
#endif
/*
* bitmap structures:
*/
#define BITMAP_MAGIC 0x6d746962
/* use these for bitmap->flags and bitmap->sb->state bit-fields */
enum bitmap_state {
@ -152,136 +58,58 @@ typedef struct bitmap_super_s {
* devices. For raid10 it is the size of the array.
*/
#ifdef __KERNEL__
struct md_bitmap_stats {
u64 events_cleared;
int behind_writes;
bool behind_wait;
/* the in-memory bitmap is represented by bitmap_pages */
struct bitmap_page {
/*
* map points to the actual memory page
*/
char *map;
/*
* in emergencies (when map cannot be alloced), hijack the map
* pointer and use it as two counters itself
*/
unsigned int hijacked:1;
/*
* If any counter in this page is '1' or '2' - and so could be
* cleared then that page is marked as 'pending'
*/
unsigned int pending:1;
/*
* count of dirty bits on the page
*/
unsigned int count:30;
unsigned long missing_pages;
unsigned long file_pages;
unsigned long sync_size;
unsigned long pages;
struct file *file;
};
/* the main bitmap structure - one per mddev */
struct bitmap {
struct bitmap_operations {
bool (*enabled)(struct mddev *mddev);
int (*create)(struct mddev *mddev, int slot);
int (*resize)(struct mddev *mddev, sector_t blocks, int chunksize,
bool init);
struct bitmap_counts {
spinlock_t lock;
struct bitmap_page *bp;
unsigned long pages; /* total number of pages
* in the bitmap */
unsigned long missing_pages; /* number of pages
* not yet allocated */
unsigned long chunkshift; /* chunksize = 2^chunkshift
* (for bitops) */
unsigned long chunks; /* Total number of data
* chunks for the array */
} counts;
int (*load)(struct mddev *mddev);
void (*destroy)(struct mddev *mddev);
void (*flush)(struct mddev *mddev);
void (*write_all)(struct mddev *mddev);
void (*dirty_bits)(struct mddev *mddev, unsigned long s,
unsigned long e);
void (*unplug)(struct mddev *mddev, bool sync);
void (*daemon_work)(struct mddev *mddev);
void (*wait_behind_writes)(struct mddev *mddev);
struct mddev *mddev; /* the md device that the bitmap is for */
int (*startwrite)(struct mddev *mddev, sector_t offset,
unsigned long sectors, bool behind);
void (*endwrite)(struct mddev *mddev, sector_t offset,
unsigned long sectors, bool success, bool behind);
bool (*start_sync)(struct mddev *mddev, sector_t offset,
sector_t *blocks, bool degraded);
void (*end_sync)(struct mddev *mddev, sector_t offset, sector_t *blocks);
void (*cond_end_sync)(struct mddev *mddev, sector_t sector, bool force);
void (*close_sync)(struct mddev *mddev);
__u64 events_cleared;
int need_sync;
void (*update_sb)(void *data);
int (*get_stats)(void *data, struct md_bitmap_stats *stats);
struct bitmap_storage {
struct file *file; /* backing disk file */
struct page *sb_page; /* cached copy of the bitmap
* file superblock */
unsigned long sb_index;
struct page **filemap; /* list of cache pages for
* the file */
unsigned long *filemap_attr; /* attributes associated
* w/ filemap pages */
unsigned long file_pages; /* number of pages in the file*/
unsigned long bytes; /* total bytes in the bitmap */
} storage;
unsigned long flags;
int allclean;
atomic_t behind_writes;
unsigned long behind_writes_used; /* highest actual value at runtime */
/*
* the bitmap daemon - periodically wakes up and sweeps the bitmap
* file, cleaning up bits and flushing out pages to disk as necessary
*/
unsigned long daemon_lastrun; /* jiffies of last run */
unsigned long last_end_sync; /* when we lasted called end_sync to
* update bitmap with resync progress */
atomic_t pending_writes; /* pending writes to the bitmap file */
wait_queue_head_t write_wait;
wait_queue_head_t overflow_wait;
wait_queue_head_t behind_wait;
struct kernfs_node *sysfs_can_clear;
int cluster_slot; /* Slot offset for clustered env */
void (*sync_with_cluster)(struct mddev *mddev,
sector_t old_lo, sector_t old_hi,
sector_t new_lo, sector_t new_hi);
void *(*get_from_slot)(struct mddev *mddev, int slot);
int (*copy_from_slot)(struct mddev *mddev, int slot, sector_t *lo,
sector_t *hi, bool clear_bits);
void (*set_pages)(void *data, unsigned long pages);
void (*free)(void *data);
};
/* the bitmap API */
/* these are used only by md/bitmap */
struct bitmap *md_bitmap_create(struct mddev *mddev, int slot);
int md_bitmap_load(struct mddev *mddev);
void md_bitmap_flush(struct mddev *mddev);
void md_bitmap_destroy(struct mddev *mddev);
void md_bitmap_print_sb(struct bitmap *bitmap);
void md_bitmap_update_sb(struct bitmap *bitmap);
void md_bitmap_status(struct seq_file *seq, struct bitmap *bitmap);
int md_bitmap_setallbits(struct bitmap *bitmap);
void md_bitmap_write_all(struct bitmap *bitmap);
void md_bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e);
/* these are exported */
int md_bitmap_startwrite(struct bitmap *bitmap, sector_t offset,
unsigned long sectors, int behind);
void md_bitmap_endwrite(struct bitmap *bitmap, sector_t offset,
unsigned long sectors, int success, int behind);
int md_bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded);
void md_bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted);
void md_bitmap_close_sync(struct bitmap *bitmap);
void md_bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force);
void md_bitmap_sync_with_cluster(struct mddev *mddev,
sector_t old_lo, sector_t old_hi,
sector_t new_lo, sector_t new_hi);
void md_bitmap_unplug(struct bitmap *bitmap);
void md_bitmap_unplug_async(struct bitmap *bitmap);
void md_bitmap_daemon_work(struct mddev *mddev);
int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
int chunksize, int init);
struct bitmap *get_bitmap_from_slot(struct mddev *mddev, int slot);
int md_bitmap_copy_from_slot(struct mddev *mddev, int slot,
sector_t *lo, sector_t *hi, bool clear_bits);
void md_bitmap_free(struct bitmap *bitmap);
void md_bitmap_wait_behind_writes(struct mddev *mddev);
static inline bool md_bitmap_enabled(struct bitmap *bitmap)
{
return bitmap && bitmap->storage.filemap &&
!test_bit(BITMAP_STALE, &bitmap->flags);
}
#endif
void mddev_set_bitmap_ops(struct mddev *mddev);
#endif

View File

@ -317,7 +317,7 @@ static void recover_bitmaps(struct md_thread *thread)
str, ret);
goto clear_bit;
}
ret = md_bitmap_copy_from_slot(mddev, slot, &lo, &hi, true);
ret = mddev->bitmap_ops->copy_from_slot(mddev, slot, &lo, &hi, true);
if (ret) {
pr_err("md-cluster: Could not copy data from bitmap %d\n", slot);
goto clear_bit;
@ -497,7 +497,7 @@ static void process_suspend_info(struct mddev *mddev,
* we don't want to trigger lots of WARN.
*/
if (sb && !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE))
md_bitmap_sync_with_cluster(mddev, cinfo->sync_low,
mddev->bitmap_ops->sync_with_cluster(mddev, cinfo->sync_low,
cinfo->sync_hi, lo, hi);
cinfo->sync_low = lo;
cinfo->sync_hi = hi;
@ -628,8 +628,9 @@ static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
break;
case BITMAP_RESIZE:
if (le64_to_cpu(msg->high) != mddev->pers->size(mddev, 0, 0))
ret = md_bitmap_resize(mddev->bitmap,
le64_to_cpu(msg->high), 0, 0);
ret = mddev->bitmap_ops->resize(mddev,
le64_to_cpu(msg->high),
0, false);
break;
default:
ret = -1;
@ -856,7 +857,7 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
}
/* Read the disk bitmap sb and check if it needs recovery */
ret = md_bitmap_copy_from_slot(mddev, i, &lo, &hi, false);
ret = mddev->bitmap_ops->copy_from_slot(mddev, i, &lo, &hi, false);
if (ret) {
pr_warn("md-cluster: Could not gather bitmaps from slot %d", i);
lockres_free(bm_lockres);
@ -1143,13 +1144,16 @@ static int update_bitmap_size(struct mddev *mddev, sector_t size)
static int resize_bitmaps(struct mddev *mddev, sector_t newsize, sector_t oldsize)
{
struct bitmap_counts *counts;
char str[64];
struct dlm_lock_resource *bm_lockres;
struct bitmap *bitmap = mddev->bitmap;
unsigned long my_pages = bitmap->counts.pages;
void *bitmap = mddev->bitmap;
struct md_bitmap_stats stats;
unsigned long my_pages;
int i, rv;
rv = mddev->bitmap_ops->get_stats(bitmap, &stats);
if (rv)
return rv;
my_pages = stats.pages;
/*
* We need to ensure all the nodes can grow to a larger
* bitmap size before make the reshaping.
@ -1159,17 +1163,22 @@ static int resize_bitmaps(struct mddev *mddev, sector_t newsize, sector_t oldsiz
return rv;
for (i = 0; i < mddev->bitmap_info.nodes; i++) {
struct dlm_lock_resource *bm_lockres;
char str[64];
if (i == md_cluster_ops->slot_number(mddev))
continue;
bitmap = get_bitmap_from_slot(mddev, i);
bitmap = mddev->bitmap_ops->get_from_slot(mddev, i);
if (IS_ERR(bitmap)) {
pr_err("can't get bitmap from slot %d\n", i);
bitmap = NULL;
goto out;
}
counts = &bitmap->counts;
rv = mddev->bitmap_ops->get_stats(bitmap, &stats);
if (rv)
goto out;
/*
* If we can hold the bitmap lock of one node then
* the slot is not occupied, update the pages.
@ -1183,21 +1192,21 @@ static int resize_bitmaps(struct mddev *mddev, sector_t newsize, sector_t oldsiz
bm_lockres->flags |= DLM_LKF_NOQUEUE;
rv = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
if (!rv)
counts->pages = my_pages;
mddev->bitmap_ops->set_pages(bitmap, my_pages);
lockres_free(bm_lockres);
if (my_pages != counts->pages)
if (my_pages != stats.pages)
/*
* Let's revert the bitmap size if one node
* can't resize bitmap
*/
goto out;
md_bitmap_free(bitmap);
mddev->bitmap_ops->free(bitmap);
}
return 0;
out:
md_bitmap_free(bitmap);
mddev->bitmap_ops->free(bitmap);
update_bitmap_size(mddev, oldsize);
return -1;
}
@ -1207,24 +1216,27 @@ out:
*/
static int cluster_check_sync_size(struct mddev *mddev)
{
int i, rv;
bitmap_super_t *sb;
unsigned long my_sync_size, sync_size = 0;
int node_num = mddev->bitmap_info.nodes;
int current_slot = md_cluster_ops->slot_number(mddev);
struct bitmap *bitmap = mddev->bitmap;
char str[64];
int node_num = mddev->bitmap_info.nodes;
struct dlm_lock_resource *bm_lockres;
struct md_bitmap_stats stats;
void *bitmap = mddev->bitmap;
unsigned long sync_size = 0;
unsigned long my_sync_size;
char str[64];
int i, rv;
sb = kmap_atomic(bitmap->storage.sb_page);
my_sync_size = sb->sync_size;
kunmap_atomic(sb);
rv = mddev->bitmap_ops->get_stats(bitmap, &stats);
if (rv)
return rv;
my_sync_size = stats.sync_size;
for (i = 0; i < node_num; i++) {
if (i == current_slot)
continue;
bitmap = get_bitmap_from_slot(mddev, i);
bitmap = mddev->bitmap_ops->get_from_slot(mddev, i);
if (IS_ERR(bitmap)) {
pr_err("can't get bitmap from slot %d\n", i);
return -1;
@ -1238,25 +1250,28 @@ static int cluster_check_sync_size(struct mddev *mddev)
bm_lockres = lockres_init(mddev, str, NULL, 1);
if (!bm_lockres) {
pr_err("md-cluster: Cannot initialize %s\n", str);
md_bitmap_free(bitmap);
mddev->bitmap_ops->free(bitmap);
return -1;
}
bm_lockres->flags |= DLM_LKF_NOQUEUE;
rv = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
if (!rv)
md_bitmap_update_sb(bitmap);
mddev->bitmap_ops->update_sb(bitmap);
lockres_free(bm_lockres);
sb = kmap_atomic(bitmap->storage.sb_page);
if (sync_size == 0)
sync_size = sb->sync_size;
else if (sync_size != sb->sync_size) {
kunmap_atomic(sb);
md_bitmap_free(bitmap);
rv = mddev->bitmap_ops->get_stats(bitmap, &stats);
if (rv) {
mddev->bitmap_ops->free(bitmap);
return rv;
}
if (sync_size == 0) {
sync_size = stats.sync_size;
} else if (sync_size != stats.sync_size) {
mddev->bitmap_ops->free(bitmap);
return -1;
}
kunmap_atomic(sb);
md_bitmap_free(bitmap);
mddev->bitmap_ops->free(bitmap);
}
return (my_sync_size == sync_size) ? 0 : -1;
@ -1585,7 +1600,7 @@ static int gather_bitmaps(struct md_rdev *rdev)
for (sn = 0; sn < mddev->bitmap_info.nodes; sn++) {
if (sn == (cinfo->slot_number - 1))
continue;
err = md_bitmap_copy_from_slot(mddev, sn, &lo, &hi, false);
err = mddev->bitmap_ops->copy_from_slot(mddev, sn, &lo, &hi, false);
if (err) {
pr_warn("md-cluster: Could not gather bitmaps from slot %d", sn);
goto out;

View File

@ -664,6 +664,7 @@ int mddev_init(struct mddev *mddev)
mddev->resync_min = 0;
mddev->resync_max = MaxSector;
mddev->level = LEVEL_NONE;
mddev_set_bitmap_ops(mddev);
INIT_WORK(&mddev->sync_work, md_start_sync);
INIT_WORK(&mddev->del_work, mddev_delayed_delete);
@ -1264,6 +1265,18 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
return ret;
}
static u64 md_bitmap_events_cleared(struct mddev *mddev)
{
struct md_bitmap_stats stats;
int err;
err = mddev->bitmap_ops->get_stats(mddev->bitmap, &stats);
if (err)
return 0;
return stats.events_cleared;
}
/*
* validate_super for 0.90.0
* note: we are not using "freshest" for 0.9 superblock
@ -1356,7 +1369,7 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *freshest, stru
/* if adding to array with a bitmap, then we can accept an
* older device ... but not too old.
*/
if (ev1 < mddev->bitmap->events_cleared)
if (ev1 < md_bitmap_events_cleared(mddev))
return 0;
if (ev1 < mddev->events)
set_bit(Bitmap_sync, &rdev->flags);
@ -1883,7 +1896,7 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *freshest, struc
/* If adding to array with a bitmap, then we can accept an
* older device, but not too old.
*/
if (ev1 < mddev->bitmap->events_cleared)
if (ev1 < md_bitmap_events_cleared(mddev))
return 0;
if (ev1 < mddev->events)
set_bit(Bitmap_sync, &rdev->flags);
@ -2215,7 +2228,6 @@ super_1_allow_new_offset(struct md_rdev *rdev,
unsigned long long new_offset)
{
/* All necessary checks on new >= old have been done */
struct bitmap *bitmap;
if (new_offset >= rdev->data_offset)
return 1;
@ -2232,11 +2244,18 @@ super_1_allow_new_offset(struct md_rdev *rdev,
*/
if (rdev->sb_start + (32+4)*2 > new_offset)
return 0;
bitmap = rdev->mddev->bitmap;
if (bitmap && !rdev->mddev->bitmap_info.file &&
rdev->sb_start + rdev->mddev->bitmap_info.offset +
bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
if (!rdev->mddev->bitmap_info.file) {
struct mddev *mddev = rdev->mddev;
struct md_bitmap_stats stats;
int err;
err = mddev->bitmap_ops->get_stats(mddev->bitmap, &stats);
if (!err && rdev->sb_start + mddev->bitmap_info.offset +
stats.file_pages * (PAGE_SIZE >> 9) > new_offset)
return 0;
}
if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
return 0;
@ -2712,7 +2731,7 @@ repeat:
mddev_add_trace_msg(mddev, "md md_update_sb");
rewrite:
md_bitmap_update_sb(mddev->bitmap);
mddev->bitmap_ops->update_sb(mddev->bitmap);
rdev_for_each(rdev, mddev) {
if (rdev->sb_loaded != 1)
continue; /* no noise on spare devices */
@ -4572,17 +4591,23 @@ bitmap_store(struct mddev *mddev, const char *buf, size_t len)
/* buf should be <chunk> <chunk> ... or <chunk>-<chunk> ... (range) */
while (*buf) {
chunk = end_chunk = simple_strtoul(buf, &end, 0);
if (buf == end) break;
if (buf == end)
break;
if (*end == '-') { /* range */
buf = end + 1;
end_chunk = simple_strtoul(buf, &end, 0);
if (buf == end) break;
if (buf == end)
break;
}
if (*end && !isspace(*end)) break;
md_bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
if (*end && !isspace(*end))
break;
mddev->bitmap_ops->dirty_bits(mddev, chunk, end_chunk);
buf = skip_spaces(end);
}
md_bitmap_unplug(mddev->bitmap); /* flush the bits to disk */
mddev->bitmap_ops->unplug(mddev, true); /* flush the bits to disk */
out:
mddev_unlock(mddev);
return len;
@ -6098,16 +6123,10 @@ int md_run(struct mddev *mddev)
}
if (err == 0 && pers->sync_request &&
(mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
struct bitmap *bitmap;
bitmap = md_bitmap_create(mddev, -1);
if (IS_ERR(bitmap)) {
err = PTR_ERR(bitmap);
err = mddev->bitmap_ops->create(mddev, -1);
if (err)
pr_warn("%s: failed to create bitmap (%d)\n",
mdname(mddev), err);
} else
mddev->bitmap = bitmap;
}
if (err)
goto bitmap_abort;
@ -6177,7 +6196,7 @@ bitmap_abort:
pers->free(mddev, mddev->private);
mddev->private = NULL;
module_put(pers->owner);
md_bitmap_destroy(mddev);
mddev->bitmap_ops->destroy(mddev);
abort:
bioset_exit(&mddev->io_clone_set);
exit_sync_set:
@ -6196,9 +6215,10 @@ int do_md_run(struct mddev *mddev)
err = md_run(mddev);
if (err)
goto out;
err = md_bitmap_load(mddev);
err = mddev->bitmap_ops->load(mddev);
if (err) {
md_bitmap_destroy(mddev);
mddev->bitmap_ops->destroy(mddev);
goto out;
}
@ -6342,7 +6362,8 @@ static void __md_stop_writes(struct mddev *mddev)
mddev->pers->quiesce(mddev, 1);
mddev->pers->quiesce(mddev, 0);
}
md_bitmap_flush(mddev);
mddev->bitmap_ops->flush(mddev);
if (md_is_rdwr(mddev) &&
((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
@ -6369,7 +6390,7 @@ EXPORT_SYMBOL_GPL(md_stop_writes);
static void mddev_detach(struct mddev *mddev)
{
md_bitmap_wait_behind_writes(mddev);
mddev->bitmap_ops->wait_behind_writes(mddev);
if (mddev->pers && mddev->pers->quiesce && !is_md_suspended(mddev)) {
mddev->pers->quiesce(mddev, 1);
mddev->pers->quiesce(mddev, 0);
@ -6384,7 +6405,8 @@ static void mddev_detach(struct mddev *mddev)
static void __md_stop(struct mddev *mddev)
{
struct md_personality *pers = mddev->pers;
md_bitmap_destroy(mddev);
mddev->bitmap_ops->destroy(mddev);
mddev_detach(mddev);
spin_lock(&mddev->lock);
mddev->pers = NULL;
@ -7162,22 +7184,19 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
err = 0;
if (mddev->pers) {
if (fd >= 0) {
struct bitmap *bitmap;
err = mddev->bitmap_ops->create(mddev, -1);
if (!err)
err = mddev->bitmap_ops->load(mddev);
bitmap = md_bitmap_create(mddev, -1);
if (!IS_ERR(bitmap)) {
mddev->bitmap = bitmap;
err = md_bitmap_load(mddev);
} else
err = PTR_ERR(bitmap);
if (err) {
md_bitmap_destroy(mddev);
mddev->bitmap_ops->destroy(mddev);
fd = -1;
}
} else if (fd < 0) {
md_bitmap_destroy(mddev);
mddev->bitmap_ops->destroy(mddev);
}
}
if (fd < 0) {
struct file *f = mddev->bitmap_info.file;
if (f) {
@ -7446,7 +7465,6 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
goto err;
}
if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
struct bitmap *bitmap;
/* add the bitmap */
if (mddev->bitmap) {
rv = -EEXIST;
@ -7460,24 +7478,24 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
mddev->bitmap_info.default_offset;
mddev->bitmap_info.space =
mddev->bitmap_info.default_space;
bitmap = md_bitmap_create(mddev, -1);
if (!IS_ERR(bitmap)) {
mddev->bitmap = bitmap;
rv = md_bitmap_load(mddev);
} else
rv = PTR_ERR(bitmap);
rv = mddev->bitmap_ops->create(mddev, -1);
if (!rv)
rv = mddev->bitmap_ops->load(mddev);
if (rv)
md_bitmap_destroy(mddev);
mddev->bitmap_ops->destroy(mddev);
} else {
/* remove the bitmap */
if (!mddev->bitmap) {
rv = -ENOENT;
struct md_bitmap_stats stats;
rv = mddev->bitmap_ops->get_stats(mddev->bitmap, &stats);
if (rv)
goto err;
}
if (mddev->bitmap->storage.file) {
if (stats.file) {
rv = -EINVAL;
goto err;
}
if (mddev->bitmap_info.nodes) {
/* hold PW on all the bitmap lock */
if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
@ -7492,7 +7510,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
module_put(md_cluster_mod);
mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
}
md_bitmap_destroy(mddev);
mddev->bitmap_ops->destroy(mddev);
mddev->bitmap_info.offset = 0;
}
}
@ -8262,6 +8280,33 @@ static void md_seq_stop(struct seq_file *seq, void *v)
spin_unlock(&all_mddevs_lock);
}
static void md_bitmap_status(struct seq_file *seq, struct mddev *mddev)
{
struct md_bitmap_stats stats;
unsigned long used_pages;
unsigned long chunk_kb;
int err;
err = mddev->bitmap_ops->get_stats(mddev->bitmap, &stats);
if (err)
return;
chunk_kb = mddev->bitmap_info.chunksize >> 10;
used_pages = stats.pages - stats.missing_pages;
seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], %lu%s chunk",
used_pages, stats.pages, used_pages << (PAGE_SHIFT - 10),
chunk_kb ? chunk_kb : mddev->bitmap_info.chunksize,
chunk_kb ? "KB" : "B");
if (stats.file) {
seq_puts(seq, ", file: ");
seq_file_path(seq, stats.file, " \t\n");
}
seq_putc(seq, '\n');
}
static int md_seq_show(struct seq_file *seq, void *v)
{
struct mddev *mddev;
@ -8345,7 +8390,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
} else
seq_printf(seq, "\n ");
md_bitmap_status(seq, mddev->bitmap);
md_bitmap_status(seq, mddev);
seq_printf(seq, "\n");
}
@ -9397,7 +9442,7 @@ static void md_start_sync(struct work_struct *ws)
* stored on all devices. So make sure all bitmap pages get written.
*/
if (spares)
md_bitmap_write_all(mddev->bitmap);
mddev->bitmap_ops->write_all(mddev);
name = test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ?
"reshape" : "resync";
@ -9485,7 +9530,7 @@ static void unregister_sync_thread(struct mddev *mddev)
void md_check_recovery(struct mddev *mddev)
{
if (mddev->bitmap)
md_bitmap_daemon_work(mddev);
mddev->bitmap_ops->daemon_work(mddev);
if (signal_pending(current)) {
if (mddev->pers->sync_request && !mddev->external) {
@ -9856,7 +9901,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
if (ret)
pr_info("md-cluster: resize failed\n");
else
md_bitmap_update_sb(mddev->bitmap);
mddev->bitmap_ops->update_sb(mddev->bitmap);
}
/* Check for change of roles in the active devices */

View File

@ -535,7 +535,8 @@ struct mddev {
struct percpu_ref writes_pending;
int sync_checkers; /* # of threads checking writes_pending */
struct bitmap *bitmap; /* the bitmap for the device */
void *bitmap; /* the bitmap for the device */
struct bitmap_operations *bitmap_ops;
struct {
struct file *file; /* the bitmap file */
loff_t offset; /* offset from superblock of

View File

@ -140,7 +140,7 @@ static inline bool raid1_add_bio_to_plug(struct mddev *mddev, struct bio *bio,
* If bitmap is not enabled, it's safe to submit the io directly, and
* this can get optimal performance.
*/
if (!md_bitmap_enabled(mddev->bitmap)) {
if (!mddev->bitmap_ops->enabled(mddev)) {
raid1_submit_write(bio);
return true;
}
@ -166,12 +166,9 @@ static inline bool raid1_add_bio_to_plug(struct mddev *mddev, struct bio *bio,
* while current io submission must wait for bitmap io to be done. In order to
* avoid such deadlock, submit bitmap io asynchronously.
*/
static inline void raid1_prepare_flush_writes(struct bitmap *bitmap)
static inline void raid1_prepare_flush_writes(struct mddev *mddev)
{
if (current->bio_list)
md_bitmap_unplug_async(bitmap);
else
md_bitmap_unplug(bitmap);
mddev->bitmap_ops->unplug(mddev, current->bio_list == NULL);
}
/*

View File

@ -411,18 +411,20 @@ static void raid1_end_read_request(struct bio *bio)
static void close_write(struct r1bio *r1_bio)
{
struct mddev *mddev = r1_bio->mddev;
/* it really is the end of this request */
if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
bio_free_pages(r1_bio->behind_master_bio);
bio_put(r1_bio->behind_master_bio);
r1_bio->behind_master_bio = NULL;
}
/* clear the bitmap if all writes complete successfully */
md_bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
r1_bio->sectors,
mddev->bitmap_ops->endwrite(mddev, r1_bio->sector, r1_bio->sectors,
!test_bit(R1BIO_Degraded, &r1_bio->state),
test_bit(R1BIO_BehindIO, &r1_bio->state));
md_write_end(r1_bio->mddev);
md_write_end(mddev);
}
static void r1_bio_write_done(struct r1bio *r1_bio)
@ -894,7 +896,7 @@ static void wake_up_barrier(struct r1conf *conf)
static void flush_bio_list(struct r1conf *conf, struct bio *bio)
{
/* flush any pending bitmap writes to disk before proceeding w/ I/O */
raid1_prepare_flush_writes(conf->mddev->bitmap);
raid1_prepare_flush_writes(conf->mddev);
wake_up_barrier(conf);
while (bio) { /* submit pending writes */
@ -1311,7 +1313,6 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
struct r1conf *conf = mddev->private;
struct raid1_info *mirror;
struct bio *read_bio;
struct bitmap *bitmap = mddev->bitmap;
const enum req_op op = bio_op(bio);
const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC;
int max_sectors;
@ -1364,15 +1365,13 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
(unsigned long long)r1_bio->sector,
mirror->rdev->bdev);
if (test_bit(WriteMostly, &mirror->rdev->flags) &&
bitmap) {
if (test_bit(WriteMostly, &mirror->rdev->flags)) {
/*
* Reading from a write-mostly device must take care not to
* over-take any writes that are 'behind'
*/
mddev_add_trace_msg(mddev, "raid1 wait behind writes");
wait_event(bitmap->behind_wait,
atomic_read(&bitmap->behind_writes) == 0);
mddev->bitmap_ops->wait_behind_writes(mddev);
}
if (max_sectors < bio_sectors(bio)) {
@ -1413,7 +1412,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
struct r1conf *conf = mddev->private;
struct r1bio *r1_bio;
int i, disks;
struct bitmap *bitmap = mddev->bitmap;
unsigned long flags;
struct md_rdev *blocked_rdev;
int first_clone;
@ -1566,7 +1564,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
* at a time and thus needs a new bio that can fit the whole payload
* this bio in page sized chunks.
*/
if (write_behind && bitmap)
if (write_behind && mddev->bitmap)
max_sectors = min_t(int, max_sectors,
BIO_MAX_VECS * (PAGE_SIZE >> 9));
if (max_sectors < bio_sectors(bio)) {
@ -1593,18 +1591,22 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
continue;
if (first_clone) {
unsigned long max_write_behind =
mddev->bitmap_info.max_write_behind;
struct md_bitmap_stats stats;
int err;
/* do behind I/O ?
* Not if there are too many, or cannot
* allocate memory, or a reader on WriteMostly
* is waiting for behind writes to flush */
if (bitmap && write_behind &&
(atomic_read(&bitmap->behind_writes)
< mddev->bitmap_info.max_write_behind) &&
!waitqueue_active(&bitmap->behind_wait)) {
err = mddev->bitmap_ops->get_stats(mddev->bitmap, &stats);
if (!err && write_behind && !stats.behind_wait &&
stats.behind_writes < max_write_behind)
alloc_behind_master_bio(r1_bio, bio);
}
md_bitmap_startwrite(bitmap, r1_bio->sector, r1_bio->sectors,
mddev->bitmap_ops->startwrite(
mddev, r1_bio->sector, r1_bio->sectors,
test_bit(R1BIO_BehindIO, &r1_bio->state));
first_clone = 0;
}
@ -2023,7 +2025,7 @@ static void abort_sync_write(struct mddev *mddev, struct r1bio *r1_bio)
/* make sure these bits don't get cleared. */
do {
md_bitmap_end_sync(mddev->bitmap, s, &sync_blocks, 1);
mddev->bitmap_ops->end_sync(mddev, s, &sync_blocks);
s += sync_blocks;
sectors_to_go -= sync_blocks;
} while (sectors_to_go > 0);
@ -2752,7 +2754,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
int wonly = -1;
int write_targets = 0, read_targets = 0;
sector_t sync_blocks;
int still_degraded = 0;
bool still_degraded = false;
int good_sectors = RESYNC_SECTORS;
int min_bad = 0; /* number of sectors that are bad in all devices */
int idx = sector_to_idx(sector_nr);
@ -2769,12 +2771,12 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
* We can find the current addess in mddev->curr_resync
*/
if (mddev->curr_resync < max_sector) /* aborted */
md_bitmap_end_sync(mddev->bitmap, mddev->curr_resync,
&sync_blocks, 1);
mddev->bitmap_ops->end_sync(mddev, mddev->curr_resync,
&sync_blocks);
else /* completed sync */
conf->fullsync = 0;
md_bitmap_close_sync(mddev->bitmap);
mddev->bitmap_ops->close_sync(mddev);
close_sync(conf);
if (mddev_is_clustered(mddev)) {
@ -2794,7 +2796,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
/* before building a request, check if we can skip these blocks..
* This call the bitmap_start_sync doesn't actually record anything
*/
if (!md_bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
if (!mddev->bitmap_ops->start_sync(mddev, sector_nr, &sync_blocks, true) &&
!conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
/* We can skip this block, and probably several more */
*skipped = 1;
@ -2812,9 +2814,9 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
* sector_nr + two times RESYNC_SECTORS
*/
md_bitmap_cond_end_sync(mddev->bitmap, sector_nr,
mddev_is_clustered(mddev) && (sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high));
mddev->bitmap_ops->cond_end_sync(mddev, sector_nr,
mddev_is_clustered(mddev) &&
(sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high));
if (raise_barrier(conf, sector_nr))
return 0;
@ -2845,7 +2847,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
if (rdev == NULL ||
test_bit(Faulty, &rdev->flags)) {
if (i < conf->raid_disks)
still_degraded = 1;
still_degraded = true;
} else if (!test_bit(In_sync, &rdev->flags)) {
bio->bi_opf = REQ_OP_WRITE;
bio->bi_end_io = end_sync_write;
@ -2969,7 +2971,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
if (len == 0)
break;
if (sync_blocks == 0) {
if (!md_bitmap_start_sync(mddev->bitmap, sector_nr,
if (!mddev->bitmap_ops->start_sync(mddev, sector_nr,
&sync_blocks, still_degraded) &&
!conf->fullsync &&
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
@ -3294,14 +3296,16 @@ static int raid1_resize(struct mddev *mddev, sector_t sectors)
* worth it.
*/
sector_t newsize = raid1_size(mddev, sectors, 0);
int ret;
if (mddev->external_size &&
mddev->array_sectors > newsize)
return -EINVAL;
if (mddev->bitmap) {
int ret = md_bitmap_resize(mddev->bitmap, newsize, 0, 0);
ret = mddev->bitmap_ops->resize(mddev, newsize, 0, false);
if (ret)
return ret;
}
md_set_array_sectors(mddev, newsize);
if (sectors > mddev->dev_sectors &&
mddev->recovery_cp > mddev->dev_sectors) {

View File

@ -426,12 +426,13 @@ static void raid10_end_read_request(struct bio *bio)
static void close_write(struct r10bio *r10_bio)
{
struct mddev *mddev = r10_bio->mddev;
/* clear the bitmap if all writes complete successfully */
md_bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
r10_bio->sectors,
mddev->bitmap_ops->endwrite(mddev, r10_bio->sector, r10_bio->sectors,
!test_bit(R10BIO_Degraded, &r10_bio->state),
0);
md_write_end(r10_bio->mddev);
false);
md_write_end(mddev);
}
static void one_write_done(struct r10bio *r10_bio)
@ -884,7 +885,7 @@ static void flush_pending_writes(struct r10conf *conf)
__set_current_state(TASK_RUNNING);
blk_start_plug(&plug);
raid1_prepare_flush_writes(conf->mddev->bitmap);
raid1_prepare_flush_writes(conf->mddev);
wake_up(&conf->wait_barrier);
while (bio) { /* submit pending writes */
@ -1100,7 +1101,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
/* we aren't scheduling, so we can do the write-out directly. */
bio = bio_list_get(&plug->pending);
raid1_prepare_flush_writes(mddev->bitmap);
raid1_prepare_flush_writes(mddev);
wake_up_barrier(conf);
while (bio) { /* submit pending writes */
@ -1492,7 +1493,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
md_account_bio(mddev, &bio);
r10_bio->master_bio = bio;
atomic_set(&r10_bio->remaining, 1);
md_bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0);
mddev->bitmap_ops->startwrite(mddev, r10_bio->sector, r10_bio->sectors,
false);
for (i = 0; i < conf->copies; i++) {
if (r10_bio->devs[i].bio)
@ -3192,13 +3194,15 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
if (mddev->curr_resync < max_sector) { /* aborted */
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
md_bitmap_end_sync(mddev->bitmap, mddev->curr_resync,
&sync_blocks, 1);
mddev->bitmap_ops->end_sync(mddev,
mddev->curr_resync,
&sync_blocks);
else for (i = 0; i < conf->geo.raid_disks; i++) {
sector_t sect =
raid10_find_virt(conf, mddev->curr_resync, i);
md_bitmap_end_sync(mddev->bitmap, sect,
&sync_blocks, 1);
mddev->bitmap_ops->end_sync(mddev, sect,
&sync_blocks);
}
} else {
/* completed sync */
@ -3218,7 +3222,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
}
conf->fullsync = 0;
}
md_bitmap_close_sync(mddev->bitmap);
mddev->bitmap_ops->close_sync(mddev);
close_sync(conf);
*skipped = 1;
return sectors_skipped;
@ -3287,10 +3291,10 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
r10_bio = NULL;
for (i = 0 ; i < conf->geo.raid_disks; i++) {
int still_degraded;
bool still_degraded;
struct r10bio *rb2;
sector_t sect;
int must_sync;
bool must_sync;
int any_working;
struct raid10_info *mirror = &conf->mirrors[i];
struct md_rdev *mrdev, *mreplace;
@ -3307,7 +3311,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
if (!mrdev && !mreplace)
continue;
still_degraded = 0;
still_degraded = false;
/* want to reconstruct this device */
rb2 = r10_bio;
sect = raid10_find_virt(conf, sector_nr, i);
@ -3320,8 +3324,9 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
* we only need to recover the block if it is set in
* the bitmap
*/
must_sync = md_bitmap_start_sync(mddev->bitmap, sect,
&sync_blocks, 1);
must_sync = mddev->bitmap_ops->start_sync(mddev, sect,
&sync_blocks,
true);
if (sync_blocks < max_sync)
max_sync = sync_blocks;
if (!must_sync &&
@ -3359,12 +3364,12 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
struct md_rdev *rdev = conf->mirrors[j].rdev;
if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
still_degraded = 1;
still_degraded = false;
break;
}
}
must_sync = md_bitmap_start_sync(mddev->bitmap, sect,
must_sync = mddev->bitmap_ops->start_sync(mddev, sect,
&sync_blocks, still_degraded);
any_working = 0;
@ -3538,12 +3543,13 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
* safety reason, which ensures curr_resync_completed is
* updated in bitmap_cond_end_sync.
*/
md_bitmap_cond_end_sync(mddev->bitmap, sector_nr,
mddev->bitmap_ops->cond_end_sync(mddev, sector_nr,
mddev_is_clustered(mddev) &&
(sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high));
if (!md_bitmap_start_sync(mddev->bitmap, sector_nr,
&sync_blocks, mddev->degraded) &&
if (!mddev->bitmap_ops->start_sync(mddev, sector_nr,
&sync_blocks,
mddev->degraded) &&
!conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED,
&mddev->recovery)) {
/* We can skip this block */
@ -4190,6 +4196,7 @@ static int raid10_resize(struct mddev *mddev, sector_t sectors)
*/
struct r10conf *conf = mddev->private;
sector_t oldsize, size;
int ret;
if (mddev->reshape_position != MaxSector)
return -EBUSY;
@ -4202,11 +4209,11 @@ static int raid10_resize(struct mddev *mddev, sector_t sectors)
if (mddev->external_size &&
mddev->array_sectors > size)
return -EINVAL;
if (mddev->bitmap) {
int ret = md_bitmap_resize(mddev->bitmap, size, 0, 0);
ret = mddev->bitmap_ops->resize(mddev, size, 0, false);
if (ret)
return ret;
}
md_set_array_sectors(mddev, size);
if (sectors > mddev->dev_sectors &&
mddev->recovery_cp > oldsize) {
@ -4472,7 +4479,7 @@ static int raid10_start_reshape(struct mddev *mddev)
newsize = raid10_size(mddev, 0, conf->geo.raid_disks);
if (!mddev_is_clustered(mddev)) {
ret = md_bitmap_resize(mddev->bitmap, newsize, 0, 0);
ret = mddev->bitmap_ops->resize(mddev, newsize, 0, false);
if (ret)
goto abort;
else
@ -4487,20 +4494,20 @@ static int raid10_start_reshape(struct mddev *mddev)
/*
* some node is already performing reshape, and no need to
* call md_bitmap_resize again since it should be called when
* call bitmap_ops->resize again since it should be called when
* receiving BITMAP_RESIZE msg
*/
if ((sb && (le32_to_cpu(sb->feature_map) &
MD_FEATURE_RESHAPE_ACTIVE)) || (oldsize == newsize))
goto out;
ret = md_bitmap_resize(mddev->bitmap, newsize, 0, 0);
ret = mddev->bitmap_ops->resize(mddev, newsize, 0, false);
if (ret)
goto abort;
ret = md_cluster_ops->resize_bitmaps(mddev, newsize, oldsize);
if (ret) {
md_bitmap_resize(mddev->bitmap, oldsize, 0, 0);
mddev->bitmap_ops->resize(mddev, oldsize, 0, false);
goto abort;
}
}

View File

@ -313,10 +313,10 @@ void r5c_handle_cached_data_endio(struct r5conf *conf,
if (sh->dev[i].written) {
set_bit(R5_UPTODATE, &sh->dev[i].flags);
r5c_return_dev_pending_writes(conf, &sh->dev[i]);
md_bitmap_endwrite(conf->mddev->bitmap, sh->sector,
RAID5_STRIPE_SECTORS(conf),
conf->mddev->bitmap_ops->endwrite(conf->mddev,
sh->sector, RAID5_STRIPE_SECTORS(conf),
!test_bit(STRIPE_DEGRADED, &sh->state),
0);
false);
}
}
}

View File

@ -3563,8 +3563,8 @@ static void __add_stripe_bio(struct stripe_head *sh, struct bio *bi,
*/
set_bit(STRIPE_BITMAP_PENDING, &sh->state);
spin_unlock_irq(&sh->stripe_lock);
md_bitmap_startwrite(conf->mddev->bitmap, sh->sector,
RAID5_STRIPE_SECTORS(conf), 0);
conf->mddev->bitmap_ops->startwrite(conf->mddev, sh->sector,
RAID5_STRIPE_SECTORS(conf), false);
spin_lock_irq(&sh->stripe_lock);
clear_bit(STRIPE_BITMAP_PENDING, &sh->state);
if (!sh->batch_head) {
@ -3663,8 +3663,9 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
bi = nextbi;
}
if (bitmap_end)
md_bitmap_endwrite(conf->mddev->bitmap, sh->sector,
RAID5_STRIPE_SECTORS(conf), 0, 0);
conf->mddev->bitmap_ops->endwrite(conf->mddev,
sh->sector, RAID5_STRIPE_SECTORS(conf),
false, false);
bitmap_end = 0;
/* and fail all 'written' */
bi = sh->dev[i].written;
@ -3709,8 +3710,9 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
}
}
if (bitmap_end)
md_bitmap_endwrite(conf->mddev->bitmap, sh->sector,
RAID5_STRIPE_SECTORS(conf), 0, 0);
conf->mddev->bitmap_ops->endwrite(conf->mddev,
sh->sector, RAID5_STRIPE_SECTORS(conf),
false, false);
/* If we were in the middle of a write the parity block might
* still be locked - so just clear all R5_LOCKED flags
*/
@ -4059,10 +4061,10 @@ returnbi:
bio_endio(wbi);
wbi = wbi2;
}
md_bitmap_endwrite(conf->mddev->bitmap, sh->sector,
RAID5_STRIPE_SECTORS(conf),
conf->mddev->bitmap_ops->endwrite(conf->mddev,
sh->sector, RAID5_STRIPE_SECTORS(conf),
!test_bit(STRIPE_DEGRADED, &sh->state),
0);
false);
if (head_sh->batch_head) {
sh = list_first_entry(&sh->batch_list,
struct stripe_head,
@ -5788,13 +5790,10 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
}
spin_unlock_irq(&sh->stripe_lock);
if (conf->mddev->bitmap) {
for (d = 0;
d < conf->raid_disks - conf->max_degraded;
for (d = 0; d < conf->raid_disks - conf->max_degraded;
d++)
md_bitmap_startwrite(mddev->bitmap,
sh->sector,
RAID5_STRIPE_SECTORS(conf),
0);
mddev->bitmap_ops->startwrite(mddev, sh->sector,
RAID5_STRIPE_SECTORS(conf), false);
sh->bm_seq = conf->seq_flush + 1;
set_bit(STRIPE_BIT_DELAY, &sh->state);
}
@ -6486,7 +6485,7 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n
struct r5conf *conf = mddev->private;
struct stripe_head *sh;
sector_t sync_blocks;
int still_degraded = 0;
bool still_degraded = false;
int i;
if (sector_nr >= max_sector) {
@ -6498,11 +6497,11 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n
}
if (mddev->curr_resync < max_sector) /* aborted */
md_bitmap_end_sync(mddev->bitmap, mddev->curr_resync,
&sync_blocks, 1);
mddev->bitmap_ops->end_sync(mddev, mddev->curr_resync,
&sync_blocks);
else /* completed sync */
conf->fullsync = 0;
md_bitmap_close_sync(mddev->bitmap);
mddev->bitmap_ops->close_sync(mddev);
return 0;
}
@ -6531,7 +6530,8 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n
}
if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
!conf->fullsync &&
!md_bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
!mddev->bitmap_ops->start_sync(mddev, sector_nr, &sync_blocks,
true) &&
sync_blocks >= RAID5_STRIPE_SECTORS(conf)) {
/* we can skip this block, and probably more */
do_div(sync_blocks, RAID5_STRIPE_SECTORS(conf));
@ -6540,7 +6540,7 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n
return sync_blocks * RAID5_STRIPE_SECTORS(conf);
}
md_bitmap_cond_end_sync(mddev->bitmap, sector_nr, false);
mddev->bitmap_ops->cond_end_sync(mddev, sector_nr, false);
sh = raid5_get_active_stripe(conf, NULL, sector_nr,
R5_GAS_NOBLOCK);
@ -6559,10 +6559,11 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n
struct md_rdev *rdev = conf->disks[i].rdev;
if (rdev == NULL || test_bit(Faulty, &rdev->flags))
still_degraded = 1;
still_degraded = true;
}
md_bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded);
mddev->bitmap_ops->start_sync(mddev, sector_nr, &sync_blocks,
still_degraded);
set_bit(STRIPE_SYNC_REQUESTED, &sh->state);
set_bit(STRIPE_HANDLE, &sh->state);
@ -6767,7 +6768,7 @@ static void raid5d(struct md_thread *thread)
/* Now is a good time to flush some bitmap updates */
conf->seq_flush++;
spin_unlock_irq(&conf->device_lock);
md_bitmap_unplug(mddev->bitmap);
mddev->bitmap_ops->unplug(mddev, true);
spin_lock_irq(&conf->device_lock);
conf->seq_write = conf->seq_flush;
activate_bit_delay(conf, conf->temp_inactive_list);
@ -8312,6 +8313,7 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors)
*/
sector_t newsize;
struct r5conf *conf = mddev->private;
int ret;
if (raid5_has_log(conf) || raid5_has_ppl(conf))
return -EINVAL;
@ -8320,11 +8322,11 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors)
if (mddev->external_size &&
mddev->array_sectors > newsize)
return -EINVAL;
if (mddev->bitmap) {
int ret = md_bitmap_resize(mddev->bitmap, sectors, 0, 0);
ret = mddev->bitmap_ops->resize(mddev, sectors, 0, false);
if (ret)
return ret;
}
md_set_array_sectors(mddev, newsize);
if (sectors > mddev->dev_sectors &&
mddev->recovery_cp > mddev->dev_sectors) {