linux/fs/gfs2/meta_io.c
Steven Whitehouse 7276b3b0c7 [GFS2] Tidy up meta_io code
Fix a bug in the directory reading code, where we might have dereferenced
a NULL pointer in case of OOM. Updated the directory code to use the new
& improved version of gfs2_meta_ra() which now returns the first block
that was being read. Previously it was releasing it requiring following
code to grab the block again at each point it was called.

Also turned off readahead on directory lookups since we are reading a
hash table, and therefore reading the entries in order is very
unlikely. Readahead is still used for all other calls to the
directory reading function (e.g. when growing the hash table).

Removed the DIO_START constant. Everywhere this was used, it was
used to unconditionally start i/o aside from a couple of places, so
I've removed it and made the couple of exceptions to this rule into
separate functions.

Also hunted through the other DIO flags and removed them as arguments
from functions which were always called with the same combination of
arguments.

Updated gfs2_meta_indirect_buffer to be a bit more efficient and
hopefully also be a bit easier to read.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
2006-09-21 17:05:23 -04:00

754 lines
16 KiB
C

/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/swap.h>
#include <linux/delay.h>
#include <linux/gfs2_ondisk.h>
#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
#include "glock.h"
#include "glops.h"
#include "inode.h"
#include "log.h"
#include "lops.h"
#include "meta_io.h"
#include "rgrp.h"
#include "trans.h"
#include "util.h"
#include "ops_address.h"
#define buffer_busy(bh) \
((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned)))
#define buffer_in_io(bh) \
((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock)))
static int aspace_get_block(struct inode *inode, sector_t lblock,
struct buffer_head *bh_result, int create)
{
gfs2_assert_warn(inode->i_sb->s_fs_info, 0);
return -EOPNOTSUPP;
}
static int gfs2_aspace_writepage(struct page *page,
struct writeback_control *wbc)
{
return block_write_full_page(page, aspace_get_block, wbc);
}
static const struct address_space_operations aspace_aops = {
.writepage = gfs2_aspace_writepage,
.releasepage = gfs2_releasepage,
};
/**
* gfs2_aspace_get - Create and initialize a struct inode structure
* @sdp: the filesystem the aspace is in
*
* Right now a struct inode is just a struct inode. Maybe Linux
* will supply a more lightweight address space construct (that works)
* in the future.
*
* Make sure pages/buffers in this aspace aren't in high memory.
*
* Returns: the aspace
*/
struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp)
{
struct inode *aspace;
aspace = new_inode(sdp->sd_vfs);
if (aspace) {
mapping_set_gfp_mask(aspace->i_mapping, GFP_NOFS);
aspace->i_mapping->a_ops = &aspace_aops;
aspace->i_size = ~0ULL;
aspace->u.generic_ip = NULL;
insert_inode_hash(aspace);
}
return aspace;
}
void gfs2_aspace_put(struct inode *aspace)
{
remove_inode_hash(aspace);
iput(aspace);
}
/**
* gfs2_ail1_start_one - Start I/O on a part of the AIL
* @sdp: the filesystem
* @tr: the part of the AIL
*
*/
void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
{
struct gfs2_bufdata *bd, *s;
struct buffer_head *bh;
int retry;
BUG_ON(!spin_is_locked(&sdp->sd_log_lock));
do {
retry = 0;
list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
bd_ail_st_list) {
bh = bd->bd_bh;
gfs2_assert(sdp, bd->bd_ail == ai);
if (!buffer_busy(bh)) {
if (!buffer_uptodate(bh)) {
gfs2_log_unlock(sdp);
gfs2_io_error_bh(sdp, bh);
gfs2_log_lock(sdp);
}
list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
continue;
}
if (!buffer_dirty(bh))
continue;
list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
gfs2_log_unlock(sdp);
wait_on_buffer(bh);
ll_rw_block(WRITE, 1, &bh);
gfs2_log_lock(sdp);
retry = 1;
break;
}
} while (retry);
}
/**
* gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced
* @sdp: the filesystem
* @ai: the AIL entry
*
*/
int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags)
{
struct gfs2_bufdata *bd, *s;
struct buffer_head *bh;
list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
bd_ail_st_list) {
bh = bd->bd_bh;
gfs2_assert(sdp, bd->bd_ail == ai);
if (buffer_busy(bh)) {
if (flags & DIO_ALL)
continue;
else
break;
}
if (!buffer_uptodate(bh))
gfs2_io_error_bh(sdp, bh);
list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
}
return list_empty(&ai->ai_ail1_list);
}
/**
* gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced
* @sdp: the filesystem
* @ai: the AIL entry
*
*/
void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
{
struct list_head *head = &ai->ai_ail2_list;
struct gfs2_bufdata *bd;
while (!list_empty(head)) {
bd = list_entry(head->prev, struct gfs2_bufdata,
bd_ail_st_list);
gfs2_assert(sdp, bd->bd_ail == ai);
bd->bd_ail = NULL;
list_del(&bd->bd_ail_st_list);
list_del(&bd->bd_ail_gl_list);
atomic_dec(&bd->bd_gl->gl_ail_count);
brelse(bd->bd_bh);
}
}
/**
* ail_empty_gl - remove all buffers for a given lock from the AIL
* @gl: the glock
*
* None of the buffers should be dirty, locked, or pinned.
*/
void gfs2_ail_empty_gl(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
unsigned int blocks;
struct list_head *head = &gl->gl_ail_list;
struct gfs2_bufdata *bd;
struct buffer_head *bh;
u64 blkno;
int error;
blocks = atomic_read(&gl->gl_ail_count);
if (!blocks)
return;
error = gfs2_trans_begin(sdp, 0, blocks);
if (gfs2_assert_withdraw(sdp, !error))
return;
gfs2_log_lock(sdp);
while (!list_empty(head)) {
bd = list_entry(head->next, struct gfs2_bufdata,
bd_ail_gl_list);
bh = bd->bd_bh;
blkno = bh->b_blocknr;
gfs2_assert_withdraw(sdp, !buffer_busy(bh));
bd->bd_ail = NULL;
list_del(&bd->bd_ail_st_list);
list_del(&bd->bd_ail_gl_list);
atomic_dec(&gl->gl_ail_count);
brelse(bh);
gfs2_log_unlock(sdp);
gfs2_trans_add_revoke(sdp, blkno);
gfs2_log_lock(sdp);
}
gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
gfs2_log_unlock(sdp);
gfs2_trans_end(sdp);
gfs2_log_flush(sdp, NULL);
}
/**
* gfs2_meta_inval - Invalidate all buffers associated with a glock
* @gl: the glock
*
*/
void gfs2_meta_inval(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
struct inode *aspace = gl->gl_aspace;
struct address_space *mapping = gl->gl_aspace->i_mapping;
gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
atomic_inc(&aspace->i_writecount);
truncate_inode_pages(mapping, 0);
atomic_dec(&aspace->i_writecount);
gfs2_assert_withdraw(sdp, !mapping->nrpages);
}
/**
* gfs2_meta_sync - Sync all buffers associated with a glock
* @gl: The glock
*
*/
void gfs2_meta_sync(struct gfs2_glock *gl)
{
struct address_space *mapping = gl->gl_aspace->i_mapping;
int error;
filemap_fdatawrite(mapping);
error = filemap_fdatawait(mapping);
if (error)
gfs2_io_error(gl->gl_sbd);
}
/**
* getbuf - Get a buffer with a given address space
* @sdp: the filesystem
* @aspace: the address space
* @blkno: the block number (filesystem scope)
* @create: 1 if the buffer should be created
*
* Returns: the buffer
*/
static struct buffer_head *getbuf(struct gfs2_sbd *sdp, struct inode *aspace,
u64 blkno, int create)
{
struct page *page;
struct buffer_head *bh;
unsigned int shift;
unsigned long index;
unsigned int bufnum;
shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift;
index = blkno >> shift; /* convert block to page */
bufnum = blkno - (index << shift); /* block buf index within page */
if (create) {
for (;;) {
page = grab_cache_page(aspace->i_mapping, index);
if (page)
break;
yield();
}
} else {
page = find_lock_page(aspace->i_mapping, index);
if (!page)
return NULL;
}
if (!page_has_buffers(page))
create_empty_buffers(page, sdp->sd_sb.sb_bsize, 0);
/* Locate header for our buffer within our page */
for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page)
/* Do nothing */;
get_bh(bh);
if (!buffer_mapped(bh))
map_bh(bh, sdp->sd_vfs, blkno);
unlock_page(page);
mark_page_accessed(page);
page_cache_release(page);
return bh;
}
static void meta_prep_new(struct buffer_head *bh)
{
struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
lock_buffer(bh);
clear_buffer_dirty(bh);
set_buffer_uptodate(bh);
unlock_buffer(bh);
mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
}
/**
* gfs2_meta_new - Get a block
* @gl: The glock associated with this block
* @blkno: The block number
*
* Returns: The buffer
*/
struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno)
{
struct buffer_head *bh;
bh = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE);
meta_prep_new(bh);
return bh;
}
/**
* gfs2_meta_read - Read a block from disk
* @gl: The glock covering the block
* @blkno: The block number
* @flags: flags
* @bhp: the place where the buffer is returned (NULL on failure)
*
* Returns: errno
*/
int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
struct buffer_head **bhp)
{
*bhp = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE);
if (!buffer_uptodate(*bhp))
ll_rw_block(READ, 1, bhp);
if (flags & DIO_WAIT) {
int error = gfs2_meta_wait(gl->gl_sbd, *bhp);
if (error) {
brelse(*bhp);
return error;
}
}
return 0;
}
/**
* gfs2_meta_wait - Reread a block from disk
* @sdp: the filesystem
* @bh: The block to wait for
*
* Returns: errno
*/
int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
{
if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
return -EIO;
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
struct gfs2_trans *tr = current->journal_info;
if (tr && tr->tr_touched)
gfs2_io_error_bh(sdp, bh);
return -EIO;
}
if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
return -EIO;
return 0;
}
/**
* gfs2_attach_bufdata - attach a struct gfs2_bufdata structure to a buffer
* @gl: the glock the buffer belongs to
* @bh: The buffer to be attached to
* @meta: Flag to indicate whether its metadata or not
*/
void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
int meta)
{
struct gfs2_bufdata *bd;
if (meta)
lock_page(bh->b_page);
if (bh->b_private) {
if (meta)
unlock_page(bh->b_page);
return;
}
bd = kmem_cache_alloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL),
memset(bd, 0, sizeof(struct gfs2_bufdata));
bd->bd_bh = bh;
bd->bd_gl = gl;
INIT_LIST_HEAD(&bd->bd_list_tr);
if (meta)
lops_init_le(&bd->bd_le, &gfs2_buf_lops);
else
lops_init_le(&bd->bd_le, &gfs2_databuf_lops);
bh->b_private = bd;
if (meta)
unlock_page(bh->b_page);
}
/**
* gfs2_pin - Pin a buffer in memory
* @sdp: the filesystem the buffer belongs to
* @bh: The buffer to be pinned
*
*/
void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
{
struct gfs2_bufdata *bd = bh->b_private;
gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags));
if (test_set_buffer_pinned(bh))
gfs2_assert_withdraw(sdp, 0);
wait_on_buffer(bh);
/* If this buffer is in the AIL and it has already been written
to in-place disk block, remove it from the AIL. */
gfs2_log_lock(sdp);
if (bd->bd_ail && !buffer_in_io(bh))
list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
gfs2_log_unlock(sdp);
clear_buffer_dirty(bh);
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
gfs2_io_error_bh(sdp, bh);
get_bh(bh);
}
/**
* gfs2_unpin - Unpin a buffer
* @sdp: the filesystem the buffer belongs to
* @bh: The buffer to unpin
* @ai:
*
*/
void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
struct gfs2_ail *ai)
{
struct gfs2_bufdata *bd = bh->b_private;
gfs2_assert_withdraw(sdp, buffer_uptodate(bh));
if (!buffer_pinned(bh))
gfs2_assert_withdraw(sdp, 0);
mark_buffer_dirty(bh);
clear_buffer_pinned(bh);
gfs2_log_lock(sdp);
if (bd->bd_ail) {
list_del(&bd->bd_ail_st_list);
brelse(bh);
} else {
struct gfs2_glock *gl = bd->bd_gl;
list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
atomic_inc(&gl->gl_ail_count);
}
bd->bd_ail = ai;
list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
gfs2_log_unlock(sdp);
}
/**
* gfs2_meta_wipe - make inode's buffers so they aren't dirty/pinned anymore
* @ip: the inode who owns the buffers
* @bstart: the first buffer in the run
* @blen: the number of buffers in the run
*
*/
void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct inode *aspace = ip->i_gl->gl_aspace;
struct buffer_head *bh;
while (blen) {
bh = getbuf(sdp, aspace, bstart, NO_CREATE);
if (bh) {
struct gfs2_bufdata *bd = bh->b_private;
if (test_clear_buffer_pinned(bh)) {
struct gfs2_trans *tr = current->journal_info;
gfs2_log_lock(sdp);
list_del_init(&bd->bd_le.le_list);
gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
sdp->sd_log_num_buf--;
gfs2_log_unlock(sdp);
tr->tr_num_buf_rm++;
brelse(bh);
}
if (bd) {
gfs2_log_lock(sdp);
if (bd->bd_ail) {
u64 blkno = bh->b_blocknr;
bd->bd_ail = NULL;
list_del(&bd->bd_ail_st_list);
list_del(&bd->bd_ail_gl_list);
atomic_dec(&bd->bd_gl->gl_ail_count);
brelse(bh);
gfs2_log_unlock(sdp);
gfs2_trans_add_revoke(sdp, blkno);
} else
gfs2_log_unlock(sdp);
}
lock_buffer(bh);
clear_buffer_dirty(bh);
clear_buffer_uptodate(bh);
unlock_buffer(bh);
brelse(bh);
}
bstart++;
blen--;
}
}
/**
* gfs2_meta_cache_flush - get rid of any references on buffers for this inode
* @ip: The GFS2 inode
*
* This releases buffers that are in the most-recently-used array of
* blocks used for indirect block addressing for this inode.
*/
void gfs2_meta_cache_flush(struct gfs2_inode *ip)
{
struct buffer_head **bh_slot;
unsigned int x;
spin_lock(&ip->i_spin);
for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) {
bh_slot = &ip->i_cache[x];
if (!*bh_slot)
break;
brelse(*bh_slot);
*bh_slot = NULL;
}
spin_unlock(&ip->i_spin);
}
/**
* gfs2_meta_indirect_buffer - Get a metadata buffer
* @ip: The GFS2 inode
* @height: The level of this buf in the metadata (indir addr) tree (if any)
* @num: The block number (device relative) of the buffer
* @new: Non-zero if we may create a new buffer
* @bhp: the buffer is returned here
*
* Try to use the gfs2_inode's MRU metadata tree cache.
*
* Returns: errno
*/
int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
int new, struct buffer_head **bhp)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_glock *gl = ip->i_gl;
struct buffer_head *bh = NULL, **bh_slot = ip->i_cache + height;
int in_cache = 0;
spin_lock(&ip->i_spin);
if (*bh_slot && (*bh_slot)->b_blocknr == num) {
bh = *bh_slot;
get_bh(bh);
in_cache = 1;
}
spin_unlock(&ip->i_spin);
if (!bh)
bh = getbuf(gl->gl_sbd, gl->gl_aspace, num, CREATE);
if (!bh)
return -ENOBUFS;
if (new) {
if (gfs2_assert_warn(sdp, height))
goto err;
meta_prep_new(bh);
gfs2_trans_add_bh(ip->i_gl, bh, 1);
gfs2_metatype_set(bh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
} else {
u32 mtype = height ? GFS2_METATYPE_IN : GFS2_METATYPE_DI;
if (!buffer_uptodate(bh)) {
ll_rw_block(READ, 1, &bh);
if (gfs2_meta_wait(sdp, bh))
goto err;
}
if (gfs2_metatype_check(sdp, bh, mtype))
goto err;
}
if (!in_cache) {
spin_lock(&ip->i_spin);
if (*bh_slot)
brelse(*bh_slot);
*bh_slot = bh;
get_bh(bh);
spin_unlock(&ip->i_spin);
}
*bhp = bh;
return 0;
err:
brelse(bh);
return -EIO;
}
/**
* gfs2_meta_ra - start readahead on an extent of a file
* @gl: the glock the blocks belong to
* @dblock: the starting disk block
* @extlen: the number of blocks in the extent
*
* returns: the first buffer in the extent
*/
struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
struct inode *aspace = gl->gl_aspace;
struct buffer_head *first_bh, *bh;
u32 max_ra = gfs2_tune_get(sdp, gt_max_readahead) >>
sdp->sd_sb.sb_bsize_shift;
BUG_ON(!extlen);
if (max_ra < 1)
max_ra = 1;
if (extlen > max_ra)
extlen = max_ra;
first_bh = getbuf(sdp, aspace, dblock, CREATE);
if (buffer_uptodate(first_bh))
goto out;
if (!buffer_locked(first_bh))
ll_rw_block(READ, 1, &first_bh);
dblock++;
extlen--;
while (extlen) {
bh = getbuf(sdp, aspace, dblock, CREATE);
if (!buffer_uptodate(bh) && !buffer_locked(bh))
ll_rw_block(READA, 1, &bh);
brelse(bh);
dblock++;
extlen--;
if (!buffer_locked(first_bh) && buffer_uptodate(first_bh))
goto out;
}
wait_on_buffer(first_bh);
out:
return first_bh;
}
/**
* gfs2_meta_syncfs - sync all the buffers in a filesystem
* @sdp: the filesystem
*
*/
void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
{
gfs2_log_flush(sdp, NULL);
for (;;) {
gfs2_ail1_start(sdp, DIO_ALL);
if (gfs2_ail1_empty(sdp, DIO_ALL))
break;
msleep(10);
}
}