forked from Minki/linux
Lots of bugs fixes, including Zheng and Jan's extent status shrinker
fixes, which should improve CPU utilization and potential soft lockups under heavy memory pressure, and Eric Whitney's bigalloc fixes. -----BEGIN PGP SIGNATURE----- Version: GnuPG v2 iQIcBAABCAAGBQJUiRUwAAoJENNvdpvBGATwltQP/3sjHtFw+RUvKgQ8vX9M2THk 4b9j0ja0mrD3ObTXUxdDuOh1q09MsfSUiOYK6KZOav3nO/dRODqZnWgXz/zJt3LC R97s4velgzZi3F2ijnLiCo5RVZahN9xs8bUHZ85orMIr5wogwGdaUpnoqZSg0Ehr PIFnTNORyNXBwEm3XPjUmENTdyq9FZ8DsS6ACFzgFi79QTSyJFEM4LAl2XaqwMGV fVhNwnOGIyT8lHZAtDcobkaC86NjakmpW2Ip3p9/UEQtynh16UeVXKEO3K7CcQ+L YJRDNnSIlGpR1OJp+v6QJPUd8q4fc/8JW9AxxsLak0eqkszuB+MxoQXOCFV5AWaf jrs4TV3y0hCuB4OwuYUpnfcU1o+O7p39MqXMv8SA1ZBPbijN/LQSMErFtXj2oih6 3gJHUWLwELGeR+d9JlI29zxhOeOIotX255UBgj2oasQ0X3BW3qAgQ4LmP3QY90Pm BUmxiMoIWB9N3kU4XQGf+Kyy8JeMLJj0frHDxI3XLz+B+IlWCCkBH6y3AD/a13kS HHMMLOwHGEs0lYEKsm89dkcij5GuKd8eKT8Q0+CvKD9Z6HPdYvQxoazmF87Q6j/7 ZmshaVxtWaLpNbDaXVg+IgZifJAN0+mVzVHRhY9TSjx8k9qLdSgSEqYWjkSjx9Ij nNB2zVrHZDMvZ7MCZy85 =ZrTc -----END PGP SIGNATURE----- Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4 Pull ext4 updates from Ted Ts'o: "Lots of bugs fixes, including Zheng and Jan's extent status shrinker fixes, which should improve CPU utilization and potential soft lockups under heavy memory pressure, and Eric Whitney's bigalloc fixes" * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (26 commits) ext4: ext4_da_convert_inline_data_to_extent drop locked page after error ext4: fix suboptimal seek_{data,hole} extents traversial ext4: ext4_inline_data_fiemap should respect callers argument ext4: prevent fsreentrance deadlock for inline_data ext4: forbid journal_async_commit in data=ordered mode jbd2: remove unnecessary NULL check before iput() ext4: Remove an unnecessary check for NULL before iput() ext4: remove unneeded code in ext4_unlink ext4: don't count external journal blocks as overhead ext4: remove never taken branch from ext4_ext_shift_path_extents() ext4: create nojournal_checksum mount option ext4: update comments regarding ext4_delete_inode() ext4: cleanup GFP flags inside resize path ext4: introduce aging to extent status tree ext4: cleanup flag definitions for extent status tree ext4: limit number of scanned extents in status tree shrinker ext4: move handling of list of shrinkable inodes into extent status code ext4: change LRU to round-robin in extent status tree shrinker ext4: cache extent hole in extent status tree for ext4_da_map_blocks() ext4: fix block reservation for bigalloc filesystems ...
This commit is contained in:
commit
9bfccec24e
@ -158,17 +158,8 @@ struct ext4_allocation_request {
|
||||
#define EXT4_MAP_MAPPED (1 << BH_Mapped)
|
||||
#define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten)
|
||||
#define EXT4_MAP_BOUNDARY (1 << BH_Boundary)
|
||||
/* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of
|
||||
* ext4_map_blocks wants to know whether or not the underlying cluster has
|
||||
* already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that
|
||||
* the requested mapping was from previously mapped (or delayed allocated)
|
||||
* cluster. We use BH_AllocFromCluster only for this flag. BH_AllocFromCluster
|
||||
* should never appear on buffer_head's state flags.
|
||||
*/
|
||||
#define EXT4_MAP_FROM_CLUSTER (1 << BH_AllocFromCluster)
|
||||
#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
|
||||
EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
|
||||
EXT4_MAP_FROM_CLUSTER)
|
||||
EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY)
|
||||
|
||||
struct ext4_map_blocks {
|
||||
ext4_fsblk_t m_pblk;
|
||||
@ -565,10 +556,8 @@ enum {
|
||||
#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
|
||||
/* Do not take i_data_sem locking in ext4_map_blocks */
|
||||
#define EXT4_GET_BLOCKS_NO_LOCK 0x0100
|
||||
/* Do not put hole in extent cache */
|
||||
#define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200
|
||||
/* Convert written extents to unwritten */
|
||||
#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0400
|
||||
#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0200
|
||||
|
||||
/*
|
||||
* The bit position of these flags must not overlap with any of the
|
||||
@ -889,10 +878,12 @@ struct ext4_inode_info {
|
||||
/* extents status tree */
|
||||
struct ext4_es_tree i_es_tree;
|
||||
rwlock_t i_es_lock;
|
||||
struct list_head i_es_lru;
|
||||
struct list_head i_es_list;
|
||||
unsigned int i_es_all_nr; /* protected by i_es_lock */
|
||||
unsigned int i_es_lru_nr; /* protected by i_es_lock */
|
||||
unsigned long i_touch_when; /* jiffies of last accessing */
|
||||
unsigned int i_es_shk_nr; /* protected by i_es_lock */
|
||||
ext4_lblk_t i_es_shrink_lblk; /* Offset where we start searching for
|
||||
extents to shrink. Protected by
|
||||
i_es_lock */
|
||||
|
||||
/* ialloc */
|
||||
ext4_group_t i_last_alloc_group;
|
||||
@ -1337,10 +1328,11 @@ struct ext4_sb_info {
|
||||
|
||||
/* Reclaim extents from extent status tree */
|
||||
struct shrinker s_es_shrinker;
|
||||
struct list_head s_es_lru;
|
||||
struct list_head s_es_list; /* List of inodes with reclaimable extents */
|
||||
long s_es_nr_inode;
|
||||
struct ext4_es_stats s_es_stats;
|
||||
struct mb_cache *s_mb_cache;
|
||||
spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp;
|
||||
spinlock_t s_es_lock ____cacheline_aligned_in_smp;
|
||||
|
||||
/* Ratelimit ext4 messages. */
|
||||
struct ratelimit_state s_err_ratelimit_state;
|
||||
@ -2196,7 +2188,6 @@ extern int ext4_calculate_overhead(struct super_block *sb);
|
||||
extern void ext4_superblock_csum_set(struct super_block *sb);
|
||||
extern void *ext4_kvmalloc(size_t size, gfp_t flags);
|
||||
extern void *ext4_kvzalloc(size_t size, gfp_t flags);
|
||||
extern void ext4_kvfree(void *ptr);
|
||||
extern int ext4_alloc_flex_bg_array(struct super_block *sb,
|
||||
ext4_group_t ngroup);
|
||||
extern const char *ext4_decode_error(struct super_block *sb, int errno,
|
||||
@ -2647,7 +2638,7 @@ extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
|
||||
int *retval);
|
||||
extern int ext4_inline_data_fiemap(struct inode *inode,
|
||||
struct fiemap_extent_info *fieinfo,
|
||||
int *has_inline);
|
||||
int *has_inline, __u64 start, __u64 len);
|
||||
extern int ext4_try_to_evict_inline_data(handle_t *handle,
|
||||
struct inode *inode,
|
||||
int needed);
|
||||
@ -2794,16 +2785,6 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io,
|
||||
/* mmp.c */
|
||||
extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
|
||||
|
||||
/*
|
||||
* Note that these flags will never ever appear in a buffer_head's state flag.
|
||||
* See EXT4_MAP_... to see where this is used.
|
||||
*/
|
||||
enum ext4_state_bits {
|
||||
BH_AllocFromCluster /* allocated blocks were part of already
|
||||
* allocated cluster. */
|
||||
= BH_JBDPrivateStart
|
||||
};
|
||||
|
||||
/*
|
||||
* Add new method to test whether block and inode bitmaps are properly
|
||||
* initialized. With uninit_bg reading the block from disk is not enough
|
||||
|
@ -2306,16 +2306,16 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
|
||||
ext4_lblk_t block)
|
||||
{
|
||||
int depth = ext_depth(inode);
|
||||
unsigned long len = 0;
|
||||
ext4_lblk_t lblock = 0;
|
||||
ext4_lblk_t len;
|
||||
ext4_lblk_t lblock;
|
||||
struct ext4_extent *ex;
|
||||
struct extent_status es;
|
||||
|
||||
ex = path[depth].p_ext;
|
||||
if (ex == NULL) {
|
||||
/*
|
||||
* there is no extent yet, so gap is [0;-] and we
|
||||
* don't cache it
|
||||
*/
|
||||
/* there is no extent yet, so gap is [0;-] */
|
||||
lblock = 0;
|
||||
len = EXT_MAX_BLOCKS;
|
||||
ext_debug("cache gap(whole file):");
|
||||
} else if (block < le32_to_cpu(ex->ee_block)) {
|
||||
lblock = block;
|
||||
@ -2324,9 +2324,6 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
|
||||
block,
|
||||
le32_to_cpu(ex->ee_block),
|
||||
ext4_ext_get_actual_len(ex));
|
||||
if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1))
|
||||
ext4_es_insert_extent(inode, lblock, len, ~0,
|
||||
EXTENT_STATUS_HOLE);
|
||||
} else if (block >= le32_to_cpu(ex->ee_block)
|
||||
+ ext4_ext_get_actual_len(ex)) {
|
||||
ext4_lblk_t next;
|
||||
@ -2340,14 +2337,19 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
|
||||
block);
|
||||
BUG_ON(next == lblock);
|
||||
len = next - lblock;
|
||||
if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1))
|
||||
ext4_es_insert_extent(inode, lblock, len, ~0,
|
||||
EXTENT_STATUS_HOLE);
|
||||
} else {
|
||||
BUG();
|
||||
}
|
||||
|
||||
ext_debug(" -> %u:%lu\n", lblock, len);
|
||||
ext4_es_find_delayed_extent_range(inode, lblock, lblock + len - 1, &es);
|
||||
if (es.es_len) {
|
||||
/* There's delayed extent containing lblock? */
|
||||
if (es.es_lblk <= lblock)
|
||||
return;
|
||||
len = min(es.es_lblk - lblock, len);
|
||||
}
|
||||
ext_debug(" -> %u:%u\n", lblock, len);
|
||||
ext4_es_insert_extent(inode, lblock, len, ~0, EXTENT_STATUS_HOLE);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2481,7 +2483,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
|
||||
ext4_lblk_t from, ext4_lblk_t to)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
unsigned short ee_len = ext4_ext_get_actual_len(ex);
|
||||
unsigned short ee_len = ext4_ext_get_actual_len(ex);
|
||||
ext4_fsblk_t pblk;
|
||||
int flags = get_default_free_blocks_flags(inode);
|
||||
|
||||
@ -2490,7 +2492,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
|
||||
* at the beginning of the extent. Instead, we make a note
|
||||
* that we tried freeing the cluster, and check to see if we
|
||||
* need to free it on a subsequent call to ext4_remove_blocks,
|
||||
* or at the end of the ext4_truncate() operation.
|
||||
* or at the end of ext4_ext_rm_leaf or ext4_ext_remove_space.
|
||||
*/
|
||||
flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER;
|
||||
|
||||
@ -2501,8 +2503,8 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
|
||||
* partial cluster here.
|
||||
*/
|
||||
pblk = ext4_ext_pblock(ex) + ee_len - 1;
|
||||
if ((*partial_cluster > 0) &&
|
||||
(EXT4_B2C(sbi, pblk) != *partial_cluster)) {
|
||||
if (*partial_cluster > 0 &&
|
||||
*partial_cluster != (long long) EXT4_B2C(sbi, pblk)) {
|
||||
ext4_free_blocks(handle, inode, NULL,
|
||||
EXT4_C2B(sbi, *partial_cluster),
|
||||
sbi->s_cluster_ratio, flags);
|
||||
@ -2528,7 +2530,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
|
||||
&& to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
|
||||
/* tail removal */
|
||||
ext4_lblk_t num;
|
||||
unsigned int unaligned;
|
||||
long long first_cluster;
|
||||
|
||||
num = le32_to_cpu(ex->ee_block) + ee_len - from;
|
||||
pblk = ext4_ext_pblock(ex) + ee_len - num;
|
||||
@ -2538,7 +2540,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
|
||||
* used by any other extent (partial_cluster is negative).
|
||||
*/
|
||||
if (*partial_cluster < 0 &&
|
||||
-(*partial_cluster) == EXT4_B2C(sbi, pblk + num - 1))
|
||||
*partial_cluster == -(long long) EXT4_B2C(sbi, pblk+num-1))
|
||||
flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER;
|
||||
|
||||
ext_debug("free last %u blocks starting %llu partial %lld\n",
|
||||
@ -2549,21 +2551,24 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
|
||||
* beginning of a cluster, and we removed the entire
|
||||
* extent and the cluster is not used by any other extent,
|
||||
* save the partial cluster here, since we might need to
|
||||
* delete if we determine that the truncate operation has
|
||||
* removed all of the blocks in the cluster.
|
||||
* delete if we determine that the truncate or punch hole
|
||||
* operation has removed all of the blocks in the cluster.
|
||||
* If that cluster is used by another extent, preserve its
|
||||
* negative value so it isn't freed later on.
|
||||
*
|
||||
* On the other hand, if we did not manage to free the whole
|
||||
* extent, we have to mark the cluster as used (store negative
|
||||
* cluster number in partial_cluster).
|
||||
* If the whole extent wasn't freed, we've reached the
|
||||
* start of the truncated/punched region and have finished
|
||||
* removing blocks. If there's a partial cluster here it's
|
||||
* shared with the remainder of the extent and is no longer
|
||||
* a candidate for removal.
|
||||
*/
|
||||
unaligned = EXT4_PBLK_COFF(sbi, pblk);
|
||||
if (unaligned && (ee_len == num) &&
|
||||
(*partial_cluster != -((long long)EXT4_B2C(sbi, pblk))))
|
||||
*partial_cluster = EXT4_B2C(sbi, pblk);
|
||||
else if (unaligned)
|
||||
*partial_cluster = -((long long)EXT4_B2C(sbi, pblk));
|
||||
else if (*partial_cluster > 0)
|
||||
if (EXT4_PBLK_COFF(sbi, pblk) && ee_len == num) {
|
||||
first_cluster = (long long) EXT4_B2C(sbi, pblk);
|
||||
if (first_cluster != -*partial_cluster)
|
||||
*partial_cluster = first_cluster;
|
||||
} else {
|
||||
*partial_cluster = 0;
|
||||
}
|
||||
} else
|
||||
ext4_error(sbi->s_sb, "strange request: removal(2) "
|
||||
"%u-%u from %u:%u\n",
|
||||
@ -2574,15 +2579,16 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
|
||||
|
||||
/*
|
||||
* ext4_ext_rm_leaf() Removes the extents associated with the
|
||||
* blocks appearing between "start" and "end", and splits the extents
|
||||
* if "start" and "end" appear in the same extent
|
||||
* blocks appearing between "start" and "end". Both "start"
|
||||
* and "end" must appear in the same extent or EIO is returned.
|
||||
*
|
||||
* @handle: The journal handle
|
||||
* @inode: The files inode
|
||||
* @path: The path to the leaf
|
||||
* @partial_cluster: The cluster which we'll have to free if all extents
|
||||
* has been released from it. It gets negative in case
|
||||
* that the cluster is still used.
|
||||
* has been released from it. However, if this value is
|
||||
* negative, it's a cluster just to the right of the
|
||||
* punched region and it must not be freed.
|
||||
* @start: The first block to remove
|
||||
* @end: The last block to remove
|
||||
*/
|
||||
@ -2621,27 +2627,6 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
|
||||
ex_ee_block = le32_to_cpu(ex->ee_block);
|
||||
ex_ee_len = ext4_ext_get_actual_len(ex);
|
||||
|
||||
/*
|
||||
* If we're starting with an extent other than the last one in the
|
||||
* node, we need to see if it shares a cluster with the extent to
|
||||
* the right (towards the end of the file). If its leftmost cluster
|
||||
* is this extent's rightmost cluster and it is not cluster aligned,
|
||||
* we'll mark it as a partial that is not to be deallocated.
|
||||
*/
|
||||
|
||||
if (ex != EXT_LAST_EXTENT(eh)) {
|
||||
ext4_fsblk_t current_pblk, right_pblk;
|
||||
long long current_cluster, right_cluster;
|
||||
|
||||
current_pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
|
||||
current_cluster = (long long)EXT4_B2C(sbi, current_pblk);
|
||||
right_pblk = ext4_ext_pblock(ex + 1);
|
||||
right_cluster = (long long)EXT4_B2C(sbi, right_pblk);
|
||||
if (current_cluster == right_cluster &&
|
||||
EXT4_PBLK_COFF(sbi, right_pblk))
|
||||
*partial_cluster = -right_cluster;
|
||||
}
|
||||
|
||||
trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster);
|
||||
|
||||
while (ex >= EXT_FIRST_EXTENT(eh) &&
|
||||
@ -2666,14 +2651,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
|
||||
if (end < ex_ee_block) {
|
||||
/*
|
||||
* We're going to skip this extent and move to another,
|
||||
* so if this extent is not cluster aligned we have
|
||||
* to mark the current cluster as used to avoid
|
||||
* accidentally freeing it later on
|
||||
* so note that its first cluster is in use to avoid
|
||||
* freeing it when removing blocks. Eventually, the
|
||||
* right edge of the truncated/punched region will
|
||||
* be just to the left.
|
||||
*/
|
||||
pblk = ext4_ext_pblock(ex);
|
||||
if (EXT4_PBLK_COFF(sbi, pblk))
|
||||
if (sbi->s_cluster_ratio > 1) {
|
||||
pblk = ext4_ext_pblock(ex);
|
||||
*partial_cluster =
|
||||
-((long long)EXT4_B2C(sbi, pblk));
|
||||
-(long long) EXT4_B2C(sbi, pblk);
|
||||
}
|
||||
ex--;
|
||||
ex_ee_block = le32_to_cpu(ex->ee_block);
|
||||
ex_ee_len = ext4_ext_get_actual_len(ex);
|
||||
@ -2749,8 +2736,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
|
||||
sizeof(struct ext4_extent));
|
||||
}
|
||||
le16_add_cpu(&eh->eh_entries, -1);
|
||||
} else if (*partial_cluster > 0)
|
||||
*partial_cluster = 0;
|
||||
}
|
||||
|
||||
err = ext4_ext_dirty(handle, inode, path + depth);
|
||||
if (err)
|
||||
@ -2769,20 +2755,18 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
|
||||
/*
|
||||
* If there's a partial cluster and at least one extent remains in
|
||||
* the leaf, free the partial cluster if it isn't shared with the
|
||||
* current extent. If there's a partial cluster and no extents
|
||||
* remain in the leaf, it can't be freed here. It can only be
|
||||
* freed when it's possible to determine if it's not shared with
|
||||
* any other extent - when the next leaf is processed or when space
|
||||
* removal is complete.
|
||||
* current extent. If it is shared with the current extent
|
||||
* we zero partial_cluster because we've reached the start of the
|
||||
* truncated/punched region and we're done removing blocks.
|
||||
*/
|
||||
if (*partial_cluster > 0 && eh->eh_entries &&
|
||||
(EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) !=
|
||||
*partial_cluster)) {
|
||||
int flags = get_default_free_blocks_flags(inode);
|
||||
|
||||
ext4_free_blocks(handle, inode, NULL,
|
||||
EXT4_C2B(sbi, *partial_cluster),
|
||||
sbi->s_cluster_ratio, flags);
|
||||
if (*partial_cluster > 0 && ex >= EXT_FIRST_EXTENT(eh)) {
|
||||
pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
|
||||
if (*partial_cluster != (long long) EXT4_B2C(sbi, pblk)) {
|
||||
ext4_free_blocks(handle, inode, NULL,
|
||||
EXT4_C2B(sbi, *partial_cluster),
|
||||
sbi->s_cluster_ratio,
|
||||
get_default_free_blocks_flags(inode));
|
||||
}
|
||||
*partial_cluster = 0;
|
||||
}
|
||||
|
||||
@ -2819,7 +2803,7 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path)
|
||||
int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
|
||||
ext4_lblk_t end)
|
||||
{
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
int depth = ext_depth(inode);
|
||||
struct ext4_ext_path *path = NULL;
|
||||
long long partial_cluster = 0;
|
||||
@ -2845,9 +2829,10 @@ again:
|
||||
*/
|
||||
if (end < EXT_MAX_BLOCKS - 1) {
|
||||
struct ext4_extent *ex;
|
||||
ext4_lblk_t ee_block;
|
||||
ext4_lblk_t ee_block, ex_end, lblk;
|
||||
ext4_fsblk_t pblk;
|
||||
|
||||
/* find extent for this block */
|
||||
/* find extent for or closest extent to this block */
|
||||
path = ext4_find_extent(inode, end, NULL, EXT4_EX_NOCACHE);
|
||||
if (IS_ERR(path)) {
|
||||
ext4_journal_stop(handle);
|
||||
@ -2867,6 +2852,7 @@ again:
|
||||
}
|
||||
|
||||
ee_block = le32_to_cpu(ex->ee_block);
|
||||
ex_end = ee_block + ext4_ext_get_actual_len(ex) - 1;
|
||||
|
||||
/*
|
||||
* See if the last block is inside the extent, if so split
|
||||
@ -2874,8 +2860,19 @@ again:
|
||||
* tail of the first part of the split extent in
|
||||
* ext4_ext_rm_leaf().
|
||||
*/
|
||||
if (end >= ee_block &&
|
||||
end < ee_block + ext4_ext_get_actual_len(ex) - 1) {
|
||||
if (end >= ee_block && end < ex_end) {
|
||||
|
||||
/*
|
||||
* If we're going to split the extent, note that
|
||||
* the cluster containing the block after 'end' is
|
||||
* in use to avoid freeing it when removing blocks.
|
||||
*/
|
||||
if (sbi->s_cluster_ratio > 1) {
|
||||
pblk = ext4_ext_pblock(ex) + end - ee_block + 2;
|
||||
partial_cluster =
|
||||
-(long long) EXT4_B2C(sbi, pblk);
|
||||
}
|
||||
|
||||
/*
|
||||
* Split the extent in two so that 'end' is the last
|
||||
* block in the first new extent. Also we should not
|
||||
@ -2886,6 +2883,24 @@ again:
|
||||
end + 1, 1);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
|
||||
} else if (sbi->s_cluster_ratio > 1 && end >= ex_end) {
|
||||
/*
|
||||
* If there's an extent to the right its first cluster
|
||||
* contains the immediate right boundary of the
|
||||
* truncated/punched region. Set partial_cluster to
|
||||
* its negative value so it won't be freed if shared
|
||||
* with the current extent. The end < ee_block case
|
||||
* is handled in ext4_ext_rm_leaf().
|
||||
*/
|
||||
lblk = ex_end + 1;
|
||||
err = ext4_ext_search_right(inode, path, &lblk, &pblk,
|
||||
&ex);
|
||||
if (err)
|
||||
goto out;
|
||||
if (pblk)
|
||||
partial_cluster =
|
||||
-(long long) EXT4_B2C(sbi, pblk);
|
||||
}
|
||||
}
|
||||
/*
|
||||
@ -2996,16 +3011,18 @@ again:
|
||||
trace_ext4_ext_remove_space_done(inode, start, end, depth,
|
||||
partial_cluster, path->p_hdr->eh_entries);
|
||||
|
||||
/* If we still have something in the partial cluster and we have removed
|
||||
/*
|
||||
* If we still have something in the partial cluster and we have removed
|
||||
* even the first extent, then we should free the blocks in the partial
|
||||
* cluster as well. */
|
||||
if (partial_cluster > 0 && path->p_hdr->eh_entries == 0) {
|
||||
int flags = get_default_free_blocks_flags(inode);
|
||||
|
||||
* cluster as well. (This code will only run when there are no leaves
|
||||
* to the immediate left of the truncated/punched region.)
|
||||
*/
|
||||
if (partial_cluster > 0 && err == 0) {
|
||||
/* don't zero partial_cluster since it's not used afterwards */
|
||||
ext4_free_blocks(handle, inode, NULL,
|
||||
EXT4_C2B(EXT4_SB(sb), partial_cluster),
|
||||
EXT4_SB(sb)->s_cluster_ratio, flags);
|
||||
partial_cluster = 0;
|
||||
EXT4_C2B(sbi, partial_cluster),
|
||||
sbi->s_cluster_ratio,
|
||||
get_default_free_blocks_flags(inode));
|
||||
}
|
||||
|
||||
/* TODO: flexible tree reduction should be here */
|
||||
@ -4267,6 +4284,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
||||
ext4_io_end_t *io = ext4_inode_aio(inode);
|
||||
ext4_lblk_t cluster_offset;
|
||||
int set_unwritten = 0;
|
||||
bool map_from_cluster = false;
|
||||
|
||||
ext_debug("blocks %u/%u requested for inode %lu\n",
|
||||
map->m_lblk, map->m_len, inode->i_ino);
|
||||
@ -4343,10 +4361,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
||||
}
|
||||
}
|
||||
|
||||
if ((sbi->s_cluster_ratio > 1) &&
|
||||
ext4_find_delalloc_cluster(inode, map->m_lblk))
|
||||
map->m_flags |= EXT4_MAP_FROM_CLUSTER;
|
||||
|
||||
/*
|
||||
* requested block isn't allocated yet;
|
||||
* we couldn't try to create block if create flag is zero
|
||||
@ -4356,15 +4370,13 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
||||
* put just found gap into cache to speed up
|
||||
* subsequent requests
|
||||
*/
|
||||
if ((flags & EXT4_GET_BLOCKS_NO_PUT_HOLE) == 0)
|
||||
ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
|
||||
ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
|
||||
goto out2;
|
||||
}
|
||||
|
||||
/*
|
||||
* Okay, we need to do block allocation.
|
||||
*/
|
||||
map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
|
||||
newex.ee_block = cpu_to_le32(map->m_lblk);
|
||||
cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
|
||||
|
||||
@ -4376,7 +4388,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
||||
get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
|
||||
ar.len = allocated = map->m_len;
|
||||
newblock = map->m_pblk;
|
||||
map->m_flags |= EXT4_MAP_FROM_CLUSTER;
|
||||
map_from_cluster = true;
|
||||
goto got_allocated_blocks;
|
||||
}
|
||||
|
||||
@ -4397,7 +4409,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
||||
get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) {
|
||||
ar.len = allocated = map->m_len;
|
||||
newblock = map->m_pblk;
|
||||
map->m_flags |= EXT4_MAP_FROM_CLUSTER;
|
||||
map_from_cluster = true;
|
||||
goto got_allocated_blocks;
|
||||
}
|
||||
|
||||
@ -4523,7 +4535,7 @@ got_allocated_blocks:
|
||||
*/
|
||||
reserved_clusters = get_reserved_cluster_alloc(inode,
|
||||
map->m_lblk, allocated);
|
||||
if (map->m_flags & EXT4_MAP_FROM_CLUSTER) {
|
||||
if (map_from_cluster) {
|
||||
if (reserved_clusters) {
|
||||
/*
|
||||
* We have clusters reserved for this range.
|
||||
@ -4620,7 +4632,6 @@ out2:
|
||||
|
||||
trace_ext4_ext_map_blocks_exit(inode, flags, map,
|
||||
err ? err : allocated);
|
||||
ext4_es_lru_add(inode);
|
||||
return err ? err : allocated;
|
||||
}
|
||||
|
||||
@ -5140,7 +5151,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
if (ext4_has_inline_data(inode)) {
|
||||
int has_inline = 1;
|
||||
|
||||
error = ext4_inline_data_fiemap(inode, fieinfo, &has_inline);
|
||||
error = ext4_inline_data_fiemap(inode, fieinfo, &has_inline,
|
||||
start, len);
|
||||
|
||||
if (has_inline)
|
||||
return error;
|
||||
@ -5154,8 +5166,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
|
||||
/* fallback to generic here if not in extents fmt */
|
||||
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
|
||||
return generic_block_fiemap(inode, fieinfo, start, len,
|
||||
ext4_get_block);
|
||||
return __generic_block_fiemap(inode, fieinfo, start, len,
|
||||
ext4_get_block);
|
||||
|
||||
if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS))
|
||||
return -EBADR;
|
||||
@ -5179,7 +5191,6 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
error = ext4_fill_fiemap_extents(inode, start_blk,
|
||||
len_blks, fieinfo);
|
||||
}
|
||||
ext4_es_lru_add(inode);
|
||||
return error;
|
||||
}
|
||||
|
||||
@ -5239,8 +5250,6 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
|
||||
return -EIO;
|
||||
|
||||
ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
|
||||
if (!ex_last)
|
||||
return -EIO;
|
||||
|
||||
err = ext4_access_path(handle, inode, path + depth);
|
||||
if (err)
|
||||
|
@ -147,10 +147,9 @@ static struct kmem_cache *ext4_es_cachep;
|
||||
static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
|
||||
static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
ext4_lblk_t end);
|
||||
static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
|
||||
int nr_to_scan);
|
||||
static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
|
||||
struct ext4_inode_info *locked_ei);
|
||||
static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
|
||||
static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
|
||||
struct ext4_inode_info *locked_ei);
|
||||
|
||||
int __init ext4_init_es(void)
|
||||
{
|
||||
@ -298,6 +297,36 @@ out:
|
||||
trace_ext4_es_find_delayed_extent_range_exit(inode, es);
|
||||
}
|
||||
|
||||
static void ext4_es_list_add(struct inode *inode)
|
||||
{
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
|
||||
if (!list_empty(&ei->i_es_list))
|
||||
return;
|
||||
|
||||
spin_lock(&sbi->s_es_lock);
|
||||
if (list_empty(&ei->i_es_list)) {
|
||||
list_add_tail(&ei->i_es_list, &sbi->s_es_list);
|
||||
sbi->s_es_nr_inode++;
|
||||
}
|
||||
spin_unlock(&sbi->s_es_lock);
|
||||
}
|
||||
|
||||
static void ext4_es_list_del(struct inode *inode)
|
||||
{
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
|
||||
spin_lock(&sbi->s_es_lock);
|
||||
if (!list_empty(&ei->i_es_list)) {
|
||||
list_del_init(&ei->i_es_list);
|
||||
sbi->s_es_nr_inode--;
|
||||
WARN_ON_ONCE(sbi->s_es_nr_inode < 0);
|
||||
}
|
||||
spin_unlock(&sbi->s_es_lock);
|
||||
}
|
||||
|
||||
static struct extent_status *
|
||||
ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
|
||||
ext4_fsblk_t pblk)
|
||||
@ -314,9 +343,10 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
|
||||
* We don't count delayed extent because we never try to reclaim them
|
||||
*/
|
||||
if (!ext4_es_is_delayed(es)) {
|
||||
EXT4_I(inode)->i_es_lru_nr++;
|
||||
if (!EXT4_I(inode)->i_es_shk_nr++)
|
||||
ext4_es_list_add(inode);
|
||||
percpu_counter_inc(&EXT4_SB(inode->i_sb)->
|
||||
s_es_stats.es_stats_lru_cnt);
|
||||
s_es_stats.es_stats_shk_cnt);
|
||||
}
|
||||
|
||||
EXT4_I(inode)->i_es_all_nr++;
|
||||
@ -330,12 +360,13 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
|
||||
EXT4_I(inode)->i_es_all_nr--;
|
||||
percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
|
||||
|
||||
/* Decrease the lru counter when this es is not delayed */
|
||||
/* Decrease the shrink counter when this es is not delayed */
|
||||
if (!ext4_es_is_delayed(es)) {
|
||||
BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0);
|
||||
EXT4_I(inode)->i_es_lru_nr--;
|
||||
BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0);
|
||||
if (!--EXT4_I(inode)->i_es_shk_nr)
|
||||
ext4_es_list_del(inode);
|
||||
percpu_counter_dec(&EXT4_SB(inode->i_sb)->
|
||||
s_es_stats.es_stats_lru_cnt);
|
||||
s_es_stats.es_stats_shk_cnt);
|
||||
}
|
||||
|
||||
kmem_cache_free(ext4_es_cachep, es);
|
||||
@ -351,7 +382,7 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
|
||||
static int ext4_es_can_be_merged(struct extent_status *es1,
|
||||
struct extent_status *es2)
|
||||
{
|
||||
if (ext4_es_status(es1) != ext4_es_status(es2))
|
||||
if (ext4_es_type(es1) != ext4_es_type(es2))
|
||||
return 0;
|
||||
|
||||
if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) {
|
||||
@ -394,6 +425,8 @@ ext4_es_try_to_merge_left(struct inode *inode, struct extent_status *es)
|
||||
es1 = rb_entry(node, struct extent_status, rb_node);
|
||||
if (ext4_es_can_be_merged(es1, es)) {
|
||||
es1->es_len += es->es_len;
|
||||
if (ext4_es_is_referenced(es))
|
||||
ext4_es_set_referenced(es1);
|
||||
rb_erase(&es->rb_node, &tree->root);
|
||||
ext4_es_free_extent(inode, es);
|
||||
es = es1;
|
||||
@ -416,6 +449,8 @@ ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
|
||||
es1 = rb_entry(node, struct extent_status, rb_node);
|
||||
if (ext4_es_can_be_merged(es, es1)) {
|
||||
es->es_len += es1->es_len;
|
||||
if (ext4_es_is_referenced(es1))
|
||||
ext4_es_set_referenced(es);
|
||||
rb_erase(node, &tree->root);
|
||||
ext4_es_free_extent(inode, es1);
|
||||
}
|
||||
@ -683,8 +718,8 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
goto error;
|
||||
retry:
|
||||
err = __es_insert_extent(inode, &newes);
|
||||
if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1,
|
||||
EXT4_I(inode)))
|
||||
if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
|
||||
128, EXT4_I(inode)))
|
||||
goto retry;
|
||||
if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
|
||||
err = 0;
|
||||
@ -782,6 +817,8 @@ out:
|
||||
es->es_lblk = es1->es_lblk;
|
||||
es->es_len = es1->es_len;
|
||||
es->es_pblk = es1->es_pblk;
|
||||
if (!ext4_es_is_referenced(es))
|
||||
ext4_es_set_referenced(es);
|
||||
stats->es_stats_cache_hits++;
|
||||
} else {
|
||||
stats->es_stats_cache_misses++;
|
||||
@ -841,8 +878,8 @@ retry:
|
||||
es->es_lblk = orig_es.es_lblk;
|
||||
es->es_len = orig_es.es_len;
|
||||
if ((err == -ENOMEM) &&
|
||||
__ext4_es_shrink(EXT4_SB(inode->i_sb), 1,
|
||||
EXT4_I(inode)))
|
||||
__es_shrink(EXT4_SB(inode->i_sb),
|
||||
128, EXT4_I(inode)))
|
||||
goto retry;
|
||||
goto out;
|
||||
}
|
||||
@ -914,6 +951,11 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
end = lblk + len - 1;
|
||||
BUG_ON(end < lblk);
|
||||
|
||||
/*
|
||||
* ext4_clear_inode() depends on us taking i_es_lock unconditionally
|
||||
* so that we are sure __es_shrink() is done with the inode before it
|
||||
* is reclaimed.
|
||||
*/
|
||||
write_lock(&EXT4_I(inode)->i_es_lock);
|
||||
err = __es_remove_extent(inode, lblk, end);
|
||||
write_unlock(&EXT4_I(inode)->i_es_lock);
|
||||
@ -921,114 +963,75 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a,
|
||||
struct list_head *b)
|
||||
{
|
||||
struct ext4_inode_info *eia, *eib;
|
||||
eia = list_entry(a, struct ext4_inode_info, i_es_lru);
|
||||
eib = list_entry(b, struct ext4_inode_info, i_es_lru);
|
||||
|
||||
if (ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
|
||||
!ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
|
||||
return 1;
|
||||
if (!ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
|
||||
ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
|
||||
return -1;
|
||||
if (eia->i_touch_when == eib->i_touch_when)
|
||||
return 0;
|
||||
if (time_after(eia->i_touch_when, eib->i_touch_when))
|
||||
return 1;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
|
||||
struct ext4_inode_info *locked_ei)
|
||||
static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
|
||||
struct ext4_inode_info *locked_ei)
|
||||
{
|
||||
struct ext4_inode_info *ei;
|
||||
struct ext4_es_stats *es_stats;
|
||||
struct list_head *cur, *tmp;
|
||||
LIST_HEAD(skipped);
|
||||
ktime_t start_time;
|
||||
u64 scan_time;
|
||||
int nr_to_walk;
|
||||
int nr_shrunk = 0;
|
||||
int retried = 0, skip_precached = 1, nr_skipped = 0;
|
||||
int retried = 0, nr_skipped = 0;
|
||||
|
||||
es_stats = &sbi->s_es_stats;
|
||||
start_time = ktime_get();
|
||||
spin_lock(&sbi->s_es_lru_lock);
|
||||
|
||||
retry:
|
||||
list_for_each_safe(cur, tmp, &sbi->s_es_lru) {
|
||||
int shrunk;
|
||||
spin_lock(&sbi->s_es_lock);
|
||||
nr_to_walk = sbi->s_es_nr_inode;
|
||||
while (nr_to_walk-- > 0) {
|
||||
if (list_empty(&sbi->s_es_list)) {
|
||||
spin_unlock(&sbi->s_es_lock);
|
||||
goto out;
|
||||
}
|
||||
ei = list_first_entry(&sbi->s_es_list, struct ext4_inode_info,
|
||||
i_es_list);
|
||||
/* Move the inode to the tail */
|
||||
list_move_tail(&ei->i_es_list, &sbi->s_es_list);
|
||||
|
||||
/*
|
||||
* If we have already reclaimed all extents from extent
|
||||
* status tree, just stop the loop immediately.
|
||||
* Normally we try hard to avoid shrinking precached inodes,
|
||||
* but we will as a last resort.
|
||||
*/
|
||||
if (percpu_counter_read_positive(
|
||||
&es_stats->es_stats_lru_cnt) == 0)
|
||||
break;
|
||||
|
||||
ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
|
||||
|
||||
/*
|
||||
* Skip the inode that is newer than the last_sorted
|
||||
* time. Normally we try hard to avoid shrinking
|
||||
* precached inodes, but we will as a last resort.
|
||||
*/
|
||||
if ((es_stats->es_stats_last_sorted < ei->i_touch_when) ||
|
||||
(skip_precached && ext4_test_inode_state(&ei->vfs_inode,
|
||||
EXT4_STATE_EXT_PRECACHED))) {
|
||||
if (!retried && ext4_test_inode_state(&ei->vfs_inode,
|
||||
EXT4_STATE_EXT_PRECACHED)) {
|
||||
nr_skipped++;
|
||||
list_move_tail(cur, &skipped);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ei->i_es_lru_nr == 0 || ei == locked_ei ||
|
||||
!write_trylock(&ei->i_es_lock))
|
||||
if (ei == locked_ei || !write_trylock(&ei->i_es_lock)) {
|
||||
nr_skipped++;
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* Now we hold i_es_lock which protects us from inode reclaim
|
||||
* freeing inode under us
|
||||
*/
|
||||
spin_unlock(&sbi->s_es_lock);
|
||||
|
||||
shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan);
|
||||
if (ei->i_es_lru_nr == 0)
|
||||
list_del_init(&ei->i_es_lru);
|
||||
nr_shrunk += es_reclaim_extents(ei, &nr_to_scan);
|
||||
write_unlock(&ei->i_es_lock);
|
||||
|
||||
nr_shrunk += shrunk;
|
||||
nr_to_scan -= shrunk;
|
||||
if (nr_to_scan == 0)
|
||||
break;
|
||||
if (nr_to_scan <= 0)
|
||||
goto out;
|
||||
spin_lock(&sbi->s_es_lock);
|
||||
}
|
||||
|
||||
/* Move the newer inodes into the tail of the LRU list. */
|
||||
list_splice_tail(&skipped, &sbi->s_es_lru);
|
||||
INIT_LIST_HEAD(&skipped);
|
||||
spin_unlock(&sbi->s_es_lock);
|
||||
|
||||
/*
|
||||
* If we skipped any inodes, and we weren't able to make any
|
||||
* forward progress, sort the list and try again.
|
||||
* forward progress, try again to scan precached inodes.
|
||||
*/
|
||||
if ((nr_shrunk == 0) && nr_skipped && !retried) {
|
||||
retried++;
|
||||
list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
|
||||
es_stats->es_stats_last_sorted = jiffies;
|
||||
ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info,
|
||||
i_es_lru);
|
||||
/*
|
||||
* If there are no non-precached inodes left on the
|
||||
* list, start releasing precached extents.
|
||||
*/
|
||||
if (ext4_test_inode_state(&ei->vfs_inode,
|
||||
EXT4_STATE_EXT_PRECACHED))
|
||||
skip_precached = 0;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
spin_unlock(&sbi->s_es_lru_lock);
|
||||
|
||||
if (locked_ei && nr_shrunk == 0)
|
||||
nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan);
|
||||
nr_shrunk = es_reclaim_extents(locked_ei, &nr_to_scan);
|
||||
|
||||
out:
|
||||
scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
|
||||
if (likely(es_stats->es_stats_scan_time))
|
||||
es_stats->es_stats_scan_time = (scan_time +
|
||||
@ -1043,7 +1046,7 @@ retry:
|
||||
else
|
||||
es_stats->es_stats_shrunk = nr_shrunk;
|
||||
|
||||
trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, skip_precached,
|
||||
trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time,
|
||||
nr_skipped, retried);
|
||||
return nr_shrunk;
|
||||
}
|
||||
@ -1055,7 +1058,7 @@ static unsigned long ext4_es_count(struct shrinker *shrink,
|
||||
struct ext4_sb_info *sbi;
|
||||
|
||||
sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker);
|
||||
nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt);
|
||||
nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
|
||||
trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr);
|
||||
return nr;
|
||||
}
|
||||
@ -1068,13 +1071,13 @@ static unsigned long ext4_es_scan(struct shrinker *shrink,
|
||||
int nr_to_scan = sc->nr_to_scan;
|
||||
int ret, nr_shrunk;
|
||||
|
||||
ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt);
|
||||
ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
|
||||
trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret);
|
||||
|
||||
if (!nr_to_scan)
|
||||
return ret;
|
||||
|
||||
nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL);
|
||||
nr_shrunk = __es_shrink(sbi, nr_to_scan, NULL);
|
||||
|
||||
trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret);
|
||||
return nr_shrunk;
|
||||
@ -1102,28 +1105,24 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v)
|
||||
return 0;
|
||||
|
||||
/* here we just find an inode that has the max nr. of objects */
|
||||
spin_lock(&sbi->s_es_lru_lock);
|
||||
list_for_each_entry(ei, &sbi->s_es_lru, i_es_lru) {
|
||||
spin_lock(&sbi->s_es_lock);
|
||||
list_for_each_entry(ei, &sbi->s_es_list, i_es_list) {
|
||||
inode_cnt++;
|
||||
if (max && max->i_es_all_nr < ei->i_es_all_nr)
|
||||
max = ei;
|
||||
else if (!max)
|
||||
max = ei;
|
||||
}
|
||||
spin_unlock(&sbi->s_es_lru_lock);
|
||||
spin_unlock(&sbi->s_es_lock);
|
||||
|
||||
seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n",
|
||||
percpu_counter_sum_positive(&es_stats->es_stats_all_cnt),
|
||||
percpu_counter_sum_positive(&es_stats->es_stats_lru_cnt));
|
||||
percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt));
|
||||
seq_printf(seq, " %lu/%lu cache hits/misses\n",
|
||||
es_stats->es_stats_cache_hits,
|
||||
es_stats->es_stats_cache_misses);
|
||||
if (es_stats->es_stats_last_sorted != 0)
|
||||
seq_printf(seq, " %u ms last sorted interval\n",
|
||||
jiffies_to_msecs(jiffies -
|
||||
es_stats->es_stats_last_sorted));
|
||||
if (inode_cnt)
|
||||
seq_printf(seq, " %d inodes on lru list\n", inode_cnt);
|
||||
seq_printf(seq, " %d inodes on list\n", inode_cnt);
|
||||
|
||||
seq_printf(seq, "average:\n %llu us scan time\n",
|
||||
div_u64(es_stats->es_stats_scan_time, 1000));
|
||||
@ -1132,7 +1131,7 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v)
|
||||
seq_printf(seq,
|
||||
"maximum:\n %lu inode (%u objects, %u reclaimable)\n"
|
||||
" %llu us max scan time\n",
|
||||
max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_lru_nr,
|
||||
max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_shk_nr,
|
||||
div_u64(es_stats->es_stats_max_scan_time, 1000));
|
||||
|
||||
return 0;
|
||||
@ -1181,9 +1180,11 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
|
||||
{
|
||||
int err;
|
||||
|
||||
INIT_LIST_HEAD(&sbi->s_es_lru);
|
||||
spin_lock_init(&sbi->s_es_lru_lock);
|
||||
sbi->s_es_stats.es_stats_last_sorted = 0;
|
||||
/* Make sure we have enough bits for physical block number */
|
||||
BUILD_BUG_ON(ES_SHIFT < 48);
|
||||
INIT_LIST_HEAD(&sbi->s_es_list);
|
||||
sbi->s_es_nr_inode = 0;
|
||||
spin_lock_init(&sbi->s_es_lock);
|
||||
sbi->s_es_stats.es_stats_shrunk = 0;
|
||||
sbi->s_es_stats.es_stats_cache_hits = 0;
|
||||
sbi->s_es_stats.es_stats_cache_misses = 0;
|
||||
@ -1192,7 +1193,7 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
|
||||
err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL);
|
||||
if (err)
|
||||
return err;
|
||||
err = percpu_counter_init(&sbi->s_es_stats.es_stats_lru_cnt, 0, GFP_KERNEL);
|
||||
err = percpu_counter_init(&sbi->s_es_stats.es_stats_shk_cnt, 0, GFP_KERNEL);
|
||||
if (err)
|
||||
goto err1;
|
||||
|
||||
@ -1210,7 +1211,7 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
|
||||
return 0;
|
||||
|
||||
err2:
|
||||
percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt);
|
||||
percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
|
||||
err1:
|
||||
percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
|
||||
return err;
|
||||
@ -1221,71 +1222,83 @@ void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
|
||||
if (sbi->s_proc)
|
||||
remove_proc_entry("es_shrinker_info", sbi->s_proc);
|
||||
percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
|
||||
percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt);
|
||||
percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
|
||||
unregister_shrinker(&sbi->s_es_shrinker);
|
||||
}
|
||||
|
||||
void ext4_es_lru_add(struct inode *inode)
|
||||
{
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
|
||||
ei->i_touch_when = jiffies;
|
||||
|
||||
if (!list_empty(&ei->i_es_lru))
|
||||
return;
|
||||
|
||||
spin_lock(&sbi->s_es_lru_lock);
|
||||
if (list_empty(&ei->i_es_lru))
|
||||
list_add_tail(&ei->i_es_lru, &sbi->s_es_lru);
|
||||
spin_unlock(&sbi->s_es_lru_lock);
|
||||
}
|
||||
|
||||
void ext4_es_lru_del(struct inode *inode)
|
||||
{
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
|
||||
spin_lock(&sbi->s_es_lru_lock);
|
||||
if (!list_empty(&ei->i_es_lru))
|
||||
list_del_init(&ei->i_es_lru);
|
||||
spin_unlock(&sbi->s_es_lru_lock);
|
||||
}
|
||||
|
||||
static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
|
||||
int nr_to_scan)
|
||||
/*
|
||||
* Shrink extents in given inode from ei->i_es_shrink_lblk till end. Scan at
|
||||
* most *nr_to_scan extents, update *nr_to_scan accordingly.
|
||||
*
|
||||
* Return 0 if we hit end of tree / interval, 1 if we exhausted nr_to_scan.
|
||||
* Increment *nr_shrunk by the number of reclaimed extents. Also update
|
||||
* ei->i_es_shrink_lblk to where we should continue scanning.
|
||||
*/
|
||||
static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end,
|
||||
int *nr_to_scan, int *nr_shrunk)
|
||||
{
|
||||
struct inode *inode = &ei->vfs_inode;
|
||||
struct ext4_es_tree *tree = &ei->i_es_tree;
|
||||
struct rb_node *node;
|
||||
struct extent_status *es;
|
||||
unsigned long nr_shrunk = 0;
|
||||
struct rb_node *node;
|
||||
|
||||
es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk);
|
||||
if (!es)
|
||||
goto out_wrap;
|
||||
node = &es->rb_node;
|
||||
while (*nr_to_scan > 0) {
|
||||
if (es->es_lblk > end) {
|
||||
ei->i_es_shrink_lblk = end + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
(*nr_to_scan)--;
|
||||
node = rb_next(&es->rb_node);
|
||||
/*
|
||||
* We can't reclaim delayed extent from status tree because
|
||||
* fiemap, bigallic, and seek_data/hole need to use it.
|
||||
*/
|
||||
if (ext4_es_is_delayed(es))
|
||||
goto next;
|
||||
if (ext4_es_is_referenced(es)) {
|
||||
ext4_es_clear_referenced(es);
|
||||
goto next;
|
||||
}
|
||||
|
||||
rb_erase(&es->rb_node, &tree->root);
|
||||
ext4_es_free_extent(inode, es);
|
||||
(*nr_shrunk)++;
|
||||
next:
|
||||
if (!node)
|
||||
goto out_wrap;
|
||||
es = rb_entry(node, struct extent_status, rb_node);
|
||||
}
|
||||
ei->i_es_shrink_lblk = es->es_lblk;
|
||||
return 1;
|
||||
out_wrap:
|
||||
ei->i_es_shrink_lblk = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan)
|
||||
{
|
||||
struct inode *inode = &ei->vfs_inode;
|
||||
int nr_shrunk = 0;
|
||||
ext4_lblk_t start = ei->i_es_shrink_lblk;
|
||||
static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
|
||||
DEFAULT_RATELIMIT_BURST);
|
||||
|
||||
if (ei->i_es_lru_nr == 0)
|
||||
if (ei->i_es_shk_nr == 0)
|
||||
return 0;
|
||||
|
||||
if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) &&
|
||||
__ratelimit(&_rs))
|
||||
ext4_warning(inode->i_sb, "forced shrink of precached extents");
|
||||
|
||||
node = rb_first(&tree->root);
|
||||
while (node != NULL) {
|
||||
es = rb_entry(node, struct extent_status, rb_node);
|
||||
node = rb_next(&es->rb_node);
|
||||
/*
|
||||
* We can't reclaim delayed extent from status tree because
|
||||
* fiemap, bigallic, and seek_data/hole need to use it.
|
||||
*/
|
||||
if (!ext4_es_is_delayed(es)) {
|
||||
rb_erase(&es->rb_node, &tree->root);
|
||||
ext4_es_free_extent(inode, es);
|
||||
nr_shrunk++;
|
||||
if (--nr_to_scan == 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
tree->cache_es = NULL;
|
||||
if (!es_do_reclaim_extents(ei, EXT_MAX_BLOCKS, nr_to_scan, &nr_shrunk) &&
|
||||
start != 0)
|
||||
es_do_reclaim_extents(ei, start - 1, nr_to_scan, &nr_shrunk);
|
||||
|
||||
ei->i_es_tree.cache_es = NULL;
|
||||
return nr_shrunk;
|
||||
}
|
||||
|
@ -29,25 +29,28 @@
|
||||
/*
|
||||
* These flags live in the high bits of extent_status.es_pblk
|
||||
*/
|
||||
#define ES_SHIFT 60
|
||||
enum {
|
||||
ES_WRITTEN_B,
|
||||
ES_UNWRITTEN_B,
|
||||
ES_DELAYED_B,
|
||||
ES_HOLE_B,
|
||||
ES_REFERENCED_B,
|
||||
ES_FLAGS
|
||||
};
|
||||
|
||||
#define EXTENT_STATUS_WRITTEN (1 << 3)
|
||||
#define EXTENT_STATUS_UNWRITTEN (1 << 2)
|
||||
#define EXTENT_STATUS_DELAYED (1 << 1)
|
||||
#define EXTENT_STATUS_HOLE (1 << 0)
|
||||
#define ES_SHIFT (sizeof(ext4_fsblk_t)*8 - ES_FLAGS)
|
||||
#define ES_MASK (~((ext4_fsblk_t)0) << ES_SHIFT)
|
||||
|
||||
#define EXTENT_STATUS_FLAGS (EXTENT_STATUS_WRITTEN | \
|
||||
EXTENT_STATUS_UNWRITTEN | \
|
||||
EXTENT_STATUS_DELAYED | \
|
||||
EXTENT_STATUS_HOLE)
|
||||
#define EXTENT_STATUS_WRITTEN (1 << ES_WRITTEN_B)
|
||||
#define EXTENT_STATUS_UNWRITTEN (1 << ES_UNWRITTEN_B)
|
||||
#define EXTENT_STATUS_DELAYED (1 << ES_DELAYED_B)
|
||||
#define EXTENT_STATUS_HOLE (1 << ES_HOLE_B)
|
||||
#define EXTENT_STATUS_REFERENCED (1 << ES_REFERENCED_B)
|
||||
|
||||
#define ES_WRITTEN (1ULL << 63)
|
||||
#define ES_UNWRITTEN (1ULL << 62)
|
||||
#define ES_DELAYED (1ULL << 61)
|
||||
#define ES_HOLE (1ULL << 60)
|
||||
|
||||
#define ES_MASK (ES_WRITTEN | ES_UNWRITTEN | \
|
||||
ES_DELAYED | ES_HOLE)
|
||||
#define ES_TYPE_MASK ((ext4_fsblk_t)(EXTENT_STATUS_WRITTEN | \
|
||||
EXTENT_STATUS_UNWRITTEN | \
|
||||
EXTENT_STATUS_DELAYED | \
|
||||
EXTENT_STATUS_HOLE) << ES_SHIFT)
|
||||
|
||||
struct ext4_sb_info;
|
||||
struct ext4_extent;
|
||||
@ -65,14 +68,13 @@ struct ext4_es_tree {
|
||||
};
|
||||
|
||||
struct ext4_es_stats {
|
||||
unsigned long es_stats_last_sorted;
|
||||
unsigned long es_stats_shrunk;
|
||||
unsigned long es_stats_cache_hits;
|
||||
unsigned long es_stats_cache_misses;
|
||||
u64 es_stats_scan_time;
|
||||
u64 es_stats_max_scan_time;
|
||||
struct percpu_counter es_stats_all_cnt;
|
||||
struct percpu_counter es_stats_lru_cnt;
|
||||
struct percpu_counter es_stats_shk_cnt;
|
||||
};
|
||||
|
||||
extern int __init ext4_init_es(void);
|
||||
@ -93,29 +95,49 @@ extern void ext4_es_find_delayed_extent_range(struct inode *inode,
|
||||
extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
struct extent_status *es);
|
||||
|
||||
static inline unsigned int ext4_es_status(struct extent_status *es)
|
||||
{
|
||||
return es->es_pblk >> ES_SHIFT;
|
||||
}
|
||||
|
||||
static inline unsigned int ext4_es_type(struct extent_status *es)
|
||||
{
|
||||
return (es->es_pblk & ES_TYPE_MASK) >> ES_SHIFT;
|
||||
}
|
||||
|
||||
static inline int ext4_es_is_written(struct extent_status *es)
|
||||
{
|
||||
return (es->es_pblk & ES_WRITTEN) != 0;
|
||||
return (ext4_es_type(es) & EXTENT_STATUS_WRITTEN) != 0;
|
||||
}
|
||||
|
||||
static inline int ext4_es_is_unwritten(struct extent_status *es)
|
||||
{
|
||||
return (es->es_pblk & ES_UNWRITTEN) != 0;
|
||||
return (ext4_es_type(es) & EXTENT_STATUS_UNWRITTEN) != 0;
|
||||
}
|
||||
|
||||
static inline int ext4_es_is_delayed(struct extent_status *es)
|
||||
{
|
||||
return (es->es_pblk & ES_DELAYED) != 0;
|
||||
return (ext4_es_type(es) & EXTENT_STATUS_DELAYED) != 0;
|
||||
}
|
||||
|
||||
static inline int ext4_es_is_hole(struct extent_status *es)
|
||||
{
|
||||
return (es->es_pblk & ES_HOLE) != 0;
|
||||
return (ext4_es_type(es) & EXTENT_STATUS_HOLE) != 0;
|
||||
}
|
||||
|
||||
static inline unsigned int ext4_es_status(struct extent_status *es)
|
||||
static inline void ext4_es_set_referenced(struct extent_status *es)
|
||||
{
|
||||
return es->es_pblk >> ES_SHIFT;
|
||||
es->es_pblk |= ((ext4_fsblk_t)EXTENT_STATUS_REFERENCED) << ES_SHIFT;
|
||||
}
|
||||
|
||||
static inline void ext4_es_clear_referenced(struct extent_status *es)
|
||||
{
|
||||
es->es_pblk &= ~(((ext4_fsblk_t)EXTENT_STATUS_REFERENCED) << ES_SHIFT);
|
||||
}
|
||||
|
||||
static inline int ext4_es_is_referenced(struct extent_status *es)
|
||||
{
|
||||
return (ext4_es_status(es) & EXTENT_STATUS_REFERENCED) != 0;
|
||||
}
|
||||
|
||||
static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es)
|
||||
@ -135,23 +157,19 @@ static inline void ext4_es_store_pblock(struct extent_status *es,
|
||||
static inline void ext4_es_store_status(struct extent_status *es,
|
||||
unsigned int status)
|
||||
{
|
||||
es->es_pblk = (((ext4_fsblk_t)
|
||||
(status & EXTENT_STATUS_FLAGS) << ES_SHIFT) |
|
||||
(es->es_pblk & ~ES_MASK));
|
||||
es->es_pblk = (((ext4_fsblk_t)status << ES_SHIFT) & ES_MASK) |
|
||||
(es->es_pblk & ~ES_MASK);
|
||||
}
|
||||
|
||||
static inline void ext4_es_store_pblock_status(struct extent_status *es,
|
||||
ext4_fsblk_t pb,
|
||||
unsigned int status)
|
||||
{
|
||||
es->es_pblk = (((ext4_fsblk_t)
|
||||
(status & EXTENT_STATUS_FLAGS) << ES_SHIFT) |
|
||||
(pb & ~ES_MASK));
|
||||
es->es_pblk = (((ext4_fsblk_t)status << ES_SHIFT) & ES_MASK) |
|
||||
(pb & ~ES_MASK);
|
||||
}
|
||||
|
||||
extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi);
|
||||
extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi);
|
||||
extern void ext4_es_lru_add(struct inode *inode);
|
||||
extern void ext4_es_lru_del(struct inode *inode);
|
||||
|
||||
#endif /* _EXT4_EXTENTS_STATUS_H */
|
||||
|
222
fs/ext4/file.c
222
fs/ext4/file.c
@ -273,24 +273,19 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
|
||||
* we determine this extent as a data or a hole according to whether the
|
||||
* page cache has data or not.
|
||||
*/
|
||||
static int ext4_find_unwritten_pgoff(struct inode *inode,
|
||||
int whence,
|
||||
struct ext4_map_blocks *map,
|
||||
loff_t *offset)
|
||||
static int ext4_find_unwritten_pgoff(struct inode *inode, int whence,
|
||||
loff_t endoff, loff_t *offset)
|
||||
{
|
||||
struct pagevec pvec;
|
||||
unsigned int blkbits;
|
||||
pgoff_t index;
|
||||
pgoff_t end;
|
||||
loff_t endoff;
|
||||
loff_t startoff;
|
||||
loff_t lastoff;
|
||||
int found = 0;
|
||||
|
||||
blkbits = inode->i_sb->s_blocksize_bits;
|
||||
startoff = *offset;
|
||||
lastoff = startoff;
|
||||
endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits;
|
||||
|
||||
|
||||
index = startoff >> PAGE_CACHE_SHIFT;
|
||||
end = endoff >> PAGE_CACHE_SHIFT;
|
||||
@ -408,147 +403,144 @@ out:
|
||||
static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
|
||||
{
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
struct ext4_map_blocks map;
|
||||
struct extent_status es;
|
||||
ext4_lblk_t start, last, end;
|
||||
loff_t dataoff, isize;
|
||||
int blkbits;
|
||||
int ret = 0;
|
||||
struct fiemap_extent_info fie;
|
||||
struct fiemap_extent ext[2];
|
||||
loff_t next;
|
||||
int i, ret = 0;
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
|
||||
isize = i_size_read(inode);
|
||||
if (offset >= isize) {
|
||||
if (offset >= inode->i_size) {
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
return -ENXIO;
|
||||
}
|
||||
fie.fi_flags = 0;
|
||||
fie.fi_extents_max = 2;
|
||||
fie.fi_extents_start = (struct fiemap_extent __user *) &ext;
|
||||
while (1) {
|
||||
mm_segment_t old_fs = get_fs();
|
||||
|
||||
blkbits = inode->i_sb->s_blocksize_bits;
|
||||
start = offset >> blkbits;
|
||||
last = start;
|
||||
end = isize >> blkbits;
|
||||
dataoff = offset;
|
||||
fie.fi_extents_mapped = 0;
|
||||
memset(ext, 0, sizeof(*ext) * fie.fi_extents_max);
|
||||
|
||||
do {
|
||||
map.m_lblk = last;
|
||||
map.m_len = end - last + 1;
|
||||
ret = ext4_map_blocks(NULL, inode, &map, 0);
|
||||
if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
|
||||
if (last != start)
|
||||
dataoff = (loff_t)last << blkbits;
|
||||
set_fs(get_ds());
|
||||
ret = ext4_fiemap(inode, &fie, offset, maxsize - offset);
|
||||
set_fs(old_fs);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
/* No extents found, EOF */
|
||||
if (!fie.fi_extents_mapped) {
|
||||
ret = -ENXIO;
|
||||
break;
|
||||
}
|
||||
for (i = 0; i < fie.fi_extents_mapped; i++) {
|
||||
next = (loff_t)(ext[i].fe_length + ext[i].fe_logical);
|
||||
|
||||
/*
|
||||
* If there is a delay extent at this offset,
|
||||
* it will be as a data.
|
||||
*/
|
||||
ext4_es_find_delayed_extent_range(inode, last, last, &es);
|
||||
if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
|
||||
if (last != start)
|
||||
dataoff = (loff_t)last << blkbits;
|
||||
break;
|
||||
if (offset < (loff_t)ext[i].fe_logical)
|
||||
offset = (loff_t)ext[i].fe_logical;
|
||||
/*
|
||||
* If extent is not unwritten, then it contains valid
|
||||
* data, mapped or delayed.
|
||||
*/
|
||||
if (!(ext[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* If there is a unwritten extent at this offset,
|
||||
* it will be as a data or a hole according to page
|
||||
* cache that has data or not.
|
||||
*/
|
||||
if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
|
||||
next, &offset))
|
||||
goto out;
|
||||
|
||||
if (ext[i].fe_flags & FIEMAP_EXTENT_LAST) {
|
||||
ret = -ENXIO;
|
||||
goto out;
|
||||
}
|
||||
offset = next;
|
||||
}
|
||||
|
||||
/*
|
||||
* If there is a unwritten extent at this offset,
|
||||
* it will be as a data or a hole according to page
|
||||
* cache that has data or not.
|
||||
*/
|
||||
if (map.m_flags & EXT4_MAP_UNWRITTEN) {
|
||||
int unwritten;
|
||||
unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA,
|
||||
&map, &dataoff);
|
||||
if (unwritten)
|
||||
break;
|
||||
}
|
||||
|
||||
last++;
|
||||
dataoff = (loff_t)last << blkbits;
|
||||
} while (last <= end);
|
||||
|
||||
}
|
||||
if (offset > inode->i_size)
|
||||
offset = inode->i_size;
|
||||
out:
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (dataoff > isize)
|
||||
return -ENXIO;
|
||||
|
||||
return vfs_setpos(file, dataoff, maxsize);
|
||||
return vfs_setpos(file, offset, maxsize);
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_seek_hole() retrieves the offset for SEEK_HOLE.
|
||||
* ext4_seek_hole() retrieves the offset for SEEK_HOLE
|
||||
*/
|
||||
static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
|
||||
{
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
struct ext4_map_blocks map;
|
||||
struct extent_status es;
|
||||
ext4_lblk_t start, last, end;
|
||||
loff_t holeoff, isize;
|
||||
int blkbits;
|
||||
int ret = 0;
|
||||
struct fiemap_extent_info fie;
|
||||
struct fiemap_extent ext[2];
|
||||
loff_t next;
|
||||
int i, ret = 0;
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
|
||||
isize = i_size_read(inode);
|
||||
if (offset >= isize) {
|
||||
if (offset >= inode->i_size) {
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
blkbits = inode->i_sb->s_blocksize_bits;
|
||||
start = offset >> blkbits;
|
||||
last = start;
|
||||
end = isize >> blkbits;
|
||||
holeoff = offset;
|
||||
fie.fi_flags = 0;
|
||||
fie.fi_extents_max = 2;
|
||||
fie.fi_extents_start = (struct fiemap_extent __user *)&ext;
|
||||
while (1) {
|
||||
mm_segment_t old_fs = get_fs();
|
||||
|
||||
do {
|
||||
map.m_lblk = last;
|
||||
map.m_len = end - last + 1;
|
||||
ret = ext4_map_blocks(NULL, inode, &map, 0);
|
||||
if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
|
||||
last += ret;
|
||||
holeoff = (loff_t)last << blkbits;
|
||||
continue;
|
||||
}
|
||||
fie.fi_extents_mapped = 0;
|
||||
memset(ext, 0, sizeof(*ext));
|
||||
|
||||
/*
|
||||
* If there is a delay extent at this offset,
|
||||
* we will skip this extent.
|
||||
*/
|
||||
ext4_es_find_delayed_extent_range(inode, last, last, &es);
|
||||
if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
|
||||
last = es.es_lblk + es.es_len;
|
||||
holeoff = (loff_t)last << blkbits;
|
||||
continue;
|
||||
}
|
||||
set_fs(get_ds());
|
||||
ret = ext4_fiemap(inode, &fie, offset, maxsize - offset);
|
||||
set_fs(old_fs);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
/*
|
||||
* If there is a unwritten extent at this offset,
|
||||
* it will be as a data or a hole according to page
|
||||
* cache that has data or not.
|
||||
*/
|
||||
if (map.m_flags & EXT4_MAP_UNWRITTEN) {
|
||||
int unwritten;
|
||||
unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
|
||||
&map, &holeoff);
|
||||
if (!unwritten) {
|
||||
last += ret;
|
||||
holeoff = (loff_t)last << blkbits;
|
||||
/* No extents found */
|
||||
if (!fie.fi_extents_mapped)
|
||||
break;
|
||||
|
||||
for (i = 0; i < fie.fi_extents_mapped; i++) {
|
||||
next = (loff_t)(ext[i].fe_logical + ext[i].fe_length);
|
||||
/*
|
||||
* If extent is not unwritten, then it contains valid
|
||||
* data, mapped or delayed.
|
||||
*/
|
||||
if (!(ext[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN)) {
|
||||
if (offset < (loff_t)ext[i].fe_logical)
|
||||
goto out;
|
||||
offset = next;
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* If there is a unwritten extent at this offset,
|
||||
* it will be as a data or a hole according to page
|
||||
* cache that has data or not.
|
||||
*/
|
||||
if (ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
|
||||
next, &offset))
|
||||
goto out;
|
||||
|
||||
offset = next;
|
||||
if (ext[i].fe_flags & FIEMAP_EXTENT_LAST)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* find a hole */
|
||||
break;
|
||||
} while (last <= end);
|
||||
|
||||
}
|
||||
if (offset > inode->i_size)
|
||||
offset = inode->i_size;
|
||||
out:
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (holeoff > isize)
|
||||
holeoff = isize;
|
||||
|
||||
return vfs_setpos(file, holeoff, maxsize);
|
||||
return vfs_setpos(file, offset, maxsize);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -811,8 +811,11 @@ static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping,
|
||||
ret = __block_write_begin(page, 0, inline_size,
|
||||
ext4_da_get_block_prep);
|
||||
if (ret) {
|
||||
up_read(&EXT4_I(inode)->xattr_sem);
|
||||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
ext4_truncate_failed_write(inode);
|
||||
goto out;
|
||||
return ret;
|
||||
}
|
||||
|
||||
SetPageDirty(page);
|
||||
@ -870,6 +873,12 @@ retry_journal:
|
||||
goto out_journal;
|
||||
}
|
||||
|
||||
/*
|
||||
* We cannot recurse into the filesystem as the transaction
|
||||
* is already started.
|
||||
*/
|
||||
flags |= AOP_FLAG_NOFS;
|
||||
|
||||
if (ret == -ENOSPC) {
|
||||
ret = ext4_da_convert_inline_data_to_extent(mapping,
|
||||
inode,
|
||||
@ -882,11 +891,6 @@ retry_journal:
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* We cannot recurse into the filesystem as the transaction
|
||||
* is already started.
|
||||
*/
|
||||
flags |= AOP_FLAG_NOFS;
|
||||
|
||||
page = grab_cache_page_write_begin(mapping, 0, flags);
|
||||
if (!page) {
|
||||
@ -1807,11 +1811,12 @@ int ext4_destroy_inline_data(handle_t *handle, struct inode *inode)
|
||||
|
||||
int ext4_inline_data_fiemap(struct inode *inode,
|
||||
struct fiemap_extent_info *fieinfo,
|
||||
int *has_inline)
|
||||
int *has_inline, __u64 start, __u64 len)
|
||||
{
|
||||
__u64 physical = 0;
|
||||
__u64 length;
|
||||
__u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_LAST;
|
||||
__u64 inline_len;
|
||||
__u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED |
|
||||
FIEMAP_EXTENT_LAST;
|
||||
int error = 0;
|
||||
struct ext4_iloc iloc;
|
||||
|
||||
@ -1820,6 +1825,13 @@ int ext4_inline_data_fiemap(struct inode *inode,
|
||||
*has_inline = 0;
|
||||
goto out;
|
||||
}
|
||||
inline_len = min_t(size_t, ext4_get_inline_size(inode),
|
||||
i_size_read(inode));
|
||||
if (start >= inline_len)
|
||||
goto out;
|
||||
if (start + len < inline_len)
|
||||
inline_len = start + len;
|
||||
inline_len -= start;
|
||||
|
||||
error = ext4_get_inode_loc(inode, &iloc);
|
||||
if (error)
|
||||
@ -1828,11 +1840,10 @@ int ext4_inline_data_fiemap(struct inode *inode,
|
||||
physical = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits;
|
||||
physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data;
|
||||
physical += offsetof(struct ext4_inode, i_block);
|
||||
length = i_size_read(inode);
|
||||
|
||||
if (physical)
|
||||
error = fiemap_fill_next_extent(fieinfo, 0, physical,
|
||||
length, flags);
|
||||
error = fiemap_fill_next_extent(fieinfo, start, physical,
|
||||
inline_len, flags);
|
||||
brelse(iloc.bh);
|
||||
out:
|
||||
up_read(&EXT4_I(inode)->xattr_sem);
|
||||
|
@ -416,11 +416,6 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
|
||||
}
|
||||
if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
|
||||
up_read((&EXT4_I(inode)->i_data_sem));
|
||||
/*
|
||||
* Clear EXT4_MAP_FROM_CLUSTER and EXT4_MAP_BOUNDARY flag
|
||||
* because it shouldn't be marked in es_map->m_flags.
|
||||
*/
|
||||
map->m_flags &= ~(EXT4_MAP_FROM_CLUSTER | EXT4_MAP_BOUNDARY);
|
||||
|
||||
/*
|
||||
* We don't check m_len because extent will be collpased in status
|
||||
@ -491,7 +486,6 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
|
||||
|
||||
/* Lookup extent status tree firstly */
|
||||
if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
|
||||
ext4_es_lru_add(inode);
|
||||
if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
|
||||
map->m_pblk = ext4_es_pblock(&es) +
|
||||
map->m_lblk - es.es_lblk;
|
||||
@ -1393,7 +1387,6 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
|
||||
|
||||
/* Lookup extent status tree firstly */
|
||||
if (ext4_es_lookup_extent(inode, iblock, &es)) {
|
||||
ext4_es_lru_add(inode);
|
||||
if (ext4_es_is_hole(&es)) {
|
||||
retval = 0;
|
||||
down_read(&EXT4_I(inode)->i_data_sem);
|
||||
@ -1434,24 +1427,12 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
|
||||
* file system block.
|
||||
*/
|
||||
down_read(&EXT4_I(inode)->i_data_sem);
|
||||
if (ext4_has_inline_data(inode)) {
|
||||
/*
|
||||
* We will soon create blocks for this page, and let
|
||||
* us pretend as if the blocks aren't allocated yet.
|
||||
* In case of clusters, we have to handle the work
|
||||
* of mapping from cluster so that the reserved space
|
||||
* is calculated properly.
|
||||
*/
|
||||
if ((EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) &&
|
||||
ext4_find_delalloc_cluster(inode, map->m_lblk))
|
||||
map->m_flags |= EXT4_MAP_FROM_CLUSTER;
|
||||
if (ext4_has_inline_data(inode))
|
||||
retval = 0;
|
||||
} else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
|
||||
retval = ext4_ext_map_blocks(NULL, inode, map,
|
||||
EXT4_GET_BLOCKS_NO_PUT_HOLE);
|
||||
else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
|
||||
retval = ext4_ext_map_blocks(NULL, inode, map, 0);
|
||||
else
|
||||
retval = ext4_ind_map_blocks(NULL, inode, map,
|
||||
EXT4_GET_BLOCKS_NO_PUT_HOLE);
|
||||
retval = ext4_ind_map_blocks(NULL, inode, map, 0);
|
||||
|
||||
add_delayed:
|
||||
if (retval == 0) {
|
||||
@ -1465,7 +1446,8 @@ add_delayed:
|
||||
* then we don't need to reserve it again. However we still need
|
||||
* to reserve metadata for every block we're going to write.
|
||||
*/
|
||||
if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
|
||||
if (EXT4_SB(inode->i_sb)->s_cluster_ratio <= 1 ||
|
||||
!ext4_find_delalloc_cluster(inode, map->m_lblk)) {
|
||||
ret = ext4_da_reserve_space(inode, iblock);
|
||||
if (ret) {
|
||||
/* not enough space to reserve */
|
||||
@ -1481,11 +1463,6 @@ add_delayed:
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served
|
||||
* and it should not appear on the bh->b_state.
|
||||
*/
|
||||
map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
|
||||
|
||||
map_bh(bh, inode->i_sb, invalid_block);
|
||||
set_buffer_new(bh);
|
||||
set_buffer_delay(bh);
|
||||
@ -3643,7 +3620,7 @@ out_stop:
|
||||
* If this was a simple ftruncate() and the file will remain alive,
|
||||
* then we need to clear up the orphan record which we created above.
|
||||
* However, if this was a real unlink then we were called by
|
||||
* ext4_delete_inode(), and we allow that function to clean up the
|
||||
* ext4_evict_inode(), and we allow that function to clean up the
|
||||
* orphan info for us.
|
||||
*/
|
||||
if (inode->i_nlink)
|
||||
|
@ -78,8 +78,6 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2)
|
||||
memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize));
|
||||
ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS);
|
||||
ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS);
|
||||
ext4_es_lru_del(inode1);
|
||||
ext4_es_lru_del(inode2);
|
||||
|
||||
isize = i_size_read(inode1);
|
||||
i_size_write(inode1, i_size_read(inode2));
|
||||
|
@ -2358,7 +2358,7 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
|
||||
if (sbi->s_group_info) {
|
||||
memcpy(new_groupinfo, sbi->s_group_info,
|
||||
sbi->s_group_info_size * sizeof(*sbi->s_group_info));
|
||||
ext4_kvfree(sbi->s_group_info);
|
||||
kvfree(sbi->s_group_info);
|
||||
}
|
||||
sbi->s_group_info = new_groupinfo;
|
||||
sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
|
||||
@ -2385,7 +2385,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
|
||||
if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
|
||||
metalen = sizeof(*meta_group_info) <<
|
||||
EXT4_DESC_PER_BLOCK_BITS(sb);
|
||||
meta_group_info = kmalloc(metalen, GFP_KERNEL);
|
||||
meta_group_info = kmalloc(metalen, GFP_NOFS);
|
||||
if (meta_group_info == NULL) {
|
||||
ext4_msg(sb, KERN_ERR, "can't allocate mem "
|
||||
"for a buddy group");
|
||||
@ -2399,7 +2399,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
|
||||
sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
|
||||
i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
|
||||
|
||||
meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_KERNEL);
|
||||
meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS);
|
||||
if (meta_group_info[i] == NULL) {
|
||||
ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
|
||||
goto exit_group_info;
|
||||
@ -2428,7 +2428,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
|
||||
{
|
||||
struct buffer_head *bh;
|
||||
meta_group_info[i]->bb_bitmap =
|
||||
kmalloc(sb->s_blocksize, GFP_KERNEL);
|
||||
kmalloc(sb->s_blocksize, GFP_NOFS);
|
||||
BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
|
||||
bh = ext4_read_block_bitmap(sb, group);
|
||||
BUG_ON(bh == NULL);
|
||||
@ -2495,7 +2495,7 @@ err_freebuddy:
|
||||
kfree(sbi->s_group_info[i]);
|
||||
iput(sbi->s_buddy_cache);
|
||||
err_freesgi:
|
||||
ext4_kvfree(sbi->s_group_info);
|
||||
kvfree(sbi->s_group_info);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@ -2708,12 +2708,11 @@ int ext4_mb_release(struct super_block *sb)
|
||||
EXT4_DESC_PER_BLOCK_BITS(sb);
|
||||
for (i = 0; i < num_meta_group_infos; i++)
|
||||
kfree(sbi->s_group_info[i]);
|
||||
ext4_kvfree(sbi->s_group_info);
|
||||
kvfree(sbi->s_group_info);
|
||||
}
|
||||
kfree(sbi->s_mb_offsets);
|
||||
kfree(sbi->s_mb_maxs);
|
||||
if (sbi->s_buddy_cache)
|
||||
iput(sbi->s_buddy_cache);
|
||||
iput(sbi->s_buddy_cache);
|
||||
if (sbi->s_mb_stats) {
|
||||
ext4_msg(sb, KERN_INFO,
|
||||
"mballoc: %u blocks %u reqs (%u success)",
|
||||
|
@ -592,7 +592,7 @@ err_out:
|
||||
|
||||
/*
|
||||
* set the i_blocks count to zero
|
||||
* so that the ext4_delete_inode does the
|
||||
* so that the ext4_evict_inode() does the
|
||||
* right job
|
||||
*
|
||||
* We don't need to take the i_lock because
|
||||
|
@ -273,6 +273,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
|
||||
int replaced_count = 0;
|
||||
int from = data_offset_in_page << orig_inode->i_blkbits;
|
||||
int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
|
||||
struct super_block *sb = orig_inode->i_sb;
|
||||
|
||||
/*
|
||||
* It needs twice the amount of ordinary journal buffers because
|
||||
@ -405,10 +406,13 @@ unlock_pages:
|
||||
page_cache_release(pagep[1]);
|
||||
stop_journal:
|
||||
ext4_journal_stop(handle);
|
||||
if (*err == -ENOSPC &&
|
||||
ext4_should_retry_alloc(sb, &retries))
|
||||
goto again;
|
||||
/* Buffer was busy because probably is pinned to journal transaction,
|
||||
* force transaction commit may help to free it. */
|
||||
if (*err == -EBUSY && ext4_should_retry_alloc(orig_inode->i_sb,
|
||||
&retries))
|
||||
if (*err == -EBUSY && retries++ < 4 && EXT4_SB(sb)->s_journal &&
|
||||
jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal))
|
||||
goto again;
|
||||
return replaced_count;
|
||||
|
||||
|
@ -2814,7 +2814,6 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
|
||||
ext4_orphan_add(handle, inode);
|
||||
inode->i_ctime = ext4_current_time(inode);
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
retval = 0;
|
||||
|
||||
end_unlink:
|
||||
brelse(bh);
|
||||
|
@ -856,7 +856,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
|
||||
n_group_desc[gdb_num] = gdb_bh;
|
||||
EXT4_SB(sb)->s_group_desc = n_group_desc;
|
||||
EXT4_SB(sb)->s_gdb_count++;
|
||||
ext4_kvfree(o_group_desc);
|
||||
kvfree(o_group_desc);
|
||||
|
||||
le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
|
||||
err = ext4_handle_dirty_super(handle, sb);
|
||||
@ -866,7 +866,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
|
||||
return err;
|
||||
|
||||
exit_inode:
|
||||
ext4_kvfree(n_group_desc);
|
||||
kvfree(n_group_desc);
|
||||
brelse(iloc.bh);
|
||||
exit_dind:
|
||||
brelse(dind);
|
||||
@ -909,7 +909,7 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
|
||||
n_group_desc[gdb_num] = gdb_bh;
|
||||
EXT4_SB(sb)->s_group_desc = n_group_desc;
|
||||
EXT4_SB(sb)->s_gdb_count++;
|
||||
ext4_kvfree(o_group_desc);
|
||||
kvfree(o_group_desc);
|
||||
BUFFER_TRACE(gdb_bh, "get_write_access");
|
||||
err = ext4_journal_get_write_access(handle, gdb_bh);
|
||||
if (unlikely(err))
|
||||
|
@ -176,15 +176,6 @@ void *ext4_kvzalloc(size_t size, gfp_t flags)
|
||||
return ret;
|
||||
}
|
||||
|
||||
void ext4_kvfree(void *ptr)
|
||||
{
|
||||
if (is_vmalloc_addr(ptr))
|
||||
vfree(ptr);
|
||||
else
|
||||
kfree(ptr);
|
||||
|
||||
}
|
||||
|
||||
ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
|
||||
struct ext4_group_desc *bg)
|
||||
{
|
||||
@ -811,8 +802,8 @@ static void ext4_put_super(struct super_block *sb)
|
||||
|
||||
for (i = 0; i < sbi->s_gdb_count; i++)
|
||||
brelse(sbi->s_group_desc[i]);
|
||||
ext4_kvfree(sbi->s_group_desc);
|
||||
ext4_kvfree(sbi->s_flex_groups);
|
||||
kvfree(sbi->s_group_desc);
|
||||
kvfree(sbi->s_flex_groups);
|
||||
percpu_counter_destroy(&sbi->s_freeclusters_counter);
|
||||
percpu_counter_destroy(&sbi->s_freeinodes_counter);
|
||||
percpu_counter_destroy(&sbi->s_dirs_counter);
|
||||
@ -880,10 +871,10 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
|
||||
spin_lock_init(&ei->i_prealloc_lock);
|
||||
ext4_es_init_tree(&ei->i_es_tree);
|
||||
rwlock_init(&ei->i_es_lock);
|
||||
INIT_LIST_HEAD(&ei->i_es_lru);
|
||||
INIT_LIST_HEAD(&ei->i_es_list);
|
||||
ei->i_es_all_nr = 0;
|
||||
ei->i_es_lru_nr = 0;
|
||||
ei->i_touch_when = 0;
|
||||
ei->i_es_shk_nr = 0;
|
||||
ei->i_es_shrink_lblk = 0;
|
||||
ei->i_reserved_data_blocks = 0;
|
||||
ei->i_reserved_meta_blocks = 0;
|
||||
ei->i_allocated_meta_blocks = 0;
|
||||
@ -973,7 +964,6 @@ void ext4_clear_inode(struct inode *inode)
|
||||
dquot_drop(inode);
|
||||
ext4_discard_preallocations(inode);
|
||||
ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
|
||||
ext4_es_lru_del(inode);
|
||||
if (EXT4_I(inode)->jinode) {
|
||||
jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
|
||||
EXT4_I(inode)->jinode);
|
||||
@ -1153,7 +1143,7 @@ enum {
|
||||
Opt_inode_readahead_blks, Opt_journal_ioprio,
|
||||
Opt_dioread_nolock, Opt_dioread_lock,
|
||||
Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
|
||||
Opt_max_dir_size_kb,
|
||||
Opt_max_dir_size_kb, Opt_nojournal_checksum,
|
||||
};
|
||||
|
||||
static const match_table_t tokens = {
|
||||
@ -1187,6 +1177,7 @@ static const match_table_t tokens = {
|
||||
{Opt_journal_dev, "journal_dev=%u"},
|
||||
{Opt_journal_path, "journal_path=%s"},
|
||||
{Opt_journal_checksum, "journal_checksum"},
|
||||
{Opt_nojournal_checksum, "nojournal_checksum"},
|
||||
{Opt_journal_async_commit, "journal_async_commit"},
|
||||
{Opt_abort, "abort"},
|
||||
{Opt_data_journal, "data=journal"},
|
||||
@ -1368,6 +1359,8 @@ static const struct mount_opts {
|
||||
MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
|
||||
{Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
|
||||
MOPT_EXT4_ONLY | MOPT_CLEAR},
|
||||
{Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
|
||||
MOPT_EXT4_ONLY | MOPT_CLEAR},
|
||||
{Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
|
||||
MOPT_EXT4_ONLY | MOPT_SET},
|
||||
{Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
|
||||
@ -1709,6 +1702,12 @@ static int parse_options(char *options, struct super_block *sb,
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
|
||||
test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
|
||||
ext4_msg(sb, KERN_ERR, "can't mount with journal_async_commit "
|
||||
"in data=ordered mode");
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -1946,7 +1945,7 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
|
||||
memcpy(new_groups, sbi->s_flex_groups,
|
||||
(sbi->s_flex_groups_allocated *
|
||||
sizeof(struct flex_groups)));
|
||||
ext4_kvfree(sbi->s_flex_groups);
|
||||
kvfree(sbi->s_flex_groups);
|
||||
}
|
||||
sbi->s_flex_groups = new_groups;
|
||||
sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups);
|
||||
@ -3317,7 +3316,7 @@ int ext4_calculate_overhead(struct super_block *sb)
|
||||
struct ext4_super_block *es = sbi->s_es;
|
||||
ext4_group_t i, ngroups = ext4_get_groups_count(sb);
|
||||
ext4_fsblk_t overhead = 0;
|
||||
char *buf = (char *) get_zeroed_page(GFP_KERNEL);
|
||||
char *buf = (char *) get_zeroed_page(GFP_NOFS);
|
||||
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
@ -3345,8 +3344,8 @@ int ext4_calculate_overhead(struct super_block *sb)
|
||||
memset(buf, 0, PAGE_SIZE);
|
||||
cond_resched();
|
||||
}
|
||||
/* Add the journal blocks as well */
|
||||
if (sbi->s_journal)
|
||||
/* Add the internal journal blocks as well */
|
||||
if (sbi->s_journal && !sbi->journal_bdev)
|
||||
overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
|
||||
|
||||
sbi->s_overhead = overhead;
|
||||
@ -4232,7 +4231,7 @@ failed_mount7:
|
||||
failed_mount6:
|
||||
ext4_mb_release(sb);
|
||||
if (sbi->s_flex_groups)
|
||||
ext4_kvfree(sbi->s_flex_groups);
|
||||
kvfree(sbi->s_flex_groups);
|
||||
percpu_counter_destroy(&sbi->s_freeclusters_counter);
|
||||
percpu_counter_destroy(&sbi->s_freeinodes_counter);
|
||||
percpu_counter_destroy(&sbi->s_dirs_counter);
|
||||
@ -4261,7 +4260,7 @@ failed_mount3:
|
||||
failed_mount2:
|
||||
for (i = 0; i < db_count; i++)
|
||||
brelse(sbi->s_group_desc[i]);
|
||||
ext4_kvfree(sbi->s_group_desc);
|
||||
kvfree(sbi->s_group_desc);
|
||||
failed_mount:
|
||||
if (sbi->s_chksum_driver)
|
||||
crypto_free_shash(sbi->s_chksum_driver);
|
||||
@ -4862,6 +4861,14 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
|
||||
goto restore_opts;
|
||||
}
|
||||
|
||||
if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
|
||||
test_opt(sb, JOURNAL_CHECKSUM)) {
|
||||
ext4_msg(sb, KERN_ERR, "changing journal_checksum "
|
||||
"during remount not supported");
|
||||
err = -EINVAL;
|
||||
goto restore_opts;
|
||||
}
|
||||
|
||||
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
|
||||
if (test_opt2(sb, EXPLICIT_DELALLOC)) {
|
||||
ext4_msg(sb, KERN_ERR, "can't mount with "
|
||||
|
@ -1714,8 +1714,7 @@ int jbd2_journal_destroy(journal_t *journal)
|
||||
|
||||
if (journal->j_proc_entry)
|
||||
jbd2_stats_proc_exit(journal);
|
||||
if (journal->j_inode)
|
||||
iput(journal->j_inode);
|
||||
iput(journal->j_inode);
|
||||
if (journal->j_revoke)
|
||||
jbd2_journal_destroy_revoke(journal);
|
||||
if (journal->j_chksum_driver)
|
||||
|
@ -43,15 +43,13 @@ struct extent_status;
|
||||
{ EXT4_GET_BLOCKS_METADATA_NOFAIL, "METADATA_NOFAIL" }, \
|
||||
{ EXT4_GET_BLOCKS_NO_NORMALIZE, "NO_NORMALIZE" }, \
|
||||
{ EXT4_GET_BLOCKS_KEEP_SIZE, "KEEP_SIZE" }, \
|
||||
{ EXT4_GET_BLOCKS_NO_LOCK, "NO_LOCK" }, \
|
||||
{ EXT4_GET_BLOCKS_NO_PUT_HOLE, "NO_PUT_HOLE" })
|
||||
{ EXT4_GET_BLOCKS_NO_LOCK, "NO_LOCK" })
|
||||
|
||||
#define show_mflags(flags) __print_flags(flags, "", \
|
||||
{ EXT4_MAP_NEW, "N" }, \
|
||||
{ EXT4_MAP_MAPPED, "M" }, \
|
||||
{ EXT4_MAP_UNWRITTEN, "U" }, \
|
||||
{ EXT4_MAP_BOUNDARY, "B" }, \
|
||||
{ EXT4_MAP_FROM_CLUSTER, "C" })
|
||||
{ EXT4_MAP_BOUNDARY, "B" })
|
||||
|
||||
#define show_free_flags(flags) __print_flags(flags, "|", \
|
||||
{ EXT4_FREE_BLOCKS_METADATA, "METADATA" }, \
|
||||
@ -2452,15 +2450,14 @@ TRACE_EVENT(ext4_collapse_range,
|
||||
|
||||
TRACE_EVENT(ext4_es_shrink,
|
||||
TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time,
|
||||
int skip_precached, int nr_skipped, int retried),
|
||||
int nr_skipped, int retried),
|
||||
|
||||
TP_ARGS(sb, nr_shrunk, scan_time, skip_precached, nr_skipped, retried),
|
||||
TP_ARGS(sb, nr_shrunk, scan_time, nr_skipped, retried),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( dev_t, dev )
|
||||
__field( int, nr_shrunk )
|
||||
__field( unsigned long long, scan_time )
|
||||
__field( int, skip_precached )
|
||||
__field( int, nr_skipped )
|
||||
__field( int, retried )
|
||||
),
|
||||
@ -2469,16 +2466,14 @@ TRACE_EVENT(ext4_es_shrink,
|
||||
__entry->dev = sb->s_dev;
|
||||
__entry->nr_shrunk = nr_shrunk;
|
||||
__entry->scan_time = div_u64(scan_time, 1000);
|
||||
__entry->skip_precached = skip_precached;
|
||||
__entry->nr_skipped = nr_skipped;
|
||||
__entry->retried = retried;
|
||||
),
|
||||
|
||||
TP_printk("dev %d,%d nr_shrunk %d, scan_time %llu skip_precached %d "
|
||||
TP_printk("dev %d,%d nr_shrunk %d, scan_time %llu "
|
||||
"nr_skipped %d retried %d",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_shrunk,
|
||||
__entry->scan_time, __entry->skip_precached,
|
||||
__entry->nr_skipped, __entry->retried)
|
||||
__entry->scan_time, __entry->nr_skipped, __entry->retried)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_EXT4_H */
|
||||
|
Loading…
Reference in New Issue
Block a user