ext4: Add multi block allocator for ext4
Signed-off-by: Alex Tomas <alex@clusterfs.com> Signed-off-by: Andreas Dilger <adilger@clusterfs.com> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Eric Sandeen <sandeen@redhat.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
This commit is contained in:
		
							parent
							
								
									1988b51e47
								
							
						
					
					
						commit
						c9de560ded
					
				| @ -86,9 +86,11 @@ Alex is working on a new set of patches right now. | ||||
| When mounting an ext4 filesystem, the following option are accepted: | ||||
| (*) == default | ||||
| 
 | ||||
| extents			ext4 will use extents to address file data.  The | ||||
| extents		(*)	ext4 will use extents to address file data.  The | ||||
| 			file system will no longer be mountable by ext3. | ||||
| 
 | ||||
| noextents		ext4 will not use extents for newly created files | ||||
| 
 | ||||
| journal_checksum	Enable checksumming of the journal transactions. | ||||
| 			This will allow the recovery code in e2fsck and the | ||||
| 			kernel to detect corruption in the kernel.  It is a | ||||
| @ -206,6 +208,12 @@ nobh			(a) cache disk block mapping information | ||||
| 			"nobh" option tries to avoid associating buffer | ||||
| 			heads (supported only for "writeback" mode). | ||||
| 
 | ||||
| mballoc		(*)	Use the multiple block allocator for block allocation | ||||
| nomballoc		disabled multiple block allocator for block allocation. | ||||
| stripe=n		Number of filesystem blocks that mballoc will try | ||||
| 			to use for allocation size and alignment. For RAID5/6 | ||||
| 			systems this should be the number of data | ||||
| 			disks *  RAID chunk size in file system blocks. | ||||
| 
 | ||||
| Data Mode | ||||
| --------- | ||||
|  | ||||
| @ -857,6 +857,45 @@ CPUs. | ||||
| The   "procs_blocked" line gives  the  number of  processes currently blocked, | ||||
| waiting for I/O to complete. | ||||
| 
 | ||||
| 1.9 Ext4 file system parameters | ||||
| ------------------------------ | ||||
| Ext4 file system have one directory per partition under /proc/fs/ext4/ | ||||
| # ls /proc/fs/ext4/hdc/ | ||||
| group_prealloc  max_to_scan  mb_groups  mb_history  min_to_scan  order2_req | ||||
| stats  stream_req | ||||
| 
 | ||||
| mb_groups: | ||||
| This file gives the details of mutiblock allocator buddy cache of free blocks | ||||
| 
 | ||||
| mb_history: | ||||
| Multiblock allocation history. | ||||
| 
 | ||||
| stats: | ||||
| This file indicate whether the multiblock allocator should start collecting | ||||
| statistics. The statistics are shown during unmount | ||||
| 
 | ||||
| group_prealloc: | ||||
| The multiblock allocator normalize the block allocation request to | ||||
| group_prealloc filesystem blocks if we don't have strip value set. | ||||
| The stripe value can be specified at mount time or during mke2fs. | ||||
| 
 | ||||
| max_to_scan: | ||||
| How long multiblock allocator can look for a best extent (in found extents) | ||||
| 
 | ||||
| min_to_scan: | ||||
| How long multiblock allocator  must look for a best extent | ||||
| 
 | ||||
| order2_req: | ||||
| Multiblock allocator use  2^N search using buddies only for requests greater | ||||
| than or equal to order2_req. The request size is specfied in file system | ||||
| blocks. A value of 2 indicate only if the requests are greater than or equal | ||||
| to 4 blocks. | ||||
| 
 | ||||
| stream_req: | ||||
| Files smaller than stream_req are served by the stream allocator, whose | ||||
| purpose is to pack requests as close each to other as possible to | ||||
| produce smooth I/O traffic. Avalue of 16 indicate that file smaller than 16 | ||||
| filesystem block size will use group based preallocation. | ||||
| 
 | ||||
| ------------------------------------------------------------------------------ | ||||
| Summary | ||||
|  | ||||
| @ -6,7 +6,7 @@ obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o | ||||
| 
 | ||||
| ext4dev-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
 | ||||
| 		   ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
 | ||||
| 		   ext4_jbd2.o migrate.o | ||||
| 		   ext4_jbd2.o migrate.o mballoc.o | ||||
| 
 | ||||
| ext4dev-$(CONFIG_EXT4DEV_FS_XATTR)	+= xattr.o xattr_user.o xattr_trusted.o | ||||
| ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL)	+= acl.o | ||||
|  | ||||
| @ -577,6 +577,8 @@ void ext4_discard_reservation(struct inode *inode) | ||||
| 	struct ext4_reserve_window_node *rsv; | ||||
| 	spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock; | ||||
| 
 | ||||
| 	ext4_mb_discard_inode_preallocations(inode); | ||||
| 
 | ||||
| 	if (!block_i) | ||||
| 		return; | ||||
| 
 | ||||
| @ -785,19 +787,29 @@ error_return: | ||||
|  * @inode:		inode | ||||
|  * @block:		start physical block to free | ||||
|  * @count:		number of blocks to count | ||||
|  * @metadata: 		Are these metadata blocks | ||||
|  */ | ||||
| void ext4_free_blocks(handle_t *handle, struct inode *inode, | ||||
| 			ext4_fsblk_t block, unsigned long count) | ||||
| 			ext4_fsblk_t block, unsigned long count, | ||||
| 			int metadata) | ||||
| { | ||||
| 	struct super_block * sb; | ||||
| 	unsigned long dquot_freed_blocks; | ||||
| 
 | ||||
| 	/* this isn't the right place to decide whether block is metadata
 | ||||
| 	 * inode.c/extents.c knows better, but for safety ... */ | ||||
| 	if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) || | ||||
| 			ext4_should_journal_data(inode)) | ||||
| 		metadata = 1; | ||||
| 
 | ||||
| 	sb = inode->i_sb; | ||||
| 	if (!sb) { | ||||
| 		printk ("ext4_free_blocks: nonexistent device"); | ||||
| 		return; | ||||
| 	} | ||||
| 	ext4_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); | ||||
| 
 | ||||
| 	if (!test_opt(sb, MBALLOC) || !EXT4_SB(sb)->s_group_info) | ||||
| 		ext4_free_blocks_sb(handle, sb, block, count, | ||||
| 						&dquot_freed_blocks); | ||||
| 	else | ||||
| 		ext4_mb_free_blocks(handle, inode, block, count, | ||||
| 						metadata, &dquot_freed_blocks); | ||||
| 	if (dquot_freed_blocks) | ||||
| 		DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); | ||||
| 	return; | ||||
| @ -1576,7 +1588,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * ext4_new_blocks() -- core block(s) allocation function | ||||
|  * ext4_new_blocks_old() -- core block(s) allocation function | ||||
|  * @handle:		handle to this transaction | ||||
|  * @inode:		file inode | ||||
|  * @goal:		given target block(filesystem wide) | ||||
| @ -1589,7 +1601,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) | ||||
|  * any specific goal block. | ||||
|  * | ||||
|  */ | ||||
| ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, | ||||
| ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, | ||||
| 			ext4_fsblk_t goal, unsigned long *count, int *errp) | ||||
| { | ||||
| 	struct buffer_head *bitmap_bh = NULL; | ||||
| @ -1849,13 +1861,46 @@ out: | ||||
| } | ||||
| 
 | ||||
| ext4_fsblk_t ext4_new_block(handle_t *handle, struct inode *inode, | ||||
| 			ext4_fsblk_t goal, int *errp) | ||||
| 		ext4_fsblk_t goal, int *errp) | ||||
| { | ||||
| 	unsigned long count = 1; | ||||
| 	struct ext4_allocation_request ar; | ||||
| 	ext4_fsblk_t ret; | ||||
| 
 | ||||
| 	return ext4_new_blocks(handle, inode, goal, &count, errp); | ||||
| 	if (!test_opt(inode->i_sb, MBALLOC)) { | ||||
| 		unsigned long count = 1; | ||||
| 		ret = ext4_new_blocks_old(handle, inode, goal, &count, errp); | ||||
| 		return ret; | ||||
| 	} | ||||
| 
 | ||||
| 	memset(&ar, 0, sizeof(ar)); | ||||
| 	ar.inode = inode; | ||||
| 	ar.goal = goal; | ||||
| 	ar.len = 1; | ||||
| 	ret = ext4_mb_new_blocks(handle, &ar, errp); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, | ||||
| 		ext4_fsblk_t goal, unsigned long *count, int *errp) | ||||
| { | ||||
| 	struct ext4_allocation_request ar; | ||||
| 	ext4_fsblk_t ret; | ||||
| 
 | ||||
| 	if (!test_opt(inode->i_sb, MBALLOC)) { | ||||
| 		ret = ext4_new_blocks_old(handle, inode, goal, count, errp); | ||||
| 		return ret; | ||||
| 	} | ||||
| 
 | ||||
| 	memset(&ar, 0, sizeof(ar)); | ||||
| 	ar.inode = inode; | ||||
| 	ar.goal = goal; | ||||
| 	ar.len = *count; | ||||
| 	ret = ext4_mb_new_blocks(handle, &ar, errp); | ||||
| 	*count = ar.len; | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| /**
 | ||||
|  * ext4_count_free_blocks() -- count filesystem free blocks | ||||
|  * @sb:		superblock | ||||
|  | ||||
| @ -853,7 +853,7 @@ cleanup: | ||||
| 		for (i = 0; i < depth; i++) { | ||||
| 			if (!ablocks[i]) | ||||
| 				continue; | ||||
| 			ext4_free_blocks(handle, inode, ablocks[i], 1); | ||||
| 			ext4_free_blocks(handle, inode, ablocks[i], 1, 1); | ||||
| 		} | ||||
| 	} | ||||
| 	kfree(ablocks); | ||||
| @ -1698,7 +1698,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, | ||||
| 	ext_debug("index is empty, remove it, free block %llu\n", leaf); | ||||
| 	bh = sb_find_get_block(inode->i_sb, leaf); | ||||
| 	ext4_forget(handle, 1, inode, bh, leaf); | ||||
| 	ext4_free_blocks(handle, inode, leaf, 1); | ||||
| 	ext4_free_blocks(handle, inode, leaf, 1, 1); | ||||
| 	return err; | ||||
| } | ||||
| 
 | ||||
| @ -1759,8 +1759,10 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | ||||
| { | ||||
| 	struct buffer_head *bh; | ||||
| 	unsigned short ee_len =  ext4_ext_get_actual_len(ex); | ||||
| 	int i; | ||||
| 	int i, metadata = 0; | ||||
| 
 | ||||
| 	if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||||
| 		metadata = 1; | ||||
| #ifdef EXTENTS_STATS | ||||
| 	{ | ||||
| 		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||||
| @ -1789,7 +1791,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | ||||
| 			bh = sb_find_get_block(inode->i_sb, start + i); | ||||
| 			ext4_forget(handle, 0, inode, bh, start + i); | ||||
| 		} | ||||
| 		ext4_free_blocks(handle, inode, start, num); | ||||
| 		ext4_free_blocks(handle, inode, start, num, metadata); | ||||
| 	} else if (from == le32_to_cpu(ex->ee_block) | ||||
| 		   && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { | ||||
| 		printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", | ||||
| @ -2287,6 +2289,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | ||||
| 	ext4_fsblk_t goal, newblock; | ||||
| 	int err = 0, depth, ret; | ||||
| 	unsigned long allocated = 0; | ||||
| 	struct ext4_allocation_request ar; | ||||
| 
 | ||||
| 	__clear_bit(BH_New, &bh_result->b_state); | ||||
| 	ext_debug("blocks %u/%lu requested for inode %u\n", | ||||
| @ -2397,8 +2400,15 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | ||||
| 	if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info)) | ||||
| 		ext4_init_block_alloc_info(inode); | ||||
| 
 | ||||
| 	/* allocate new block */ | ||||
| 	goal = ext4_ext_find_goal(inode, path, iblock); | ||||
| 	/* find neighbour allocated blocks */ | ||||
| 	ar.lleft = iblock; | ||||
| 	err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft); | ||||
| 	if (err) | ||||
| 		goto out2; | ||||
| 	ar.lright = iblock; | ||||
| 	err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright); | ||||
| 	if (err) | ||||
| 		goto out2; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * See if request is beyond maximum number of blocks we can have in | ||||
| @ -2421,7 +2431,18 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | ||||
| 		allocated = le16_to_cpu(newex.ee_len); | ||||
| 	else | ||||
| 		allocated = max_blocks; | ||||
| 	newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err); | ||||
| 
 | ||||
| 	/* allocate new block */ | ||||
| 	ar.inode = inode; | ||||
| 	ar.goal = ext4_ext_find_goal(inode, path, iblock); | ||||
| 	ar.logical = iblock; | ||||
| 	ar.len = allocated; | ||||
| 	if (S_ISREG(inode->i_mode)) | ||||
| 		ar.flags = EXT4_MB_HINT_DATA; | ||||
| 	else | ||||
| 		/* disable in-core preallocation for non-regular files */ | ||||
| 		ar.flags = 0; | ||||
| 	newblock = ext4_mb_new_blocks(handle, &ar, &err); | ||||
| 	if (!newblock) | ||||
| 		goto out2; | ||||
| 	ext_debug("allocate new block: goal %llu, found %llu/%lu\n", | ||||
| @ -2429,14 +2450,17 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | ||||
| 
 | ||||
| 	/* try to insert new extent into found leaf and return */ | ||||
| 	ext4_ext_store_pblock(&newex, newblock); | ||||
| 	newex.ee_len = cpu_to_le16(allocated); | ||||
| 	newex.ee_len = cpu_to_le16(ar.len); | ||||
| 	if (create == EXT4_CREATE_UNINITIALIZED_EXT)  /* Mark uninitialized */ | ||||
| 		ext4_ext_mark_uninitialized(&newex); | ||||
| 	err = ext4_ext_insert_extent(handle, inode, path, &newex); | ||||
| 	if (err) { | ||||
| 		/* free data blocks we just allocated */ | ||||
| 		/* not a good idea to call discard here directly,
 | ||||
| 		 * but otherwise we'd need to call it every free() */ | ||||
| 		ext4_mb_discard_inode_preallocations(inode); | ||||
| 		ext4_free_blocks(handle, inode, ext_pblock(&newex), | ||||
| 					le16_to_cpu(newex.ee_len)); | ||||
| 					le16_to_cpu(newex.ee_len), 0); | ||||
| 		goto out2; | ||||
| 	} | ||||
| 
 | ||||
| @ -2445,6 +2469,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | ||||
| 
 | ||||
| 	/* previous routine could use block we allocated */ | ||||
| 	newblock = ext_pblock(&newex); | ||||
| 	allocated = le16_to_cpu(newex.ee_len); | ||||
| outnew: | ||||
| 	__set_bit(BH_New, &bh_result->b_state); | ||||
| 
 | ||||
| @ -2496,6 +2521,8 @@ void ext4_ext_truncate(struct inode * inode, struct page *page) | ||||
| 	down_write(&EXT4_I(inode)->i_data_sem); | ||||
| 	ext4_ext_invalidate_cache(inode); | ||||
| 
 | ||||
| 	ext4_mb_discard_inode_preallocations(inode); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * TODO: optimization is possible here. | ||||
| 	 * Probably we need not scan at all, | ||||
|  | ||||
| @ -551,7 +551,7 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | ||||
| 	return ret; | ||||
| failed_out: | ||||
| 	for (i = 0; i <index; i++) | ||||
| 		ext4_free_blocks(handle, inode, new_blocks[i], 1); | ||||
| 		ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| @ -650,9 +650,9 @@ failed: | ||||
| 		ext4_journal_forget(handle, branch[i].bh); | ||||
| 	} | ||||
| 	for (i = 0; i <indirect_blks; i++) | ||||
| 		ext4_free_blocks(handle, inode, new_blocks[i], 1); | ||||
| 		ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); | ||||
| 
 | ||||
| 	ext4_free_blocks(handle, inode, new_blocks[i], num); | ||||
| 	ext4_free_blocks(handle, inode, new_blocks[i], num, 0); | ||||
| 
 | ||||
| 	return err; | ||||
| } | ||||
| @ -749,9 +749,10 @@ err_out: | ||||
| 	for (i = 1; i <= num; i++) { | ||||
| 		BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget"); | ||||
| 		ext4_journal_forget(handle, where[i].bh); | ||||
| 		ext4_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1); | ||||
| 		ext4_free_blocks(handle, inode, | ||||
| 					le32_to_cpu(where[i-1].key), 1, 0); | ||||
| 	} | ||||
| 	ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks); | ||||
| 	ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks, 0); | ||||
| 
 | ||||
| 	return err; | ||||
| } | ||||
| @ -2052,7 +2053,7 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	ext4_free_blocks(handle, inode, block_to_free, count); | ||||
| 	ext4_free_blocks(handle, inode, block_to_free, count, 0); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
| @ -2225,7 +2226,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | ||||
| 				ext4_journal_test_restart(handle, inode); | ||||
| 			} | ||||
| 
 | ||||
| 			ext4_free_blocks(handle, inode, nr, 1); | ||||
| 			ext4_free_blocks(handle, inode, nr, 1, 1); | ||||
| 
 | ||||
| 			if (parent_bh) { | ||||
| 				/*
 | ||||
|  | ||||
							
								
								
									
										4552
									
								
								fs/ext4/mballoc.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4552
									
								
								fs/ext4/mballoc.c
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -236,10 +236,10 @@ static int free_dind_blocks(handle_t *handle, | ||||
| 	for (i = 0; i < max_entries; i++) { | ||||
| 		if (tmp_idata[i]) | ||||
| 			ext4_free_blocks(handle, inode, | ||||
| 					le32_to_cpu(tmp_idata[i]), 1); | ||||
| 					le32_to_cpu(tmp_idata[i]), 1, 1); | ||||
| 	} | ||||
| 	put_bh(bh); | ||||
| 	ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1); | ||||
| 	ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| @ -267,7 +267,7 @@ static int free_tind_blocks(handle_t *handle, | ||||
| 		} | ||||
| 	} | ||||
| 	put_bh(bh); | ||||
| 	ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1); | ||||
| 	ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| @ -278,7 +278,7 @@ static int free_ind_block(handle_t *handle, struct inode *inode) | ||||
| 
 | ||||
| 	if (ei->i_data[EXT4_IND_BLOCK]) | ||||
| 		ext4_free_blocks(handle, inode, | ||||
| 				le32_to_cpu(ei->i_data[EXT4_IND_BLOCK]), 1); | ||||
| 				le32_to_cpu(ei->i_data[EXT4_IND_BLOCK]), 1, 1); | ||||
| 
 | ||||
| 	if (ei->i_data[EXT4_DIND_BLOCK]) { | ||||
| 		retval = free_dind_blocks(handle, inode, | ||||
| @ -365,7 +365,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode, | ||||
| 		} | ||||
| 	} | ||||
| 	put_bh(bh); | ||||
| 	ext4_free_blocks(handle, inode, block, 1); | ||||
| 	ext4_free_blocks(handle, inode, block, 1, 1); | ||||
| 	return retval; | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -503,6 +503,7 @@ static void ext4_put_super (struct super_block * sb) | ||||
| 	struct ext4_super_block *es = sbi->s_es; | ||||
| 	int i; | ||||
| 
 | ||||
| 	ext4_mb_release(sb); | ||||
| 	ext4_ext_release(sb); | ||||
| 	ext4_xattr_put_super(sb); | ||||
| 	jbd2_journal_destroy(sbi->s_journal); | ||||
| @ -569,6 +570,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | ||||
| 	ei->i_block_alloc_info = NULL; | ||||
| 	ei->vfs_inode.i_version = 1; | ||||
| 	memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); | ||||
| 	INIT_LIST_HEAD(&ei->i_prealloc_list); | ||||
| 	spin_lock_init(&ei->i_prealloc_lock); | ||||
| 	return &ei->vfs_inode; | ||||
| } | ||||
| 
 | ||||
| @ -881,6 +884,7 @@ enum { | ||||
| 	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, | ||||
| 	Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, | ||||
| 	Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, | ||||
| 	Opt_mballoc, Opt_nomballoc, Opt_stripe, | ||||
| }; | ||||
| 
 | ||||
| static match_table_t tokens = { | ||||
| @ -935,6 +939,9 @@ static match_table_t tokens = { | ||||
| 	{Opt_extents, "extents"}, | ||||
| 	{Opt_noextents, "noextents"}, | ||||
| 	{Opt_i_version, "i_version"}, | ||||
| 	{Opt_mballoc, "mballoc"}, | ||||
| 	{Opt_nomballoc, "nomballoc"}, | ||||
| 	{Opt_stripe, "stripe=%u"}, | ||||
| 	{Opt_err, NULL}, | ||||
| 	{Opt_resize, "resize"}, | ||||
| }; | ||||
| @ -1284,6 +1291,19 @@ clear_qf_name: | ||||
| 			set_opt(sbi->s_mount_opt, I_VERSION); | ||||
| 			sb->s_flags |= MS_I_VERSION; | ||||
| 			break; | ||||
| 		case Opt_mballoc: | ||||
| 			set_opt(sbi->s_mount_opt, MBALLOC); | ||||
| 			break; | ||||
| 		case Opt_nomballoc: | ||||
| 			clear_opt(sbi->s_mount_opt, MBALLOC); | ||||
| 			break; | ||||
| 		case Opt_stripe: | ||||
| 			if (match_int(&args[0], &option)) | ||||
| 				return 0; | ||||
| 			if (option < 0) | ||||
| 				return 0; | ||||
| 			sbi->s_stripe = option; | ||||
| 			break; | ||||
| 		default: | ||||
| 			printk (KERN_ERR | ||||
| 				"EXT4-fs: Unrecognized mount option \"%s\" " | ||||
| @ -1742,6 +1762,34 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb, | ||||
| 	return (has_super + ext4_group_first_block_no(sb, bg)); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * ext4_get_stripe_size: Get the stripe size. | ||||
|  * @sbi: In memory super block info | ||||
|  * | ||||
|  * If we have specified it via mount option, then | ||||
|  * use the mount option value. If the value specified at mount time is | ||||
|  * greater than the blocks per group use the super block value. | ||||
|  * If the super block value is greater than blocks per group return 0. | ||||
|  * Allocator needs it be less than blocks per group. | ||||
|  * | ||||
|  */ | ||||
| static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) | ||||
| { | ||||
| 	unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride); | ||||
| 	unsigned long stripe_width = | ||||
| 			le32_to_cpu(sbi->s_es->s_raid_stripe_width); | ||||
| 
 | ||||
| 	if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) | ||||
| 		return sbi->s_stripe; | ||||
| 
 | ||||
| 	if (stripe_width <= sbi->s_blocks_per_group) | ||||
| 		return stripe_width; | ||||
| 
 | ||||
| 	if (stride <= sbi->s_blocks_per_group) | ||||
| 		return stride; | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static int ext4_fill_super (struct super_block *sb, void *data, int silent) | ||||
| 				__releases(kernel_sem) | ||||
| @ -2091,6 +2139,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) | ||||
| 	sbi->s_rsv_window_head.rsv_goal_size = 0; | ||||
| 	ext4_rsv_window_add(sb, &sbi->s_rsv_window_head); | ||||
| 
 | ||||
| 	sbi->s_stripe = ext4_get_stripe_size(sbi); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * set up enough so that it can read an inode | ||||
| 	 */ | ||||
| @ -2250,6 +2300,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) | ||||
| 		"writeback"); | ||||
| 
 | ||||
| 	ext4_ext_init(sb); | ||||
| 	ext4_mb_init(sb, needs_recovery); | ||||
| 
 | ||||
| 	lock_kernel(); | ||||
| 	return 0; | ||||
| @ -3232,9 +3283,15 @@ static struct file_system_type ext4dev_fs_type = { | ||||
| 
 | ||||
| static int __init init_ext4_fs(void) | ||||
| { | ||||
| 	int err = init_ext4_xattr(); | ||||
| 	int err; | ||||
| 
 | ||||
| 	err = init_ext4_mballoc(); | ||||
| 	if (err) | ||||
| 		return err; | ||||
| 
 | ||||
| 	err = init_ext4_xattr(); | ||||
| 	if (err) | ||||
| 		goto out2; | ||||
| 	err = init_inodecache(); | ||||
| 	if (err) | ||||
| 		goto out1; | ||||
| @ -3246,6 +3303,8 @@ out: | ||||
| 	destroy_inodecache(); | ||||
| out1: | ||||
| 	exit_ext4_xattr(); | ||||
| out2: | ||||
| 	exit_ext4_mballoc(); | ||||
| 	return err; | ||||
| } | ||||
| 
 | ||||
| @ -3254,6 +3313,7 @@ static void __exit exit_ext4_fs(void) | ||||
| 	unregister_filesystem(&ext4dev_fs_type); | ||||
| 	destroy_inodecache(); | ||||
| 	exit_ext4_xattr(); | ||||
| 	exit_ext4_mballoc(); | ||||
| } | ||||
| 
 | ||||
| MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); | ||||
|  | ||||
| @ -480,7 +480,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, | ||||
| 		ea_bdebug(bh, "refcount now=0; freeing"); | ||||
| 		if (ce) | ||||
| 			mb_cache_entry_free(ce); | ||||
| 		ext4_free_blocks(handle, inode, bh->b_blocknr, 1); | ||||
| 		ext4_free_blocks(handle, inode, bh->b_blocknr, 1, 1); | ||||
| 		get_bh(bh); | ||||
| 		ext4_forget(handle, 1, inode, bh, bh->b_blocknr); | ||||
| 	} else { | ||||
| @ -821,7 +821,7 @@ inserted: | ||||
| 			new_bh = sb_getblk(sb, block); | ||||
| 			if (!new_bh) { | ||||
| getblk_failed: | ||||
| 				ext4_free_blocks(handle, inode, block, 1); | ||||
| 				ext4_free_blocks(handle, inode, block, 1, 1); | ||||
| 				error = -EIO; | ||||
| 				goto cleanup; | ||||
| 			} | ||||
|  | ||||
| @ -20,6 +20,8 @@ | ||||
| #include <linux/blkdev.h> | ||||
| #include <linux/magic.h> | ||||
| 
 | ||||
| #include <linux/ext4_fs_i.h> | ||||
| 
 | ||||
| /*
 | ||||
|  * The second extended filesystem constants/structures | ||||
|  */ | ||||
| @ -51,6 +53,50 @@ | ||||
| #define ext4_debug(f, a...)	do {} while (0) | ||||
| #endif | ||||
| 
 | ||||
| #define EXT4_MULTIBLOCK_ALLOCATOR	1 | ||||
| 
 | ||||
| /* prefer goal again. length */ | ||||
| #define EXT4_MB_HINT_MERGE		1 | ||||
| /* blocks already reserved */ | ||||
| #define EXT4_MB_HINT_RESERVED		2 | ||||
| /* metadata is being allocated */ | ||||
| #define EXT4_MB_HINT_METADATA		4 | ||||
| /* first blocks in the file */ | ||||
| #define EXT4_MB_HINT_FIRST		8 | ||||
| /* search for the best chunk */ | ||||
| #define EXT4_MB_HINT_BEST		16 | ||||
| /* data is being allocated */ | ||||
| #define EXT4_MB_HINT_DATA		32 | ||||
| /* don't preallocate (for tails) */ | ||||
| #define EXT4_MB_HINT_NOPREALLOC		64 | ||||
| /* allocate for locality group */ | ||||
| #define EXT4_MB_HINT_GROUP_ALLOC	128 | ||||
| /* allocate goal blocks or none */ | ||||
| #define EXT4_MB_HINT_GOAL_ONLY		256 | ||||
| /* goal is meaningful */ | ||||
| #define EXT4_MB_HINT_TRY_GOAL		512 | ||||
| 
 | ||||
| struct ext4_allocation_request { | ||||
| 	/* target inode for block we're allocating */ | ||||
| 	struct inode *inode; | ||||
| 	/* logical block in target inode */ | ||||
| 	ext4_lblk_t logical; | ||||
| 	/* phys. target (a hint) */ | ||||
| 	ext4_fsblk_t goal; | ||||
| 	/* the closest logical allocated block to the left */ | ||||
| 	ext4_lblk_t lleft; | ||||
| 	/* phys. block for ^^^ */ | ||||
| 	ext4_fsblk_t pleft; | ||||
| 	/* the closest logical allocated block to the right */ | ||||
| 	ext4_lblk_t lright; | ||||
| 	/* phys. block for ^^^ */ | ||||
| 	ext4_fsblk_t pright; | ||||
| 	/* how many blocks we want to allocate */ | ||||
| 	unsigned long len; | ||||
| 	/* flags. see above EXT4_MB_HINT_* */ | ||||
| 	unsigned long flags; | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * Special inodes numbers | ||||
|  */ | ||||
| @ -474,6 +520,7 @@ do {									       \ | ||||
| #define EXT4_MOUNT_JOURNAL_CHECKSUM	0x800000 /* Journal checksums */ | ||||
| #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT	0x1000000 /* Journal Async Commit */ | ||||
| #define EXT4_MOUNT_I_VERSION            0x2000000 /* i_version support */ | ||||
| #define EXT4_MOUNT_MBALLOC		0x4000000 /* Buddy allocation support */ | ||||
| /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ | ||||
| #ifndef _LINUX_EXT2_FS_H | ||||
| #define clear_opt(o, opt)		o &= ~EXT4_MOUNT_##opt | ||||
| @ -912,7 +959,7 @@ extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode, | ||||
| extern ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, | ||||
| 			ext4_fsblk_t goal, unsigned long *count, int *errp); | ||||
| extern void ext4_free_blocks (handle_t *handle, struct inode *inode, | ||||
| 			ext4_fsblk_t block, unsigned long count); | ||||
| 			ext4_fsblk_t block, unsigned long count, int metadata); | ||||
| extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, | ||||
| 				 ext4_fsblk_t block, unsigned long count, | ||||
| 				unsigned long *pdquot_freed_blocks); | ||||
| @ -950,6 +997,20 @@ extern unsigned long ext4_count_dirs (struct super_block *); | ||||
| extern void ext4_check_inodes_bitmap (struct super_block *); | ||||
| extern unsigned long ext4_count_free (struct buffer_head *, unsigned); | ||||
| 
 | ||||
| /* mballoc.c */ | ||||
| extern long ext4_mb_stats; | ||||
| extern long ext4_mb_max_to_scan; | ||||
| extern int ext4_mb_init(struct super_block *, int); | ||||
| extern int ext4_mb_release(struct super_block *); | ||||
| extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, | ||||
| 				struct ext4_allocation_request *, int *); | ||||
| extern int ext4_mb_reserve_blocks(struct super_block *, int); | ||||
| extern void ext4_mb_discard_inode_preallocations(struct inode *); | ||||
| extern int __init init_ext4_mballoc(void); | ||||
| extern void exit_ext4_mballoc(void); | ||||
| extern void ext4_mb_free_blocks(handle_t *, struct inode *, | ||||
| 		unsigned long, unsigned long, int, unsigned long *); | ||||
| 
 | ||||
| 
 | ||||
| /* inode.c */ | ||||
| int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, | ||||
| @ -1080,6 +1141,19 @@ static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size) | ||||
| 	raw_inode->i_size_high = cpu_to_le32(i_size >> 32); | ||||
| } | ||||
| 
 | ||||
| static inline | ||||
| struct ext4_group_info *ext4_get_group_info(struct super_block *sb, | ||||
| 							ext4_group_t group) | ||||
| { | ||||
| 	 struct ext4_group_info ***grp_info; | ||||
| 	 long indexv, indexh; | ||||
| 	 grp_info = EXT4_SB(sb)->s_group_info; | ||||
| 	 indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); | ||||
| 	 indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); | ||||
| 	 return grp_info[indexv][indexh]; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| #define ext4_std_error(sb, errno)				\ | ||||
| do {								\ | ||||
| 	if ((errno))						\ | ||||
|  | ||||
| @ -158,6 +158,10 @@ struct ext4_inode_info { | ||||
| 	 * struct timespec i_{a,c,m}time in the generic inode. | ||||
| 	 */ | ||||
| 	struct timespec i_crtime; | ||||
| 
 | ||||
| 	/* mballoc */ | ||||
| 	struct list_head i_prealloc_list; | ||||
| 	spinlock_t i_prealloc_lock; | ||||
| }; | ||||
| 
 | ||||
| #endif	/* _LINUX_EXT4_FS_I */ | ||||
|  | ||||
| @ -91,6 +91,58 @@ struct ext4_sb_info { | ||||
| 	unsigned long s_ext_blocks; | ||||
| 	unsigned long s_ext_extents; | ||||
| #endif | ||||
| 
 | ||||
| 	/* for buddy allocator */ | ||||
| 	struct ext4_group_info ***s_group_info; | ||||
| 	struct inode *s_buddy_cache; | ||||
| 	long s_blocks_reserved; | ||||
| 	spinlock_t s_reserve_lock; | ||||
| 	struct list_head s_active_transaction; | ||||
| 	struct list_head s_closed_transaction; | ||||
| 	struct list_head s_committed_transaction; | ||||
| 	spinlock_t s_md_lock; | ||||
| 	tid_t s_last_transaction; | ||||
| 	unsigned short *s_mb_offsets, *s_mb_maxs; | ||||
| 
 | ||||
| 	/* tunables */ | ||||
| 	unsigned long s_stripe; | ||||
| 	unsigned long s_mb_stream_request; | ||||
| 	unsigned long s_mb_max_to_scan; | ||||
| 	unsigned long s_mb_min_to_scan; | ||||
| 	unsigned long s_mb_stats; | ||||
| 	unsigned long s_mb_order2_reqs; | ||||
| 	unsigned long s_mb_group_prealloc; | ||||
| 	/* where last allocation was done - for stream allocation */ | ||||
| 	unsigned long s_mb_last_group; | ||||
| 	unsigned long s_mb_last_start; | ||||
| 
 | ||||
| 	/* history to debug policy */ | ||||
| 	struct ext4_mb_history *s_mb_history; | ||||
| 	int s_mb_history_cur; | ||||
| 	int s_mb_history_max; | ||||
| 	int s_mb_history_num; | ||||
| 	struct proc_dir_entry *s_mb_proc; | ||||
| 	spinlock_t s_mb_history_lock; | ||||
| 	int s_mb_history_filter; | ||||
| 
 | ||||
| 	/* stats for buddy allocator */ | ||||
| 	spinlock_t s_mb_pa_lock; | ||||
| 	atomic_t s_bal_reqs;	/* number of reqs with len > 1 */ | ||||
| 	atomic_t s_bal_success;	/* we found long enough chunks */ | ||||
| 	atomic_t s_bal_allocated;	/* in blocks */ | ||||
| 	atomic_t s_bal_ex_scanned;	/* total extents scanned */ | ||||
| 	atomic_t s_bal_goals;	/* goal hits */ | ||||
| 	atomic_t s_bal_breaks;	/* too long searches */ | ||||
| 	atomic_t s_bal_2orders;	/* 2^order hits */ | ||||
| 	spinlock_t s_bal_lock; | ||||
| 	unsigned long s_mb_buddies_generated; | ||||
| 	unsigned long long s_mb_generation_time; | ||||
| 	atomic_t s_mb_lost_chunks; | ||||
| 	atomic_t s_mb_preallocated; | ||||
| 	atomic_t s_mb_discarded; | ||||
| 
 | ||||
| 	/* locality groups */ | ||||
| 	struct ext4_locality_group *s_locality_groups; | ||||
| }; | ||||
| 
 | ||||
| #endif	/* _LINUX_EXT4_FS_SB */ | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user