ceph: punch hole support
This patch implements fallocate and punch hole support for Ceph kernel client. Signed-off-by: Li Wang <liwang@ubuntukylin.com> Signed-off-by: Yunchuan Wen <yunchuanwen@ubuntukylin.com>
This commit is contained in:
		
							parent
							
								
									3871cbb9a4
								
							
						
					
					
						commit
						ad7a60de88
					
				
							
								
								
									
										196
									
								
								fs/ceph/file.c
									
									
									
									
									
								
							
							
						
						
									
										196
									
								
								fs/ceph/file.c
									
									
									
									
									
								
							| @ -8,6 +8,7 @@ | ||||
| #include <linux/namei.h> | ||||
| #include <linux/writeback.h> | ||||
| #include <linux/aio.h> | ||||
| #include <linux/falloc.h> | ||||
| 
 | ||||
| #include "super.h" | ||||
| #include "mds_client.h" | ||||
| @ -874,6 +875,200 @@ out: | ||||
| 	return offset; | ||||
| } | ||||
| 
 | ||||
| static inline void ceph_zero_partial_page( | ||||
| 	struct inode *inode, loff_t offset, unsigned size) | ||||
| { | ||||
| 	struct page *page; | ||||
| 	pgoff_t index = offset >> PAGE_CACHE_SHIFT; | ||||
| 
 | ||||
| 	page = find_lock_page(inode->i_mapping, index); | ||||
| 	if (page) { | ||||
| 		wait_on_page_writeback(page); | ||||
| 		zero_user(page, offset & (PAGE_CACHE_SIZE - 1), size); | ||||
| 		unlock_page(page); | ||||
| 		page_cache_release(page); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static void ceph_zero_pagecache_range(struct inode *inode, loff_t offset, | ||||
| 				      loff_t length) | ||||
| { | ||||
| 	loff_t nearly = round_up(offset, PAGE_CACHE_SIZE); | ||||
| 	if (offset < nearly) { | ||||
| 		loff_t size = nearly - offset; | ||||
| 		if (length < size) | ||||
| 			size = length; | ||||
| 		ceph_zero_partial_page(inode, offset, size); | ||||
| 		offset += size; | ||||
| 		length -= size; | ||||
| 	} | ||||
| 	if (length >= PAGE_CACHE_SIZE) { | ||||
| 		loff_t size = round_down(length, PAGE_CACHE_SIZE); | ||||
| 		truncate_pagecache_range(inode, offset, offset + size - 1); | ||||
| 		offset += size; | ||||
| 		length -= size; | ||||
| 	} | ||||
| 	if (length) | ||||
| 		ceph_zero_partial_page(inode, offset, length); | ||||
| } | ||||
| 
 | ||||
| static int ceph_zero_partial_object(struct inode *inode, | ||||
| 				    loff_t offset, loff_t *length) | ||||
| { | ||||
| 	struct ceph_inode_info *ci = ceph_inode(inode); | ||||
| 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode); | ||||
| 	struct ceph_osd_request *req; | ||||
| 	int ret = 0; | ||||
| 	loff_t zero = 0; | ||||
| 	int op; | ||||
| 
 | ||||
| 	if (!length) { | ||||
| 		op = offset ? CEPH_OSD_OP_DELETE : CEPH_OSD_OP_TRUNCATE; | ||||
| 		length = &zero; | ||||
| 	} else { | ||||
| 		op = CEPH_OSD_OP_ZERO; | ||||
| 	} | ||||
| 
 | ||||
| 	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, | ||||
| 					ceph_vino(inode), | ||||
| 					offset, length, | ||||
| 					1, op, | ||||
| 					CEPH_OSD_FLAG_WRITE | | ||||
| 					CEPH_OSD_FLAG_ONDISK, | ||||
| 					NULL, 0, 0, false); | ||||
| 	if (IS_ERR(req)) { | ||||
| 		ret = PTR_ERR(req); | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	ceph_osdc_build_request(req, offset, NULL, ceph_vino(inode).snap, | ||||
| 				&inode->i_mtime); | ||||
| 
 | ||||
| 	ret = ceph_osdc_start_request(&fsc->client->osdc, req, false); | ||||
| 	if (!ret) { | ||||
| 		ret = ceph_osdc_wait_request(&fsc->client->osdc, req); | ||||
| 		if (ret == -ENOENT) | ||||
| 			ret = 0; | ||||
| 	} | ||||
| 	ceph_osdc_put_request(req); | ||||
| 
 | ||||
| out: | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static int ceph_zero_objects(struct inode *inode, loff_t offset, loff_t length) | ||||
| { | ||||
| 	int ret = 0; | ||||
| 	struct ceph_inode_info *ci = ceph_inode(inode); | ||||
| 	__s32 stripe_unit = ceph_file_layout_su(ci->i_layout); | ||||
| 	__s32 stripe_count = ceph_file_layout_stripe_count(ci->i_layout); | ||||
| 	__s32 object_size = ceph_file_layout_object_size(ci->i_layout); | ||||
| 	loff_t object_set_size = (loff_t)object_size * stripe_count; | ||||
| 
 | ||||
| 	loff_t nearly = (offset + object_set_size - 1) | ||||
| 			/ object_set_size * object_set_size; | ||||
| 	while (length && offset < nearly) { | ||||
| 		loff_t size = length; | ||||
| 		ret = ceph_zero_partial_object(inode, offset, &size); | ||||
| 		if (ret < 0) | ||||
| 			return ret; | ||||
| 		offset += size; | ||||
| 		length -= size; | ||||
| 	} | ||||
| 	while (length >= object_set_size) { | ||||
| 		int i; | ||||
| 		loff_t pos = offset; | ||||
| 		for (i = 0; i < stripe_count; ++i) { | ||||
| 			ret = ceph_zero_partial_object(inode, pos, NULL); | ||||
| 			if (ret < 0) | ||||
| 				return ret; | ||||
| 			pos += stripe_unit; | ||||
| 		} | ||||
| 		offset += object_set_size; | ||||
| 		length -= object_set_size; | ||||
| 	} | ||||
| 	while (length) { | ||||
| 		loff_t size = length; | ||||
| 		ret = ceph_zero_partial_object(inode, offset, &size); | ||||
| 		if (ret < 0) | ||||
| 			return ret; | ||||
| 		offset += size; | ||||
| 		length -= size; | ||||
| 	} | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static long ceph_fallocate(struct file *file, int mode, | ||||
| 				loff_t offset, loff_t length) | ||||
| { | ||||
| 	struct ceph_file_info *fi = file->private_data; | ||||
| 	struct inode *inode = file->f_dentry->d_inode; | ||||
| 	struct ceph_inode_info *ci = ceph_inode(inode); | ||||
| 	struct ceph_osd_client *osdc = | ||||
| 		&ceph_inode_to_client(inode)->client->osdc; | ||||
| 	int want, got = 0; | ||||
| 	int dirty; | ||||
| 	int ret = 0; | ||||
| 	loff_t endoff = 0; | ||||
| 	loff_t size; | ||||
| 
 | ||||
| 	if (!S_ISREG(inode->i_mode)) | ||||
| 		return -EOPNOTSUPP; | ||||
| 
 | ||||
| 	if (IS_SWAPFILE(inode)) | ||||
| 		return -ETXTBSY; | ||||
| 
 | ||||
| 	mutex_lock(&inode->i_mutex); | ||||
| 
 | ||||
| 	if (ceph_snap(inode) != CEPH_NOSNAP) { | ||||
| 		ret = -EROFS; | ||||
| 		goto unlock; | ||||
| 	} | ||||
| 
 | ||||
| 	if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) && | ||||
| 		!(mode & FALLOC_FL_PUNCH_HOLE)) { | ||||
| 		ret = -ENOSPC; | ||||
| 		goto unlock; | ||||
| 	} | ||||
| 
 | ||||
| 	size = i_size_read(inode); | ||||
| 	if (!(mode & FALLOC_FL_KEEP_SIZE)) | ||||
| 		endoff = offset + length; | ||||
| 
 | ||||
| 	if (fi->fmode & CEPH_FILE_MODE_LAZY) | ||||
| 		want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO; | ||||
| 	else | ||||
| 		want = CEPH_CAP_FILE_BUFFER; | ||||
| 
 | ||||
| 	ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); | ||||
| 	if (ret < 0) | ||||
| 		goto unlock; | ||||
| 
 | ||||
| 	if (mode & FALLOC_FL_PUNCH_HOLE) { | ||||
| 		if (offset < size) | ||||
| 			ceph_zero_pagecache_range(inode, offset, length); | ||||
| 		ret = ceph_zero_objects(inode, offset, length); | ||||
| 	} else if (endoff > size) { | ||||
| 		truncate_pagecache_range(inode, size, -1); | ||||
| 		if (ceph_inode_set_size(inode, endoff)) | ||||
| 			ceph_check_caps(ceph_inode(inode), | ||||
| 				CHECK_CAPS_AUTHONLY, NULL); | ||||
| 	} | ||||
| 
 | ||||
| 	if (!ret) { | ||||
| 		spin_lock(&ci->i_ceph_lock); | ||||
| 		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); | ||||
| 		spin_unlock(&ci->i_ceph_lock); | ||||
| 		if (dirty) | ||||
| 			__mark_inode_dirty(inode, dirty); | ||||
| 	} | ||||
| 
 | ||||
| 	ceph_put_cap_refs(ci, got); | ||||
| unlock: | ||||
| 	mutex_unlock(&inode->i_mutex); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| const struct file_operations ceph_file_fops = { | ||||
| 	.open = ceph_open, | ||||
| 	.release = ceph_release, | ||||
| @ -890,5 +1085,6 @@ const struct file_operations ceph_file_fops = { | ||||
| 	.splice_write = generic_file_splice_write, | ||||
| 	.unlocked_ioctl = ceph_ioctl, | ||||
| 	.compat_ioctl	= ceph_ioctl, | ||||
| 	.fallocate	= ceph_fallocate, | ||||
| }; | ||||
| 
 | ||||
|  | ||||
| @ -503,7 +503,9 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req, | ||||
| 	struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); | ||||
| 	size_t payload_len = 0; | ||||
| 
 | ||||
| 	BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE); | ||||
| 	BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE && | ||||
| 	       opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO && | ||||
| 	       opcode != CEPH_OSD_OP_TRUNCATE); | ||||
| 
 | ||||
| 	op->extent.offset = offset; | ||||
| 	op->extent.length = length; | ||||
| @ -631,6 +633,9 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req, | ||||
| 		break; | ||||
| 	case CEPH_OSD_OP_READ: | ||||
| 	case CEPH_OSD_OP_WRITE: | ||||
| 	case CEPH_OSD_OP_ZERO: | ||||
| 	case CEPH_OSD_OP_DELETE: | ||||
| 	case CEPH_OSD_OP_TRUNCATE: | ||||
| 		if (src->op == CEPH_OSD_OP_WRITE) | ||||
| 			request_data_len = src->extent.length; | ||||
| 		dst->extent.offset = cpu_to_le64(src->extent.offset); | ||||
| @ -715,7 +720,9 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | ||||
| 	u64 object_base; | ||||
| 	int r; | ||||
| 
 | ||||
| 	BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE); | ||||
| 	BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE && | ||||
| 	       opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO && | ||||
| 	       opcode != CEPH_OSD_OP_TRUNCATE); | ||||
| 
 | ||||
| 	req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool, | ||||
| 					GFP_NOFS); | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user