mirror of
https://github.com/torvalds/linux.git
synced 2024-11-18 10:01:43 +00:00
bc07c10a36
There are at least 3 advantages to use direct I/O and AIO on read/write loop's backing file: 1) double cache can be avoided, then memory usage gets decreased a lot 2) not like user space direct I/O, there isn't cost of pinning pages 3) avoid context switch for obtaining good throughput - in buffered file read, random I/O top throughput is often obtained only if they are submitted concurrently from lots of tasks; but for sequential I/O, most of times they can be hit from page cache, so concurrent submissions often introduce unnecessary context switch and can't improve throughput much. There was such discussion[1] to use non-blocking I/O to improve the problem for application. - with direct I/O and AIO, concurrent submissions can be avoided and random read throughput can't be affected meantime xfstests(-g auto, ext4) is basically passed when running with direct I/O(aio), one exception is generic/232, but it failed in loop buffered I/O(4.2-rc6-next-20150814) too. Follows the fio test result for performance purpose: 4 jobs fio test inside ext4 file system over loop block 1) How to run - KVM: 4 VCPUs, 2G RAM - linux kernel: 4.2-rc6-next-20150814(base) with the patchset - the loop block is over one image on SSD. - linux psync, 4 jobs, size 1500M, ext4 over loop block - test result: IOPS from fio output 2) Throughput(IOPS) becomes a bit better with direct I/O(aio) ------------------------------------------------------------- test cases |randread |read |randwrite |write | ------------------------------------------------------------- base |8015 |113811 |67442 |106978 ------------------------------------------------------------- base+loop aio |8136 |125040 |67811 |111376 ------------------------------------------------------------- - somehow, it should be caused by more page cache avaiable for application or one extra page copy is avoided in case of direct I/O 3) context switch - context switch decreased by ~50% with loop direct I/O(aio) compared with loop buffered I/O(4.2-rc6-next-20150814) 4) memory usage from /proc/meminfo ------------------------------------------------------------- | Buffers | Cached ------------------------------------------------------------- base | > 760MB | ~950MB ------------------------------------------------------------- base+loop direct I/O(aio) | < 5MB | ~1.6GB ------------------------------------------------------------- - so there are much more page caches available for application with direct I/O [1] https://lwn.net/Articles/612483/ Signed-off-by: Ming Lei <ming.lei@canonical.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jens Axboe <axboe@fb.com>
94 lines
2.3 KiB
C
94 lines
2.3 KiB
C
/*
|
|
* loop.h
|
|
*
|
|
* Written by Theodore Ts'o, 3/29/93.
|
|
*
|
|
* Copyright 1993 by Theodore Ts'o. Redistribution of this file is
|
|
* permitted under the GNU General Public License.
|
|
*/
|
|
#ifndef _LINUX_LOOP_H
|
|
#define _LINUX_LOOP_H
|
|
|
|
#include <linux/bio.h>
|
|
#include <linux/blkdev.h>
|
|
#include <linux/blk-mq.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/mutex.h>
|
|
#include <linux/kthread.h>
|
|
#include <uapi/linux/loop.h>
|
|
|
|
/* Possible states of device */
|
|
enum {
|
|
Lo_unbound,
|
|
Lo_bound,
|
|
Lo_rundown,
|
|
};
|
|
|
|
struct loop_func_table;
|
|
|
|
struct loop_device {
|
|
int lo_number;
|
|
atomic_t lo_refcnt;
|
|
loff_t lo_offset;
|
|
loff_t lo_sizelimit;
|
|
int lo_flags;
|
|
int (*transfer)(struct loop_device *, int cmd,
|
|
struct page *raw_page, unsigned raw_off,
|
|
struct page *loop_page, unsigned loop_off,
|
|
int size, sector_t real_block);
|
|
char lo_file_name[LO_NAME_SIZE];
|
|
char lo_crypt_name[LO_NAME_SIZE];
|
|
char lo_encrypt_key[LO_KEY_SIZE];
|
|
int lo_encrypt_key_size;
|
|
struct loop_func_table *lo_encryption;
|
|
__u32 lo_init[2];
|
|
kuid_t lo_key_owner; /* Who set the key */
|
|
int (*ioctl)(struct loop_device *, int cmd,
|
|
unsigned long arg);
|
|
|
|
struct file * lo_backing_file;
|
|
struct block_device *lo_device;
|
|
unsigned lo_blocksize;
|
|
void *key_data;
|
|
|
|
gfp_t old_gfp_mask;
|
|
|
|
spinlock_t lo_lock;
|
|
int lo_state;
|
|
struct mutex lo_ctl_mutex;
|
|
struct kthread_worker worker;
|
|
struct task_struct *worker_task;
|
|
bool use_dio;
|
|
|
|
struct request_queue *lo_queue;
|
|
struct blk_mq_tag_set tag_set;
|
|
struct gendisk *lo_disk;
|
|
};
|
|
|
|
struct loop_cmd {
|
|
struct kthread_work work;
|
|
struct request *rq;
|
|
struct list_head list;
|
|
bool use_aio; /* use AIO interface to handle I/O */
|
|
struct kiocb iocb;
|
|
};
|
|
|
|
/* Support for loadable transfer modules */
|
|
struct loop_func_table {
|
|
int number; /* filter type */
|
|
int (*transfer)(struct loop_device *lo, int cmd,
|
|
struct page *raw_page, unsigned raw_off,
|
|
struct page *loop_page, unsigned loop_off,
|
|
int size, sector_t real_block);
|
|
int (*init)(struct loop_device *, const struct loop_info64 *);
|
|
/* release is called from loop_unregister_transfer or clr_fd */
|
|
int (*release)(struct loop_device *);
|
|
int (*ioctl)(struct loop_device *, int cmd, unsigned long arg);
|
|
struct module *owner;
|
|
};
|
|
|
|
int loop_register_transfer(struct loop_func_table *funcs);
|
|
int loop_unregister_transfer(int number);
|
|
|
|
#endif
|