mirror of
https://github.com/torvalds/linux.git
synced 2024-11-10 22:21:40 +00:00
- virtio_pmem: The new virtio_pmem facility introduces a paravirtualized
persistent memory device that allows a guest VM to use DAX mechanisms to access a host-file with host-page-cache. It arranges for MAP_SYNC to be disabled and instead triggers a host fsync() when a 'write-cache flush' command is sent to the virtual disk device. - Miscellaneous small fixups. -----BEGIN PGP SIGNATURE----- iQIcBAABAgAGBQJdMHwpAAoJEB7SkWpmfYgCUYoP/3vcgYBAaXNksyALF0iowPoP z4J0KoaOA1CzRFEQtCWUQa84CWj+XoSewwSeyrIkqKQvx/gghXblK+GVjVzBn0BD hmmiKr8af4DdxfzYdEXJp65cCpIiVMaJiGr20Aj9ObwvWJb4QZbz9q7hnPt6KgiI jVND3BpP3OERb4ZFcibdmJT5foKooMcXVG6+luVe+hc1+ZZQxJBsBaqie4brQIFq j59NX3HfHH2fr1vVwnVH0CO4tgbgYg9wZ2EivGu6wBWvORjrr7KiSSbOYP68EBtd lUoNps+vQtGnfXGwNzAjp1wuknrQYYh4/KMKjep7hiZD39rgyvBpbHbyynKzQCWV REe8cXr/nwphsENvBAUBiqY999EWVIxdT2iaVaSA6K/31JQAC5AFyxVK/P2Ke1SK rvePZ++iLQ1o4phTxQPNlVUqF9jOrFVVICGwMDqaqSkOsD9YKQdFClfOF/1ntlDz V0bs+Y0Pe8AJCd9ESep4X+vHAWRRIb4EQIuwLaX8RJoY+r1fGye9RPthpYYzvXKp DI2iJztFO3anzj2i9htNPUFIaiUmIhzEvG32O2If2yc5FL02hMpHPoFx6vHhe6s3 f8OJ+olsJK+/IIrV8+DHqYvhzylOYIhmRTvIxIxaNDPHkhR1i2RDQ6KKK1YZmsr8 MjAZ+Ym0GadDivs+wcM6 =uAMG -----END PGP SIGNATURE----- Merge tag 'libnvdimm-for-5.3' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm Pull libnvdimm updates from Dan Williams: "Primarily just the virtio_pmem driver: - virtio_pmem The new virtio_pmem facility introduces a paravirtualized persistent memory device that allows a guest VM to use DAX mechanisms to access a host-file with host-page-cache. It arranges for MAP_SYNC to be disabled and instead triggers a host fsync() when a 'write-cache flush' command is sent to the virtual disk device. - Miscellaneous small fixups" * tag 'libnvdimm-for-5.3' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: virtio_pmem: fix sparse warning xfs: disable map_sync for async flush ext4: disable map_sync for async flush dax: check synchronous mapping is supported dm: enable synchronous dax libnvdimm: add dax_dev sync flag virtio-pmem: Add virtio pmem driver libnvdimm: nd_region flush callback support libnvdimm, namespace: Drop uuid_t implementation detail
This commit is contained in:
commit
f8c3500cd1
@ -2426,7 +2426,7 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
|
||||
offset = to_interleave_offset(offset, mmio);
|
||||
|
||||
writeq(cmd, mmio->addr.base + offset);
|
||||
nvdimm_flush(nfit_blk->nd_region);
|
||||
nvdimm_flush(nfit_blk->nd_region, NULL);
|
||||
|
||||
if (nfit_blk->dimm_flags & NFIT_BLK_DCR_LATCH)
|
||||
readq(mmio->addr.base + offset);
|
||||
@ -2475,7 +2475,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
|
||||
}
|
||||
|
||||
if (rw)
|
||||
nvdimm_flush(nfit_blk->nd_region);
|
||||
nvdimm_flush(nfit_blk->nd_region, NULL);
|
||||
|
||||
rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0;
|
||||
return rc;
|
||||
|
@ -388,7 +388,7 @@ struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id,
|
||||
* No 'host' or dax_operations since there is no access to this
|
||||
* device outside of mmap of the resulting character device.
|
||||
*/
|
||||
dax_dev = alloc_dax(dev_dax, NULL, NULL);
|
||||
dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC);
|
||||
if (!dax_dev)
|
||||
goto err;
|
||||
|
||||
|
@ -195,6 +195,8 @@ enum dax_device_flags {
|
||||
DAXDEV_ALIVE,
|
||||
/* gate whether dax_flush() calls the low level flush routine */
|
||||
DAXDEV_WRITE_CACHE,
|
||||
/* flag to check if device supports synchronous flush */
|
||||
DAXDEV_SYNC,
|
||||
};
|
||||
|
||||
/**
|
||||
@ -372,6 +374,18 @@ bool dax_write_cache_enabled(struct dax_device *dax_dev)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_write_cache_enabled);
|
||||
|
||||
bool __dax_synchronous(struct dax_device *dax_dev)
|
||||
{
|
||||
return test_bit(DAXDEV_SYNC, &dax_dev->flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__dax_synchronous);
|
||||
|
||||
void __set_dax_synchronous(struct dax_device *dax_dev)
|
||||
{
|
||||
set_bit(DAXDEV_SYNC, &dax_dev->flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__set_dax_synchronous);
|
||||
|
||||
bool dax_alive(struct dax_device *dax_dev)
|
||||
{
|
||||
lockdep_assert_held(&dax_srcu);
|
||||
@ -526,7 +540,7 @@ static void dax_add_host(struct dax_device *dax_dev, const char *host)
|
||||
}
|
||||
|
||||
struct dax_device *alloc_dax(void *private, const char *__host,
|
||||
const struct dax_operations *ops)
|
||||
const struct dax_operations *ops, unsigned long flags)
|
||||
{
|
||||
struct dax_device *dax_dev;
|
||||
const char *host;
|
||||
@ -549,6 +563,9 @@ struct dax_device *alloc_dax(void *private, const char *__host,
|
||||
dax_add_host(dax_dev, host);
|
||||
dax_dev->ops = ops;
|
||||
dax_dev->private = private;
|
||||
if (flags & DAXDEV_F_SYNC)
|
||||
set_dax_synchronous(dax_dev);
|
||||
|
||||
return dax_dev;
|
||||
|
||||
err_dev:
|
||||
|
@ -881,7 +881,7 @@ void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type)
|
||||
EXPORT_SYMBOL_GPL(dm_table_set_type);
|
||||
|
||||
/* validate the dax capability of the target device span */
|
||||
static int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
|
||||
int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
|
||||
sector_t start, sector_t len, void *data)
|
||||
{
|
||||
int blocksize = *(int *) data;
|
||||
@ -890,7 +890,15 @@ static int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
|
||||
start, len);
|
||||
}
|
||||
|
||||
bool dm_table_supports_dax(struct dm_table *t, int blocksize)
|
||||
/* Check devices support synchronous DAX */
|
||||
static int device_synchronous(struct dm_target *ti, struct dm_dev *dev,
|
||||
sector_t start, sector_t len, void *data)
|
||||
{
|
||||
return dax_synchronous(dev->dax_dev);
|
||||
}
|
||||
|
||||
bool dm_table_supports_dax(struct dm_table *t,
|
||||
iterate_devices_callout_fn iterate_fn, int *blocksize)
|
||||
{
|
||||
struct dm_target *ti;
|
||||
unsigned i;
|
||||
@ -903,8 +911,7 @@ bool dm_table_supports_dax(struct dm_table *t, int blocksize)
|
||||
return false;
|
||||
|
||||
if (!ti->type->iterate_devices ||
|
||||
!ti->type->iterate_devices(ti, device_supports_dax,
|
||||
&blocksize))
|
||||
!ti->type->iterate_devices(ti, iterate_fn, blocksize))
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -940,6 +947,7 @@ static int dm_table_determine_type(struct dm_table *t)
|
||||
struct dm_target *tgt;
|
||||
struct list_head *devices = dm_table_get_devices(t);
|
||||
enum dm_queue_mode live_md_type = dm_get_md_type(t->md);
|
||||
int page_size = PAGE_SIZE;
|
||||
|
||||
if (t->type != DM_TYPE_NONE) {
|
||||
/* target already set the table's type */
|
||||
@ -984,7 +992,7 @@ static int dm_table_determine_type(struct dm_table *t)
|
||||
verify_bio_based:
|
||||
/* We must use this table as bio-based */
|
||||
t->type = DM_TYPE_BIO_BASED;
|
||||
if (dm_table_supports_dax(t, PAGE_SIZE) ||
|
||||
if (dm_table_supports_dax(t, device_supports_dax, &page_size) ||
|
||||
(list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) {
|
||||
t->type = DM_TYPE_DAX_BIO_BASED;
|
||||
} else {
|
||||
@ -1883,6 +1891,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
|
||||
struct queue_limits *limits)
|
||||
{
|
||||
bool wc = false, fua = false;
|
||||
int page_size = PAGE_SIZE;
|
||||
|
||||
/*
|
||||
* Copy table's limits to the DM device's request_queue
|
||||
@ -1910,8 +1919,11 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
|
||||
}
|
||||
blk_queue_write_cache(q, wc, fua);
|
||||
|
||||
if (dm_table_supports_dax(t, PAGE_SIZE))
|
||||
if (dm_table_supports_dax(t, device_supports_dax, &page_size)) {
|
||||
blk_queue_flag_set(QUEUE_FLAG_DAX, q);
|
||||
if (dm_table_supports_dax(t, device_synchronous, NULL))
|
||||
set_dax_synchronous(t->md->dax_dev);
|
||||
}
|
||||
else
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DAX, q);
|
||||
|
||||
|
@ -1117,7 +1117,7 @@ static bool dm_dax_supported(struct dax_device *dax_dev, struct block_device *bd
|
||||
if (!map)
|
||||
return false;
|
||||
|
||||
ret = dm_table_supports_dax(map, blocksize);
|
||||
ret = dm_table_supports_dax(map, device_supports_dax, &blocksize);
|
||||
|
||||
dm_put_live_table(md, srcu_idx);
|
||||
|
||||
@ -1989,7 +1989,8 @@ static struct mapped_device *alloc_dev(int minor)
|
||||
sprintf(md->disk->disk_name, "dm-%d", minor);
|
||||
|
||||
if (IS_ENABLED(CONFIG_DAX_DRIVER)) {
|
||||
md->dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops);
|
||||
md->dax_dev = alloc_dax(md, md->disk->disk_name,
|
||||
&dm_dax_ops, 0);
|
||||
if (!md->dax_dev)
|
||||
goto bad;
|
||||
}
|
||||
|
@ -72,7 +72,10 @@ bool dm_table_bio_based(struct dm_table *t);
|
||||
bool dm_table_request_based(struct dm_table *t);
|
||||
void dm_table_free_md_mempools(struct dm_table *t);
|
||||
struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
|
||||
bool dm_table_supports_dax(struct dm_table *t, int blocksize);
|
||||
bool dm_table_supports_dax(struct dm_table *t, iterate_devices_callout_fn fn,
|
||||
int *blocksize);
|
||||
int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
|
||||
sector_t start, sector_t len, void *data);
|
||||
|
||||
void dm_lock_md_type(struct mapped_device *md);
|
||||
void dm_unlock_md_type(struct mapped_device *md);
|
||||
|
@ -5,6 +5,7 @@ obj-$(CONFIG_ND_BTT) += nd_btt.o
|
||||
obj-$(CONFIG_ND_BLK) += nd_blk.o
|
||||
obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
|
||||
obj-$(CONFIG_OF_PMEM) += of_pmem.o
|
||||
obj-$(CONFIG_VIRTIO_PMEM) += virtio_pmem.o nd_virtio.o
|
||||
|
||||
nd_pmem-y := pmem.o
|
||||
|
||||
|
@ -255,7 +255,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
|
||||
struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
|
||||
unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512);
|
||||
sector_t sector = offset >> 9;
|
||||
int rc = 0;
|
||||
int rc = 0, ret = 0;
|
||||
|
||||
if (unlikely(!size))
|
||||
return 0;
|
||||
@ -293,7 +293,9 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
|
||||
}
|
||||
|
||||
memcpy_flushcache(nsio->addr + offset, buf, size);
|
||||
nvdimm_flush(to_nd_region(ndns->dev.parent));
|
||||
ret = nvdimm_flush(to_nd_region(ndns->dev.parent), NULL);
|
||||
if (ret)
|
||||
rc = ret;
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
@ -1822,8 +1822,8 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
|
||||
&& !guid_equal(&nd_set->type_guid,
|
||||
&nd_label->type_guid)) {
|
||||
dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n",
|
||||
nd_set->type_guid.b,
|
||||
nd_label->type_guid.b);
|
||||
&nd_set->type_guid,
|
||||
&nd_label->type_guid);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -2227,8 +2227,8 @@ static struct device *create_namespace_blk(struct nd_region *nd_region,
|
||||
if (namespace_label_has(ndd, type_guid)) {
|
||||
if (!guid_equal(&nd_set->type_guid, &nd_label->type_guid)) {
|
||||
dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n",
|
||||
nd_set->type_guid.b,
|
||||
nd_label->type_guid.b);
|
||||
&nd_set->type_guid,
|
||||
&nd_label->type_guid);
|
||||
return ERR_PTR(-EAGAIN);
|
||||
}
|
||||
|
||||
|
@ -155,6 +155,7 @@ struct nd_region {
|
||||
struct badblocks bb;
|
||||
struct nd_interleave_set *nd_set;
|
||||
struct nd_percpu_lane __percpu *lane;
|
||||
int (*flush)(struct nd_region *nd_region, struct bio *bio);
|
||||
struct nd_mapping mapping[0];
|
||||
};
|
||||
|
||||
|
125
drivers/nvdimm/nd_virtio.c
Normal file
125
drivers/nvdimm/nd_virtio.c
Normal file
@ -0,0 +1,125 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* virtio_pmem.c: Virtio pmem Driver
|
||||
*
|
||||
* Discovers persistent memory range information
|
||||
* from host and provides a virtio based flushing
|
||||
* interface.
|
||||
*/
|
||||
#include "virtio_pmem.h"
|
||||
#include "nd.h"
|
||||
|
||||
/* The interrupt handler */
|
||||
void virtio_pmem_host_ack(struct virtqueue *vq)
|
||||
{
|
||||
struct virtio_pmem *vpmem = vq->vdev->priv;
|
||||
struct virtio_pmem_request *req_data, *req_buf;
|
||||
unsigned long flags;
|
||||
unsigned int len;
|
||||
|
||||
spin_lock_irqsave(&vpmem->pmem_lock, flags);
|
||||
while ((req_data = virtqueue_get_buf(vq, &len)) != NULL) {
|
||||
req_data->done = true;
|
||||
wake_up(&req_data->host_acked);
|
||||
|
||||
if (!list_empty(&vpmem->req_list)) {
|
||||
req_buf = list_first_entry(&vpmem->req_list,
|
||||
struct virtio_pmem_request, list);
|
||||
req_buf->wq_buf_avail = true;
|
||||
wake_up(&req_buf->wq_buf);
|
||||
list_del(&req_buf->list);
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(virtio_pmem_host_ack);
|
||||
|
||||
/* The request submission function */
|
||||
static int virtio_pmem_flush(struct nd_region *nd_region)
|
||||
{
|
||||
struct virtio_device *vdev = nd_region->provider_data;
|
||||
struct virtio_pmem *vpmem = vdev->priv;
|
||||
struct virtio_pmem_request *req_data;
|
||||
struct scatterlist *sgs[2], sg, ret;
|
||||
unsigned long flags;
|
||||
int err, err1;
|
||||
|
||||
might_sleep();
|
||||
req_data = kmalloc(sizeof(*req_data), GFP_KERNEL);
|
||||
if (!req_data)
|
||||
return -ENOMEM;
|
||||
|
||||
req_data->done = false;
|
||||
init_waitqueue_head(&req_data->host_acked);
|
||||
init_waitqueue_head(&req_data->wq_buf);
|
||||
INIT_LIST_HEAD(&req_data->list);
|
||||
req_data->req.type = cpu_to_le32(VIRTIO_PMEM_REQ_TYPE_FLUSH);
|
||||
sg_init_one(&sg, &req_data->req, sizeof(req_data->req));
|
||||
sgs[0] = &sg;
|
||||
sg_init_one(&ret, &req_data->resp.ret, sizeof(req_data->resp));
|
||||
sgs[1] = &ret;
|
||||
|
||||
spin_lock_irqsave(&vpmem->pmem_lock, flags);
|
||||
/*
|
||||
* If virtqueue_add_sgs returns -ENOSPC then req_vq virtual
|
||||
* queue does not have free descriptor. We add the request
|
||||
* to req_list and wait for host_ack to wake us up when free
|
||||
* slots are available.
|
||||
*/
|
||||
while ((err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req_data,
|
||||
GFP_ATOMIC)) == -ENOSPC) {
|
||||
|
||||
dev_info(&vdev->dev, "failed to send command to virtio pmem device, no free slots in the virtqueue\n");
|
||||
req_data->wq_buf_avail = false;
|
||||
list_add_tail(&req_data->list, &vpmem->req_list);
|
||||
spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
|
||||
|
||||
/* A host response results in "host_ack" getting called */
|
||||
wait_event(req_data->wq_buf, req_data->wq_buf_avail);
|
||||
spin_lock_irqsave(&vpmem->pmem_lock, flags);
|
||||
}
|
||||
err1 = virtqueue_kick(vpmem->req_vq);
|
||||
spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
|
||||
/*
|
||||
* virtqueue_add_sgs failed with error different than -ENOSPC, we can't
|
||||
* do anything about that.
|
||||
*/
|
||||
if (err || !err1) {
|
||||
dev_info(&vdev->dev, "failed to send command to virtio pmem device\n");
|
||||
err = -EIO;
|
||||
} else {
|
||||
/* A host repsonse results in "host_ack" getting called */
|
||||
wait_event(req_data->host_acked, req_data->done);
|
||||
err = le32_to_cpu(req_data->resp.ret);
|
||||
}
|
||||
|
||||
kfree(req_data);
|
||||
return err;
|
||||
};
|
||||
|
||||
/* The asynchronous flush callback function */
|
||||
int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
|
||||
{
|
||||
/*
|
||||
* Create child bio for asynchronous flush and chain with
|
||||
* parent bio. Otherwise directly call nd_region flush.
|
||||
*/
|
||||
if (bio && bio->bi_iter.bi_sector != -1) {
|
||||
struct bio *child = bio_alloc(GFP_ATOMIC, 0);
|
||||
|
||||
if (!child)
|
||||
return -ENOMEM;
|
||||
bio_copy_dev(child, bio);
|
||||
child->bi_opf = REQ_PREFLUSH;
|
||||
child->bi_iter.bi_sector = -1;
|
||||
bio_chain(child, bio);
|
||||
submit_bio(child);
|
||||
return 0;
|
||||
}
|
||||
if (virtio_pmem_flush(nd_region))
|
||||
return -EIO;
|
||||
|
||||
return 0;
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(async_pmem_flush);
|
||||
MODULE_LICENSE("GPL");
|
@ -184,6 +184,7 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
|
||||
|
||||
static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
|
||||
{
|
||||
int ret = 0;
|
||||
blk_status_t rc = 0;
|
||||
bool do_acct;
|
||||
unsigned long start;
|
||||
@ -193,7 +194,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
|
||||
struct nd_region *nd_region = to_region(pmem);
|
||||
|
||||
if (bio->bi_opf & REQ_PREFLUSH)
|
||||
nvdimm_flush(nd_region);
|
||||
ret = nvdimm_flush(nd_region, bio);
|
||||
|
||||
do_acct = nd_iostat_start(bio, &start);
|
||||
bio_for_each_segment(bvec, bio, iter) {
|
||||
@ -208,7 +209,10 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
|
||||
nd_iostat_end(bio, start);
|
||||
|
||||
if (bio->bi_opf & REQ_FUA)
|
||||
nvdimm_flush(nd_region);
|
||||
ret = nvdimm_flush(nd_region, bio);
|
||||
|
||||
if (ret)
|
||||
bio->bi_status = errno_to_blk_status(ret);
|
||||
|
||||
bio_endio(bio);
|
||||
return BLK_QC_T_NONE;
|
||||
@ -362,6 +366,7 @@ static int pmem_attach_disk(struct device *dev,
|
||||
struct gendisk *disk;
|
||||
void *addr;
|
||||
int rc;
|
||||
unsigned long flags = 0UL;
|
||||
|
||||
pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
|
||||
if (!pmem)
|
||||
@ -457,14 +462,15 @@ static int pmem_attach_disk(struct device *dev,
|
||||
nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_res);
|
||||
disk->bb = &pmem->bb;
|
||||
|
||||
dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops);
|
||||
if (is_nvdimm_sync(nd_region))
|
||||
flags = DAXDEV_F_SYNC;
|
||||
dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops, flags);
|
||||
if (!dax_dev) {
|
||||
put_disk(disk);
|
||||
return -ENOMEM;
|
||||
}
|
||||
dax_write_cache(dax_dev, nvdimm_has_cache(nd_region));
|
||||
pmem->dax_dev = dax_dev;
|
||||
|
||||
gendev = disk_to_dev(disk);
|
||||
gendev->groups = pmem_attribute_groups;
|
||||
|
||||
@ -522,14 +528,14 @@ static int nd_pmem_remove(struct device *dev)
|
||||
sysfs_put(pmem->bb_state);
|
||||
pmem->bb_state = NULL;
|
||||
}
|
||||
nvdimm_flush(to_nd_region(dev->parent));
|
||||
nvdimm_flush(to_nd_region(dev->parent), NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void nd_pmem_shutdown(struct device *dev)
|
||||
{
|
||||
nvdimm_flush(to_nd_region(dev->parent));
|
||||
nvdimm_flush(to_nd_region(dev->parent), NULL);
|
||||
}
|
||||
|
||||
static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
|
||||
|
@ -287,7 +287,9 @@ static ssize_t deep_flush_store(struct device *dev, struct device_attribute *att
|
||||
return rc;
|
||||
if (!flush)
|
||||
return -EINVAL;
|
||||
nvdimm_flush(nd_region);
|
||||
rc = nvdimm_flush(nd_region, NULL);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
return len;
|
||||
}
|
||||
@ -1077,6 +1079,11 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
|
||||
dev->of_node = ndr_desc->of_node;
|
||||
nd_region->ndr_size = resource_size(ndr_desc->res);
|
||||
nd_region->ndr_start = ndr_desc->res->start;
|
||||
if (ndr_desc->flush)
|
||||
nd_region->flush = ndr_desc->flush;
|
||||
else
|
||||
nd_region->flush = NULL;
|
||||
|
||||
nd_device_register(dev);
|
||||
|
||||
return nd_region;
|
||||
@ -1117,11 +1124,24 @@ struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create);
|
||||
|
||||
int nvdimm_flush(struct nd_region *nd_region, struct bio *bio)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
if (!nd_region->flush)
|
||||
rc = generic_nvdimm_flush(nd_region);
|
||||
else {
|
||||
if (nd_region->flush(nd_region, bio))
|
||||
rc = -EIO;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
/**
|
||||
* nvdimm_flush - flush any posted write queues between the cpu and pmem media
|
||||
* @nd_region: blk or interleaved pmem region
|
||||
*/
|
||||
void nvdimm_flush(struct nd_region *nd_region)
|
||||
int generic_nvdimm_flush(struct nd_region *nd_region)
|
||||
{
|
||||
struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev);
|
||||
int i, idx;
|
||||
@ -1145,6 +1165,8 @@ void nvdimm_flush(struct nd_region *nd_region)
|
||||
if (ndrd_get_flush_wpq(ndrd, i, 0))
|
||||
writeq(1, ndrd_get_flush_wpq(ndrd, i, idx));
|
||||
wmb();
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvdimm_flush);
|
||||
|
||||
@ -1189,6 +1211,13 @@ int nvdimm_has_cache(struct nd_region *nd_region)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvdimm_has_cache);
|
||||
|
||||
bool is_nvdimm_sync(struct nd_region *nd_region)
|
||||
{
|
||||
return is_nd_pmem(&nd_region->dev) &&
|
||||
!test_bit(ND_REGION_ASYNC, &nd_region->flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(is_nvdimm_sync);
|
||||
|
||||
struct conflict_context {
|
||||
struct nd_region *nd_region;
|
||||
resource_size_t start, size;
|
||||
|
122
drivers/nvdimm/virtio_pmem.c
Normal file
122
drivers/nvdimm/virtio_pmem.c
Normal file
@ -0,0 +1,122 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* virtio_pmem.c: Virtio pmem Driver
|
||||
*
|
||||
* Discovers persistent memory range information
|
||||
* from host and registers the virtual pmem device
|
||||
* with libnvdimm core.
|
||||
*/
|
||||
#include "virtio_pmem.h"
|
||||
#include "nd.h"
|
||||
|
||||
static struct virtio_device_id id_table[] = {
|
||||
{ VIRTIO_ID_PMEM, VIRTIO_DEV_ANY_ID },
|
||||
{ 0 },
|
||||
};
|
||||
|
||||
/* Initialize virt queue */
|
||||
static int init_vq(struct virtio_pmem *vpmem)
|
||||
{
|
||||
/* single vq */
|
||||
vpmem->req_vq = virtio_find_single_vq(vpmem->vdev,
|
||||
virtio_pmem_host_ack, "flush_queue");
|
||||
if (IS_ERR(vpmem->req_vq))
|
||||
return PTR_ERR(vpmem->req_vq);
|
||||
|
||||
spin_lock_init(&vpmem->pmem_lock);
|
||||
INIT_LIST_HEAD(&vpmem->req_list);
|
||||
|
||||
return 0;
|
||||
};
|
||||
|
||||
static int virtio_pmem_probe(struct virtio_device *vdev)
|
||||
{
|
||||
struct nd_region_desc ndr_desc = {};
|
||||
int nid = dev_to_node(&vdev->dev);
|
||||
struct nd_region *nd_region;
|
||||
struct virtio_pmem *vpmem;
|
||||
struct resource res;
|
||||
int err = 0;
|
||||
|
||||
if (!vdev->config->get) {
|
||||
dev_err(&vdev->dev, "%s failure: config access disabled\n",
|
||||
__func__);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
vpmem = devm_kzalloc(&vdev->dev, sizeof(*vpmem), GFP_KERNEL);
|
||||
if (!vpmem) {
|
||||
err = -ENOMEM;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
vpmem->vdev = vdev;
|
||||
vdev->priv = vpmem;
|
||||
err = init_vq(vpmem);
|
||||
if (err) {
|
||||
dev_err(&vdev->dev, "failed to initialize virtio pmem vq's\n");
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
virtio_cread(vpmem->vdev, struct virtio_pmem_config,
|
||||
start, &vpmem->start);
|
||||
virtio_cread(vpmem->vdev, struct virtio_pmem_config,
|
||||
size, &vpmem->size);
|
||||
|
||||
res.start = vpmem->start;
|
||||
res.end = vpmem->start + vpmem->size - 1;
|
||||
vpmem->nd_desc.provider_name = "virtio-pmem";
|
||||
vpmem->nd_desc.module = THIS_MODULE;
|
||||
|
||||
vpmem->nvdimm_bus = nvdimm_bus_register(&vdev->dev,
|
||||
&vpmem->nd_desc);
|
||||
if (!vpmem->nvdimm_bus) {
|
||||
dev_err(&vdev->dev, "failed to register device with nvdimm_bus\n");
|
||||
err = -ENXIO;
|
||||
goto out_vq;
|
||||
}
|
||||
|
||||
dev_set_drvdata(&vdev->dev, vpmem->nvdimm_bus);
|
||||
|
||||
ndr_desc.res = &res;
|
||||
ndr_desc.numa_node = nid;
|
||||
ndr_desc.flush = async_pmem_flush;
|
||||
set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
|
||||
set_bit(ND_REGION_ASYNC, &ndr_desc.flags);
|
||||
nd_region = nvdimm_pmem_region_create(vpmem->nvdimm_bus, &ndr_desc);
|
||||
if (!nd_region) {
|
||||
dev_err(&vdev->dev, "failed to create nvdimm region\n");
|
||||
err = -ENXIO;
|
||||
goto out_nd;
|
||||
}
|
||||
nd_region->provider_data = dev_to_virtio(nd_region->dev.parent->parent);
|
||||
return 0;
|
||||
out_nd:
|
||||
nvdimm_bus_unregister(vpmem->nvdimm_bus);
|
||||
out_vq:
|
||||
vdev->config->del_vqs(vdev);
|
||||
out_err:
|
||||
return err;
|
||||
}
|
||||
|
||||
static void virtio_pmem_remove(struct virtio_device *vdev)
|
||||
{
|
||||
struct nvdimm_bus *nvdimm_bus = dev_get_drvdata(&vdev->dev);
|
||||
|
||||
nvdimm_bus_unregister(nvdimm_bus);
|
||||
vdev->config->del_vqs(vdev);
|
||||
vdev->config->reset(vdev);
|
||||
}
|
||||
|
||||
static struct virtio_driver virtio_pmem_driver = {
|
||||
.driver.name = KBUILD_MODNAME,
|
||||
.driver.owner = THIS_MODULE,
|
||||
.id_table = id_table,
|
||||
.probe = virtio_pmem_probe,
|
||||
.remove = virtio_pmem_remove,
|
||||
};
|
||||
|
||||
module_virtio_driver(virtio_pmem_driver);
|
||||
MODULE_DEVICE_TABLE(virtio, id_table);
|
||||
MODULE_DESCRIPTION("Virtio pmem driver");
|
||||
MODULE_LICENSE("GPL");
|
55
drivers/nvdimm/virtio_pmem.h
Normal file
55
drivers/nvdimm/virtio_pmem.h
Normal file
@ -0,0 +1,55 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* virtio_pmem.h: virtio pmem Driver
|
||||
*
|
||||
* Discovers persistent memory range information
|
||||
* from host and provides a virtio based flushing
|
||||
* interface.
|
||||
**/
|
||||
|
||||
#ifndef _LINUX_VIRTIO_PMEM_H
|
||||
#define _LINUX_VIRTIO_PMEM_H
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <uapi/linux/virtio_pmem.h>
|
||||
#include <linux/libnvdimm.h>
|
||||
#include <linux/spinlock.h>
|
||||
|
||||
struct virtio_pmem_request {
|
||||
struct virtio_pmem_req req;
|
||||
struct virtio_pmem_resp resp;
|
||||
|
||||
/* Wait queue to process deferred work after ack from host */
|
||||
wait_queue_head_t host_acked;
|
||||
bool done;
|
||||
|
||||
/* Wait queue to process deferred work after virt queue buffer avail */
|
||||
wait_queue_head_t wq_buf;
|
||||
bool wq_buf_avail;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
struct virtio_pmem {
|
||||
struct virtio_device *vdev;
|
||||
|
||||
/* Virtio pmem request queue */
|
||||
struct virtqueue *req_vq;
|
||||
|
||||
/* nvdimm bus registers virtio pmem device */
|
||||
struct nvdimm_bus *nvdimm_bus;
|
||||
struct nvdimm_bus_descriptor nd_desc;
|
||||
|
||||
/* List to store deferred work if virtqueue is full */
|
||||
struct list_head req_list;
|
||||
|
||||
/* Synchronize virtqueue data */
|
||||
spinlock_t pmem_lock;
|
||||
|
||||
/* Memory region information */
|
||||
__u64 start;
|
||||
__u64 size;
|
||||
};
|
||||
|
||||
void virtio_pmem_host_ack(struct virtqueue *vq);
|
||||
int async_pmem_flush(struct nd_region *nd_region, struct bio *bio);
|
||||
#endif
|
@ -679,7 +679,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
|
||||
goto put_dev;
|
||||
|
||||
dev_info->dax_dev = alloc_dax(dev_info, dev_info->gd->disk_name,
|
||||
&dcssblk_dax_ops);
|
||||
&dcssblk_dax_ops, DAXDEV_F_SYNC);
|
||||
if (!dev_info->dax_dev) {
|
||||
rc = -ENOMEM;
|
||||
goto put_dev;
|
||||
|
@ -43,6 +43,17 @@ config VIRTIO_PCI_LEGACY
|
||||
|
||||
If unsure, say Y.
|
||||
|
||||
config VIRTIO_PMEM
|
||||
tristate "Support for virtio pmem driver"
|
||||
depends on VIRTIO
|
||||
depends on LIBNVDIMM
|
||||
help
|
||||
This driver provides access to virtio-pmem devices, storage devices
|
||||
that are mapped into the physical address space - similar to NVDIMMs
|
||||
- with a virtio-based flushing interface.
|
||||
|
||||
If unsure, say Y.
|
||||
|
||||
config VIRTIO_BALLOON
|
||||
tristate "Virtio balloon driver"
|
||||
depends on VIRTIO
|
||||
|
@ -371,15 +371,17 @@ static const struct vm_operations_struct ext4_file_vm_ops = {
|
||||
static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
struct dax_device *dax_dev = sbi->s_daxdev;
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
|
||||
if (unlikely(ext4_forced_shutdown(sbi)))
|
||||
return -EIO;
|
||||
|
||||
/*
|
||||
* We don't support synchronous mappings for non-DAX files. At least
|
||||
* until someone comes with a sensible use case.
|
||||
* We don't support synchronous mappings for non-DAX files and
|
||||
* for DAX files if underneath dax_device is not synchronous.
|
||||
*/
|
||||
if (!IS_DAX(file_inode(file)) && (vma->vm_flags & VM_SYNC))
|
||||
if (!daxdev_mapping_supported(vma, dax_dev))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
file_accessed(file);
|
||||
|
@ -1197,11 +1197,14 @@ xfs_file_mmap(
|
||||
struct file *filp,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
struct dax_device *dax_dev;
|
||||
|
||||
dax_dev = xfs_find_daxdev_for_inode(file_inode(filp));
|
||||
/*
|
||||
* We don't support synchronous mappings for non-DAX files. At least
|
||||
* until someone comes with a sensible use case.
|
||||
* We don't support synchronous mappings for non-DAX files and
|
||||
* for DAX files if underneath dax_device is not synchronous.
|
||||
*/
|
||||
if (!IS_DAX(file_inode(filp)) && (vma->vm_flags & VM_SYNC))
|
||||
if (!daxdev_mapping_supported(vma, dax_dev))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
file_accessed(filp);
|
||||
|
@ -7,6 +7,9 @@
|
||||
#include <linux/radix-tree.h>
|
||||
#include <asm/pgtable.h>
|
||||
|
||||
/* Flag for synchronous flush */
|
||||
#define DAXDEV_F_SYNC (1UL << 0)
|
||||
|
||||
typedef unsigned long dax_entry_t;
|
||||
|
||||
struct iomap_ops;
|
||||
@ -38,18 +41,40 @@ extern struct attribute_group dax_attribute_group;
|
||||
#if IS_ENABLED(CONFIG_DAX)
|
||||
struct dax_device *dax_get_by_host(const char *host);
|
||||
struct dax_device *alloc_dax(void *private, const char *host,
|
||||
const struct dax_operations *ops);
|
||||
const struct dax_operations *ops, unsigned long flags);
|
||||
void put_dax(struct dax_device *dax_dev);
|
||||
void kill_dax(struct dax_device *dax_dev);
|
||||
void dax_write_cache(struct dax_device *dax_dev, bool wc);
|
||||
bool dax_write_cache_enabled(struct dax_device *dax_dev);
|
||||
bool __dax_synchronous(struct dax_device *dax_dev);
|
||||
static inline bool dax_synchronous(struct dax_device *dax_dev)
|
||||
{
|
||||
return __dax_synchronous(dax_dev);
|
||||
}
|
||||
void __set_dax_synchronous(struct dax_device *dax_dev);
|
||||
static inline void set_dax_synchronous(struct dax_device *dax_dev)
|
||||
{
|
||||
__set_dax_synchronous(dax_dev);
|
||||
}
|
||||
/*
|
||||
* Check if given mapping is supported by the file / underlying device.
|
||||
*/
|
||||
static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
|
||||
struct dax_device *dax_dev)
|
||||
{
|
||||
if (!(vma->vm_flags & VM_SYNC))
|
||||
return true;
|
||||
if (!IS_DAX(file_inode(vma->vm_file)))
|
||||
return false;
|
||||
return dax_synchronous(dax_dev);
|
||||
}
|
||||
#else
|
||||
static inline struct dax_device *dax_get_by_host(const char *host)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
static inline struct dax_device *alloc_dax(void *private, const char *host,
|
||||
const struct dax_operations *ops)
|
||||
const struct dax_operations *ops, unsigned long flags)
|
||||
{
|
||||
/*
|
||||
* Callers should check IS_ENABLED(CONFIG_DAX) to know if this
|
||||
@ -70,6 +95,18 @@ static inline bool dax_write_cache_enabled(struct dax_device *dax_dev)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline bool dax_synchronous(struct dax_device *dax_dev)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
static inline void set_dax_synchronous(struct dax_device *dax_dev)
|
||||
{
|
||||
}
|
||||
static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
|
||||
struct dax_device *dax_dev)
|
||||
{
|
||||
return !(vma->vm_flags & VM_SYNC);
|
||||
}
|
||||
#endif
|
||||
|
||||
struct writeback_control;
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/uuid.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/bio.h>
|
||||
|
||||
struct badrange_entry {
|
||||
u64 start;
|
||||
@ -57,6 +58,9 @@ enum {
|
||||
*/
|
||||
ND_REGION_PERSIST_MEMCTRL = 2,
|
||||
|
||||
/* Platform provides asynchronous flush mechanism */
|
||||
ND_REGION_ASYNC = 3,
|
||||
|
||||
/* mark newly adjusted resources as requiring a label update */
|
||||
DPA_RESOURCE_ADJUSTED = 1 << 0,
|
||||
};
|
||||
@ -113,6 +117,7 @@ struct nd_mapping_desc {
|
||||
int position;
|
||||
};
|
||||
|
||||
struct nd_region;
|
||||
struct nd_region_desc {
|
||||
struct resource *res;
|
||||
struct nd_mapping_desc *mapping;
|
||||
@ -125,6 +130,7 @@ struct nd_region_desc {
|
||||
int target_node;
|
||||
unsigned long flags;
|
||||
struct device_node *of_node;
|
||||
int (*flush)(struct nd_region *nd_region, struct bio *bio);
|
||||
};
|
||||
|
||||
struct device;
|
||||
@ -252,10 +258,12 @@ unsigned long nd_blk_memremap_flags(struct nd_blk_region *ndbr);
|
||||
unsigned int nd_region_acquire_lane(struct nd_region *nd_region);
|
||||
void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane);
|
||||
u64 nd_fletcher64(void *addr, size_t len, bool le);
|
||||
void nvdimm_flush(struct nd_region *nd_region);
|
||||
int nvdimm_flush(struct nd_region *nd_region, struct bio *bio);
|
||||
int generic_nvdimm_flush(struct nd_region *nd_region);
|
||||
int nvdimm_has_flush(struct nd_region *nd_region);
|
||||
int nvdimm_has_cache(struct nd_region *nd_region);
|
||||
int nvdimm_in_overwrite(struct nvdimm *nvdimm);
|
||||
bool is_nvdimm_sync(struct nd_region *nd_region);
|
||||
|
||||
static inline int nvdimm_ctl(struct nvdimm *nvdimm, unsigned int cmd, void *buf,
|
||||
unsigned int buf_len, int *cmd_rc)
|
||||
|
@ -44,5 +44,6 @@
|
||||
#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */
|
||||
#define VIRTIO_ID_CRYPTO 20 /* virtio crypto */
|
||||
#define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */
|
||||
#define VIRTIO_ID_PMEM 27 /* virtio pmem */
|
||||
|
||||
#endif /* _LINUX_VIRTIO_IDS_H */
|
||||
|
34
include/uapi/linux/virtio_pmem.h
Normal file
34
include/uapi/linux/virtio_pmem.h
Normal file
@ -0,0 +1,34 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
|
||||
/*
|
||||
* Definitions for virtio-pmem devices.
|
||||
*
|
||||
* Copyright (C) 2019 Red Hat, Inc.
|
||||
*
|
||||
* Author(s): Pankaj Gupta <pagupta@redhat.com>
|
||||
*/
|
||||
|
||||
#ifndef _UAPI_LINUX_VIRTIO_PMEM_H
|
||||
#define _UAPI_LINUX_VIRTIO_PMEM_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/virtio_ids.h>
|
||||
#include <linux/virtio_config.h>
|
||||
|
||||
struct virtio_pmem_config {
|
||||
__u64 start;
|
||||
__u64 size;
|
||||
};
|
||||
|
||||
#define VIRTIO_PMEM_REQ_TYPE_FLUSH 0
|
||||
|
||||
struct virtio_pmem_resp {
|
||||
/* Host return status corresponding to flush request */
|
||||
__le32 ret;
|
||||
};
|
||||
|
||||
struct virtio_pmem_req {
|
||||
/* command type */
|
||||
__le32 type;
|
||||
};
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user