dax + libnvdimm for v5.17

- Simplify the dax_operations API
   - Eliminate bdev_dax_pgoff() in favor of the filesystem maintaining
     and applying a partition offset to all its DAX iomap operations.
   - Remove wrappers and device-mapper stacked callbacks for
     ->copy_from_iter() and ->copy_to_iter() in favor of moving
     block_device relative offset responsibility to the
     dax_direct_access() caller.
   - Remove the need for an @bdev in filesystem-DAX infrastructure
   - Remove unused uio helpers copy_from_iter_flushcache() and
     copy_mc_to_iter() as only the non-check_copy_size() versions are
     used for DAX.
 - Prepare XFS for the pending (next merge window) DAX+reflink support
 - Remove deprecated DEV_DAX_PMEM_COMPAT support
 - Cleanup a straggling misuse of the GUID api
 
 Tags offered after the branch was cut:
 Reviewed-by: Mike Snitzer <snitzer@redhat.com>
 Link: https://lore.kernel.org/r/Ydb/3P+8nvjCjYfO@redhat.com
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYIAB0WIQSbo+XnGs+rwLz9XGXfioYZHlFsZwUCYd3dTAAKCRDfioYZHlFs
 Z//UAP9zetoTE+O7zJG7CXja4jSopSadbdbh6QKSXaqfKBPvQQD+N4US3wA2bGv8
 f/qCY62j2Hj3hUTGHs9RvTyw3JsSYAA=
 =QvDs
 -----END PGP SIGNATURE-----

Merge tag 'libnvdimm-for-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull dax and libnvdimm updates from Dan Williams:
 "The bulk of this is a rework of the dax_operations API after
  discovering the obstacles it posed to the work-in-progress DAX+reflink
  support for XFS and other copy-on-write filesystem mechanics.

  Primarily the need to plumb a block_device through the API to handle
  partition offsets was a sticking point and Christoph untangled that
  dependency in addition to other cleanups to make landing the
  DAX+reflink support easier.

  The DAX_PMEM_COMPAT option has been around for 4 years and not only
  are distributions shipping userspace that understand the current
  configuration API, but some are not even bothering to turn this option
  on anymore, so it seems a good time to remove it per the deprecation
  schedule. Recall that this was added after the device-dax subsystem
  moved from /sys/class/dax to /sys/bus/dax for its sysfs organization.
  All recent functionality depends on /sys/bus/dax.

  Some other miscellaneous cleanups and reflink prep patches are
  included as well.

  Summary:

   - Simplify the dax_operations API:

      - Eliminate bdev_dax_pgoff() in favor of the filesystem
        maintaining and applying a partition offset to all its DAX iomap
        operations.

      - Remove wrappers and device-mapper stacked callbacks for
        ->copy_from_iter() and ->copy_to_iter() in favor of moving
        block_device relative offset responsibility to the
        dax_direct_access() caller.

      - Remove the need for an @bdev in filesystem-DAX infrastructure

      - Remove unused uio helpers copy_from_iter_flushcache() and
        copy_mc_to_iter() as only the non-check_copy_size() versions are
        used for DAX.

   - Prepare XFS for the pending (next merge window) DAX+reflink support

   - Remove deprecated DEV_DAX_PMEM_COMPAT support

   - Cleanup a straggling misuse of the GUID api"

* tag 'libnvdimm-for-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (38 commits)
  iomap: Fix error handling in iomap_zero_iter()
  ACPI: NFIT: Import GUID before use
  dax: remove the copy_from_iter and copy_to_iter methods
  dax: remove the DAXDEV_F_SYNC flag
  dax: simplify dax_synchronous and set_dax_synchronous
  uio: remove copy_from_iter_flushcache() and copy_mc_to_iter()
  iomap: turn the byte variable in iomap_zero_iter into a ssize_t
  memremap: remove support for external pgmap refcounts
  fsdax: don't require CONFIG_BLOCK
  iomap: build the block based code conditionally
  dax: fix up some of the block device related ifdefs
  fsdax: shift partition offset handling into the file systems
  dax: return the partition offset from fs_dax_get_by_bdev
  iomap: add a IOMAP_DAX flag
  xfs: pass the mapping flags to xfs_bmbt_to_iomap
  xfs: use xfs_direct_write_iomap_ops for DAX zeroing
  xfs: move dax device handling into xfs_{alloc,free}_buftarg
  ext4: cleanup the dax handling in ext4_fill_super
  ext2: cleanup the dax handling in ext2_fill_super
  fsdax: decouple zeroing from the iomap buffered I/O code
  ...
This commit is contained in:
Linus Torvalds 2022-01-12 15:46:11 -08:00
commit 3acbdbf42e
63 changed files with 567 additions and 1186 deletions

View File

@ -1,22 +0,0 @@
What: /sys/class/dax/
Date: May, 2016
KernelVersion: v4.7
Contact: nvdimm@lists.linux.dev
Description: Device DAX is the device-centric analogue of Filesystem
DAX (CONFIG_FS_DAX). It allows memory ranges to be
allocated and mapped without need of an intervening file
system. Device DAX is strict, precise and predictable.
Specifically this interface:
1. Guarantees fault granularity with respect to a given
page size (pte, pmd, or pud) set at configuration time.
2. Enforces deterministic behavior by being strict about
what fault scenarios are supported.
The /sys/class/dax/ interface enumerates all the
device-dax instances in the system. The ABI is
deprecated and will be removed after 2020. It is
replaced with the DAX bus interface /sys/bus/dax/ where
device-dax instances can be found under
/sys/bus/dax/devices/

View File

@ -678,10 +678,12 @@ static const char *spa_type_name(u16 type)
int nfit_spa_type(struct acpi_nfit_system_address *spa) int nfit_spa_type(struct acpi_nfit_system_address *spa)
{ {
guid_t guid;
int i; int i;
import_guid(&guid, spa->range_guid);
for (i = 0; i < NFIT_UUID_MAX; i++) for (i = 0; i < NFIT_UUID_MAX; i++)
if (guid_equal(to_nfit_uuid(i), (guid_t *)&spa->range_guid)) if (guid_equal(to_nfit_uuid(i), &guid))
return i; return i;
return -1; return -1;
} }

View File

@ -1,8 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only # SPDX-License-Identifier: GPL-2.0-only
config DAX_DRIVER
select DAX
bool
menuconfig DAX menuconfig DAX
tristate "DAX: direct access to differentiated memory" tristate "DAX: direct access to differentiated memory"
select SRCU select SRCU
@ -70,13 +66,4 @@ config DEV_DAX_KMEM
Say N if unsure. Say N if unsure.
config DEV_DAX_PMEM_COMPAT
tristate "PMEM DAX: support the deprecated /sys/class/dax interface"
depends on m && DEV_DAX_PMEM=m
default DEV_DAX_PMEM
help
Older versions of the libdaxctl library expect to find all
device-dax instances under /sys/class/dax. If libdaxctl in
your distribution is older than v58 say M, otherwise say N.
endif endif

View File

@ -2,10 +2,11 @@
obj-$(CONFIG_DAX) += dax.o obj-$(CONFIG_DAX) += dax.o
obj-$(CONFIG_DEV_DAX) += device_dax.o obj-$(CONFIG_DEV_DAX) += device_dax.o
obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
dax-y := super.o dax-y := super.o
dax-y += bus.o dax-y += bus.o
device_dax-y := device.o device_dax-y := device.o
dax_pmem-y := pmem.o
obj-y += pmem/
obj-y += hmem/ obj-y += hmem/

View File

@ -10,8 +10,6 @@
#include "dax-private.h" #include "dax-private.h"
#include "bus.h" #include "bus.h"
static struct class *dax_class;
static DEFINE_MUTEX(dax_bus_lock); static DEFINE_MUTEX(dax_bus_lock);
#define DAX_NAME_LEN 30 #define DAX_NAME_LEN 30
@ -1323,14 +1321,17 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data)
} }
/* /*
* No 'host' or dax_operations since there is no access to this * No dax_operations since there is no access to this device outside of
* device outside of mmap of the resulting character device. * mmap of the resulting character device.
*/ */
dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC); dax_dev = alloc_dax(dev_dax, NULL);
if (IS_ERR(dax_dev)) { if (IS_ERR(dax_dev)) {
rc = PTR_ERR(dax_dev); rc = PTR_ERR(dax_dev);
goto err_alloc_dax; goto err_alloc_dax;
} }
set_dax_synchronous(dax_dev);
set_dax_nocache(dax_dev);
set_dax_nomc(dax_dev);
/* a device_dax instance is dead while the driver is not attached */ /* a device_dax instance is dead while the driver is not attached */
kill_dax(dax_dev); kill_dax(dax_dev);
@ -1343,10 +1344,7 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data)
inode = dax_inode(dax_dev); inode = dax_inode(dax_dev);
dev->devt = inode->i_rdev; dev->devt = inode->i_rdev;
if (data->subsys == DEV_DAX_BUS) dev->bus = &dax_bus_type;
dev->bus = &dax_bus_type;
else
dev->class = dax_class;
dev->parent = parent; dev->parent = parent;
dev->type = &dev_dax_type; dev->type = &dev_dax_type;
@ -1445,22 +1443,10 @@ EXPORT_SYMBOL_GPL(dax_driver_unregister);
int __init dax_bus_init(void) int __init dax_bus_init(void)
{ {
int rc; return bus_register(&dax_bus_type);
if (IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)) {
dax_class = class_create(THIS_MODULE, "dax");
if (IS_ERR(dax_class))
return PTR_ERR(dax_class);
}
rc = bus_register(&dax_bus_type);
if (rc)
class_destroy(dax_class);
return rc;
} }
void __exit dax_bus_exit(void) void __exit dax_bus_exit(void)
{ {
bus_unregister(&dax_bus_type); bus_unregister(&dax_bus_type);
class_destroy(dax_class);
} }

View File

@ -16,24 +16,15 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id,
struct range *range, int target_node, unsigned int align, struct range *range, int target_node, unsigned int align,
unsigned long flags); unsigned long flags);
enum dev_dax_subsys {
DEV_DAX_BUS = 0, /* zeroed dev_dax_data picks this by default */
DEV_DAX_CLASS,
};
struct dev_dax_data { struct dev_dax_data {
struct dax_region *dax_region; struct dax_region *dax_region;
struct dev_pagemap *pgmap; struct dev_pagemap *pgmap;
enum dev_dax_subsys subsys;
resource_size_t size; resource_size_t size;
int id; int id;
}; };
struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data); struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data);
/* to be deleted when DEV_DAX_CLASS is removed */
struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys);
struct dax_device_driver { struct dax_device_driver {
struct device_driver drv; struct device_driver drv;
struct list_head ids; struct list_head ids;
@ -49,10 +40,6 @@ int __dax_driver_register(struct dax_device_driver *dax_drv,
void dax_driver_unregister(struct dax_device_driver *dax_drv); void dax_driver_unregister(struct dax_device_driver *dax_drv);
void kill_dev_dax(struct dev_dax *dev_dax); void kill_dev_dax(struct dev_dax *dev_dax);
#if IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)
int dev_dax_probe(struct dev_dax *dev_dax);
#endif
/* /*
* While run_dax() is potentially a generic operation that could be * While run_dax() is potentially a generic operation that could be
* defined in include/linux/dax.h we don't want to grow any users * defined in include/linux/dax.h we don't want to grow any users

View File

@ -433,11 +433,7 @@ int dev_dax_probe(struct dev_dax *dev_dax)
inode = dax_inode(dax_dev); inode = dax_inode(dax_dev);
cdev = inode->i_cdev; cdev = inode->i_cdev;
cdev_init(cdev, &dax_fops); cdev_init(cdev, &dax_fops);
if (dev->class) { cdev->owner = dev->driver->owner;
/* for the CONFIG_DEV_DAX_PMEM_COMPAT case */
cdev->owner = dev->parent->driver->owner;
} else
cdev->owner = dev->driver->owner;
cdev_set_parent(cdev, &dev->kobj); cdev_set_parent(cdev, &dev->kobj);
rc = cdev_add(cdev, dev->devt, 1); rc = cdev_add(cdev, dev->devt, 1);
if (rc) if (rc)

View File

@ -3,11 +3,11 @@
#include <linux/memremap.h> #include <linux/memremap.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/pfn_t.h> #include <linux/pfn_t.h>
#include "../../nvdimm/pfn.h" #include "../nvdimm/pfn.h"
#include "../../nvdimm/nd.h" #include "../nvdimm/nd.h"
#include "../bus.h" #include "bus.h"
struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys) static struct dev_dax *__dax_pmem_probe(struct device *dev)
{ {
struct range range; struct range range;
int rc, id, region_id; int rc, id, region_id;
@ -63,7 +63,6 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys)
.dax_region = dax_region, .dax_region = dax_region,
.id = id, .id = id,
.pgmap = &pgmap, .pgmap = &pgmap,
.subsys = subsys,
.size = range_len(&range), .size = range_len(&range),
}; };
dev_dax = devm_create_dev_dax(&data); dev_dax = devm_create_dev_dax(&data);
@ -73,7 +72,32 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys)
return dev_dax; return dev_dax;
} }
EXPORT_SYMBOL_GPL(__dax_pmem_probe);
static int dax_pmem_probe(struct device *dev)
{
return PTR_ERR_OR_ZERO(__dax_pmem_probe(dev));
}
static struct nd_device_driver dax_pmem_driver = {
.probe = dax_pmem_probe,
.drv = {
.name = "dax_pmem",
},
.type = ND_DRIVER_DAX_PMEM,
};
static int __init dax_pmem_init(void)
{
return nd_driver_register(&dax_pmem_driver);
}
module_init(dax_pmem_init);
static void __exit dax_pmem_exit(void)
{
driver_unregister(&dax_pmem_driver.drv);
}
module_exit(dax_pmem_exit);
MODULE_LICENSE("GPL v2"); MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Intel Corporation"); MODULE_AUTHOR("Intel Corporation");
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM);

View File

@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only # SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem_core.o obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem_core.o
obj-$(CONFIG_DEV_DAX_PMEM_COMPAT) += dax_pmem_compat.o
dax_pmem-y := pmem.o dax_pmem-y := pmem.o
dax_pmem_core-y := core.o dax_pmem_core-y := core.o

View File

@ -1,72 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2016 - 2018 Intel Corporation. All rights reserved. */
#include <linux/percpu-refcount.h>
#include <linux/memremap.h>
#include <linux/module.h>
#include <linux/pfn_t.h>
#include <linux/nd.h>
#include "../bus.h"
/* we need the private definitions to implement compat suport */
#include "../dax-private.h"
static int dax_pmem_compat_probe(struct device *dev)
{
struct dev_dax *dev_dax = __dax_pmem_probe(dev, DEV_DAX_CLASS);
int rc;
if (IS_ERR(dev_dax))
return PTR_ERR(dev_dax);
if (!devres_open_group(&dev_dax->dev, dev_dax, GFP_KERNEL))
return -ENOMEM;
device_lock(&dev_dax->dev);
rc = dev_dax_probe(dev_dax);
device_unlock(&dev_dax->dev);
devres_close_group(&dev_dax->dev, dev_dax);
if (rc)
devres_release_group(&dev_dax->dev, dev_dax);
return rc;
}
static int dax_pmem_compat_release(struct device *dev, void *data)
{
device_lock(dev);
devres_release_group(dev, to_dev_dax(dev));
device_unlock(dev);
return 0;
}
static void dax_pmem_compat_remove(struct device *dev)
{
device_for_each_child(dev, NULL, dax_pmem_compat_release);
}
static struct nd_device_driver dax_pmem_compat_driver = {
.probe = dax_pmem_compat_probe,
.remove = dax_pmem_compat_remove,
.drv = {
.name = "dax_pmem_compat",
},
.type = ND_DRIVER_DAX_PMEM,
};
static int __init dax_pmem_compat_init(void)
{
return nd_driver_register(&dax_pmem_compat_driver);
}
module_init(dax_pmem_compat_init);
static void __exit dax_pmem_compat_exit(void)
{
driver_unregister(&dax_pmem_compat_driver.drv);
}
module_exit(dax_pmem_compat_exit);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Intel Corporation");
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM);

View File

@ -7,34 +7,4 @@
#include <linux/nd.h> #include <linux/nd.h>
#include "../bus.h" #include "../bus.h"
static int dax_pmem_probe(struct device *dev)
{
return PTR_ERR_OR_ZERO(__dax_pmem_probe(dev, DEV_DAX_BUS));
}
static struct nd_device_driver dax_pmem_driver = {
.probe = dax_pmem_probe,
.drv = {
.name = "dax_pmem",
},
.type = ND_DRIVER_DAX_PMEM,
};
static int __init dax_pmem_init(void)
{
return nd_driver_register(&dax_pmem_driver);
}
module_init(dax_pmem_init);
static void __exit dax_pmem_exit(void)
{
driver_unregister(&dax_pmem_driver.drv);
}
module_exit(dax_pmem_exit);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Intel Corporation");
#if !IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)
/* For compat builds, don't load this module by default */
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM);
#endif

View File

@ -7,10 +7,8 @@
#include <linux/mount.h> #include <linux/mount.h>
#include <linux/pseudo_fs.h> #include <linux/pseudo_fs.h>
#include <linux/magic.h> #include <linux/magic.h>
#include <linux/genhd.h>
#include <linux/pfn_t.h> #include <linux/pfn_t.h>
#include <linux/cdev.h> #include <linux/cdev.h>
#include <linux/hash.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/uio.h> #include <linux/uio.h>
#include <linux/dax.h> #include <linux/dax.h>
@ -21,15 +19,12 @@
* struct dax_device - anchor object for dax services * struct dax_device - anchor object for dax services
* @inode: core vfs * @inode: core vfs
* @cdev: optional character interface for "device dax" * @cdev: optional character interface for "device dax"
* @host: optional name for lookups where the device path is not available
* @private: dax driver private data * @private: dax driver private data
* @flags: state and boolean properties * @flags: state and boolean properties
*/ */
struct dax_device { struct dax_device {
struct hlist_node list;
struct inode inode; struct inode inode;
struct cdev cdev; struct cdev cdev;
const char *host;
void *private; void *private;
unsigned long flags; unsigned long flags;
const struct dax_operations *ops; const struct dax_operations *ops;
@ -42,10 +37,6 @@ static DEFINE_IDA(dax_minor_ida);
static struct kmem_cache *dax_cache __read_mostly; static struct kmem_cache *dax_cache __read_mostly;
static struct super_block *dax_superblock __read_mostly; static struct super_block *dax_superblock __read_mostly;
#define DAX_HASH_SIZE (PAGE_SIZE / sizeof(struct hlist_head))
static struct hlist_head dax_host_list[DAX_HASH_SIZE];
static DEFINE_SPINLOCK(dax_host_lock);
int dax_read_lock(void) int dax_read_lock(void)
{ {
return srcu_read_lock(&dax_srcu); return srcu_read_lock(&dax_srcu);
@ -58,169 +49,54 @@ void dax_read_unlock(int id)
} }
EXPORT_SYMBOL_GPL(dax_read_unlock); EXPORT_SYMBOL_GPL(dax_read_unlock);
static int dax_host_hash(const char *host) #if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX)
{
return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE;
}
#ifdef CONFIG_BLOCK
#include <linux/blkdev.h> #include <linux/blkdev.h>
int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size, static DEFINE_XARRAY(dax_hosts);
pgoff_t *pgoff)
int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk)
{ {
sector_t start_sect = bdev ? get_start_sect(bdev) : 0; return xa_insert(&dax_hosts, (unsigned long)disk, dax_dev, GFP_KERNEL);
phys_addr_t phys_off = (start_sect + sector) * 512;
if (pgoff)
*pgoff = PHYS_PFN(phys_off);
if (phys_off % PAGE_SIZE || size % PAGE_SIZE)
return -EINVAL;
return 0;
} }
EXPORT_SYMBOL(bdev_dax_pgoff); EXPORT_SYMBOL_GPL(dax_add_host);
void dax_remove_host(struct gendisk *disk)
{
xa_erase(&dax_hosts, (unsigned long)disk);
}
EXPORT_SYMBOL_GPL(dax_remove_host);
#if IS_ENABLED(CONFIG_FS_DAX)
/** /**
* dax_get_by_host() - temporary lookup mechanism for filesystem-dax * fs_dax_get_by_bdev() - temporary lookup mechanism for filesystem-dax
* @host: alternate name for the device registered by a dax driver * @bdev: block device to find a dax_device for
* @start_off: returns the byte offset into the dax_device that @bdev starts
*/ */
static struct dax_device *dax_get_by_host(const char *host) struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev, u64 *start_off)
{ {
struct dax_device *dax_dev, *found = NULL; struct dax_device *dax_dev;
int hash, id; u64 part_size;
if (!host)
return NULL;
hash = dax_host_hash(host);
id = dax_read_lock();
spin_lock(&dax_host_lock);
hlist_for_each_entry(dax_dev, &dax_host_list[hash], list) {
if (!dax_alive(dax_dev)
|| strcmp(host, dax_dev->host) != 0)
continue;
if (igrab(&dax_dev->inode))
found = dax_dev;
break;
}
spin_unlock(&dax_host_lock);
dax_read_unlock(id);
return found;
}
struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
{
if (!blk_queue_dax(bdev->bd_disk->queue))
return NULL;
return dax_get_by_host(bdev->bd_disk->disk_name);
}
EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
bool generic_fsdax_supported(struct dax_device *dax_dev,
struct block_device *bdev, int blocksize, sector_t start,
sector_t sectors)
{
bool dax_enabled = false;
pgoff_t pgoff, pgoff_end;
void *kaddr, *end_kaddr;
pfn_t pfn, end_pfn;
sector_t last_page;
long len, len2;
int err, id;
if (blocksize != PAGE_SIZE) {
pr_info("%pg: error: unsupported blocksize for dax\n", bdev);
return false;
}
if (!dax_dev) {
pr_debug("%pg: error: dax unsupported by block device\n", bdev);
return false;
}
err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, &pgoff);
if (err) {
pr_info("%pg: error: unaligned partition for dax\n", bdev);
return false;
}
last_page = PFN_DOWN((start + sectors - 1) * 512) * PAGE_SIZE / 512;
err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end);
if (err) {
pr_info("%pg: error: unaligned partition for dax\n", bdev);
return false;
}
id = dax_read_lock();
len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn);
if (len < 1 || len2 < 1) {
pr_info("%pg: error: dax access failed (%ld)\n",
bdev, len < 1 ? len : len2);
dax_read_unlock(id);
return false;
}
if (IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn)) {
/*
* An arch that has enabled the pmem api should also
* have its drivers support pfn_t_devmap()
*
* This is a developer warning and should not trigger in
* production. dax_flush() will crash since it depends
* on being able to do (page_address(pfn_to_page())).
*/
WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API));
dax_enabled = true;
} else if (pfn_t_devmap(pfn) && pfn_t_devmap(end_pfn)) {
struct dev_pagemap *pgmap, *end_pgmap;
pgmap = get_dev_pagemap(pfn_t_to_pfn(pfn), NULL);
end_pgmap = get_dev_pagemap(pfn_t_to_pfn(end_pfn), NULL);
if (pgmap && pgmap == end_pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX
&& pfn_t_to_page(pfn)->pgmap == pgmap
&& pfn_t_to_page(end_pfn)->pgmap == pgmap
&& pfn_t_to_pfn(pfn) == PHYS_PFN(__pa(kaddr))
&& pfn_t_to_pfn(end_pfn) == PHYS_PFN(__pa(end_kaddr)))
dax_enabled = true;
put_dev_pagemap(pgmap);
put_dev_pagemap(end_pgmap);
}
dax_read_unlock(id);
if (!dax_enabled) {
pr_info("%pg: error: dax support not enabled\n", bdev);
return false;
}
return true;
}
EXPORT_SYMBOL_GPL(generic_fsdax_supported);
bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
int blocksize, sector_t start, sector_t len)
{
bool ret = false;
int id; int id;
if (!dax_dev) if (!blk_queue_dax(bdev->bd_disk->queue))
return false; return NULL;
*start_off = get_start_sect(bdev) * SECTOR_SIZE;
part_size = bdev_nr_sectors(bdev) * SECTOR_SIZE;
if (*start_off % PAGE_SIZE || part_size % PAGE_SIZE) {
pr_info("%pg: error: unaligned partition for dax\n", bdev);
return NULL;
}
id = dax_read_lock(); id = dax_read_lock();
if (dax_alive(dax_dev) && dax_dev->ops->dax_supported) dax_dev = xa_load(&dax_hosts, (unsigned long)bdev->bd_disk);
ret = dax_dev->ops->dax_supported(dax_dev, bdev, blocksize, if (!dax_dev || !dax_alive(dax_dev) || !igrab(&dax_dev->inode))
start, len); dax_dev = NULL;
dax_read_unlock(id); dax_read_unlock(id);
return ret;
return dax_dev;
} }
EXPORT_SYMBOL_GPL(dax_supported); EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
#endif /* CONFIG_FS_DAX */ #endif /* CONFIG_BLOCK && CONFIG_FS_DAX */
#endif /* CONFIG_BLOCK */
enum dax_device_flags { enum dax_device_flags {
/* !alive + rcu grace period == no new operations / mappings */ /* !alive + rcu grace period == no new operations / mappings */
@ -229,6 +105,10 @@ enum dax_device_flags {
DAXDEV_WRITE_CACHE, DAXDEV_WRITE_CACHE,
/* flag to check if device supports synchronous flush */ /* flag to check if device supports synchronous flush */
DAXDEV_SYNC, DAXDEV_SYNC,
/* do not leave the caches dirty after writes */
DAXDEV_NOCACHE,
/* handle CPU fetch exceptions during reads */
DAXDEV_NOMC,
}; };
/** /**
@ -270,9 +150,15 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
if (!dax_alive(dax_dev)) if (!dax_alive(dax_dev))
return 0; return 0;
return dax_dev->ops->copy_from_iter(dax_dev, pgoff, addr, bytes, i); /*
* The userspace address for the memory copy has already been validated
* via access_ok() in vfs_write, so use the 'no check' version to bypass
* the HARDENED_USERCOPY overhead.
*/
if (test_bit(DAXDEV_NOCACHE, &dax_dev->flags))
return _copy_from_iter_flushcache(addr, bytes, i);
return _copy_from_iter(addr, bytes, i);
} }
EXPORT_SYMBOL_GPL(dax_copy_from_iter);
size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i) size_t bytes, struct iov_iter *i)
@ -280,9 +166,15 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
if (!dax_alive(dax_dev)) if (!dax_alive(dax_dev))
return 0; return 0;
return dax_dev->ops->copy_to_iter(dax_dev, pgoff, addr, bytes, i); /*
* The userspace address for the memory copy has already been validated
* via access_ok() in vfs_red, so use the 'no check' version to bypass
* the HARDENED_USERCOPY overhead.
*/
if (test_bit(DAXDEV_NOMC, &dax_dev->flags))
return _copy_mc_to_iter(addr, bytes, i);
return _copy_to_iter(addr, bytes, i);
} }
EXPORT_SYMBOL_GPL(dax_copy_to_iter);
int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
size_t nr_pages) size_t nr_pages)
@ -332,17 +224,29 @@ bool dax_write_cache_enabled(struct dax_device *dax_dev)
} }
EXPORT_SYMBOL_GPL(dax_write_cache_enabled); EXPORT_SYMBOL_GPL(dax_write_cache_enabled);
bool __dax_synchronous(struct dax_device *dax_dev) bool dax_synchronous(struct dax_device *dax_dev)
{ {
return test_bit(DAXDEV_SYNC, &dax_dev->flags); return test_bit(DAXDEV_SYNC, &dax_dev->flags);
} }
EXPORT_SYMBOL_GPL(__dax_synchronous); EXPORT_SYMBOL_GPL(dax_synchronous);
void __set_dax_synchronous(struct dax_device *dax_dev) void set_dax_synchronous(struct dax_device *dax_dev)
{ {
set_bit(DAXDEV_SYNC, &dax_dev->flags); set_bit(DAXDEV_SYNC, &dax_dev->flags);
} }
EXPORT_SYMBOL_GPL(__set_dax_synchronous); EXPORT_SYMBOL_GPL(set_dax_synchronous);
void set_dax_nocache(struct dax_device *dax_dev)
{
set_bit(DAXDEV_NOCACHE, &dax_dev->flags);
}
EXPORT_SYMBOL_GPL(set_dax_nocache);
void set_dax_nomc(struct dax_device *dax_dev)
{
set_bit(DAXDEV_NOMC, &dax_dev->flags);
}
EXPORT_SYMBOL_GPL(set_dax_nomc);
bool dax_alive(struct dax_device *dax_dev) bool dax_alive(struct dax_device *dax_dev)
{ {
@ -363,12 +267,7 @@ void kill_dax(struct dax_device *dax_dev)
return; return;
clear_bit(DAXDEV_ALIVE, &dax_dev->flags); clear_bit(DAXDEV_ALIVE, &dax_dev->flags);
synchronize_srcu(&dax_srcu); synchronize_srcu(&dax_srcu);
spin_lock(&dax_host_lock);
hlist_del_init(&dax_dev->list);
spin_unlock(&dax_host_lock);
} }
EXPORT_SYMBOL_GPL(kill_dax); EXPORT_SYMBOL_GPL(kill_dax);
@ -400,8 +299,6 @@ static struct dax_device *to_dax_dev(struct inode *inode)
static void dax_free_inode(struct inode *inode) static void dax_free_inode(struct inode *inode)
{ {
struct dax_device *dax_dev = to_dax_dev(inode); struct dax_device *dax_dev = to_dax_dev(inode);
kfree(dax_dev->host);
dax_dev->host = NULL;
if (inode->i_rdev) if (inode->i_rdev)
ida_simple_remove(&dax_minor_ida, iminor(inode)); ida_simple_remove(&dax_minor_ida, iminor(inode));
kmem_cache_free(dax_cache, dax_dev); kmem_cache_free(dax_cache, dax_dev);
@ -476,65 +373,30 @@ static struct dax_device *dax_dev_get(dev_t devt)
return dax_dev; return dax_dev;
} }
static void dax_add_host(struct dax_device *dax_dev, const char *host) struct dax_device *alloc_dax(void *private, const struct dax_operations *ops)
{
int hash;
/*
* Unconditionally init dax_dev since it's coming from a
* non-zeroed slab cache
*/
INIT_HLIST_NODE(&dax_dev->list);
dax_dev->host = host;
if (!host)
return;
hash = dax_host_hash(host);
spin_lock(&dax_host_lock);
hlist_add_head(&dax_dev->list, &dax_host_list[hash]);
spin_unlock(&dax_host_lock);
}
struct dax_device *alloc_dax(void *private, const char *__host,
const struct dax_operations *ops, unsigned long flags)
{ {
struct dax_device *dax_dev; struct dax_device *dax_dev;
const char *host;
dev_t devt; dev_t devt;
int minor; int minor;
if (ops && !ops->zero_page_range) { if (WARN_ON_ONCE(ops && !ops->zero_page_range))
pr_debug("%s: error: device does not provide dax"
" operation zero_page_range()\n",
__host ? __host : "Unknown");
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
}
host = kstrdup(__host, GFP_KERNEL);
if (__host && !host)
return ERR_PTR(-ENOMEM);
minor = ida_simple_get(&dax_minor_ida, 0, MINORMASK+1, GFP_KERNEL); minor = ida_simple_get(&dax_minor_ida, 0, MINORMASK+1, GFP_KERNEL);
if (minor < 0) if (minor < 0)
goto err_minor; return ERR_PTR(-ENOMEM);
devt = MKDEV(MAJOR(dax_devt), minor); devt = MKDEV(MAJOR(dax_devt), minor);
dax_dev = dax_dev_get(devt); dax_dev = dax_dev_get(devt);
if (!dax_dev) if (!dax_dev)
goto err_dev; goto err_dev;
dax_add_host(dax_dev, host);
dax_dev->ops = ops; dax_dev->ops = ops;
dax_dev->private = private; dax_dev->private = private;
if (flags & DAXDEV_F_SYNC)
set_dax_synchronous(dax_dev);
return dax_dev; return dax_dev;
err_dev: err_dev:
ida_simple_remove(&dax_minor_ida, minor); ida_simple_remove(&dax_minor_ida, minor);
err_minor:
kfree(host);
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
EXPORT_SYMBOL_GPL(alloc_dax); EXPORT_SYMBOL_GPL(alloc_dax);

View File

@ -162,71 +162,34 @@ static int linear_iterate_devices(struct dm_target *ti,
return fn(ti, lc->dev, lc->start, ti->len, data); return fn(ti, lc->dev, lc->start, ti->len, data);
} }
#if IS_ENABLED(CONFIG_DAX_DRIVER) #if IS_ENABLED(CONFIG_FS_DAX)
static struct dax_device *linear_dax_pgoff(struct dm_target *ti, pgoff_t *pgoff)
{
struct linear_c *lc = ti->private;
sector_t sector = linear_map_sector(ti, *pgoff << PAGE_SECTORS_SHIFT);
*pgoff = (get_start_sect(lc->dev->bdev) + sector) >> PAGE_SECTORS_SHIFT;
return lc->dev->dax_dev;
}
static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
long nr_pages, void **kaddr, pfn_t *pfn) long nr_pages, void **kaddr, pfn_t *pfn)
{ {
long ret; struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff);
struct linear_c *lc = ti->private;
struct block_device *bdev = lc->dev->bdev;
struct dax_device *dax_dev = lc->dev->dax_dev;
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
dev_sector = linear_map_sector(ti, sector);
ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages * PAGE_SIZE, &pgoff);
if (ret)
return ret;
return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
} }
static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
struct linear_c *lc = ti->private;
struct block_device *bdev = lc->dev->bdev;
struct dax_device *dax_dev = lc->dev->dax_dev;
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
dev_sector = linear_map_sector(ti, sector);
if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
return 0;
return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
}
static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
struct linear_c *lc = ti->private;
struct block_device *bdev = lc->dev->bdev;
struct dax_device *dax_dev = lc->dev->dax_dev;
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
dev_sector = linear_map_sector(ti, sector);
if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
return 0;
return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
}
static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
size_t nr_pages) size_t nr_pages)
{ {
int ret; struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff);
struct linear_c *lc = ti->private;
struct block_device *bdev = lc->dev->bdev;
struct dax_device *dax_dev = lc->dev->dax_dev;
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
dev_sector = linear_map_sector(ti, sector);
ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages << PAGE_SHIFT, &pgoff);
if (ret)
return ret;
return dax_zero_page_range(dax_dev, pgoff, nr_pages); return dax_zero_page_range(dax_dev, pgoff, nr_pages);
} }
#else #else
#define linear_dax_direct_access NULL #define linear_dax_direct_access NULL
#define linear_dax_copy_from_iter NULL
#define linear_dax_copy_to_iter NULL
#define linear_dax_zero_page_range NULL #define linear_dax_zero_page_range NULL
#endif #endif
@ -244,8 +207,6 @@ static struct target_type linear_target = {
.prepare_ioctl = linear_prepare_ioctl, .prepare_ioctl = linear_prepare_ioctl,
.iterate_devices = linear_iterate_devices, .iterate_devices = linear_iterate_devices,
.direct_access = linear_dax_direct_access, .direct_access = linear_dax_direct_access,
.dax_copy_from_iter = linear_dax_copy_from_iter,
.dax_copy_to_iter = linear_dax_copy_to_iter,
.dax_zero_page_range = linear_dax_zero_page_range, .dax_zero_page_range = linear_dax_zero_page_range,
}; };

View File

@ -901,120 +901,34 @@ static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limit
limits->io_min = limits->physical_block_size; limits->io_min = limits->physical_block_size;
} }
#if IS_ENABLED(CONFIG_DAX_DRIVER) #if IS_ENABLED(CONFIG_FS_DAX)
static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes, static struct dax_device *log_writes_dax_pgoff(struct dm_target *ti,
struct iov_iter *i) pgoff_t *pgoff)
{ {
struct pending_block *block; struct log_writes_c *lc = ti->private;
if (!bytes) *pgoff += (get_start_sect(lc->dev->bdev) >> PAGE_SECTORS_SHIFT);
return 0; return lc->dev->dax_dev;
block = kzalloc(sizeof(struct pending_block), GFP_KERNEL);
if (!block) {
DMERR("Error allocating dax pending block");
return -ENOMEM;
}
block->data = kzalloc(bytes, GFP_KERNEL);
if (!block->data) {
DMERR("Error allocating dax data space");
kfree(block);
return -ENOMEM;
}
/* write data provided via the iterator */
if (!copy_from_iter(block->data, bytes, i)) {
DMERR("Error copying dax data");
kfree(block->data);
kfree(block);
return -EIO;
}
/* rewind the iterator so that the block driver can use it */
iov_iter_revert(i, bytes);
block->datalen = bytes;
block->sector = bio_to_dev_sectors(lc, sector);
block->nr_sectors = ALIGN(bytes, lc->sectorsize) >> lc->sectorshift;
atomic_inc(&lc->pending_blocks);
spin_lock_irq(&lc->blocks_lock);
list_add_tail(&block->list, &lc->unflushed_blocks);
spin_unlock_irq(&lc->blocks_lock);
wake_up_process(lc->log_kthread);
return 0;
} }
static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
long nr_pages, void **kaddr, pfn_t *pfn) long nr_pages, void **kaddr, pfn_t *pfn)
{ {
struct log_writes_c *lc = ti->private; struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff);
sector_t sector = pgoff * PAGE_SECTORS;
int ret;
ret = bdev_dax_pgoff(lc->dev->bdev, sector, nr_pages * PAGE_SIZE, &pgoff); return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
if (ret)
return ret;
return dax_direct_access(lc->dev->dax_dev, pgoff, nr_pages, kaddr, pfn);
}
static size_t log_writes_dax_copy_from_iter(struct dm_target *ti,
pgoff_t pgoff, void *addr, size_t bytes,
struct iov_iter *i)
{
struct log_writes_c *lc = ti->private;
sector_t sector = pgoff * PAGE_SECTORS;
int err;
if (bdev_dax_pgoff(lc->dev->bdev, sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
return 0;
/* Don't bother doing anything if logging has been disabled */
if (!lc->logging_enabled)
goto dax_copy;
err = log_dax(lc, sector, bytes, i);
if (err) {
DMWARN("Error %d logging DAX write", err);
return 0;
}
dax_copy:
return dax_copy_from_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
}
static size_t log_writes_dax_copy_to_iter(struct dm_target *ti,
pgoff_t pgoff, void *addr, size_t bytes,
struct iov_iter *i)
{
struct log_writes_c *lc = ti->private;
sector_t sector = pgoff * PAGE_SECTORS;
if (bdev_dax_pgoff(lc->dev->bdev, sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
return 0;
return dax_copy_to_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
} }
static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
size_t nr_pages) size_t nr_pages)
{ {
int ret; struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff);
struct log_writes_c *lc = ti->private;
sector_t sector = pgoff * PAGE_SECTORS;
ret = bdev_dax_pgoff(lc->dev->bdev, sector, nr_pages << PAGE_SHIFT, return dax_zero_page_range(dax_dev, pgoff, nr_pages << PAGE_SHIFT);
&pgoff);
if (ret)
return ret;
return dax_zero_page_range(lc->dev->dax_dev, pgoff,
nr_pages << PAGE_SHIFT);
} }
#else #else
#define log_writes_dax_direct_access NULL #define log_writes_dax_direct_access NULL
#define log_writes_dax_copy_from_iter NULL
#define log_writes_dax_copy_to_iter NULL
#define log_writes_dax_zero_page_range NULL #define log_writes_dax_zero_page_range NULL
#endif #endif
@ -1032,8 +946,6 @@ static struct target_type log_writes_target = {
.iterate_devices = log_writes_iterate_devices, .iterate_devices = log_writes_iterate_devices,
.io_hints = log_writes_io_hints, .io_hints = log_writes_io_hints,
.direct_access = log_writes_dax_direct_access, .direct_access = log_writes_dax_direct_access,
.dax_copy_from_iter = log_writes_dax_copy_from_iter,
.dax_copy_to_iter = log_writes_dax_copy_to_iter,
.dax_zero_page_range = log_writes_dax_zero_page_range, .dax_zero_page_range = log_writes_dax_zero_page_range,
}; };

View File

@ -300,91 +300,40 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_REMAPPED; return DM_MAPIO_REMAPPED;
} }
#if IS_ENABLED(CONFIG_DAX_DRIVER) #if IS_ENABLED(CONFIG_FS_DAX)
static struct dax_device *stripe_dax_pgoff(struct dm_target *ti, pgoff_t *pgoff)
{
struct stripe_c *sc = ti->private;
struct block_device *bdev;
sector_t dev_sector;
uint32_t stripe;
stripe_map_sector(sc, *pgoff * PAGE_SECTORS, &stripe, &dev_sector);
dev_sector += sc->stripe[stripe].physical_start;
bdev = sc->stripe[stripe].dev->bdev;
*pgoff = (get_start_sect(bdev) + dev_sector) >> PAGE_SECTORS_SHIFT;
return sc->stripe[stripe].dev->dax_dev;
}
static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
long nr_pages, void **kaddr, pfn_t *pfn) long nr_pages, void **kaddr, pfn_t *pfn)
{ {
sector_t dev_sector, sector = pgoff * PAGE_SECTORS; struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff);
struct stripe_c *sc = ti->private;
struct dax_device *dax_dev;
struct block_device *bdev;
uint32_t stripe;
long ret;
stripe_map_sector(sc, sector, &stripe, &dev_sector);
dev_sector += sc->stripe[stripe].physical_start;
dax_dev = sc->stripe[stripe].dev->dax_dev;
bdev = sc->stripe[stripe].dev->bdev;
ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages * PAGE_SIZE, &pgoff);
if (ret)
return ret;
return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
} }
static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
struct stripe_c *sc = ti->private;
struct dax_device *dax_dev;
struct block_device *bdev;
uint32_t stripe;
stripe_map_sector(sc, sector, &stripe, &dev_sector);
dev_sector += sc->stripe[stripe].physical_start;
dax_dev = sc->stripe[stripe].dev->dax_dev;
bdev = sc->stripe[stripe].dev->bdev;
if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
return 0;
return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
}
static size_t stripe_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
struct stripe_c *sc = ti->private;
struct dax_device *dax_dev;
struct block_device *bdev;
uint32_t stripe;
stripe_map_sector(sc, sector, &stripe, &dev_sector);
dev_sector += sc->stripe[stripe].physical_start;
dax_dev = sc->stripe[stripe].dev->dax_dev;
bdev = sc->stripe[stripe].dev->bdev;
if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
return 0;
return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
}
static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
size_t nr_pages) size_t nr_pages)
{ {
int ret; struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff);
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
struct stripe_c *sc = ti->private;
struct dax_device *dax_dev;
struct block_device *bdev;
uint32_t stripe;
stripe_map_sector(sc, sector, &stripe, &dev_sector);
dev_sector += sc->stripe[stripe].physical_start;
dax_dev = sc->stripe[stripe].dev->dax_dev;
bdev = sc->stripe[stripe].dev->bdev;
ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages << PAGE_SHIFT, &pgoff);
if (ret)
return ret;
return dax_zero_page_range(dax_dev, pgoff, nr_pages); return dax_zero_page_range(dax_dev, pgoff, nr_pages);
} }
#else #else
#define stripe_dax_direct_access NULL #define stripe_dax_direct_access NULL
#define stripe_dax_copy_from_iter NULL
#define stripe_dax_copy_to_iter NULL
#define stripe_dax_zero_page_range NULL #define stripe_dax_zero_page_range NULL
#endif #endif
@ -521,8 +470,6 @@ static struct target_type stripe_target = {
.iterate_devices = stripe_iterate_devices, .iterate_devices = stripe_iterate_devices,
.io_hints = stripe_io_hints, .io_hints = stripe_io_hints,
.direct_access = stripe_dax_direct_access, .direct_access = stripe_dax_direct_access,
.dax_copy_from_iter = stripe_dax_copy_from_iter,
.dax_copy_to_iter = stripe_dax_copy_to_iter,
.dax_zero_page_range = stripe_dax_zero_page_range, .dax_zero_page_range = stripe_dax_zero_page_range,
}; };

View File

@ -806,12 +806,14 @@ void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type)
EXPORT_SYMBOL_GPL(dm_table_set_type); EXPORT_SYMBOL_GPL(dm_table_set_type);
/* validate the dax capability of the target device span */ /* validate the dax capability of the target device span */
int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev, static int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data) sector_t start, sector_t len, void *data)
{ {
int blocksize = *(int *) data; if (dev->dax_dev)
return false;
return !dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len); DMDEBUG("%pg: error: dax unsupported by block device", dev->bdev);
return true;
} }
/* Check devices support synchronous DAX */ /* Check devices support synchronous DAX */
@ -821,8 +823,8 @@ static int device_not_dax_synchronous_capable(struct dm_target *ti, struct dm_de
return !dev->dax_dev || !dax_synchronous(dev->dax_dev); return !dev->dax_dev || !dax_synchronous(dev->dax_dev);
} }
bool dm_table_supports_dax(struct dm_table *t, static bool dm_table_supports_dax(struct dm_table *t,
iterate_devices_callout_fn iterate_fn, int *blocksize) iterate_devices_callout_fn iterate_fn)
{ {
struct dm_target *ti; struct dm_target *ti;
unsigned i; unsigned i;
@ -835,7 +837,7 @@ bool dm_table_supports_dax(struct dm_table *t,
return false; return false;
if (!ti->type->iterate_devices || if (!ti->type->iterate_devices ||
ti->type->iterate_devices(ti, iterate_fn, blocksize)) ti->type->iterate_devices(ti, iterate_fn, NULL))
return false; return false;
} }
@ -862,7 +864,6 @@ static int dm_table_determine_type(struct dm_table *t)
struct dm_target *tgt; struct dm_target *tgt;
struct list_head *devices = dm_table_get_devices(t); struct list_head *devices = dm_table_get_devices(t);
enum dm_queue_mode live_md_type = dm_get_md_type(t->md); enum dm_queue_mode live_md_type = dm_get_md_type(t->md);
int page_size = PAGE_SIZE;
if (t->type != DM_TYPE_NONE) { if (t->type != DM_TYPE_NONE) {
/* target already set the table's type */ /* target already set the table's type */
@ -906,7 +907,7 @@ static int dm_table_determine_type(struct dm_table *t)
verify_bio_based: verify_bio_based:
/* We must use this table as bio-based */ /* We must use this table as bio-based */
t->type = DM_TYPE_BIO_BASED; t->type = DM_TYPE_BIO_BASED;
if (dm_table_supports_dax(t, device_not_dax_capable, &page_size) || if (dm_table_supports_dax(t, device_not_dax_capable) ||
(list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) { (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) {
t->type = DM_TYPE_DAX_BIO_BASED; t->type = DM_TYPE_DAX_BIO_BASED;
} }
@ -1976,7 +1977,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *limits) struct queue_limits *limits)
{ {
bool wc = false, fua = false; bool wc = false, fua = false;
int page_size = PAGE_SIZE;
int r; int r;
/* /*
@ -2010,9 +2010,9 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
} }
blk_queue_write_cache(q, wc, fua); blk_queue_write_cache(q, wc, fua);
if (dm_table_supports_dax(t, device_not_dax_capable, &page_size)) { if (dm_table_supports_dax(t, device_not_dax_capable)) {
blk_queue_flag_set(QUEUE_FLAG_DAX, q); blk_queue_flag_set(QUEUE_FLAG_DAX, q);
if (dm_table_supports_dax(t, device_not_dax_synchronous_capable, NULL)) if (dm_table_supports_dax(t, device_not_dax_synchronous_capable))
set_dax_synchronous(t->md->dax_dev); set_dax_synchronous(t->md->dax_dev);
} }
else else

View File

@ -38,7 +38,7 @@
#define BITMAP_GRANULARITY PAGE_SIZE #define BITMAP_GRANULARITY PAGE_SIZE
#endif #endif
#if IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && IS_ENABLED(CONFIG_DAX_DRIVER) #if IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && IS_ENABLED(CONFIG_FS_DAX)
#define DM_WRITECACHE_HAS_PMEM #define DM_WRITECACHE_HAS_PMEM
#endif #endif

View File

@ -637,7 +637,7 @@ static int open_table_device(struct table_device *td, dev_t dev,
struct mapped_device *md) struct mapped_device *md)
{ {
struct block_device *bdev; struct block_device *bdev;
u64 part_off;
int r; int r;
BUG_ON(td->dm_dev.bdev); BUG_ON(td->dm_dev.bdev);
@ -653,7 +653,7 @@ static int open_table_device(struct table_device *td, dev_t dev,
} }
td->dm_dev.bdev = bdev; td->dm_dev.bdev = bdev;
td->dm_dev.dax_dev = fs_dax_get_by_bdev(bdev); td->dm_dev.dax_dev = fs_dax_get_by_bdev(bdev, &part_off);
return 0; return 0;
} }
@ -1027,74 +1027,6 @@ static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
return ret; return ret;
} }
static bool dm_dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
int blocksize, sector_t start, sector_t len)
{
struct mapped_device *md = dax_get_private(dax_dev);
struct dm_table *map;
bool ret = false;
int srcu_idx;
map = dm_get_live_table(md, &srcu_idx);
if (!map)
goto out;
ret = dm_table_supports_dax(map, device_not_dax_capable, &blocksize);
out:
dm_put_live_table(md, srcu_idx);
return ret;
}
static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
struct mapped_device *md = dax_get_private(dax_dev);
sector_t sector = pgoff * PAGE_SECTORS;
struct dm_target *ti;
long ret = 0;
int srcu_idx;
ti = dm_dax_get_live_target(md, sector, &srcu_idx);
if (!ti)
goto out;
if (!ti->type->dax_copy_from_iter) {
ret = copy_from_iter(addr, bytes, i);
goto out;
}
ret = ti->type->dax_copy_from_iter(ti, pgoff, addr, bytes, i);
out:
dm_put_live_table(md, srcu_idx);
return ret;
}
static size_t dm_dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
struct mapped_device *md = dax_get_private(dax_dev);
sector_t sector = pgoff * PAGE_SECTORS;
struct dm_target *ti;
long ret = 0;
int srcu_idx;
ti = dm_dax_get_live_target(md, sector, &srcu_idx);
if (!ti)
goto out;
if (!ti->type->dax_copy_to_iter) {
ret = copy_to_iter(addr, bytes, i);
goto out;
}
ret = ti->type->dax_copy_to_iter(ti, pgoff, addr, bytes, i);
out:
dm_put_live_table(md, srcu_idx);
return ret;
}
static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
size_t nr_pages) size_t nr_pages)
{ {
@ -1683,6 +1615,7 @@ static void cleanup_mapped_device(struct mapped_device *md)
bioset_exit(&md->io_bs); bioset_exit(&md->io_bs);
if (md->dax_dev) { if (md->dax_dev) {
dax_remove_host(md->disk);
kill_dax(md->dax_dev); kill_dax(md->dax_dev);
put_dax(md->dax_dev); put_dax(md->dax_dev);
md->dax_dev = NULL; md->dax_dev = NULL;
@ -1784,10 +1717,15 @@ static struct mapped_device *alloc_dev(int minor)
md->disk->private_data = md; md->disk->private_data = md;
sprintf(md->disk->disk_name, "dm-%d", minor); sprintf(md->disk->disk_name, "dm-%d", minor);
if (IS_ENABLED(CONFIG_DAX_DRIVER)) { if (IS_ENABLED(CONFIG_FS_DAX)) {
md->dax_dev = alloc_dax(md, md->disk->disk_name, md->dax_dev = alloc_dax(md, &dm_dax_ops);
&dm_dax_ops, 0); if (IS_ERR(md->dax_dev)) {
if (IS_ERR(md->dax_dev)) md->dax_dev = NULL;
goto bad;
}
set_dax_nocache(md->dax_dev);
set_dax_nomc(md->dax_dev);
if (dax_add_host(md->dax_dev, md->disk))
goto bad; goto bad;
} }
@ -3041,9 +2979,6 @@ static const struct block_device_operations dm_rq_blk_dops = {
static const struct dax_operations dm_dax_ops = { static const struct dax_operations dm_dax_ops = {
.direct_access = dm_dax_direct_access, .direct_access = dm_dax_direct_access,
.dax_supported = dm_dax_supported,
.copy_from_iter = dm_dax_copy_from_iter,
.copy_to_iter = dm_dax_copy_to_iter,
.zero_page_range = dm_dax_zero_page_range, .zero_page_range = dm_dax_zero_page_range,
}; };

View File

@ -73,10 +73,6 @@ bool dm_table_bio_based(struct dm_table *t);
bool dm_table_request_based(struct dm_table *t); bool dm_table_request_based(struct dm_table *t);
void dm_table_free_md_mempools(struct dm_table *t); void dm_table_free_md_mempools(struct dm_table *t);
struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
bool dm_table_supports_dax(struct dm_table *t, iterate_devices_callout_fn fn,
int *blocksize);
int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data);
void dm_lock_md_type(struct mapped_device *md); void dm_lock_md_type(struct mapped_device *md);
void dm_unlock_md_type(struct mapped_device *md); void dm_unlock_md_type(struct mapped_device *md);

View File

@ -22,7 +22,7 @@ if LIBNVDIMM
config BLK_DEV_PMEM config BLK_DEV_PMEM
tristate "PMEM: Persistent memory block device support" tristate "PMEM: Persistent memory block device support"
default LIBNVDIMM default LIBNVDIMM
select DAX_DRIVER select DAX
select ND_BTT if BTT select ND_BTT if BTT
select ND_PFN if NVDIMM_PFN select ND_PFN if NVDIMM_PFN
help help

View File

@ -301,29 +301,8 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev,
return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn); return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn);
} }
/*
* Use the 'no check' versions of copy_from_iter_flushcache() and
* copy_mc_to_iter() to bypass HARDENED_USERCOPY overhead. Bounds
* checking, both file offset and device offset, is handled by
* dax_iomap_actor()
*/
static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
return _copy_from_iter_flushcache(addr, bytes, i);
}
static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
return _copy_mc_to_iter(addr, bytes, i);
}
static const struct dax_operations pmem_dax_ops = { static const struct dax_operations pmem_dax_ops = {
.direct_access = pmem_dax_direct_access, .direct_access = pmem_dax_direct_access,
.dax_supported = generic_fsdax_supported,
.copy_from_iter = pmem_copy_from_iter,
.copy_to_iter = pmem_copy_to_iter,
.zero_page_range = pmem_dax_zero_page_range, .zero_page_range = pmem_dax_zero_page_range,
}; };
@ -379,6 +358,7 @@ static void pmem_release_disk(void *__pmem)
{ {
struct pmem_device *pmem = __pmem; struct pmem_device *pmem = __pmem;
dax_remove_host(pmem->disk);
kill_dax(pmem->dax_dev); kill_dax(pmem->dax_dev);
put_dax(pmem->dax_dev); put_dax(pmem->dax_dev);
del_gendisk(pmem->disk); del_gendisk(pmem->disk);
@ -402,7 +382,6 @@ static int pmem_attach_disk(struct device *dev,
struct gendisk *disk; struct gendisk *disk;
void *addr; void *addr;
int rc; int rc;
unsigned long flags = 0UL;
pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL); pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
if (!pmem) if (!pmem)
@ -495,19 +474,24 @@ static int pmem_attach_disk(struct device *dev,
nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_range); nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_range);
disk->bb = &pmem->bb; disk->bb = &pmem->bb;
if (is_nvdimm_sync(nd_region)) dax_dev = alloc_dax(pmem, &pmem_dax_ops);
flags = DAXDEV_F_SYNC;
dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops, flags);
if (IS_ERR(dax_dev)) { if (IS_ERR(dax_dev)) {
rc = PTR_ERR(dax_dev); rc = PTR_ERR(dax_dev);
goto out; goto out;
} }
set_dax_nocache(dax_dev);
set_dax_nomc(dax_dev);
if (is_nvdimm_sync(nd_region))
set_dax_synchronous(dax_dev);
rc = dax_add_host(dax_dev, disk);
if (rc)
goto out_cleanup_dax;
dax_write_cache(dax_dev, nvdimm_has_cache(nd_region)); dax_write_cache(dax_dev, nvdimm_has_cache(nd_region));
pmem->dax_dev = dax_dev; pmem->dax_dev = dax_dev;
rc = device_add_disk(dev, disk, pmem_attribute_groups); rc = device_add_disk(dev, disk, pmem_attribute_groups);
if (rc) if (rc)
goto out_cleanup_dax; goto out_remove_host;
if (devm_add_action_or_reset(dev, pmem_release_disk, pmem)) if (devm_add_action_or_reset(dev, pmem_release_disk, pmem))
return -ENOMEM; return -ENOMEM;
@ -519,6 +503,8 @@ static int pmem_attach_disk(struct device *dev,
dev_warn(dev, "'badblocks' notification disabled\n"); dev_warn(dev, "'badblocks' notification disabled\n");
return 0; return 0;
out_remove_host:
dax_remove_host(pmem->disk);
out_cleanup_dax: out_cleanup_dax:
kill_dax(pmem->dax_dev); kill_dax(pmem->dax_dev);
put_dax(pmem->dax_dev); put_dax(pmem->dax_dev);

View File

@ -219,7 +219,7 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
error = gen_pool_add_owner(p2pdma->pool, (unsigned long)addr, error = gen_pool_add_owner(p2pdma->pool, (unsigned long)addr,
pci_bus_address(pdev, bar) + offset, pci_bus_address(pdev, bar) + offset,
range_len(&pgmap->range), dev_to_node(&pdev->dev), range_len(&pgmap->range), dev_to_node(&pdev->dev),
pgmap->ref); &pgmap->ref);
if (error) if (error)
goto pages_free; goto pages_free;

View File

@ -5,7 +5,7 @@ comment "S/390 block device drivers"
config DCSSBLK config DCSSBLK
def_tristate m def_tristate m
select FS_DAX_LIMITED select FS_DAX_LIMITED
select DAX_DRIVER select DAX
prompt "DCSSBLK support" prompt "DCSSBLK support"
depends on S390 && BLOCK depends on S390 && BLOCK
help help

View File

@ -44,18 +44,6 @@ static const struct block_device_operations dcssblk_devops = {
.release = dcssblk_release, .release = dcssblk_release,
}; };
static size_t dcssblk_dax_copy_from_iter(struct dax_device *dax_dev,
pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i)
{
return copy_from_iter(addr, bytes, i);
}
static size_t dcssblk_dax_copy_to_iter(struct dax_device *dax_dev,
pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i)
{
return copy_to_iter(addr, bytes, i);
}
static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev, static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev,
pgoff_t pgoff, size_t nr_pages) pgoff_t pgoff, size_t nr_pages)
{ {
@ -72,9 +60,6 @@ static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev,
static const struct dax_operations dcssblk_dax_ops = { static const struct dax_operations dcssblk_dax_ops = {
.direct_access = dcssblk_dax_direct_access, .direct_access = dcssblk_dax_direct_access,
.dax_supported = generic_fsdax_supported,
.copy_from_iter = dcssblk_dax_copy_from_iter,
.copy_to_iter = dcssblk_dax_copy_to_iter,
.zero_page_range = dcssblk_dax_zero_page_range, .zero_page_range = dcssblk_dax_zero_page_range,
}; };
@ -687,18 +672,21 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
if (rc) if (rc)
goto put_dev; goto put_dev;
dev_info->dax_dev = alloc_dax(dev_info, dev_info->gd->disk_name, dev_info->dax_dev = alloc_dax(dev_info, &dcssblk_dax_ops);
&dcssblk_dax_ops, DAXDEV_F_SYNC);
if (IS_ERR(dev_info->dax_dev)) { if (IS_ERR(dev_info->dax_dev)) {
rc = PTR_ERR(dev_info->dax_dev); rc = PTR_ERR(dev_info->dax_dev);
dev_info->dax_dev = NULL; dev_info->dax_dev = NULL;
goto put_dev; goto put_dev;
} }
set_dax_synchronous(dev_info->dax_dev);
rc = dax_add_host(dev_info->dax_dev, dev_info->gd);
if (rc)
goto out_dax;
get_device(&dev_info->dev); get_device(&dev_info->dev);
rc = device_add_disk(&dev_info->dev, dev_info->gd, NULL); rc = device_add_disk(&dev_info->dev, dev_info->gd, NULL);
if (rc) if (rc)
goto out_dax; goto out_dax_host;
switch (dev_info->segment_type) { switch (dev_info->segment_type) {
case SEG_TYPE_SR: case SEG_TYPE_SR:
@ -714,6 +702,8 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
rc = count; rc = count;
goto out; goto out;
out_dax_host:
dax_remove_host(dev_info->gd);
out_dax: out_dax:
put_device(&dev_info->dev); put_device(&dev_info->dev);
kill_dax(dev_info->dax_dev); kill_dax(dev_info->dax_dev);

View File

@ -15,11 +15,11 @@ config VALIDATE_FS_PARSER
Enable this to perform validation of the parameter description for a Enable this to perform validation of the parameter description for a
filesystem when it is registered. filesystem when it is registered.
if BLOCK
config FS_IOMAP config FS_IOMAP
bool bool
if BLOCK
source "fs/ext2/Kconfig" source "fs/ext2/Kconfig"
source "fs/ext4/Kconfig" source "fs/ext4/Kconfig"
source "fs/jbd2/Kconfig" source "fs/jbd2/Kconfig"
@ -42,6 +42,8 @@ source "fs/nilfs2/Kconfig"
source "fs/f2fs/Kconfig" source "fs/f2fs/Kconfig"
source "fs/zonefs/Kconfig" source "fs/zonefs/Kconfig"
endif # BLOCK
config FS_DAX config FS_DAX
bool "File system based Direct Access (DAX) support" bool "File system based Direct Access (DAX) support"
depends on MMU depends on MMU
@ -89,8 +91,6 @@ config FS_DAX_PMD
config FS_DAX_LIMITED config FS_DAX_LIMITED
bool bool
endif # BLOCK
# Posix ACL utility routines # Posix ACL utility routines
# #
# Note: Posix ACLs can be implemented without these helpers. Never use # Note: Posix ACLs can be implemented without these helpers. Never use

161
fs/dax.c
View File

@ -709,26 +709,26 @@ int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
return __dax_invalidate_entry(mapping, index, false); return __dax_invalidate_entry(mapping, index, false);
} }
static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_dev, static pgoff_t dax_iomap_pgoff(const struct iomap *iomap, loff_t pos)
sector_t sector, struct page *to, unsigned long vaddr)
{ {
return PHYS_PFN(iomap->addr + (pos & PAGE_MASK) - iomap->offset);
}
static int copy_cow_page_dax(struct vm_fault *vmf, const struct iomap_iter *iter)
{
pgoff_t pgoff = dax_iomap_pgoff(&iter->iomap, iter->pos);
void *vto, *kaddr; void *vto, *kaddr;
pgoff_t pgoff;
long rc; long rc;
int id; int id;
rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, &pgoff);
if (rc)
return rc;
id = dax_read_lock(); id = dax_read_lock();
rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL); rc = dax_direct_access(iter->iomap.dax_dev, pgoff, 1, &kaddr, NULL);
if (rc < 0) { if (rc < 0) {
dax_read_unlock(id); dax_read_unlock(id);
return rc; return rc;
} }
vto = kmap_atomic(to); vto = kmap_atomic(vmf->cow_page);
copy_user_page(vto, (void __force *)kaddr, vaddr, to); copy_user_page(vto, kaddr, vmf->address, vmf->cow_page);
kunmap_atomic(vto); kunmap_atomic(vto);
dax_read_unlock(id); dax_read_unlock(id);
return 0; return 0;
@ -1005,22 +1005,13 @@ int dax_writeback_mapping_range(struct address_space *mapping,
} }
EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
static sector_t dax_iomap_sector(const struct iomap *iomap, loff_t pos)
{
return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9;
}
static int dax_iomap_pfn(const struct iomap *iomap, loff_t pos, size_t size, static int dax_iomap_pfn(const struct iomap *iomap, loff_t pos, size_t size,
pfn_t *pfnp) pfn_t *pfnp)
{ {
const sector_t sector = dax_iomap_sector(iomap, pos); pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
pgoff_t pgoff;
int id, rc; int id, rc;
long length; long length;
rc = bdev_dax_pgoff(iomap->bdev, sector, size, &pgoff);
if (rc)
return rc;
id = dax_read_lock(); id = dax_read_lock();
length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size), length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
NULL, pfnp); NULL, pfnp);
@ -1126,50 +1117,94 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
} }
#endif /* CONFIG_FS_DAX_PMD */ #endif /* CONFIG_FS_DAX_PMD */
s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap) static int dax_memzero(struct dax_device *dax_dev, pgoff_t pgoff,
unsigned int offset, size_t size)
{ {
sector_t sector = iomap_sector(iomap, pos & PAGE_MASK);
pgoff_t pgoff;
long rc, id;
void *kaddr; void *kaddr;
bool page_aligned = false; long ret;
unsigned offset = offset_in_page(pos);
unsigned size = min_t(u64, PAGE_SIZE - offset, length);
if (IS_ALIGNED(sector << SECTOR_SHIFT, PAGE_SIZE) && ret = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL);
(size == PAGE_SIZE)) if (ret > 0) {
page_aligned = true;
rc = bdev_dax_pgoff(iomap->bdev, sector, PAGE_SIZE, &pgoff);
if (rc)
return rc;
id = dax_read_lock();
if (page_aligned)
rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1);
else
rc = dax_direct_access(iomap->dax_dev, pgoff, 1, &kaddr, NULL);
if (rc < 0) {
dax_read_unlock(id);
return rc;
}
if (!page_aligned) {
memset(kaddr + offset, 0, size); memset(kaddr + offset, 0, size);
dax_flush(iomap->dax_dev, kaddr + offset, size); dax_flush(dax_dev, kaddr + offset, size);
} }
dax_read_unlock(id); return ret;
return size;
} }
static s64 dax_zero_iter(struct iomap_iter *iter, bool *did_zero)
{
const struct iomap *iomap = &iter->iomap;
const struct iomap *srcmap = iomap_iter_srcmap(iter);
loff_t pos = iter->pos;
u64 length = iomap_length(iter);
s64 written = 0;
/* already zeroed? we're done. */
if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
return length;
do {
unsigned offset = offset_in_page(pos);
unsigned size = min_t(u64, PAGE_SIZE - offset, length);
pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
long rc;
int id;
id = dax_read_lock();
if (IS_ALIGNED(pos, PAGE_SIZE) && size == PAGE_SIZE)
rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1);
else
rc = dax_memzero(iomap->dax_dev, pgoff, offset, size);
dax_read_unlock(id);
if (rc < 0)
return rc;
pos += size;
length -= size;
written += size;
if (did_zero)
*did_zero = true;
} while (length > 0);
return written;
}
int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
const struct iomap_ops *ops)
{
struct iomap_iter iter = {
.inode = inode,
.pos = pos,
.len = len,
.flags = IOMAP_DAX | IOMAP_ZERO,
};
int ret;
while ((ret = iomap_iter(&iter, ops)) > 0)
iter.processed = dax_zero_iter(&iter, did_zero);
return ret;
}
EXPORT_SYMBOL_GPL(dax_zero_range);
int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
const struct iomap_ops *ops)
{
unsigned int blocksize = i_blocksize(inode);
unsigned int off = pos & (blocksize - 1);
/* Block boundary? Nothing to do */
if (!off)
return 0;
return dax_zero_range(inode, pos, blocksize - off, did_zero, ops);
}
EXPORT_SYMBOL_GPL(dax_truncate_page);
static loff_t dax_iomap_iter(const struct iomap_iter *iomi, static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
struct iov_iter *iter) struct iov_iter *iter)
{ {
const struct iomap *iomap = &iomi->iomap; const struct iomap *iomap = &iomi->iomap;
loff_t length = iomap_length(iomi); loff_t length = iomap_length(iomi);
loff_t pos = iomi->pos; loff_t pos = iomi->pos;
struct block_device *bdev = iomap->bdev;
struct dax_device *dax_dev = iomap->dax_dev; struct dax_device *dax_dev = iomap->dax_dev;
loff_t end = pos + length, done = 0; loff_t end = pos + length, done = 0;
ssize_t ret = 0; ssize_t ret = 0;
@ -1203,9 +1238,8 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
while (pos < end) { while (pos < end) {
unsigned offset = pos & (PAGE_SIZE - 1); unsigned offset = pos & (PAGE_SIZE - 1);
const size_t size = ALIGN(length + offset, PAGE_SIZE); const size_t size = ALIGN(length + offset, PAGE_SIZE);
const sector_t sector = dax_iomap_sector(iomap, pos); pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
ssize_t map_len; ssize_t map_len;
pgoff_t pgoff;
void *kaddr; void *kaddr;
if (fatal_signal_pending(current)) { if (fatal_signal_pending(current)) {
@ -1213,10 +1247,6 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
break; break;
} }
ret = bdev_dax_pgoff(bdev, sector, size, &pgoff);
if (ret)
break;
map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
&kaddr, NULL); &kaddr, NULL);
if (map_len < 0) { if (map_len < 0) {
@ -1230,11 +1260,6 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
if (map_len > end - pos) if (map_len > end - pos)
map_len = end - pos; map_len = end - pos;
/*
* The userspace address for the memory copy has already been
* validated via access_ok() in either vfs_read() or
* vfs_write(), depending on which operation we are doing.
*/
if (iov_iter_rw(iter) == WRITE) if (iov_iter_rw(iter) == WRITE)
xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr, xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr,
map_len, iter); map_len, iter);
@ -1274,6 +1299,7 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
.inode = iocb->ki_filp->f_mapping->host, .inode = iocb->ki_filp->f_mapping->host,
.pos = iocb->ki_pos, .pos = iocb->ki_pos,
.len = iov_iter_count(iter), .len = iov_iter_count(iter),
.flags = IOMAP_DAX,
}; };
loff_t done = 0; loff_t done = 0;
int ret; int ret;
@ -1332,19 +1358,16 @@ static vm_fault_t dax_fault_synchronous_pfnp(pfn_t *pfnp, pfn_t pfn)
static vm_fault_t dax_fault_cow_page(struct vm_fault *vmf, static vm_fault_t dax_fault_cow_page(struct vm_fault *vmf,
const struct iomap_iter *iter) const struct iomap_iter *iter)
{ {
sector_t sector = dax_iomap_sector(&iter->iomap, iter->pos);
unsigned long vaddr = vmf->address;
vm_fault_t ret; vm_fault_t ret;
int error = 0; int error = 0;
switch (iter->iomap.type) { switch (iter->iomap.type) {
case IOMAP_HOLE: case IOMAP_HOLE:
case IOMAP_UNWRITTEN: case IOMAP_UNWRITTEN:
clear_user_highpage(vmf->cow_page, vaddr); clear_user_highpage(vmf->cow_page, vmf->address);
break; break;
case IOMAP_MAPPED: case IOMAP_MAPPED:
error = copy_cow_page_dax(iter->iomap.bdev, iter->iomap.dax_dev, error = copy_cow_page_dax(vmf, iter);
sector, vmf->cow_page, vaddr);
break; break;
default: default:
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
@ -1430,7 +1453,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
.inode = mapping->host, .inode = mapping->host,
.pos = (loff_t)vmf->pgoff << PAGE_SHIFT, .pos = (loff_t)vmf->pgoff << PAGE_SHIFT,
.len = PAGE_SIZE, .len = PAGE_SIZE,
.flags = IOMAP_FAULT, .flags = IOMAP_DAX | IOMAP_FAULT,
}; };
vm_fault_t ret = 0; vm_fault_t ret = 0;
void *entry; void *entry;
@ -1539,7 +1562,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
struct iomap_iter iter = { struct iomap_iter iter = {
.inode = mapping->host, .inode = mapping->host,
.len = PMD_SIZE, .len = PMD_SIZE,
.flags = IOMAP_FAULT, .flags = IOMAP_DAX | IOMAP_FAULT,
}; };
vm_fault_t ret = VM_FAULT_FALLBACK; vm_fault_t ret = VM_FAULT_FALLBACK;
pgoff_t max_pgoff; pgoff_t max_pgoff;

View File

@ -192,6 +192,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
/* primary device by default */ /* primary device by default */
map->m_bdev = sb->s_bdev; map->m_bdev = sb->s_bdev;
map->m_daxdev = EROFS_SB(sb)->dax_dev; map->m_daxdev = EROFS_SB(sb)->dax_dev;
map->m_dax_part_off = EROFS_SB(sb)->dax_part_off;
if (map->m_deviceid) { if (map->m_deviceid) {
down_read(&devs->rwsem); down_read(&devs->rwsem);
@ -202,6 +203,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
} }
map->m_bdev = dif->bdev; map->m_bdev = dif->bdev;
map->m_daxdev = dif->dax_dev; map->m_daxdev = dif->dax_dev;
map->m_dax_part_off = dif->dax_part_off;
up_read(&devs->rwsem); up_read(&devs->rwsem);
} else if (devs->extra_devices) { } else if (devs->extra_devices) {
down_read(&devs->rwsem); down_read(&devs->rwsem);
@ -218,6 +220,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
map->m_pa -= startoff; map->m_pa -= startoff;
map->m_bdev = dif->bdev; map->m_bdev = dif->bdev;
map->m_daxdev = dif->dax_dev; map->m_daxdev = dif->dax_dev;
map->m_dax_part_off = dif->dax_part_off;
break; break;
} }
} }
@ -248,9 +251,13 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
if (ret) if (ret)
return ret; return ret;
iomap->bdev = mdev.m_bdev;
iomap->dax_dev = mdev.m_daxdev;
iomap->offset = map.m_la; iomap->offset = map.m_la;
if (flags & IOMAP_DAX) {
iomap->dax_dev = mdev.m_daxdev;
iomap->offset += mdev.m_dax_part_off;
} else {
iomap->bdev = mdev.m_bdev;
}
iomap->length = map.m_llen; iomap->length = map.m_llen;
iomap->flags = 0; iomap->flags = 0;
iomap->private = NULL; iomap->private = NULL;

View File

@ -51,6 +51,7 @@ struct erofs_device_info {
char *path; char *path;
struct block_device *bdev; struct block_device *bdev;
struct dax_device *dax_dev; struct dax_device *dax_dev;
u64 dax_part_off;
u32 blocks; u32 blocks;
u32 mapped_blkaddr; u32 mapped_blkaddr;
@ -115,6 +116,7 @@ struct erofs_sb_info {
#endif /* CONFIG_EROFS_FS_ZIP */ #endif /* CONFIG_EROFS_FS_ZIP */
struct erofs_dev_context *devs; struct erofs_dev_context *devs;
struct dax_device *dax_dev; struct dax_device *dax_dev;
u64 dax_part_off;
u64 total_blocks; u64 total_blocks;
u32 primarydevice_blocks; u32 primarydevice_blocks;
@ -467,6 +469,7 @@ static inline int z_erofs_map_blocks_iter(struct inode *inode,
struct erofs_map_dev { struct erofs_map_dev {
struct block_device *m_bdev; struct block_device *m_bdev;
struct dax_device *m_daxdev; struct dax_device *m_daxdev;
u64 m_dax_part_off;
erofs_off_t m_pa; erofs_off_t m_pa;
unsigned int m_deviceid; unsigned int m_deviceid;

View File

@ -267,7 +267,7 @@ static int erofs_init_devices(struct super_block *sb,
break; break;
} }
dif->bdev = bdev; dif->bdev = bdev;
dif->dax_dev = fs_dax_get_by_bdev(bdev); dif->dax_dev = fs_dax_get_by_bdev(bdev, &dif->dax_part_off);
dif->blocks = le32_to_cpu(dis->blocks); dif->blocks = le32_to_cpu(dis->blocks);
dif->mapped_blkaddr = le32_to_cpu(dis->mapped_blkaddr); dif->mapped_blkaddr = le32_to_cpu(dis->mapped_blkaddr);
sbi->total_blocks += dif->blocks; sbi->total_blocks += dif->blocks;
@ -597,7 +597,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_fs_info = sbi; sb->s_fs_info = sbi;
sbi->opt = ctx->opt; sbi->opt = ctx->opt;
sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev); sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->dax_part_off);
sbi->devs = ctx->devs; sbi->devs = ctx->devs;
ctx->devs = NULL; ctx->devs = NULL;
@ -605,10 +605,13 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
if (err) if (err)
return err; return err;
if (test_opt(&sbi->opt, DAX_ALWAYS) && if (test_opt(&sbi->opt, DAX_ALWAYS)) {
!dax_supported(sbi->dax_dev, sb->s_bdev, EROFS_BLKSIZ, 0, bdev_nr_sectors(sb->s_bdev))) { BUILD_BUG_ON(EROFS_BLKSIZ != PAGE_SIZE);
errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
clear_opt(&sbi->opt, DAX_ALWAYS); if (!sbi->dax_dev) {
errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
clear_opt(&sbi->opt, DAX_ALWAYS);
}
} }
sb->s_flags |= SB_RDONLY | SB_NOATIME; sb->s_flags |= SB_RDONLY | SB_NOATIME;
sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_maxbytes = MAX_LFS_FILESIZE;

View File

@ -118,6 +118,7 @@ struct ext2_sb_info {
spinlock_t s_lock; spinlock_t s_lock;
struct mb_cache *s_ea_block_cache; struct mb_cache *s_ea_block_cache;
struct dax_device *s_daxdev; struct dax_device *s_daxdev;
u64 s_dax_part_off;
}; };
static inline spinlock_t * static inline spinlock_t *

View File

@ -36,6 +36,7 @@
#include <linux/iomap.h> #include <linux/iomap.h>
#include <linux/namei.h> #include <linux/namei.h>
#include <linux/uio.h> #include <linux/uio.h>
#include <linux/dax.h>
#include "ext2.h" #include "ext2.h"
#include "acl.h" #include "acl.h"
#include "xattr.h" #include "xattr.h"
@ -816,9 +817,11 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
return ret; return ret;
iomap->flags = 0; iomap->flags = 0;
iomap->bdev = inode->i_sb->s_bdev;
iomap->offset = (u64)first_block << blkbits; iomap->offset = (u64)first_block << blkbits;
iomap->dax_dev = sbi->s_daxdev; if (flags & IOMAP_DAX)
iomap->dax_dev = sbi->s_daxdev;
else
iomap->bdev = inode->i_sb->s_bdev;
if (ret == 0) { if (ret == 0) {
iomap->type = IOMAP_HOLE; iomap->type = IOMAP_HOLE;
@ -827,6 +830,8 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
} else { } else {
iomap->type = IOMAP_MAPPED; iomap->type = IOMAP_MAPPED;
iomap->addr = (u64)bno << blkbits; iomap->addr = (u64)bno << blkbits;
if (flags & IOMAP_DAX)
iomap->addr += sbi->s_dax_part_off;
iomap->length = (u64)ret << blkbits; iomap->length = (u64)ret << blkbits;
iomap->flags |= IOMAP_F_MERGED; iomap->flags |= IOMAP_F_MERGED;
} }
@ -1297,9 +1302,9 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
inode_dio_wait(inode); inode_dio_wait(inode);
if (IS_DAX(inode)) { if (IS_DAX(inode)) {
error = iomap_zero_range(inode, newsize, error = dax_zero_range(inode, newsize,
PAGE_ALIGN(newsize) - newsize, NULL, PAGE_ALIGN(newsize) - newsize, NULL,
&ext2_iomap_ops); &ext2_iomap_ops);
} else if (test_opt(inode->i_sb, NOBH)) } else if (test_opt(inode->i_sb, NOBH))
error = nobh_truncate_page(inode->i_mapping, error = nobh_truncate_page(inode->i_mapping,
newsize, ext2_get_block); newsize, ext2_get_block);

View File

@ -802,7 +802,6 @@ static unsigned long descriptor_loc(struct super_block *sb,
static int ext2_fill_super(struct super_block *sb, void *data, int silent) static int ext2_fill_super(struct super_block *sb, void *data, int silent)
{ {
struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
struct buffer_head * bh; struct buffer_head * bh;
struct ext2_sb_info * sbi; struct ext2_sb_info * sbi;
struct ext2_super_block * es; struct ext2_super_block * es;
@ -822,17 +821,17 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi) if (!sbi)
goto failed; return -ENOMEM;
sbi->s_blockgroup_lock = sbi->s_blockgroup_lock =
kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
if (!sbi->s_blockgroup_lock) { if (!sbi->s_blockgroup_lock) {
kfree(sbi); kfree(sbi);
goto failed; return -ENOMEM;
} }
sb->s_fs_info = sbi; sb->s_fs_info = sbi;
sbi->s_sb_block = sb_block; sbi->s_sb_block = sb_block;
sbi->s_daxdev = dax_dev; sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off);
spin_lock_init(&sbi->s_lock); spin_lock_init(&sbi->s_lock);
ret = -EINVAL; ret = -EINVAL;
@ -946,11 +945,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
if (test_opt(sb, DAX)) { if (test_opt(sb, DAX)) {
if (!dax_supported(dax_dev, sb->s_bdev, blocksize, 0, if (!sbi->s_daxdev) {
bdev_nr_sectors(sb->s_bdev))) {
ext2_msg(sb, KERN_ERR, ext2_msg(sb, KERN_ERR,
"DAX unsupported by block device. Turning off DAX."); "DAX unsupported by block device. Turning off DAX.");
clear_opt(sbi->s_mount_opt, DAX); clear_opt(sbi->s_mount_opt, DAX);
} else if (blocksize != PAGE_SIZE) {
ext2_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n");
clear_opt(sbi->s_mount_opt, DAX);
} }
} }
@ -1199,11 +1200,10 @@ failed_mount_group_desc:
failed_mount: failed_mount:
brelse(bh); brelse(bh);
failed_sbi: failed_sbi:
fs_put_dax(sbi->s_daxdev);
sb->s_fs_info = NULL; sb->s_fs_info = NULL;
kfree(sbi->s_blockgroup_lock); kfree(sbi->s_blockgroup_lock);
kfree(sbi); kfree(sbi);
failed:
fs_put_dax(dax_dev);
return ret; return ret;
} }

View File

@ -1699,6 +1699,7 @@ struct ext4_sb_info {
*/ */
struct percpu_rw_semaphore s_writepages_rwsem; struct percpu_rw_semaphore s_writepages_rwsem;
struct dax_device *s_daxdev; struct dax_device *s_daxdev;
u64 s_dax_part_off;
#ifdef CONFIG_EXT4_DEBUG #ifdef CONFIG_EXT4_DEBUG
unsigned long s_simulate_fail; unsigned long s_simulate_fail;
#endif #endif

View File

@ -41,6 +41,7 @@
#include <linux/bitops.h> #include <linux/bitops.h>
#include <linux/iomap.h> #include <linux/iomap.h>
#include <linux/iversion.h> #include <linux/iversion.h>
#include <linux/dax.h>
#include "ext4_jbd2.h" #include "ext4_jbd2.h"
#include "xattr.h" #include "xattr.h"
@ -3253,7 +3254,7 @@ static bool ext4_inode_datasync_dirty(struct inode *inode)
static void ext4_set_iomap(struct inode *inode, struct iomap *iomap, static void ext4_set_iomap(struct inode *inode, struct iomap *iomap,
struct ext4_map_blocks *map, loff_t offset, struct ext4_map_blocks *map, loff_t offset,
loff_t length) loff_t length, unsigned int flags)
{ {
u8 blkbits = inode->i_blkbits; u8 blkbits = inode->i_blkbits;
@ -3270,8 +3271,10 @@ static void ext4_set_iomap(struct inode *inode, struct iomap *iomap,
if (map->m_flags & EXT4_MAP_NEW) if (map->m_flags & EXT4_MAP_NEW)
iomap->flags |= IOMAP_F_NEW; iomap->flags |= IOMAP_F_NEW;
iomap->bdev = inode->i_sb->s_bdev; if (flags & IOMAP_DAX)
iomap->dax_dev = EXT4_SB(inode->i_sb)->s_daxdev; iomap->dax_dev = EXT4_SB(inode->i_sb)->s_daxdev;
else
iomap->bdev = inode->i_sb->s_bdev;
iomap->offset = (u64) map->m_lblk << blkbits; iomap->offset = (u64) map->m_lblk << blkbits;
iomap->length = (u64) map->m_len << blkbits; iomap->length = (u64) map->m_len << blkbits;
@ -3291,9 +3294,13 @@ static void ext4_set_iomap(struct inode *inode, struct iomap *iomap,
if (map->m_flags & EXT4_MAP_UNWRITTEN) { if (map->m_flags & EXT4_MAP_UNWRITTEN) {
iomap->type = IOMAP_UNWRITTEN; iomap->type = IOMAP_UNWRITTEN;
iomap->addr = (u64) map->m_pblk << blkbits; iomap->addr = (u64) map->m_pblk << blkbits;
if (flags & IOMAP_DAX)
iomap->addr += EXT4_SB(inode->i_sb)->s_dax_part_off;
} else if (map->m_flags & EXT4_MAP_MAPPED) { } else if (map->m_flags & EXT4_MAP_MAPPED) {
iomap->type = IOMAP_MAPPED; iomap->type = IOMAP_MAPPED;
iomap->addr = (u64) map->m_pblk << blkbits; iomap->addr = (u64) map->m_pblk << blkbits;
if (flags & IOMAP_DAX)
iomap->addr += EXT4_SB(inode->i_sb)->s_dax_part_off;
} else { } else {
iomap->type = IOMAP_HOLE; iomap->type = IOMAP_HOLE;
iomap->addr = IOMAP_NULL_ADDR; iomap->addr = IOMAP_NULL_ADDR;
@ -3330,8 +3337,8 @@ retry:
* DAX and direct I/O are the only two operations that are currently * DAX and direct I/O are the only two operations that are currently
* supported with IOMAP_WRITE. * supported with IOMAP_WRITE.
*/ */
WARN_ON(!IS_DAX(inode) && !(flags & IOMAP_DIRECT)); WARN_ON(!(flags & (IOMAP_DAX | IOMAP_DIRECT)));
if (IS_DAX(inode)) if (flags & IOMAP_DAX)
m_flags = EXT4_GET_BLOCKS_CREATE_ZERO; m_flags = EXT4_GET_BLOCKS_CREATE_ZERO;
/* /*
* We use i_size instead of i_disksize here because delalloc writeback * We use i_size instead of i_disksize here because delalloc writeback
@ -3402,7 +3409,7 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
if (ret < 0) if (ret < 0)
return ret; return ret;
out: out:
ext4_set_iomap(inode, iomap, &map, offset, length); ext4_set_iomap(inode, iomap, &map, offset, length, flags);
return 0; return 0;
} }
@ -3522,7 +3529,7 @@ static int ext4_iomap_begin_report(struct inode *inode, loff_t offset,
delalloc = ext4_iomap_is_delalloc(inode, &map); delalloc = ext4_iomap_is_delalloc(inode, &map);
set_iomap: set_iomap:
ext4_set_iomap(inode, iomap, &map, offset, length); ext4_set_iomap(inode, iomap, &map, offset, length, flags);
if (delalloc && iomap->type == IOMAP_HOLE) if (delalloc && iomap->type == IOMAP_HOLE)
iomap->type = IOMAP_DELALLOC; iomap->type = IOMAP_DELALLOC;
@ -3762,8 +3769,8 @@ static int ext4_block_zero_page_range(handle_t *handle,
length = max; length = max;
if (IS_DAX(inode)) { if (IS_DAX(inode)) {
return iomap_zero_range(inode, from, length, NULL, return dax_zero_range(inode, from, length, NULL,
&ext4_iomap_ops); &ext4_iomap_ops);
} }
return __ext4_block_zero_page_range(handle, mapping, from, length); return __ext4_block_zero_page_range(handle, mapping, from, length);
} }

View File

@ -4338,7 +4338,7 @@ static struct ext4_sb_info *ext4_alloc_sbi(struct super_block *sb)
if (!sbi) if (!sbi)
return NULL; return NULL;
sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev); sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off);
sbi->s_blockgroup_lock = sbi->s_blockgroup_lock =
kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
@ -4756,9 +4756,12 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
goto failed_mount; goto failed_mount;
} }
if (dax_supported(sbi->s_daxdev, sb->s_bdev, blocksize, 0, if (sbi->s_daxdev) {
bdev_nr_sectors(sb->s_bdev))) if (blocksize == PAGE_SIZE)
set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags); set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
else
ext4_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n");
}
if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) { if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
if (ext4_has_feature_inline_data(sb)) { if (ext4_has_feature_inline_data(sb)) {

View File

@ -45,7 +45,7 @@ config FUSE_DAX
select INTERVAL_TREE select INTERVAL_TREE
depends on VIRTIO_FS depends on VIRTIO_FS
depends on FS_DAX depends on FS_DAX
depends on DAX_DRIVER depends on DAX
help help
This allows bypassing guest page cache and allows mapping host page This allows bypassing guest page cache and allows mapping host page
cache directly in guest address space. cache directly in guest address space.

View File

@ -765,20 +765,6 @@ static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
return nr_pages > max_nr_pages ? max_nr_pages : nr_pages; return nr_pages > max_nr_pages ? max_nr_pages : nr_pages;
} }
static size_t virtio_fs_copy_from_iter(struct dax_device *dax_dev,
pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i)
{
return copy_from_iter(addr, bytes, i);
}
static size_t virtio_fs_copy_to_iter(struct dax_device *dax_dev,
pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i)
{
return copy_to_iter(addr, bytes, i);
}
static int virtio_fs_zero_page_range(struct dax_device *dax_dev, static int virtio_fs_zero_page_range(struct dax_device *dax_dev,
pgoff_t pgoff, size_t nr_pages) pgoff_t pgoff, size_t nr_pages)
{ {
@ -795,8 +781,6 @@ static int virtio_fs_zero_page_range(struct dax_device *dax_dev,
static const struct dax_operations virtio_fs_dax_ops = { static const struct dax_operations virtio_fs_dax_ops = {
.direct_access = virtio_fs_direct_access, .direct_access = virtio_fs_direct_access,
.copy_from_iter = virtio_fs_copy_from_iter,
.copy_to_iter = virtio_fs_copy_to_iter,
.zero_page_range = virtio_fs_zero_page_range, .zero_page_range = virtio_fs_zero_page_range,
}; };
@ -862,7 +846,7 @@ static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n", dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n",
__func__, fs->window_kaddr, cache_reg.addr, cache_reg.len); __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len);
fs->dax_dev = alloc_dax(fs, NULL, &virtio_fs_dax_ops, 0); fs->dax_dev = alloc_dax(fs, &virtio_fs_dax_ops);
if (IS_ERR(fs->dax_dev)) if (IS_ERR(fs->dax_dev))
return PTR_ERR(fs->dax_dev); return PTR_ERR(fs->dax_dev);

View File

@ -9,9 +9,9 @@ ccflags-y += -I $(srctree)/$(src) # needed for trace events
obj-$(CONFIG_FS_IOMAP) += iomap.o obj-$(CONFIG_FS_IOMAP) += iomap.o
iomap-y += trace.o \ iomap-y += trace.o \
buffered-io.o \ iter.o
iomap-$(CONFIG_BLOCK) += buffered-io.o \
direct-io.o \ direct-io.o \
fiemap.o \ fiemap.o \
iter.o \
seek.o seek.o
iomap-$(CONFIG_SWAP) += swapfile.o iomap-$(CONFIG_SWAP) += swapfile.o

View File

@ -897,7 +897,6 @@ EXPORT_SYMBOL_GPL(iomap_file_unshare);
static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
{ {
struct iomap *iomap = &iter->iomap;
const struct iomap *srcmap = iomap_iter_srcmap(iter); const struct iomap *srcmap = iomap_iter_srcmap(iter);
loff_t pos = iter->pos; loff_t pos = iter->pos;
loff_t length = iomap_length(iter); loff_t length = iomap_length(iter);
@ -913,14 +912,6 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
size_t offset; size_t offset;
size_t bytes = min_t(u64, SIZE_MAX, length); size_t bytes = min_t(u64, SIZE_MAX, length);
if (IS_DAX(iter->inode)) {
s64 tmp = dax_iomap_zero(pos, bytes, iomap);
if (tmp < 0)
return tmp;
bytes = tmp;
goto good;
}
status = iomap_write_begin(iter, pos, bytes, &folio); status = iomap_write_begin(iter, pos, bytes, &folio);
if (status) if (status)
return status; return status;
@ -933,7 +924,6 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
folio_mark_accessed(folio); folio_mark_accessed(folio);
bytes = iomap_write_end(iter, pos, bytes, bytes, folio); bytes = iomap_write_end(iter, pos, bytes, bytes, folio);
good:
if (WARN_ON_ONCE(bytes == 0)) if (WARN_ON_ONCE(bytes == 0))
return -EIO; return -EIO;

View File

@ -4551,7 +4551,7 @@ xfs_bmapi_convert_delalloc(
* the extent. Just return the real extent at this offset. * the extent. Just return the real extent at this offset.
*/ */
if (!isnullstartblock(bma.got.br_startblock)) { if (!isnullstartblock(bma.got.br_startblock)) {
xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags); xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags);
*seq = READ_ONCE(ifp->if_seq); *seq = READ_ONCE(ifp->if_seq);
goto out_trans_cancel; goto out_trans_cancel;
} }
@ -4598,7 +4598,7 @@ xfs_bmapi_convert_delalloc(
XFS_STATS_INC(mp, xs_xstrat_quick); XFS_STATS_INC(mp, xs_xstrat_quick);
ASSERT(!isnullstartblock(bma.got.br_startblock)); ASSERT(!isnullstartblock(bma.got.br_startblock));
xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags); xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags);
*seq = READ_ONCE(ifp->if_seq); *seq = READ_ONCE(ifp->if_seq);
if (whichfork == XFS_COW_FORK) if (whichfork == XFS_COW_FORK)

View File

@ -359,7 +359,7 @@ retry:
isnullstartblock(imap.br_startblock)) isnullstartblock(imap.br_startblock))
goto allocate_blocks; goto allocate_blocks;
xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0); xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0);
trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap); trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap);
return 0; return 0;
allocate_blocks: allocate_blocks:

View File

@ -1001,7 +1001,7 @@ xfs_free_file_space(
/* /*
* Now that we've unmap all full blocks we'll have to zero out any * Now that we've unmap all full blocks we'll have to zero out any
* partial block at the beginning and/or end. iomap_zero_range is smart * partial block at the beginning and/or end. xfs_zero_range is smart
* enough to skip any holes, including those we just created, but we * enough to skip any holes, including those we just created, but we
* must take care not to zero beyond EOF and enlarge i_size. * must take care not to zero beyond EOF and enlarge i_size.
*/ */
@ -1009,15 +1009,14 @@ xfs_free_file_space(
return 0; return 0;
if (offset + len > XFS_ISIZE(ip)) if (offset + len > XFS_ISIZE(ip))
len = XFS_ISIZE(ip) - offset; len = XFS_ISIZE(ip) - offset;
error = iomap_zero_range(VFS_I(ip), offset, len, NULL, error = xfs_zero_range(ip, offset, len, NULL);
&xfs_buffered_write_iomap_ops);
if (error) if (error)
return error; return error;
/* /*
* If we zeroed right up to EOF and EOF straddles a page boundary we * If we zeroed right up to EOF and EOF straddles a page boundary we
* must make sure that the post-EOF area is also zeroed because the * must make sure that the post-EOF area is also zeroed because the
* page could be mmap'd and iomap_zero_range doesn't do that for us. * page could be mmap'd and xfs_zero_range doesn't do that for us.
* Writeback of the eof page will do this, albeit clumsily. * Writeback of the eof page will do this, albeit clumsily.
*/ */
if (offset + len >= XFS_ISIZE(ip) && offset_in_page(offset + len) > 0) { if (offset + len >= XFS_ISIZE(ip) && offset_in_page(offset + len) > 0) {

View File

@ -1892,6 +1892,7 @@ xfs_free_buftarg(
list_lru_destroy(&btp->bt_lru); list_lru_destroy(&btp->bt_lru);
blkdev_issue_flush(btp->bt_bdev); blkdev_issue_flush(btp->bt_bdev);
fs_put_dax(btp->bt_daxdev);
kmem_free(btp); kmem_free(btp);
} }
@ -1932,11 +1933,10 @@ xfs_setsize_buftarg_early(
return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev)); return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev));
} }
xfs_buftarg_t * struct xfs_buftarg *
xfs_alloc_buftarg( xfs_alloc_buftarg(
struct xfs_mount *mp, struct xfs_mount *mp,
struct block_device *bdev, struct block_device *bdev)
struct dax_device *dax_dev)
{ {
xfs_buftarg_t *btp; xfs_buftarg_t *btp;
@ -1945,7 +1945,7 @@ xfs_alloc_buftarg(
btp->bt_mount = mp; btp->bt_mount = mp;
btp->bt_dev = bdev->bd_dev; btp->bt_dev = bdev->bd_dev;
btp->bt_bdev = bdev; btp->bt_bdev = bdev;
btp->bt_daxdev = dax_dev; btp->bt_daxdev = fs_dax_get_by_bdev(bdev, &btp->bt_dax_part_off);
/* /*
* Buffer IO error rate limiting. Limit it to no more than 10 messages * Buffer IO error rate limiting. Limit it to no more than 10 messages

View File

@ -89,6 +89,7 @@ typedef struct xfs_buftarg {
dev_t bt_dev; dev_t bt_dev;
struct block_device *bt_bdev; struct block_device *bt_bdev;
struct dax_device *bt_daxdev; struct dax_device *bt_daxdev;
u64 bt_dax_part_off;
struct xfs_mount *bt_mount; struct xfs_mount *bt_mount;
unsigned int bt_meta_sectorsize; unsigned int bt_meta_sectorsize;
size_t bt_meta_sectormask; size_t bt_meta_sectormask;
@ -338,8 +339,8 @@ xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
/* /*
* Handling of buftargs. * Handling of buftargs.
*/ */
extern struct xfs_buftarg *xfs_alloc_buftarg(struct xfs_mount *, struct xfs_buftarg *xfs_alloc_buftarg(struct xfs_mount *mp,
struct block_device *, struct dax_device *); struct block_device *bdev);
extern void xfs_free_buftarg(struct xfs_buftarg *); extern void xfs_free_buftarg(struct xfs_buftarg *);
extern void xfs_buftarg_wait(struct xfs_buftarg *); extern void xfs_buftarg_wait(struct xfs_buftarg *);
extern void xfs_buftarg_drain(struct xfs_buftarg *); extern void xfs_buftarg_drain(struct xfs_buftarg *);

View File

@ -437,8 +437,7 @@ restart:
} }
trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize); trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize);
error = iomap_zero_range(inode, isize, iocb->ki_pos - isize, error = xfs_zero_range(ip, isize, iocb->ki_pos - isize, NULL);
NULL, &xfs_buffered_write_iomap_ops);
if (error) if (error)
return error; return error;
} else } else

View File

@ -28,7 +28,6 @@
#include "xfs_dquot.h" #include "xfs_dquot.h"
#include "xfs_reflink.h" #include "xfs_reflink.h"
#define XFS_ALLOC_ALIGN(mp, off) \ #define XFS_ALLOC_ALIGN(mp, off) \
(((off) >> mp->m_allocsize_log) << mp->m_allocsize_log) (((off) >> mp->m_allocsize_log) << mp->m_allocsize_log)
@ -54,7 +53,8 @@ xfs_bmbt_to_iomap(
struct xfs_inode *ip, struct xfs_inode *ip,
struct iomap *iomap, struct iomap *iomap,
struct xfs_bmbt_irec *imap, struct xfs_bmbt_irec *imap,
u16 flags) unsigned int mapping_flags,
u16 iomap_flags)
{ {
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
struct xfs_buftarg *target = xfs_inode_buftarg(ip); struct xfs_buftarg *target = xfs_inode_buftarg(ip);
@ -71,16 +71,22 @@ xfs_bmbt_to_iomap(
iomap->type = IOMAP_DELALLOC; iomap->type = IOMAP_DELALLOC;
} else { } else {
iomap->addr = BBTOB(xfs_fsb_to_db(ip, imap->br_startblock)); iomap->addr = BBTOB(xfs_fsb_to_db(ip, imap->br_startblock));
if (mapping_flags & IOMAP_DAX)
iomap->addr += target->bt_dax_part_off;
if (imap->br_state == XFS_EXT_UNWRITTEN) if (imap->br_state == XFS_EXT_UNWRITTEN)
iomap->type = IOMAP_UNWRITTEN; iomap->type = IOMAP_UNWRITTEN;
else else
iomap->type = IOMAP_MAPPED; iomap->type = IOMAP_MAPPED;
} }
iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff); iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff);
iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount); iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
iomap->bdev = target->bt_bdev; if (mapping_flags & IOMAP_DAX)
iomap->dax_dev = target->bt_daxdev; iomap->dax_dev = target->bt_daxdev;
iomap->flags = flags; else
iomap->bdev = target->bt_bdev;
iomap->flags = iomap_flags;
if (xfs_ipincount(ip) && if (xfs_ipincount(ip) &&
(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
@ -188,6 +194,7 @@ xfs_iomap_write_direct(
struct xfs_inode *ip, struct xfs_inode *ip,
xfs_fileoff_t offset_fsb, xfs_fileoff_t offset_fsb,
xfs_fileoff_t count_fsb, xfs_fileoff_t count_fsb,
unsigned int flags,
struct xfs_bmbt_irec *imap) struct xfs_bmbt_irec *imap)
{ {
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
@ -229,7 +236,7 @@ xfs_iomap_write_direct(
* the reserve block pool for bmbt block allocation if there is no space * the reserve block pool for bmbt block allocation if there is no space
* left but we need to do unwritten extent conversion. * left but we need to do unwritten extent conversion.
*/ */
if (IS_DAX(VFS_I(ip))) { if (flags & IOMAP_DAX) {
bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO; bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO;
if (imap->br_state == XFS_EXT_UNWRITTEN) { if (imap->br_state == XFS_EXT_UNWRITTEN) {
force = true; force = true;
@ -620,7 +627,7 @@ imap_needs_alloc(
imap->br_startblock == DELAYSTARTBLOCK) imap->br_startblock == DELAYSTARTBLOCK)
return true; return true;
/* we convert unwritten extents before copying the data for DAX */ /* we convert unwritten extents before copying the data for DAX */
if (IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN) if ((flags & IOMAP_DAX) && imap->br_state == XFS_EXT_UNWRITTEN)
return true; return true;
return false; return false;
} }
@ -800,7 +807,7 @@ xfs_direct_write_iomap_begin(
xfs_iunlock(ip, lockmode); xfs_iunlock(ip, lockmode);
trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags); return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags);
allocate_blocks: allocate_blocks:
error = -EAGAIN; error = -EAGAIN;
@ -826,23 +833,24 @@ allocate_blocks:
xfs_iunlock(ip, lockmode); xfs_iunlock(ip, lockmode);
error = xfs_iomap_write_direct(ip, offset_fsb, end_fsb - offset_fsb, error = xfs_iomap_write_direct(ip, offset_fsb, end_fsb - offset_fsb,
&imap); flags, &imap);
if (error) if (error)
return error; return error;
trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap); trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap);
return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags | IOMAP_F_NEW); return xfs_bmbt_to_iomap(ip, iomap, &imap, flags,
iomap_flags | IOMAP_F_NEW);
out_found_cow: out_found_cow:
xfs_iunlock(ip, lockmode); xfs_iunlock(ip, lockmode);
length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount); length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount);
trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap); trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap);
if (imap.br_startblock != HOLESTARTBLOCK) { if (imap.br_startblock != HOLESTARTBLOCK) {
error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0); error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0);
if (error) if (error)
return error; return error;
} }
return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED); return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED);
out_unlock: out_unlock:
if (lockmode) if (lockmode)
@ -1052,23 +1060,24 @@ retry:
*/ */
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap); trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap);
return xfs_bmbt_to_iomap(ip, iomap, &imap, IOMAP_F_NEW); return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW);
found_imap: found_imap:
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
return xfs_bmbt_to_iomap(ip, iomap, &imap, 0); return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0);
found_cow: found_cow:
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
if (imap.br_startoff <= offset_fsb) { if (imap.br_startoff <= offset_fsb) {
error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0); error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0);
if (error) if (error)
return error; return error;
return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED); return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags,
IOMAP_F_SHARED);
} }
xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb); xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb);
return xfs_bmbt_to_iomap(ip, iomap, &cmap, 0); return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0);
out_unlock: out_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
@ -1177,7 +1186,8 @@ xfs_read_iomap_begin(
if (error) if (error)
return error; return error;
trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
return xfs_bmbt_to_iomap(ip, iomap, &imap, shared ? IOMAP_F_SHARED : 0); return xfs_bmbt_to_iomap(ip, iomap, &imap, flags,
shared ? IOMAP_F_SHARED : 0);
} }
const struct iomap_ops xfs_read_iomap_ops = { const struct iomap_ops xfs_read_iomap_ops = {
@ -1236,7 +1246,8 @@ xfs_seek_iomap_begin(
if (data_fsb < cow_fsb + cmap.br_blockcount) if (data_fsb < cow_fsb + cmap.br_blockcount)
end_fsb = min(end_fsb, data_fsb); end_fsb = min(end_fsb, data_fsb);
xfs_trim_extent(&cmap, offset_fsb, end_fsb); xfs_trim_extent(&cmap, offset_fsb, end_fsb);
error = xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED); error = xfs_bmbt_to_iomap(ip, iomap, &cmap, flags,
IOMAP_F_SHARED);
/* /*
* This is a COW extent, so we must probe the page cache * This is a COW extent, so we must probe the page cache
* because there could be dirty page cache being backed * because there could be dirty page cache being backed
@ -1258,7 +1269,7 @@ xfs_seek_iomap_begin(
imap.br_state = XFS_EXT_NORM; imap.br_state = XFS_EXT_NORM;
done: done:
xfs_trim_extent(&imap, offset_fsb, end_fsb); xfs_trim_extent(&imap, offset_fsb, end_fsb);
error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0); error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0);
out_unlock: out_unlock:
xfs_iunlock(ip, lockmode); xfs_iunlock(ip, lockmode);
return error; return error;
@ -1305,9 +1316,40 @@ out_unlock:
if (error) if (error)
return error; return error;
ASSERT(nimaps); ASSERT(nimaps);
return xfs_bmbt_to_iomap(ip, iomap, &imap, 0); return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0);
} }
const struct iomap_ops xfs_xattr_iomap_ops = { const struct iomap_ops xfs_xattr_iomap_ops = {
.iomap_begin = xfs_xattr_iomap_begin, .iomap_begin = xfs_xattr_iomap_begin,
}; };
int
xfs_zero_range(
struct xfs_inode *ip,
loff_t pos,
loff_t len,
bool *did_zero)
{
struct inode *inode = VFS_I(ip);
if (IS_DAX(inode))
return dax_zero_range(inode, pos, len, did_zero,
&xfs_direct_write_iomap_ops);
return iomap_zero_range(inode, pos, len, did_zero,
&xfs_buffered_write_iomap_ops);
}
int
xfs_truncate_page(
struct xfs_inode *ip,
loff_t pos,
bool *did_zero)
{
struct inode *inode = VFS_I(ip);
if (IS_DAX(inode))
return dax_truncate_page(inode, pos, did_zero,
&xfs_direct_write_iomap_ops);
return iomap_truncate_page(inode, pos, did_zero,
&xfs_buffered_write_iomap_ops);
}

View File

@ -12,13 +12,19 @@ struct xfs_inode;
struct xfs_bmbt_irec; struct xfs_bmbt_irec;
int xfs_iomap_write_direct(struct xfs_inode *ip, xfs_fileoff_t offset_fsb, int xfs_iomap_write_direct(struct xfs_inode *ip, xfs_fileoff_t offset_fsb,
xfs_fileoff_t count_fsb, struct xfs_bmbt_irec *imap); xfs_fileoff_t count_fsb, unsigned int flags,
struct xfs_bmbt_irec *imap);
int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool); int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
xfs_fileoff_t xfs_iomap_eof_align_last_fsb(struct xfs_inode *ip, xfs_fileoff_t xfs_iomap_eof_align_last_fsb(struct xfs_inode *ip,
xfs_fileoff_t end_fsb); xfs_fileoff_t end_fsb);
int xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *, int xfs_bmbt_to_iomap(struct xfs_inode *ip, struct iomap *iomap,
struct xfs_bmbt_irec *, u16); struct xfs_bmbt_irec *imap, unsigned int mapping_flags,
u16 iomap_flags);
int xfs_zero_range(struct xfs_inode *ip, loff_t pos, loff_t len,
bool *did_zero);
int xfs_truncate_page(struct xfs_inode *ip, loff_t pos, bool *did_zero);
static inline xfs_filblks_t static inline xfs_filblks_t
xfs_aligned_fsb_count( xfs_aligned_fsb_count(

View File

@ -890,8 +890,8 @@ xfs_setattr_size(
*/ */
if (newsize > oldsize) { if (newsize > oldsize) {
trace_xfs_zero_eof(ip, oldsize, newsize - oldsize); trace_xfs_zero_eof(ip, oldsize, newsize - oldsize);
error = iomap_zero_range(inode, oldsize, newsize - oldsize, error = xfs_zero_range(ip, oldsize, newsize - oldsize,
&did_zeroing, &xfs_buffered_write_iomap_ops); &did_zeroing);
} else { } else {
/* /*
* iomap won't detect a dirty page over an unwritten block (or a * iomap won't detect a dirty page over an unwritten block (or a
@ -903,8 +903,7 @@ xfs_setattr_size(
newsize); newsize);
if (error) if (error)
return error; return error;
error = iomap_truncate_page(inode, newsize, &did_zeroing, error = xfs_truncate_page(ip, newsize, &did_zeroing);
&xfs_buffered_write_iomap_ops);
} }
if (error) if (error)

View File

@ -155,7 +155,7 @@ xfs_fs_map_blocks(
xfs_iunlock(ip, lock_flags); xfs_iunlock(ip, lock_flags);
error = xfs_iomap_write_direct(ip, offset_fsb, error = xfs_iomap_write_direct(ip, offset_fsb,
end_fsb - offset_fsb, &imap); end_fsb - offset_fsb, 0, &imap);
if (error) if (error)
goto out_unlock; goto out_unlock;
@ -173,7 +173,7 @@ xfs_fs_map_blocks(
} }
xfs_iunlock(ip, XFS_IOLOCK_EXCL); xfs_iunlock(ip, XFS_IOLOCK_EXCL);
error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0); error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0, 0);
*device_generation = mp->m_generation; *device_generation = mp->m_generation;
return error; return error;
out_unlock: out_unlock:

View File

@ -1272,8 +1272,7 @@ xfs_reflink_zero_posteof(
return 0; return 0;
trace_xfs_zero_eof(ip, isize, pos - isize); trace_xfs_zero_eof(ip, isize, pos - isize);
return iomap_zero_range(VFS_I(ip), isize, pos - isize, NULL, return xfs_zero_range(ip, isize, pos - isize, NULL);
&xfs_buffered_write_iomap_ops);
} }
/* /*

View File

@ -331,13 +331,34 @@ xfs_set_inode_alloc(
return xfs_is_inode32(mp) ? maxagi : agcount; return xfs_is_inode32(mp) ? maxagi : agcount;
} }
static bool static int
xfs_buftarg_is_dax( xfs_setup_dax_always(
struct super_block *sb, struct xfs_mount *mp)
struct xfs_buftarg *bt)
{ {
return dax_supported(bt->bt_daxdev, bt->bt_bdev, sb->s_blocksize, 0, if (!mp->m_ddev_targp->bt_daxdev &&
bdev_nr_sectors(bt->bt_bdev)); (!mp->m_rtdev_targp || !mp->m_rtdev_targp->bt_daxdev)) {
xfs_alert(mp,
"DAX unsupported by block device. Turning off DAX.");
goto disable_dax;
}
if (mp->m_super->s_blocksize != PAGE_SIZE) {
xfs_alert(mp,
"DAX not supported for blocksize. Turning off DAX.");
goto disable_dax;
}
if (xfs_has_reflink(mp)) {
xfs_alert(mp, "DAX and reflink cannot be used together!");
return -EINVAL;
}
xfs_warn(mp, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
return 0;
disable_dax:
xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER);
return 0;
} }
STATIC int STATIC int
@ -370,26 +391,19 @@ STATIC void
xfs_close_devices( xfs_close_devices(
struct xfs_mount *mp) struct xfs_mount *mp)
{ {
struct dax_device *dax_ddev = mp->m_ddev_targp->bt_daxdev;
if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
struct block_device *logdev = mp->m_logdev_targp->bt_bdev; struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
struct dax_device *dax_logdev = mp->m_logdev_targp->bt_daxdev;
xfs_free_buftarg(mp->m_logdev_targp); xfs_free_buftarg(mp->m_logdev_targp);
xfs_blkdev_put(logdev); xfs_blkdev_put(logdev);
fs_put_dax(dax_logdev);
} }
if (mp->m_rtdev_targp) { if (mp->m_rtdev_targp) {
struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev; struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
struct dax_device *dax_rtdev = mp->m_rtdev_targp->bt_daxdev;
xfs_free_buftarg(mp->m_rtdev_targp); xfs_free_buftarg(mp->m_rtdev_targp);
xfs_blkdev_put(rtdev); xfs_blkdev_put(rtdev);
fs_put_dax(dax_rtdev);
} }
xfs_free_buftarg(mp->m_ddev_targp); xfs_free_buftarg(mp->m_ddev_targp);
fs_put_dax(dax_ddev);
} }
/* /*
@ -407,8 +421,6 @@ xfs_open_devices(
struct xfs_mount *mp) struct xfs_mount *mp)
{ {
struct block_device *ddev = mp->m_super->s_bdev; struct block_device *ddev = mp->m_super->s_bdev;
struct dax_device *dax_ddev = fs_dax_get_by_bdev(ddev);
struct dax_device *dax_logdev = NULL, *dax_rtdev = NULL;
struct block_device *logdev = NULL, *rtdev = NULL; struct block_device *logdev = NULL, *rtdev = NULL;
int error; int error;
@ -418,8 +430,7 @@ xfs_open_devices(
if (mp->m_logname) { if (mp->m_logname) {
error = xfs_blkdev_get(mp, mp->m_logname, &logdev); error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
if (error) if (error)
goto out; return error;
dax_logdev = fs_dax_get_by_bdev(logdev);
} }
if (mp->m_rtname) { if (mp->m_rtname) {
@ -433,25 +444,24 @@ xfs_open_devices(
error = -EINVAL; error = -EINVAL;
goto out_close_rtdev; goto out_close_rtdev;
} }
dax_rtdev = fs_dax_get_by_bdev(rtdev);
} }
/* /*
* Setup xfs_mount buffer target pointers * Setup xfs_mount buffer target pointers
*/ */
error = -ENOMEM; error = -ENOMEM;
mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, dax_ddev); mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev);
if (!mp->m_ddev_targp) if (!mp->m_ddev_targp)
goto out_close_rtdev; goto out_close_rtdev;
if (rtdev) { if (rtdev) {
mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, dax_rtdev); mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev);
if (!mp->m_rtdev_targp) if (!mp->m_rtdev_targp)
goto out_free_ddev_targ; goto out_free_ddev_targ;
} }
if (logdev && logdev != ddev) { if (logdev && logdev != ddev) {
mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, dax_logdev); mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev);
if (!mp->m_logdev_targp) if (!mp->m_logdev_targp)
goto out_free_rtdev_targ; goto out_free_rtdev_targ;
} else { } else {
@ -467,14 +477,9 @@ xfs_open_devices(
xfs_free_buftarg(mp->m_ddev_targp); xfs_free_buftarg(mp->m_ddev_targp);
out_close_rtdev: out_close_rtdev:
xfs_blkdev_put(rtdev); xfs_blkdev_put(rtdev);
fs_put_dax(dax_rtdev);
out_close_logdev: out_close_logdev:
if (logdev && logdev != ddev) { if (logdev && logdev != ddev)
xfs_blkdev_put(logdev); xfs_blkdev_put(logdev);
fs_put_dax(dax_logdev);
}
out:
fs_put_dax(dax_ddev);
return error; return error;
} }
@ -1593,26 +1598,9 @@ xfs_fs_fill_super(
sb->s_flags |= SB_I_VERSION; sb->s_flags |= SB_I_VERSION;
if (xfs_has_dax_always(mp)) { if (xfs_has_dax_always(mp)) {
bool rtdev_is_dax = false, datadev_is_dax; error = xfs_setup_dax_always(mp);
if (error)
xfs_warn(mp,
"DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
datadev_is_dax = xfs_buftarg_is_dax(sb, mp->m_ddev_targp);
if (mp->m_rtdev_targp)
rtdev_is_dax = xfs_buftarg_is_dax(sb,
mp->m_rtdev_targp);
if (!rtdev_is_dax && !datadev_is_dax) {
xfs_alert(mp,
"DAX unsupported by block device. Turning off DAX.");
xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER);
}
if (xfs_has_reflink(mp)) {
xfs_alert(mp,
"DAX and reflink cannot be used together!");
error = -EINVAL;
goto out_filestream_unmount; goto out_filestream_unmount;
}
} }
if (xfs_has_discard(mp)) { if (xfs_has_discard(mp)) {

View File

@ -6,14 +6,14 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/radix-tree.h> #include <linux/radix-tree.h>
/* Flag for synchronous flush */
#define DAXDEV_F_SYNC (1UL << 0)
typedef unsigned long dax_entry_t; typedef unsigned long dax_entry_t;
struct iomap_ops;
struct iomap;
struct dax_device; struct dax_device;
struct gendisk;
struct iomap_ops;
struct iomap_iter;
struct iomap;
struct dax_operations { struct dax_operations {
/* /*
* direct_access: translate a device-relative * direct_access: translate a device-relative
@ -28,33 +28,18 @@ struct dax_operations {
*/ */
bool (*dax_supported)(struct dax_device *, struct block_device *, int, bool (*dax_supported)(struct dax_device *, struct block_device *, int,
sector_t, sector_t); sector_t, sector_t);
/* copy_from_iter: required operation for fs-dax direct-i/o */
size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t,
struct iov_iter *);
/* copy_to_iter: required operation for fs-dax direct-i/o */
size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t,
struct iov_iter *);
/* zero_page_range: required operation. Zero page range */ /* zero_page_range: required operation. Zero page range */
int (*zero_page_range)(struct dax_device *, pgoff_t, size_t); int (*zero_page_range)(struct dax_device *, pgoff_t, size_t);
}; };
#if IS_ENABLED(CONFIG_DAX) #if IS_ENABLED(CONFIG_DAX)
struct dax_device *alloc_dax(void *private, const char *host, struct dax_device *alloc_dax(void *private, const struct dax_operations *ops);
const struct dax_operations *ops, unsigned long flags);
void put_dax(struct dax_device *dax_dev); void put_dax(struct dax_device *dax_dev);
void kill_dax(struct dax_device *dax_dev); void kill_dax(struct dax_device *dax_dev);
void dax_write_cache(struct dax_device *dax_dev, bool wc); void dax_write_cache(struct dax_device *dax_dev, bool wc);
bool dax_write_cache_enabled(struct dax_device *dax_dev); bool dax_write_cache_enabled(struct dax_device *dax_dev);
bool __dax_synchronous(struct dax_device *dax_dev); bool dax_synchronous(struct dax_device *dax_dev);
static inline bool dax_synchronous(struct dax_device *dax_dev) void set_dax_synchronous(struct dax_device *dax_dev);
{
return __dax_synchronous(dax_dev);
}
void __set_dax_synchronous(struct dax_device *dax_dev);
static inline void set_dax_synchronous(struct dax_device *dax_dev)
{
__set_dax_synchronous(dax_dev);
}
/* /*
* Check if given mapping is supported by the file / underlying device. * Check if given mapping is supported by the file / underlying device.
*/ */
@ -68,8 +53,8 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
return dax_synchronous(dax_dev); return dax_synchronous(dax_dev);
} }
#else #else
static inline struct dax_device *alloc_dax(void *private, const char *host, static inline struct dax_device *alloc_dax(void *private,
const struct dax_operations *ops, unsigned long flags) const struct dax_operations *ops)
{ {
/* /*
* Callers should check IS_ENABLED(CONFIG_DAX) to know if this * Callers should check IS_ENABLED(CONFIG_DAX) to know if this
@ -104,22 +89,38 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
} }
#endif #endif
void set_dax_nocache(struct dax_device *dax_dev);
void set_dax_nomc(struct dax_device *dax_dev);
struct writeback_control; struct writeback_control;
int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff); #if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX)
#if IS_ENABLED(CONFIG_FS_DAX) int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk);
bool generic_fsdax_supported(struct dax_device *dax_dev, void dax_remove_host(struct gendisk *disk);
struct block_device *bdev, int blocksize, sector_t start, struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev,
sector_t sectors); u64 *start_off);
bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
int blocksize, sector_t start, sector_t len);
static inline void fs_put_dax(struct dax_device *dax_dev) static inline void fs_put_dax(struct dax_device *dax_dev)
{ {
put_dax(dax_dev); put_dax(dax_dev);
} }
#else
static inline int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk)
{
return 0;
}
static inline void dax_remove_host(struct gendisk *disk)
{
}
static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev,
u64 *start_off)
{
return NULL;
}
static inline void fs_put_dax(struct dax_device *dax_dev)
{
}
#endif /* CONFIG_BLOCK && CONFIG_FS_DAX */
struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev); #if IS_ENABLED(CONFIG_FS_DAX)
int dax_writeback_mapping_range(struct address_space *mapping, int dax_writeback_mapping_range(struct address_space *mapping,
struct dax_device *dax_dev, struct writeback_control *wbc); struct dax_device *dax_dev, struct writeback_control *wbc);
@ -128,24 +129,6 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t st
dax_entry_t dax_lock_page(struct page *page); dax_entry_t dax_lock_page(struct page *page);
void dax_unlock_page(struct page *page, dax_entry_t cookie); void dax_unlock_page(struct page *page, dax_entry_t cookie);
#else #else
#define generic_fsdax_supported NULL
static inline bool dax_supported(struct dax_device *dax_dev,
struct block_device *bdev, int blocksize, sector_t start,
sector_t len)
{
return false;
}
static inline void fs_put_dax(struct dax_device *dax_dev)
{
}
static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
{
return NULL;
}
static inline struct page *dax_layout_busy_page(struct address_space *mapping) static inline struct page *dax_layout_busy_page(struct address_space *mapping)
{ {
return NULL; return NULL;
@ -174,6 +157,11 @@ static inline void dax_unlock_page(struct page *page, dax_entry_t cookie)
} }
#endif #endif
int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
const struct iomap_ops *ops);
int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
const struct iomap_ops *ops);
#if IS_ENABLED(CONFIG_DAX) #if IS_ENABLED(CONFIG_DAX)
int dax_read_lock(void); int dax_read_lock(void);
void dax_read_unlock(int id); void dax_read_unlock(int id);
@ -208,7 +196,6 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
int dax_invalidate_mapping_entry_sync(struct address_space *mapping, int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
pgoff_t index); pgoff_t index);
s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap);
static inline bool dax_mapping(struct address_space *mapping) static inline bool dax_mapping(struct address_space *mapping)
{ {
return mapping->host && IS_DAX(mapping->host); return mapping->host && IS_DAX(mapping->host);

View File

@ -147,8 +147,6 @@ typedef int (*dm_busy_fn) (struct dm_target *ti);
*/ */
typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff,
long nr_pages, void **kaddr, pfn_t *pfn); long nr_pages, void **kaddr, pfn_t *pfn);
typedef size_t (*dm_dax_copy_iter_fn)(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i);
typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff, typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff,
size_t nr_pages); size_t nr_pages);
@ -200,8 +198,6 @@ struct target_type {
dm_iterate_devices_fn iterate_devices; dm_iterate_devices_fn iterate_devices;
dm_io_hints_fn io_hints; dm_io_hints_fn io_hints;
dm_dax_direct_access_fn direct_access; dm_dax_direct_access_fn direct_access;
dm_dax_copy_iter_fn dax_copy_from_iter;
dm_dax_copy_iter_fn dax_copy_to_iter;
dm_dax_zero_page_range_fn dax_zero_page_range; dm_dax_zero_page_range_fn dax_zero_page_range;
/* For internal device-mapper use. */ /* For internal device-mapper use. */

View File

@ -141,6 +141,11 @@ struct iomap_page_ops {
#define IOMAP_NOWAIT (1 << 5) /* do not block */ #define IOMAP_NOWAIT (1 << 5) /* do not block */
#define IOMAP_OVERWRITE_ONLY (1 << 6) /* only pure overwrites allowed */ #define IOMAP_OVERWRITE_ONLY (1 << 6) /* only pure overwrites allowed */
#define IOMAP_UNSHARE (1 << 7) /* unshare_file_range */ #define IOMAP_UNSHARE (1 << 7) /* unshare_file_range */
#ifdef CONFIG_FS_DAX
#define IOMAP_DAX (1 << 8) /* DAX mapping */
#else
#define IOMAP_DAX 0
#endif /* CONFIG_FS_DAX */
struct iomap_ops { struct iomap_ops {
/* /*

View File

@ -72,16 +72,6 @@ struct dev_pagemap_ops {
*/ */
void (*page_free)(struct page *page); void (*page_free)(struct page *page);
/*
* Transition the refcount in struct dev_pagemap to the dead state.
*/
void (*kill)(struct dev_pagemap *pgmap);
/*
* Wait for refcount in struct dev_pagemap to be idle and reap it.
*/
void (*cleanup)(struct dev_pagemap *pgmap);
/* /*
* Used for private (un-addressable) device memory only. Must migrate * Used for private (un-addressable) device memory only. Must migrate
* the page back to a CPU accessible page. * the page back to a CPU accessible page.
@ -95,8 +85,7 @@ struct dev_pagemap_ops {
* struct dev_pagemap - metadata for ZONE_DEVICE mappings * struct dev_pagemap - metadata for ZONE_DEVICE mappings
* @altmap: pre-allocated/reserved memory for vmemmap allocations * @altmap: pre-allocated/reserved memory for vmemmap allocations
* @ref: reference count that pins the devm_memremap_pages() mapping * @ref: reference count that pins the devm_memremap_pages() mapping
* @internal_ref: internal reference if @ref is not provided by the caller * @done: completion for @ref
* @done: completion for @internal_ref
* @type: memory type: see MEMORY_* in memory_hotplug.h * @type: memory type: see MEMORY_* in memory_hotplug.h
* @flags: PGMAP_* flags to specify defailed behavior * @flags: PGMAP_* flags to specify defailed behavior
* @ops: method table * @ops: method table
@ -109,8 +98,7 @@ struct dev_pagemap_ops {
*/ */
struct dev_pagemap { struct dev_pagemap {
struct vmem_altmap altmap; struct vmem_altmap altmap;
struct percpu_ref *ref; struct percpu_ref ref;
struct percpu_ref internal_ref;
struct completion done; struct completion done;
enum memory_type type; enum memory_type type;
unsigned int flags; unsigned int flags;
@ -191,7 +179,7 @@ static inline unsigned long memremap_compat_align(void)
static inline void put_dev_pagemap(struct dev_pagemap *pgmap) static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
{ {
if (pgmap) if (pgmap)
percpu_ref_put(pgmap->ref); percpu_ref_put(&pgmap->ref);
} }
#endif /* _LINUX_MEMREMAP_H_ */ #endif /* _LINUX_MEMREMAP_H_ */

View File

@ -203,7 +203,7 @@ bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
/* /*
* Note, users like pmem that depend on the stricter semantics of * Note, users like pmem that depend on the stricter semantics of
* copy_from_iter_flushcache() than copy_from_iter_nocache() must check for * _copy_from_iter_flushcache() than _copy_from_iter_nocache() must check for
* IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) before assuming that the * IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) before assuming that the
* destination is flushed from the cache on return. * destination is flushed from the cache on return.
*/ */
@ -218,24 +218,6 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
#define _copy_mc_to_iter _copy_to_iter #define _copy_mc_to_iter _copy_to_iter
#endif #endif
static __always_inline __must_check
size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
{
if (unlikely(!check_copy_size(addr, bytes, false)))
return 0;
else
return _copy_from_iter_flushcache(addr, bytes, i);
}
static __always_inline __must_check
size_t copy_mc_to_iter(void *addr, size_t bytes, struct iov_iter *i)
{
if (unlikely(!check_copy_size(addr, bytes, true)))
return 0;
else
return _copy_mc_to_iter(addr, bytes, i);
}
size_t iov_iter_zero(size_t bytes, struct iov_iter *); size_t iov_iter_zero(size_t bytes, struct iov_iter *);
unsigned long iov_iter_alignment(const struct iov_iter *i); unsigned long iov_iter_alignment(const struct iov_iter *i);
unsigned long iov_iter_gap_alignment(const struct iov_iter *i); unsigned long iov_iter_gap_alignment(const struct iov_iter *i);

View File

@ -112,30 +112,6 @@ static unsigned long pfn_next(unsigned long pfn)
#define for_each_device_pfn(pfn, map, i) \ #define for_each_device_pfn(pfn, map, i) \
for (pfn = pfn_first(map, i); pfn < pfn_end(map, i); pfn = pfn_next(pfn)) for (pfn = pfn_first(map, i); pfn < pfn_end(map, i); pfn = pfn_next(pfn))
static void dev_pagemap_kill(struct dev_pagemap *pgmap)
{
if (pgmap->ops && pgmap->ops->kill)
pgmap->ops->kill(pgmap);
else
percpu_ref_kill(pgmap->ref);
}
static void dev_pagemap_cleanup(struct dev_pagemap *pgmap)
{
if (pgmap->ops && pgmap->ops->cleanup) {
pgmap->ops->cleanup(pgmap);
} else {
wait_for_completion(&pgmap->done);
percpu_ref_exit(pgmap->ref);
}
/*
* Undo the pgmap ref assignment for the internal case as the
* caller may re-enable the same pgmap.
*/
if (pgmap->ref == &pgmap->internal_ref)
pgmap->ref = NULL;
}
static void pageunmap_range(struct dev_pagemap *pgmap, int range_id) static void pageunmap_range(struct dev_pagemap *pgmap, int range_id)
{ {
struct range *range = &pgmap->ranges[range_id]; struct range *range = &pgmap->ranges[range_id];
@ -167,11 +143,12 @@ void memunmap_pages(struct dev_pagemap *pgmap)
unsigned long pfn; unsigned long pfn;
int i; int i;
dev_pagemap_kill(pgmap); percpu_ref_kill(&pgmap->ref);
for (i = 0; i < pgmap->nr_range; i++) for (i = 0; i < pgmap->nr_range; i++)
for_each_device_pfn(pfn, pgmap, i) for_each_device_pfn(pfn, pgmap, i)
put_page(pfn_to_page(pfn)); put_page(pfn_to_page(pfn));
dev_pagemap_cleanup(pgmap); wait_for_completion(&pgmap->done);
percpu_ref_exit(&pgmap->ref);
for (i = 0; i < pgmap->nr_range; i++) for (i = 0; i < pgmap->nr_range; i++)
pageunmap_range(pgmap, i); pageunmap_range(pgmap, i);
@ -188,8 +165,7 @@ static void devm_memremap_pages_release(void *data)
static void dev_pagemap_percpu_release(struct percpu_ref *ref) static void dev_pagemap_percpu_release(struct percpu_ref *ref)
{ {
struct dev_pagemap *pgmap = struct dev_pagemap *pgmap = container_of(ref, struct dev_pagemap, ref);
container_of(ref, struct dev_pagemap, internal_ref);
complete(&pgmap->done); complete(&pgmap->done);
} }
@ -295,8 +271,8 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
PHYS_PFN(range->start), PHYS_PFN(range->start),
PHYS_PFN(range_len(range)), pgmap); PHYS_PFN(range_len(range)), pgmap);
percpu_ref_get_many(pgmap->ref, pfn_end(pgmap, range_id) percpu_ref_get_many(&pgmap->ref,
- pfn_first(pgmap, range_id)); pfn_end(pgmap, range_id) - pfn_first(pgmap, range_id));
return 0; return 0;
err_add_memory: err_add_memory:
@ -362,22 +338,11 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
break; break;
} }
if (!pgmap->ref) { init_completion(&pgmap->done);
if (pgmap->ops && (pgmap->ops->kill || pgmap->ops->cleanup)) error = percpu_ref_init(&pgmap->ref, dev_pagemap_percpu_release, 0,
return ERR_PTR(-EINVAL); GFP_KERNEL);
if (error)
init_completion(&pgmap->done); return ERR_PTR(error);
error = percpu_ref_init(&pgmap->internal_ref,
dev_pagemap_percpu_release, 0, GFP_KERNEL);
if (error)
return ERR_PTR(error);
pgmap->ref = &pgmap->internal_ref;
} else {
if (!pgmap->ops || !pgmap->ops->kill || !pgmap->ops->cleanup) {
WARN(1, "Missing reference count teardown definition\n");
return ERR_PTR(-EINVAL);
}
}
devmap_managed_enable_get(pgmap); devmap_managed_enable_get(pgmap);
@ -486,7 +451,7 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
/* fall back to slow path lookup */ /* fall back to slow path lookup */
rcu_read_lock(); rcu_read_lock();
pgmap = xa_load(&pgmap_array, PHYS_PFN(phys)); pgmap = xa_load(&pgmap_array, PHYS_PFN(phys));
if (pgmap && !percpu_ref_tryget_live(pgmap->ref)) if (pgmap && !percpu_ref_tryget_live(&pgmap->ref))
pgmap = NULL; pgmap = NULL;
rcu_read_unlock(); rcu_read_unlock();

View File

@ -35,8 +35,6 @@ obj-$(CONFIG_DAX) += dax.o
endif endif
obj-$(CONFIG_DEV_DAX) += device_dax.o obj-$(CONFIG_DEV_DAX) += device_dax.o
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem_core.o
obj-$(CONFIG_DEV_DAX_PMEM_COMPAT) += dax_pmem_compat.o
nfit-y := $(ACPI_SRC)/core.o nfit-y := $(ACPI_SRC)/core.o
nfit-y += $(ACPI_SRC)/intel.o nfit-y += $(ACPI_SRC)/intel.o
@ -67,12 +65,8 @@ device_dax-y += dax-dev.o
device_dax-y += device_dax_test.o device_dax-y += device_dax_test.o
device_dax-y += config_check.o device_dax-y += config_check.o
dax_pmem-y := $(DAX_SRC)/pmem/pmem.o dax_pmem-y := $(DAX_SRC)/pmem.o
dax_pmem-y += dax_pmem_test.o dax_pmem-y += dax_pmem_test.o
dax_pmem_core-y := $(DAX_SRC)/pmem/core.o
dax_pmem_core-y += dax_pmem_core_test.o
dax_pmem_compat-y := $(DAX_SRC)/pmem/compat.o
dax_pmem_compat-y += dax_pmem_compat_test.o
dax_pmem-y += config_check.o dax_pmem-y += config_check.o
libnvdimm-y := $(NVDIMM_SRC)/core.o libnvdimm-y := $(NVDIMM_SRC)/core.o

View File

@ -1,8 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright(c) 2019 Intel Corporation. All rights reserved.
#include <linux/module.h>
#include <linux/printk.h>
#include "watermark.h"
nfit_test_watermark(dax_pmem_compat);

View File

@ -1,8 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright(c) 2019 Intel Corporation. All rights reserved.
#include <linux/module.h>
#include <linux/printk.h>
#include "watermark.h"
nfit_test_watermark(dax_pmem_core);

View File

@ -100,25 +100,17 @@ static void nfit_test_kill(void *_pgmap)
{ {
struct dev_pagemap *pgmap = _pgmap; struct dev_pagemap *pgmap = _pgmap;
WARN_ON(!pgmap || !pgmap->ref); WARN_ON(!pgmap);
if (pgmap->ops && pgmap->ops->kill) percpu_ref_kill(&pgmap->ref);
pgmap->ops->kill(pgmap);
else
percpu_ref_kill(pgmap->ref);
if (pgmap->ops && pgmap->ops->cleanup) { wait_for_completion(&pgmap->done);
pgmap->ops->cleanup(pgmap); percpu_ref_exit(&pgmap->ref);
} else {
wait_for_completion(&pgmap->done);
percpu_ref_exit(pgmap->ref);
}
} }
static void dev_pagemap_percpu_release(struct percpu_ref *ref) static void dev_pagemap_percpu_release(struct percpu_ref *ref)
{ {
struct dev_pagemap *pgmap = struct dev_pagemap *pgmap = container_of(ref, struct dev_pagemap, ref);
container_of(ref, struct dev_pagemap, internal_ref);
complete(&pgmap->done); complete(&pgmap->done);
} }
@ -132,22 +124,11 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
if (!nfit_res) if (!nfit_res)
return devm_memremap_pages(dev, pgmap); return devm_memremap_pages(dev, pgmap);
if (!pgmap->ref) { init_completion(&pgmap->done);
if (pgmap->ops && (pgmap->ops->kill || pgmap->ops->cleanup)) error = percpu_ref_init(&pgmap->ref, dev_pagemap_percpu_release, 0,
return ERR_PTR(-EINVAL); GFP_KERNEL);
if (error)
init_completion(&pgmap->done); return ERR_PTR(error);
error = percpu_ref_init(&pgmap->internal_ref,
dev_pagemap_percpu_release, 0, GFP_KERNEL);
if (error)
return ERR_PTR(error);
pgmap->ref = &pgmap->internal_ref;
} else {
if (!pgmap->ops || !pgmap->ops->kill || !pgmap->ops->cleanup) {
WARN(1, "Missing reference count teardown definition\n");
return ERR_PTR(-EINVAL);
}
}
error = devm_add_action_or_reset(dev, nfit_test_kill, pgmap); error = devm_add_action_or_reset(dev, nfit_test_kill, pgmap);
if (error) if (error)

View File

@ -1054,10 +1054,6 @@ static __init int ndtest_init(void)
libnvdimm_test(); libnvdimm_test();
device_dax_test(); device_dax_test();
dax_pmem_test(); dax_pmem_test();
dax_pmem_core_test();
#ifdef CONFIG_DEV_DAX_PMEM_COMPAT
dax_pmem_compat_test();
#endif
nfit_test_setup(ndtest_resource_lookup, NULL); nfit_test_setup(ndtest_resource_lookup, NULL);

View File

@ -3300,10 +3300,6 @@ static __init int nfit_test_init(void)
acpi_nfit_test(); acpi_nfit_test();
device_dax_test(); device_dax_test();
dax_pmem_test(); dax_pmem_test();
dax_pmem_core_test();
#ifdef CONFIG_DEV_DAX_PMEM_COMPAT
dax_pmem_compat_test();
#endif
nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm); nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm);