linux/fs/afs/write.c
Linus Torvalds 16df6e07d6 vfs-6.8.netfs
-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCZabMrQAKCRCRxhvAZXjc
 ovnUAQDgCOonb1tjtTvC8s8IMDUEoaVYZI91KVfsZQSJYN1sdQD+KfJmX1BhJnWG
 l0cEffGfnWGXMZkZqDgLPHUIPzFrmws=
 =1b3j
 -----END PGP SIGNATURE-----

Merge tag 'vfs-6.8.netfs' of gitolite.kernel.org:pub/scm/linux/kernel/git/vfs/vfs

Pull netfs updates from Christian Brauner:
 "This extends the netfs helper library that network filesystems can use
  to replace their own implementations. Both afs and 9p are ported. cifs
  is ready as well but the patches are way bigger and will be routed
  separately once this is merged. That will remove lots of code as well.

  The overal goal is to get high-level I/O and knowledge of the page
  cache and ouf of the filesystem drivers. This includes knowledge about
  the existence of pages and folios

  The pull request converts afs and 9p. This removes about 800 lines of
  code from afs and 300 from 9p. For 9p it is now possible to do writes
  in larger than a page chunks. Additionally, multipage folio support
  can be turned on for 9p. Separate patches exist for cifs removing
  another 2000+ lines. I've included detailed information in the
  individual pulls I took.

  Summary:

   - Add NFS-style (and Ceph-style) locking around DIO vs buffered I/O
     calls to prevent these from happening at the same time.

   - Support for direct and unbuffered I/O.

   - Support for write-through caching in the page cache.

   - O_*SYNC and RWF_*SYNC writes use write-through rather than writing
     to the page cache and then flushing afterwards.

   - Support for write-streaming.

   - Support for write grouping.

   - Skip reads for which the server could only return zeros or EOF.

   - The fscache module is now part of the netfs library and the
     corresponding maintainer entry is updated.

   - Some helpers from the fscache subsystem are renamed to mark them as
     belonging to the netfs library.

   - Follow-up fixes for the netfs library.

   - Follow-up fixes for the 9p conversion"

* tag 'vfs-6.8.netfs' of gitolite.kernel.org:pub/scm/linux/kernel/git/vfs/vfs: (50 commits)
  netfs: Fix wrong #ifdef hiding wait
  cachefiles: Fix signed/unsigned mixup
  netfs: Fix the loop that unmarks folios after writing to the cache
  netfs: Fix interaction between write-streaming and cachefiles culling
  netfs: Count DIO writes
  netfs: Mark netfs_unbuffered_write_iter_locked() static
  netfs: Fix proc/fs/fscache symlink to point to "netfs" not "../netfs"
  netfs: Rearrange netfs_io_subrequest to put request pointer first
  9p: Use length of data written to the server in preference to error
  9p: Do a couple of cleanups
  9p: Fix initialisation of netfs_inode for 9p
  cachefiles: Fix __cachefiles_prepare_write()
  9p: Use netfslib read/write_iter
  afs: Use the netfs write helpers
  netfs: Export the netfs_sreq tracepoint
  netfs: Optimise away reads above the point at which there can be no data
  netfs: Implement a write-through caching option
  netfs: Provide a launder_folio implementation
  netfs: Provide a writepages implementation
  netfs, cachefiles: Pass upper bound length to allow expansion
  ...
2024-01-19 09:10:23 -08:00

285 lines
7.2 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/* handling of writes to regular files and writing back to the server
*
* Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*/
#include <linux/backing-dev.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/pagevec.h>
#include <linux/netfs.h>
#include <trace/events/netfs.h>
#include "internal.h"
/*
* completion of write to server
*/
static void afs_pages_written_back(struct afs_vnode *vnode, loff_t start, unsigned int len)
{
_enter("{%llx:%llu},{%x @%llx}",
vnode->fid.vid, vnode->fid.vnode, len, start);
afs_prune_wb_keys(vnode);
_leave("");
}
/*
* Find a key to use for the writeback. We cached the keys used to author the
* writes on the vnode. *_wbk will contain the last writeback key used or NULL
* and we need to start from there if it's set.
*/
static int afs_get_writeback_key(struct afs_vnode *vnode,
struct afs_wb_key **_wbk)
{
struct afs_wb_key *wbk = NULL;
struct list_head *p;
int ret = -ENOKEY, ret2;
spin_lock(&vnode->wb_lock);
if (*_wbk)
p = (*_wbk)->vnode_link.next;
else
p = vnode->wb_keys.next;
while (p != &vnode->wb_keys) {
wbk = list_entry(p, struct afs_wb_key, vnode_link);
_debug("wbk %u", key_serial(wbk->key));
ret2 = key_validate(wbk->key);
if (ret2 == 0) {
refcount_inc(&wbk->usage);
_debug("USE WB KEY %u", key_serial(wbk->key));
break;
}
wbk = NULL;
if (ret == -ENOKEY)
ret = ret2;
p = p->next;
}
spin_unlock(&vnode->wb_lock);
if (*_wbk)
afs_put_wb_key(*_wbk);
*_wbk = wbk;
return 0;
}
static void afs_store_data_success(struct afs_operation *op)
{
struct afs_vnode *vnode = op->file[0].vnode;
op->ctime = op->file[0].scb.status.mtime_client;
afs_vnode_commit_status(op, &op->file[0]);
if (!afs_op_error(op)) {
if (!op->store.laundering)
afs_pages_written_back(vnode, op->store.pos, op->store.size);
afs_stat_v(vnode, n_stores);
atomic_long_add(op->store.size, &afs_v2net(vnode)->n_store_bytes);
}
}
static const struct afs_operation_ops afs_store_data_operation = {
.issue_afs_rpc = afs_fs_store_data,
.issue_yfs_rpc = yfs_fs_store_data,
.success = afs_store_data_success,
};
/*
* write to a file
*/
static int afs_store_data(struct afs_vnode *vnode, struct iov_iter *iter, loff_t pos,
bool laundering)
{
struct afs_operation *op;
struct afs_wb_key *wbk = NULL;
loff_t size = iov_iter_count(iter);
int ret = -ENOKEY;
_enter("%s{%llx:%llu.%u},%llx,%llx",
vnode->volume->name,
vnode->fid.vid,
vnode->fid.vnode,
vnode->fid.unique,
size, pos);
ret = afs_get_writeback_key(vnode, &wbk);
if (ret) {
_leave(" = %d [no keys]", ret);
return ret;
}
op = afs_alloc_operation(wbk->key, vnode->volume);
if (IS_ERR(op)) {
afs_put_wb_key(wbk);
return -ENOMEM;
}
afs_op_set_vnode(op, 0, vnode);
op->file[0].dv_delta = 1;
op->file[0].modification = true;
op->store.pos = pos;
op->store.size = size;
op->store.laundering = laundering;
op->flags |= AFS_OPERATION_UNINTR;
op->ops = &afs_store_data_operation;
try_next_key:
afs_begin_vnode_operation(op);
op->store.write_iter = iter;
op->store.i_size = max(pos + size, vnode->netfs.remote_i_size);
op->mtime = inode_get_mtime(&vnode->netfs.inode);
afs_wait_for_operation(op);
switch (afs_op_error(op)) {
case -EACCES:
case -EPERM:
case -ENOKEY:
case -EKEYEXPIRED:
case -EKEYREJECTED:
case -EKEYREVOKED:
_debug("next");
ret = afs_get_writeback_key(vnode, &wbk);
if (ret == 0) {
key_put(op->key);
op->key = key_get(wbk->key);
goto try_next_key;
}
break;
}
afs_put_wb_key(wbk);
_leave(" = %d", afs_op_error(op));
return afs_put_operation(op);
}
static void afs_upload_to_server(struct netfs_io_subrequest *subreq)
{
struct afs_vnode *vnode = AFS_FS_I(subreq->rreq->inode);
ssize_t ret;
_enter("%x[%x],%zx",
subreq->rreq->debug_id, subreq->debug_index, subreq->io_iter.count);
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
ret = afs_store_data(vnode, &subreq->io_iter, subreq->start,
subreq->rreq->origin == NETFS_LAUNDER_WRITE);
netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len,
false);
}
static void afs_upload_to_server_worker(struct work_struct *work)
{
struct netfs_io_subrequest *subreq =
container_of(work, struct netfs_io_subrequest, work);
afs_upload_to_server(subreq);
}
/*
* Set up write requests for a writeback slice. We need to add a write request
* for each write we want to make.
*/
void afs_create_write_requests(struct netfs_io_request *wreq, loff_t start, size_t len)
{
struct netfs_io_subrequest *subreq;
_enter("%x,%llx-%llx", wreq->debug_id, start, start + len);
subreq = netfs_create_write_request(wreq, NETFS_UPLOAD_TO_SERVER,
start, len, afs_upload_to_server_worker);
if (subreq)
netfs_queue_write_request(subreq);
}
/*
* write some of the pending data back to the server
*/
int afs_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
struct afs_vnode *vnode = AFS_FS_I(mapping->host);
int ret;
/* We have to be careful as we can end up racing with setattr()
* truncating the pagecache since the caller doesn't take a lock here
* to prevent it.
*/
if (wbc->sync_mode == WB_SYNC_ALL)
down_read(&vnode->validate_lock);
else if (!down_read_trylock(&vnode->validate_lock))
return 0;
ret = netfs_writepages(mapping, wbc);
up_read(&vnode->validate_lock);
return ret;
}
/*
* flush any dirty pages for this process, and check for write errors.
* - the return status from this call provides a reliable indication of
* whether any write errors occurred for this process.
*/
int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
struct afs_file *af = file->private_data;
int ret;
_enter("{%llx:%llu},{n=%pD},%d",
vnode->fid.vid, vnode->fid.vnode, file,
datasync);
ret = afs_validate(vnode, af->key);
if (ret < 0)
return ret;
return file_write_and_wait_range(file, start, end);
}
/*
* notification that a previously read-only page is about to become writable
* - if it returns an error, the caller will deliver a bus error signal
*/
vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
{
struct file *file = vmf->vma->vm_file;
if (afs_validate(AFS_FS_I(file_inode(file)), afs_file_key(file)) < 0)
return VM_FAULT_SIGBUS;
return netfs_page_mkwrite(vmf, NULL);
}
/*
* Prune the keys cached for writeback. The caller must hold vnode->wb_lock.
*/
void afs_prune_wb_keys(struct afs_vnode *vnode)
{
LIST_HEAD(graveyard);
struct afs_wb_key *wbk, *tmp;
/* Discard unused keys */
spin_lock(&vnode->wb_lock);
if (!mapping_tagged(&vnode->netfs.inode.i_data, PAGECACHE_TAG_WRITEBACK) &&
!mapping_tagged(&vnode->netfs.inode.i_data, PAGECACHE_TAG_DIRTY)) {
list_for_each_entry_safe(wbk, tmp, &vnode->wb_keys, vnode_link) {
if (refcount_read(&wbk->usage) == 1)
list_move(&wbk->vnode_link, &graveyard);
}
}
spin_unlock(&vnode->wb_lock);
while (!list_empty(&graveyard)) {
wbk = list_entry(graveyard.next, struct afs_wb_key, vnode_link);
list_del(&wbk->vnode_link);
afs_put_wb_key(wbk);
}
}