forked from Minki/linux
Changes in gfs2:
- An iopen glock locking scheme rework that speeds up deletes of inodes accessed from multiple nodes. - Various bug fixes and debugging improvements. - Convert gfs2-glocks.txt to ReST. -----BEGIN PGP SIGNATURE----- iQJIBAABCAAyFiEEJZs3krPW0xkhLMTc1b+f6wMTZToFAl7eYjMUHGFncnVlbmJh QHJlZGhhdC5jb20ACgkQ1b+f6wMTZTr/9g//cJ6jgiD/+qzh0VzougVksVZIduAl RMB+kldOjBS2ORbyYM87Jm1tdyakgZuFO91XlSwChWRdC3Y2mqdaJIEE/kATqfY9 7Frlw++SyFKLvIf04kDYGk2hXX+umXXYFfrIiKb0tzDSGkPRaARUb3RM4TRvlSDP /U0JlYA/4aXMUge+VpYsbpSGeqHNfEzmcmCyPXGmZYyh1MZ/RocMZFYEsP9NH82J l07fxowUd10LJPEmBajzjD2NmEvjdvF4gBCOfJVNIfOzCj0CwXL3vmtu1SUNOKr+ em266EWZF89eOcvfdtE6xF0w81oCAK43wYRjIODSgI9JCLXGiOYmlWZVwZoqu5iy 2GQDhj/taq3ObuVqjR5n6GYuqMoJ+D0LSD13qccDALq/Bdy4lq9TMLSdDbkrVIM/ 8BVn0nI5MzUlTV3mq6uxhU0HqtYDwUEiHWURWw6bYRug5OvQy3nbg/XZptYlnD87 XQccE4ErjlgSHLiYx1YckFz/GG6ytrRAKl9bGMkZ0u2+XmDsH+iJJgLcaXCPUP9h /hhYagKI55UBDer7we4tppbu+gnJrg3PXlgImf53iMc7ia0KQHd+TfSIFkGPuydI aEKKhIQzje23JayMbPRnwPbNlw9zU1loPi7hPS3rCpDY+w8oawpFyIieEOcxJyEt pYkOt4BQi9LvpGc= =PsCY -----END PGP SIGNATURE----- Merge tag 'gfs2-for-5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2 Pull gfs2 updates from Andreas Gruenbacher: - An iopen glock locking scheme rework that speeds up deletes of inodes accessed from multiple nodes - Various bug fixes and debugging improvements - Convert gfs2-glocks.txt to ReST * tag 'gfs2-for-5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2: gfs2: fix use-after-free on transaction ail lists gfs2: new slab for transactions gfs2: initialize transaction tr_ailX_lists earlier gfs2: Smarter iopen glock waiting gfs2: Wake up when setting GLF_DEMOTE gfs2: Check inode generation number in delete_work_func gfs2: Move inode generation number check into gfs2_inode_lookup gfs2: Minor gfs2_lookup_by_inum cleanup gfs2: Try harder to delete inodes locally gfs2: Give up the iopen glock on contention gfs2: Turn gl_delete into a delayed work gfs2: Keep track of deleted inode generations in LVBs gfs2: Allow ASPACE glocks to also have an lvb gfs2: instrumentation wrt log_flush stuck gfs2: introduce new gfs2_glock_assert_withdraw gfs2: print mapping->nrpages in glock dump for address space glocks gfs2: Only do glock put in gfs2_create_inode for free inodes gfs2: Allow lock_nolock mount to specify jid=X gfs2: Don't ignore inode write errors during inode_go_sync docs: filesystems: convert gfs2-glocks.txt to ReST
This commit is contained in:
commit
ca687877e0
@ -1,5 +1,8 @@
|
||||
Glock internal locking rules
|
||||
------------------------------
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
============================
|
||||
Glock internal locking rules
|
||||
============================
|
||||
|
||||
This documents the basic principles of the glock state machine
|
||||
internals. Each glock (struct gfs2_glock in fs/gfs2/incore.h)
|
||||
@ -24,24 +27,28 @@ There are three lock states that users of the glock layer can request,
|
||||
namely shared (SH), deferred (DF) and exclusive (EX). Those translate
|
||||
to the following DLM lock modes:
|
||||
|
||||
Glock mode | DLM lock mode
|
||||
------------------------------
|
||||
UN | IV/NL Unlocked (no DLM lock associated with glock) or NL
|
||||
SH | PR (Protected read)
|
||||
DF | CW (Concurrent write)
|
||||
EX | EX (Exclusive)
|
||||
========== ====== =====================================================
|
||||
Glock mode DLM lock mode
|
||||
========== ====== =====================================================
|
||||
UN IV/NL Unlocked (no DLM lock associated with glock) or NL
|
||||
SH PR (Protected read)
|
||||
DF CW (Concurrent write)
|
||||
EX EX (Exclusive)
|
||||
========== ====== =====================================================
|
||||
|
||||
Thus DF is basically a shared mode which is incompatible with the "normal"
|
||||
shared lock mode, SH. In GFS2 the DF mode is used exclusively for direct I/O
|
||||
operations. The glocks are basically a lock plus some routines which deal
|
||||
with cache management. The following rules apply for the cache:
|
||||
|
||||
Glock mode | Cache data | Cache Metadata | Dirty Data | Dirty Metadata
|
||||
--------------------------------------------------------------------------
|
||||
UN | No | No | No | No
|
||||
SH | Yes | Yes | No | No
|
||||
DF | No | Yes | No | No
|
||||
EX | Yes | Yes | Yes | Yes
|
||||
========== ========== ============== ========== ==============
|
||||
Glock mode Cache data Cache Metadata Dirty Data Dirty Metadata
|
||||
========== ========== ============== ========== ==============
|
||||
UN No No No No
|
||||
SH Yes Yes No No
|
||||
DF No Yes No No
|
||||
EX Yes Yes Yes Yes
|
||||
========== ========== ============== ========== ==============
|
||||
|
||||
These rules are implemented using the various glock operations which
|
||||
are defined for each type of glock. Not all types of glocks use
|
||||
@ -49,21 +56,23 @@ all the modes. Only inode glocks use the DF mode for example.
|
||||
|
||||
Table of glock operations and per type constants:
|
||||
|
||||
Field | Purpose
|
||||
----------------------------------------------------------------------------
|
||||
go_xmote_th | Called before remote state change (e.g. to sync dirty data)
|
||||
go_xmote_bh | Called after remote state change (e.g. to refill cache)
|
||||
go_inval | Called if remote state change requires invalidating the cache
|
||||
go_demote_ok | Returns boolean value of whether its ok to demote a glock
|
||||
| (e.g. checks timeout, and that there is no cached data)
|
||||
go_lock | Called for the first local holder of a lock
|
||||
go_unlock | Called on the final local unlock of a lock
|
||||
go_dump | Called to print content of object for debugfs file, or on
|
||||
| error to dump glock to the log.
|
||||
go_type | The type of the glock, LM_TYPE_.....
|
||||
go_callback | Called if the DLM sends a callback to drop this lock
|
||||
go_flags | GLOF_ASPACE is set, if the glock has an address space
|
||||
| associated with it
|
||||
============= =============================================================
|
||||
Field Purpose
|
||||
============= =============================================================
|
||||
go_xmote_th Called before remote state change (e.g. to sync dirty data)
|
||||
go_xmote_bh Called after remote state change (e.g. to refill cache)
|
||||
go_inval Called if remote state change requires invalidating the cache
|
||||
go_demote_ok Returns boolean value of whether its ok to demote a glock
|
||||
(e.g. checks timeout, and that there is no cached data)
|
||||
go_lock Called for the first local holder of a lock
|
||||
go_unlock Called on the final local unlock of a lock
|
||||
go_dump Called to print content of object for debugfs file, or on
|
||||
error to dump glock to the log.
|
||||
go_type The type of the glock, ``LM_TYPE_*``
|
||||
go_callback Called if the DLM sends a callback to drop this lock
|
||||
go_flags GLOF_ASPACE is set, if the glock has an address space
|
||||
associated with it
|
||||
============= =============================================================
|
||||
|
||||
The minimum hold time for each lock is the time after a remote lock
|
||||
grant for which we ignore remote demote requests. This is in order to
|
||||
@ -82,21 +91,25 @@ rather than via the glock.
|
||||
|
||||
Locking rules for glock operations:
|
||||
|
||||
Operation | GLF_LOCK bit lock held | gl_lockref.lock spinlock held
|
||||
-------------------------------------------------------------------------
|
||||
go_xmote_th | Yes | No
|
||||
go_xmote_bh | Yes | No
|
||||
go_inval | Yes | No
|
||||
go_demote_ok | Sometimes | Yes
|
||||
go_lock | Yes | No
|
||||
go_unlock | Yes | No
|
||||
go_dump | Sometimes | Yes
|
||||
go_callback | Sometimes (N/A) | Yes
|
||||
============= ====================== =============================
|
||||
Operation GLF_LOCK bit lock held gl_lockref.lock spinlock held
|
||||
============= ====================== =============================
|
||||
go_xmote_th Yes No
|
||||
go_xmote_bh Yes No
|
||||
go_inval Yes No
|
||||
go_demote_ok Sometimes Yes
|
||||
go_lock Yes No
|
||||
go_unlock Yes No
|
||||
go_dump Sometimes Yes
|
||||
go_callback Sometimes (N/A) Yes
|
||||
============= ====================== =============================
|
||||
|
||||
N.B. Operations must not drop either the bit lock or the spinlock
|
||||
if its held on entry. go_dump and do_demote_ok must never block.
|
||||
Note that go_dump will only be called if the glock's state
|
||||
indicates that it is caching uptodate data.
|
||||
.. Note::
|
||||
|
||||
Operations must not drop either the bit lock or the spinlock
|
||||
if its held on entry. go_dump and do_demote_ok must never block.
|
||||
Note that go_dump will only be called if the glock's state
|
||||
indicates that it is caching uptodate data.
|
||||
|
||||
Glock locking order within GFS2:
|
||||
|
||||
@ -104,7 +117,7 @@ Glock locking order within GFS2:
|
||||
2. Rename glock (for rename only)
|
||||
3. Inode glock(s)
|
||||
(Parents before children, inodes at "same level" with same parent in
|
||||
lock number order)
|
||||
lock number order)
|
||||
4. Rgrp glock(s) (for (de)allocation operations)
|
||||
5. Transaction glock (via gfs2_trans_begin) for non-read operations
|
||||
6. i_rw_mutex (if required)
|
||||
@ -117,8 +130,8 @@ determine the lifetime of the inode in question. Locking of inodes
|
||||
is on a per-inode basis. Locking of rgrps is on a per rgrp basis.
|
||||
In general we prefer to lock local locks prior to cluster locks.
|
||||
|
||||
Glock Statistics
|
||||
------------------
|
||||
Glock Statistics
|
||||
----------------
|
||||
|
||||
The stats are divided into two sets: those relating to the
|
||||
super block and those relating to an individual glock. The
|
||||
@ -173,8 +186,8 @@ we'd like to get a better idea of these timings:
|
||||
1. To be able to better set the glock "min hold time"
|
||||
2. To spot performance issues more easily
|
||||
3. To improve the algorithm for selecting resource groups for
|
||||
allocation (to base it on lock wait time, rather than blindly
|
||||
using a "try lock")
|
||||
allocation (to base it on lock wait time, rather than blindly
|
||||
using a "try lock")
|
||||
|
||||
Due to the smoothing action of the updates, a step change in
|
||||
some input quantity being sampled will only fully be taken
|
||||
@ -195,10 +208,13 @@ as possible. There are always inaccuracies in any
|
||||
measuring system, but I hope this is as accurate as we
|
||||
can reasonably make it.
|
||||
|
||||
Per sb stats can be found here:
|
||||
/sys/kernel/debug/gfs2/<fsname>/sbstats
|
||||
Per glock stats can be found here:
|
||||
/sys/kernel/debug/gfs2/<fsname>/glstats
|
||||
Per sb stats can be found here::
|
||||
|
||||
/sys/kernel/debug/gfs2/<fsname>/sbstats
|
||||
|
||||
Per glock stats can be found here::
|
||||
|
||||
/sys/kernel/debug/gfs2/<fsname>/glstats
|
||||
|
||||
Assuming that debugfs is mounted on /sys/kernel/debug and also
|
||||
that <fsname> is replaced with the name of the gfs2 filesystem
|
||||
@ -206,14 +222,16 @@ in question.
|
||||
|
||||
The abbreviations used in the output as are follows:
|
||||
|
||||
srtt - Smoothed round trip time for non-blocking dlm requests
|
||||
srttvar - Variance estimate for srtt
|
||||
srttb - Smoothed round trip time for (potentially) blocking dlm requests
|
||||
srttvarb - Variance estimate for srttb
|
||||
sirt - Smoothed inter-request time (for dlm requests)
|
||||
sirtvar - Variance estimate for sirt
|
||||
dlm - Number of dlm requests made (dcnt in glstats file)
|
||||
queue - Number of glock requests queued (qcnt in glstats file)
|
||||
========= ================================================================
|
||||
srtt Smoothed round trip time for non blocking dlm requests
|
||||
srttvar Variance estimate for srtt
|
||||
srttb Smoothed round trip time for (potentially) blocking dlm requests
|
||||
srttvarb Variance estimate for srttb
|
||||
sirt Smoothed inter request time (for dlm requests)
|
||||
sirtvar Variance estimate for sirt
|
||||
dlm Number of dlm requests made (dcnt in glstats file)
|
||||
queue Number of glock requests queued (qcnt in glstats file)
|
||||
========= ================================================================
|
||||
|
||||
The sbstats file contains a set of these stats for each glock type (so 8 lines
|
||||
for each type) and for each cpu (one column per cpu). The glstats file contains
|
||||
@ -224,9 +242,12 @@ The gfs2_glock_lock_time tracepoint prints out the current values of the stats
|
||||
for the glock in question, along with some addition information on each dlm
|
||||
reply that is received:
|
||||
|
||||
status - The status of the dlm request
|
||||
flags - The dlm request flags
|
||||
tdiff - The time taken by this specific request
|
||||
====== =======================================
|
||||
status The status of the dlm request
|
||||
flags The dlm request flags
|
||||
tdiff The time taken by this specific request
|
||||
====== =======================================
|
||||
|
||||
(remaining fields as per above list)
|
||||
|
||||
|
@ -88,6 +88,7 @@ Documentation for filesystem implementations.
|
||||
f2fs
|
||||
gfs2
|
||||
gfs2-uevents
|
||||
gfs2-glocks
|
||||
hfs
|
||||
hfsplus
|
||||
hpfs
|
||||
|
@ -7251,7 +7251,7 @@ L: cluster-devel@redhat.com
|
||||
S: Supported
|
||||
W: http://sources.redhat.com/cluster/
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2.git
|
||||
F: Documentation/filesystems/gfs2*.txt
|
||||
F: Documentation/filesystems/gfs2*
|
||||
F: fs/gfs2/
|
||||
F: include/uapi/linux/gfs2_ondisk.h
|
||||
|
||||
|
@ -134,7 +134,9 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
|
||||
struct gfs2_sbd *sdp = sb->s_fs_info;
|
||||
struct inode *inode;
|
||||
|
||||
inode = gfs2_lookup_by_inum(sdp, inum->no_addr, &inum->no_formal_ino,
|
||||
if (!inum->no_formal_ino)
|
||||
return ERR_PTR(-ESTALE);
|
||||
inode = gfs2_lookup_by_inum(sdp, inum->no_addr, inum->no_formal_ino,
|
||||
GFS2_BLKST_DINODE);
|
||||
if (IS_ERR(inode))
|
||||
return ERR_CAST(inode);
|
||||
|
208
fs/gfs2/glock.c
208
fs/gfs2/glock.c
@ -125,12 +125,11 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu)
|
||||
{
|
||||
struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
|
||||
|
||||
if (gl->gl_ops->go_flags & GLOF_ASPACE) {
|
||||
kfree(gl->gl_lksb.sb_lvbptr);
|
||||
if (gl->gl_ops->go_flags & GLOF_ASPACE)
|
||||
kmem_cache_free(gfs2_glock_aspace_cachep, gl);
|
||||
} else {
|
||||
kfree(gl->gl_lksb.sb_lvbptr);
|
||||
else
|
||||
kmem_cache_free(gfs2_glock_cachep, gl);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -164,7 +163,7 @@ void gfs2_glock_free(struct gfs2_glock *gl)
|
||||
{
|
||||
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
|
||||
|
||||
BUG_ON(atomic_read(&gl->gl_revokes));
|
||||
gfs2_glock_assert_withdraw(gl, atomic_read(&gl->gl_revokes) == 0);
|
||||
rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms);
|
||||
smp_mb();
|
||||
wake_up_glock(gl);
|
||||
@ -465,6 +464,15 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state)
|
||||
gl->gl_tchange = jiffies;
|
||||
}
|
||||
|
||||
static void gfs2_set_demote(struct gfs2_glock *gl)
|
||||
{
|
||||
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
|
||||
|
||||
set_bit(GLF_DEMOTE, &gl->gl_flags);
|
||||
smp_mb();
|
||||
wake_up(&sdp->sd_async_glock_wait);
|
||||
}
|
||||
|
||||
static void gfs2_demote_wake(struct gfs2_glock *gl)
|
||||
{
|
||||
gl->gl_demote_state = LM_ST_EXCLUSIVE;
|
||||
@ -626,7 +634,8 @@ __acquires(&gl->gl_lockref.lock)
|
||||
*/
|
||||
if ((atomic_read(&gl->gl_ail_count) != 0) &&
|
||||
(!cmpxchg(&sdp->sd_log_error, 0, -EIO))) {
|
||||
gfs2_assert_warn(sdp, !atomic_read(&gl->gl_ail_count));
|
||||
gfs2_glock_assert_warn(gl,
|
||||
!atomic_read(&gl->gl_ail_count));
|
||||
gfs2_dump_glock(NULL, gl, true);
|
||||
}
|
||||
glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA);
|
||||
@ -756,20 +765,127 @@ out_unlock:
|
||||
return;
|
||||
}
|
||||
|
||||
void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation)
|
||||
{
|
||||
struct gfs2_inode_lvb *ri = (void *)gl->gl_lksb.sb_lvbptr;
|
||||
|
||||
if (ri->ri_magic == 0)
|
||||
ri->ri_magic = cpu_to_be32(GFS2_MAGIC);
|
||||
if (ri->ri_magic == cpu_to_be32(GFS2_MAGIC))
|
||||
ri->ri_generation_deleted = cpu_to_be64(generation);
|
||||
}
|
||||
|
||||
bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation)
|
||||
{
|
||||
struct gfs2_inode_lvb *ri = (void *)gl->gl_lksb.sb_lvbptr;
|
||||
|
||||
if (ri->ri_magic != cpu_to_be32(GFS2_MAGIC))
|
||||
return false;
|
||||
return generation <= be64_to_cpu(ri->ri_generation_deleted);
|
||||
}
|
||||
|
||||
static void gfs2_glock_poke(struct gfs2_glock *gl)
|
||||
{
|
||||
int flags = LM_FLAG_TRY_1CB | LM_FLAG_ANY | GL_SKIP;
|
||||
struct gfs2_holder gh;
|
||||
int error;
|
||||
|
||||
error = gfs2_glock_nq_init(gl, LM_ST_SHARED, flags, &gh);
|
||||
if (!error)
|
||||
gfs2_glock_dq(&gh);
|
||||
}
|
||||
|
||||
static bool gfs2_try_evict(struct gfs2_glock *gl)
|
||||
{
|
||||
struct gfs2_inode *ip;
|
||||
bool evicted = false;
|
||||
|
||||
/*
|
||||
* If there is contention on the iopen glock and we have an inode, try
|
||||
* to grab and release the inode so that it can be evicted. This will
|
||||
* allow the remote node to go ahead and delete the inode without us
|
||||
* having to do it, which will avoid rgrp glock thrashing.
|
||||
*
|
||||
* The remote node is likely still holding the corresponding inode
|
||||
* glock, so it will run before we get to verify that the delete has
|
||||
* happened below.
|
||||
*/
|
||||
spin_lock(&gl->gl_lockref.lock);
|
||||
ip = gl->gl_object;
|
||||
if (ip && !igrab(&ip->i_inode))
|
||||
ip = NULL;
|
||||
spin_unlock(&gl->gl_lockref.lock);
|
||||
if (ip) {
|
||||
struct gfs2_glock *inode_gl = NULL;
|
||||
|
||||
gl->gl_no_formal_ino = ip->i_no_formal_ino;
|
||||
set_bit(GIF_DEFERRED_DELETE, &ip->i_flags);
|
||||
d_prune_aliases(&ip->i_inode);
|
||||
iput(&ip->i_inode);
|
||||
|
||||
/* If the inode was evicted, gl->gl_object will now be NULL. */
|
||||
spin_lock(&gl->gl_lockref.lock);
|
||||
ip = gl->gl_object;
|
||||
if (ip) {
|
||||
inode_gl = ip->i_gl;
|
||||
lockref_get(&inode_gl->gl_lockref);
|
||||
clear_bit(GIF_DEFERRED_DELETE, &ip->i_flags);
|
||||
}
|
||||
spin_unlock(&gl->gl_lockref.lock);
|
||||
if (inode_gl) {
|
||||
gfs2_glock_poke(inode_gl);
|
||||
gfs2_glock_put(inode_gl);
|
||||
}
|
||||
evicted = !ip;
|
||||
}
|
||||
return evicted;
|
||||
}
|
||||
|
||||
static void delete_work_func(struct work_struct *work)
|
||||
{
|
||||
struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete);
|
||||
struct delayed_work *dwork = to_delayed_work(work);
|
||||
struct gfs2_glock *gl = container_of(dwork, struct gfs2_glock, gl_delete);
|
||||
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
|
||||
struct inode *inode;
|
||||
u64 no_addr = gl->gl_name.ln_number;
|
||||
|
||||
spin_lock(&gl->gl_lockref.lock);
|
||||
clear_bit(GLF_PENDING_DELETE, &gl->gl_flags);
|
||||
spin_unlock(&gl->gl_lockref.lock);
|
||||
|
||||
/* If someone's using this glock to create a new dinode, the block must
|
||||
have been freed by another node, then re-used, in which case our
|
||||
iopen callback is too late after the fact. Ignore it. */
|
||||
if (test_bit(GLF_INODE_CREATING, &gl->gl_flags))
|
||||
goto out;
|
||||
|
||||
inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
|
||||
if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
|
||||
/*
|
||||
* If we can evict the inode, give the remote node trying to
|
||||
* delete the inode some time before verifying that the delete
|
||||
* has happened. Otherwise, if we cause contention on the inode glock
|
||||
* immediately, the remote node will think that we still have
|
||||
* the inode in use, and so it will give up waiting.
|
||||
*
|
||||
* If we can't evict the inode, signal to the remote node that
|
||||
* the inode is still in use. We'll later try to delete the
|
||||
* inode locally in gfs2_evict_inode.
|
||||
*
|
||||
* FIXME: We only need to verify that the remote node has
|
||||
* deleted the inode because nodes before this remote delete
|
||||
* rework won't cooperate. At a later time, when we no longer
|
||||
* care about compatibility with such nodes, we can skip this
|
||||
* step entirely.
|
||||
*/
|
||||
if (gfs2_try_evict(gl)) {
|
||||
if (gfs2_queue_delete_work(gl, 5 * HZ))
|
||||
return;
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino,
|
||||
GFS2_BLKST_UNLINKED);
|
||||
if (!IS_ERR_OR_NULL(inode)) {
|
||||
d_prune_aliases(inode);
|
||||
iput(inode);
|
||||
@ -800,7 +916,7 @@ static void glock_work_func(struct work_struct *work)
|
||||
|
||||
if (!delay) {
|
||||
clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags);
|
||||
set_bit(GLF_DEMOTE, &gl->gl_flags);
|
||||
gfs2_set_demote(gl);
|
||||
}
|
||||
}
|
||||
run_queue(gl, 0);
|
||||
@ -931,7 +1047,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
|
||||
gl->gl_object = NULL;
|
||||
gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
|
||||
INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
|
||||
INIT_WORK(&gl->gl_delete, delete_work_func);
|
||||
INIT_DELAYED_WORK(&gl->gl_delete, delete_work_func);
|
||||
|
||||
mapping = gfs2_glock2aspace(gl);
|
||||
if (mapping) {
|
||||
@ -1145,9 +1261,10 @@ wait_for_dlm:
|
||||
static void handle_callback(struct gfs2_glock *gl, unsigned int state,
|
||||
unsigned long delay, bool remote)
|
||||
{
|
||||
int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE;
|
||||
|
||||
set_bit(bit, &gl->gl_flags);
|
||||
if (delay)
|
||||
set_bit(GLF_PENDING_DEMOTE, &gl->gl_flags);
|
||||
else
|
||||
gfs2_set_demote(gl);
|
||||
if (gl->gl_demote_state == LM_ST_EXCLUSIVE) {
|
||||
gl->gl_demote_state = state;
|
||||
gl->gl_demote_time = jiffies;
|
||||
@ -1754,6 +1871,44 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
|
||||
rhashtable_walk_exit(&iter);
|
||||
}
|
||||
|
||||
bool gfs2_queue_delete_work(struct gfs2_glock *gl, unsigned long delay)
|
||||
{
|
||||
bool queued;
|
||||
|
||||
spin_lock(&gl->gl_lockref.lock);
|
||||
queued = queue_delayed_work(gfs2_delete_workqueue,
|
||||
&gl->gl_delete, delay);
|
||||
if (queued)
|
||||
set_bit(GLF_PENDING_DELETE, &gl->gl_flags);
|
||||
spin_unlock(&gl->gl_lockref.lock);
|
||||
return queued;
|
||||
}
|
||||
|
||||
void gfs2_cancel_delete_work(struct gfs2_glock *gl)
|
||||
{
|
||||
if (cancel_delayed_work_sync(&gl->gl_delete)) {
|
||||
clear_bit(GLF_PENDING_DELETE, &gl->gl_flags);
|
||||
gfs2_glock_put(gl);
|
||||
}
|
||||
}
|
||||
|
||||
bool gfs2_delete_work_queued(const struct gfs2_glock *gl)
|
||||
{
|
||||
return test_bit(GLF_PENDING_DELETE, &gl->gl_flags);
|
||||
}
|
||||
|
||||
static void flush_delete_work(struct gfs2_glock *gl)
|
||||
{
|
||||
flush_delayed_work(&gl->gl_delete);
|
||||
gfs2_glock_queue_work(gl, 0);
|
||||
}
|
||||
|
||||
void gfs2_flush_delete_work(struct gfs2_sbd *sdp)
|
||||
{
|
||||
glock_hash_walk(flush_delete_work, sdp);
|
||||
flush_workqueue(gfs2_delete_workqueue);
|
||||
}
|
||||
|
||||
/**
|
||||
* thaw_glock - thaw out a glock which has an unprocessed reply waiting
|
||||
* @gl: The glock to thaw
|
||||
@ -1836,7 +1991,7 @@ void gfs2_glock_finish_truncate(struct gfs2_inode *ip)
|
||||
int ret;
|
||||
|
||||
ret = gfs2_truncatei_resume(ip);
|
||||
gfs2_assert_withdraw(gl->gl_name.ln_sbd, ret == 0);
|
||||
gfs2_glock_assert_withdraw(gl, ret == 0);
|
||||
|
||||
spin_lock(&gl->gl_lockref.lock);
|
||||
clear_bit(GLF_LOCK, &gl->gl_flags);
|
||||
@ -1978,7 +2133,13 @@ void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid)
|
||||
char gflags_buf[32];
|
||||
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
|
||||
char fs_id_buf[sizeof(sdp->sd_fsname) + 7];
|
||||
unsigned long nrpages = 0;
|
||||
|
||||
if (gl->gl_ops->go_flags & GLOF_ASPACE) {
|
||||
struct address_space *mapping = gfs2_glock2aspace(gl);
|
||||
|
||||
nrpages = mapping->nrpages;
|
||||
}
|
||||
memset(fs_id_buf, 0, sizeof(fs_id_buf));
|
||||
if (fsid && sdp) /* safety precaution */
|
||||
sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
|
||||
@ -1987,15 +2148,16 @@ void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid)
|
||||
if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
|
||||
dtime = 0;
|
||||
gfs2_print_dbg(seq, "%sG: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d "
|
||||
"v:%d r:%d m:%ld\n", fs_id_buf, state2str(gl->gl_state),
|
||||
gl->gl_name.ln_type,
|
||||
(unsigned long long)gl->gl_name.ln_number,
|
||||
gflags2str(gflags_buf, gl),
|
||||
state2str(gl->gl_target),
|
||||
state2str(gl->gl_demote_state), dtime,
|
||||
atomic_read(&gl->gl_ail_count),
|
||||
atomic_read(&gl->gl_revokes),
|
||||
(int)gl->gl_lockref.count, gl->gl_hold_time);
|
||||
"v:%d r:%d m:%ld p:%lu\n",
|
||||
fs_id_buf, state2str(gl->gl_state),
|
||||
gl->gl_name.ln_type,
|
||||
(unsigned long long)gl->gl_name.ln_number,
|
||||
gflags2str(gflags_buf, gl),
|
||||
state2str(gl->gl_target),
|
||||
state2str(gl->gl_demote_state), dtime,
|
||||
atomic_read(&gl->gl_ail_count),
|
||||
atomic_read(&gl->gl_revokes),
|
||||
(int)gl->gl_lockref.count, gl->gl_hold_time, nrpages);
|
||||
|
||||
list_for_each_entry(gh, &gl->gl_holders, gh_list)
|
||||
dump_holder(seq, gh, fs_id_buf);
|
||||
|
@ -205,6 +205,15 @@ extern void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl,
|
||||
#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { \
|
||||
gfs2_dump_glock(NULL, gl, true); \
|
||||
BUG(); } } while(0)
|
||||
#define gfs2_glock_assert_warn(gl, x) do { if (unlikely(!(x))) { \
|
||||
gfs2_dump_glock(NULL, gl, true); \
|
||||
gfs2_assert_warn((gl)->gl_name.ln_sbd, (x)); } } \
|
||||
while (0)
|
||||
#define gfs2_glock_assert_withdraw(gl, x) do { if (unlikely(!(x))) { \
|
||||
gfs2_dump_glock(NULL, gl, true); \
|
||||
gfs2_assert_withdraw((gl)->gl_name.ln_sbd, (x)); } } \
|
||||
while (0)
|
||||
|
||||
extern __printf(2, 3)
|
||||
void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...);
|
||||
|
||||
@ -235,6 +244,10 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
|
||||
|
||||
extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
|
||||
extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
|
||||
extern bool gfs2_queue_delete_work(struct gfs2_glock *gl, unsigned long delay);
|
||||
extern void gfs2_cancel_delete_work(struct gfs2_glock *gl);
|
||||
extern bool gfs2_delete_work_queued(const struct gfs2_glock *gl);
|
||||
extern void gfs2_flush_delete_work(struct gfs2_sbd *sdp);
|
||||
extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
|
||||
extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
|
||||
extern void gfs2_glock_thaw(struct gfs2_sbd *sdp);
|
||||
@ -306,4 +319,7 @@ static inline void glock_clear_object(struct gfs2_glock *gl, void *object)
|
||||
spin_unlock(&gl->gl_lockref.lock);
|
||||
}
|
||||
|
||||
extern void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation);
|
||||
extern bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation);
|
||||
|
||||
#endif /* __GLOCK_DOT_H__ */
|
||||
|
@ -91,6 +91,8 @@ static int gfs2_ail_empty_gl(struct gfs2_glock *gl)
|
||||
memset(&tr, 0, sizeof(tr));
|
||||
INIT_LIST_HEAD(&tr.tr_buf);
|
||||
INIT_LIST_HEAD(&tr.tr_databuf);
|
||||
INIT_LIST_HEAD(&tr.tr_ail1_list);
|
||||
INIT_LIST_HEAD(&tr.tr_ail2_list);
|
||||
tr.tr_revokes = atomic_read(&gl->gl_ail_count);
|
||||
|
||||
if (!tr.tr_revokes) {
|
||||
@ -268,7 +270,7 @@ static int inode_go_sync(struct gfs2_glock *gl)
|
||||
struct gfs2_inode *ip = gfs2_glock2inode(gl);
|
||||
int isreg = ip && S_ISREG(ip->i_inode.i_mode);
|
||||
struct address_space *metamapping = gfs2_glock2aspace(gl);
|
||||
int error = 0;
|
||||
int error = 0, ret;
|
||||
|
||||
if (isreg) {
|
||||
if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
|
||||
@ -289,8 +291,10 @@ static int inode_go_sync(struct gfs2_glock *gl)
|
||||
error = filemap_fdatawait(mapping);
|
||||
mapping_set_error(mapping, error);
|
||||
}
|
||||
error = filemap_fdatawait(metamapping);
|
||||
mapping_set_error(metamapping, error);
|
||||
ret = filemap_fdatawait(metamapping);
|
||||
mapping_set_error(metamapping, ret);
|
||||
if (!error)
|
||||
error = ret;
|
||||
gfs2_ail_empty_gl(gl);
|
||||
/*
|
||||
* Writeback of the data mapping may cause the dirty flag to be set
|
||||
@ -608,11 +612,17 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote)
|
||||
if (gl->gl_demote_state == LM_ST_UNLOCKED &&
|
||||
gl->gl_state == LM_ST_SHARED && ip) {
|
||||
gl->gl_lockref.count++;
|
||||
if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
|
||||
if (!queue_delayed_work(gfs2_delete_workqueue,
|
||||
&gl->gl_delete, 0))
|
||||
gl->gl_lockref.count--;
|
||||
}
|
||||
}
|
||||
|
||||
static int iopen_go_demote_ok(const struct gfs2_glock *gl)
|
||||
{
|
||||
return !gfs2_delete_work_queued(gl);
|
||||
}
|
||||
|
||||
/**
|
||||
* inode_go_free - wake up anyone waiting for dlm's unlock ast to free it
|
||||
* @gl: glock being freed
|
||||
@ -692,7 +702,7 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
|
||||
.go_lock = inode_go_lock,
|
||||
.go_dump = inode_go_dump,
|
||||
.go_type = LM_TYPE_INODE,
|
||||
.go_flags = GLOF_ASPACE | GLOF_LRU,
|
||||
.go_flags = GLOF_ASPACE | GLOF_LRU | GLOF_LVB,
|
||||
.go_free = inode_go_free,
|
||||
};
|
||||
|
||||
@ -716,6 +726,7 @@ const struct gfs2_glock_operations gfs2_freeze_glops = {
|
||||
const struct gfs2_glock_operations gfs2_iopen_glops = {
|
||||
.go_type = LM_TYPE_IOPEN,
|
||||
.go_callback = iopen_go_callback,
|
||||
.go_demote_ok = iopen_go_demote_ok,
|
||||
.go_flags = GLOF_LRU | GLOF_NONDISK,
|
||||
};
|
||||
|
||||
|
@ -345,6 +345,7 @@ enum {
|
||||
GLF_OBJECT = 14, /* Used only for tracing */
|
||||
GLF_BLOCKING = 15,
|
||||
GLF_INODE_CREATING = 16, /* Inode creation occurring */
|
||||
GLF_PENDING_DELETE = 17,
|
||||
GLF_FREEING = 18, /* Wait for glock to be freed */
|
||||
};
|
||||
|
||||
@ -378,8 +379,11 @@ struct gfs2_glock {
|
||||
atomic_t gl_revokes;
|
||||
struct delayed_work gl_work;
|
||||
union {
|
||||
/* For inode and iopen glocks only */
|
||||
struct work_struct gl_delete;
|
||||
/* For iopen glocks only */
|
||||
struct {
|
||||
struct delayed_work gl_delete;
|
||||
u64 gl_no_formal_ino;
|
||||
};
|
||||
/* For rgrp glocks only */
|
||||
struct {
|
||||
loff_t start;
|
||||
@ -398,6 +402,7 @@ enum {
|
||||
GIF_ORDERED = 4,
|
||||
GIF_FREE_VFS_INODE = 5,
|
||||
GIF_GLOP_PENDING = 6,
|
||||
GIF_DEFERRED_DELETE = 7,
|
||||
};
|
||||
|
||||
struct gfs2_inode {
|
||||
|
@ -115,6 +115,10 @@ static void gfs2_set_iop(struct inode *inode)
|
||||
* placeholder because it doesn't otherwise make sense), the on-disk block type
|
||||
* is verified to be @blktype.
|
||||
*
|
||||
* When @no_formal_ino is non-zero, this function will return ERR_PTR(-ESTALE)
|
||||
* if it detects that @no_formal_ino doesn't match the actual inode generation
|
||||
* number. However, it doesn't always know unless @type is DT_UNKNOWN.
|
||||
*
|
||||
* Returns: A VFS inode, or an error
|
||||
*/
|
||||
|
||||
@ -158,6 +162,11 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
|
||||
if (error)
|
||||
goto fail;
|
||||
|
||||
error = -ESTALE;
|
||||
if (no_formal_ino &&
|
||||
gfs2_inode_already_deleted(ip->i_gl, no_formal_ino))
|
||||
goto fail;
|
||||
|
||||
if (blktype != GFS2_BLKST_FREE) {
|
||||
error = gfs2_check_blk_type(sdp, no_addr,
|
||||
blktype);
|
||||
@ -171,6 +180,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
|
||||
error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
|
||||
if (unlikely(error))
|
||||
goto fail;
|
||||
gfs2_cancel_delete_work(ip->i_iopen_gh.gh_gl);
|
||||
glock_set_object(ip->i_iopen_gh.gh_gl, ip);
|
||||
gfs2_glock_put(io_gl);
|
||||
io_gl = NULL;
|
||||
@ -189,13 +199,23 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
|
||||
inode->i_mode = DT2IF(type);
|
||||
}
|
||||
|
||||
gfs2_set_iop(inode);
|
||||
if (gfs2_holder_initialized(&i_gh))
|
||||
gfs2_glock_dq_uninit(&i_gh);
|
||||
|
||||
unlock_new_inode(inode);
|
||||
gfs2_set_iop(inode);
|
||||
}
|
||||
|
||||
if (gfs2_holder_initialized(&i_gh))
|
||||
gfs2_glock_dq_uninit(&i_gh);
|
||||
if (no_formal_ino && ip->i_no_formal_ino &&
|
||||
no_formal_ino != ip->i_no_formal_ino) {
|
||||
if (inode->i_state & I_NEW)
|
||||
goto fail;
|
||||
iput(inode);
|
||||
return ERR_PTR(-ESTALE);
|
||||
}
|
||||
|
||||
if (inode->i_state & I_NEW)
|
||||
unlock_new_inode(inode);
|
||||
|
||||
return inode;
|
||||
|
||||
fail:
|
||||
@ -207,23 +227,26 @@ fail:
|
||||
return ERR_PTR(error);
|
||||
}
|
||||
|
||||
/**
|
||||
* gfs2_lookup_by_inum - look up an inode by inode number
|
||||
* @sdp: The super block
|
||||
* @no_addr: The inode number
|
||||
* @no_formal_ino: The inode generation number (0 for any)
|
||||
* @blktype: Requested block type (see gfs2_inode_lookup)
|
||||
*/
|
||||
struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
|
||||
u64 *no_formal_ino, unsigned int blktype)
|
||||
u64 no_formal_ino, unsigned int blktype)
|
||||
{
|
||||
struct super_block *sb = sdp->sd_vfs;
|
||||
struct inode *inode;
|
||||
int error;
|
||||
|
||||
inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0, blktype);
|
||||
inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, no_formal_ino,
|
||||
blktype);
|
||||
if (IS_ERR(inode))
|
||||
return inode;
|
||||
|
||||
/* Two extra checks for NFS only */
|
||||
if (no_formal_ino) {
|
||||
error = -ESTALE;
|
||||
if (GFS2_I(inode)->i_no_formal_ino != *no_formal_ino)
|
||||
goto fail_iput;
|
||||
|
||||
error = -EIO;
|
||||
if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM)
|
||||
goto fail_iput;
|
||||
@ -725,6 +748,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
|
||||
if (error)
|
||||
goto fail_gunlock2;
|
||||
|
||||
gfs2_cancel_delete_work(ip->i_iopen_gh.gh_gl);
|
||||
glock_set_object(ip->i_iopen_gh.gh_gl, ip);
|
||||
gfs2_set_iop(inode);
|
||||
insert_inode_hash(inode);
|
||||
@ -781,7 +805,8 @@ fail_gunlock2:
|
||||
fail_free_inode:
|
||||
if (ip->i_gl) {
|
||||
glock_clear_object(ip->i_gl, ip);
|
||||
gfs2_glock_put(ip->i_gl);
|
||||
if (free_vfs_inode) /* else evict will do the put for us */
|
||||
gfs2_glock_put(ip->i_gl);
|
||||
}
|
||||
gfs2_rs_delete(ip, NULL);
|
||||
gfs2_qa_put(ip);
|
||||
|
@ -92,7 +92,7 @@ extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
|
||||
u64 no_addr, u64 no_formal_ino,
|
||||
unsigned int blktype);
|
||||
extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
|
||||
u64 *no_formal_ino,
|
||||
u64 no_formal_ino,
|
||||
unsigned int blktype);
|
||||
|
||||
extern int gfs2_inode_refresh(struct gfs2_inode *ip);
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include "util.h"
|
||||
#include "dir.h"
|
||||
#include "trace_gfs2.h"
|
||||
#include "trans.h"
|
||||
|
||||
static void gfs2_log_shutdown(struct gfs2_sbd *sdp);
|
||||
|
||||
@ -145,9 +146,6 @@ static void dump_ail_list(struct gfs2_sbd *sdp)
|
||||
struct gfs2_bufdata *bd;
|
||||
struct buffer_head *bh;
|
||||
|
||||
fs_err(sdp, "Error: In gfs2_ail1_flush for ten minutes! t=%d\n",
|
||||
current->journal_info ? 1 : 0);
|
||||
|
||||
list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) {
|
||||
list_for_each_entry_reverse(bd, &tr->tr_ail1_list,
|
||||
bd_ail_st_list) {
|
||||
@ -197,6 +195,8 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
|
||||
restart:
|
||||
ret = 0;
|
||||
if (time_after(jiffies, flush_start + (HZ * 600))) {
|
||||
fs_err(sdp, "Error: In %s for ten minutes! t=%d\n",
|
||||
__func__, current->journal_info ? 1 : 0);
|
||||
dump_ail_list(sdp);
|
||||
goto out;
|
||||
}
|
||||
@ -379,7 +379,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
|
||||
list_del(&tr->tr_list);
|
||||
gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list));
|
||||
gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list));
|
||||
kfree(tr);
|
||||
gfs2_trans_free(sdp, tr);
|
||||
}
|
||||
|
||||
spin_unlock(&sdp->sd_ail_lock);
|
||||
@ -864,18 +864,40 @@ static void ail_drain(struct gfs2_sbd *sdp)
|
||||
gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail1_list);
|
||||
gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
|
||||
list_del(&tr->tr_list);
|
||||
kfree(tr);
|
||||
gfs2_trans_free(sdp, tr);
|
||||
}
|
||||
while (!list_empty(&sdp->sd_ail2_list)) {
|
||||
tr = list_first_entry(&sdp->sd_ail2_list, struct gfs2_trans,
|
||||
tr_list);
|
||||
gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
|
||||
list_del(&tr->tr_list);
|
||||
kfree(tr);
|
||||
gfs2_trans_free(sdp, tr);
|
||||
}
|
||||
spin_unlock(&sdp->sd_ail_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* empty_ail1_list - try to start IO and empty the ail1 list
|
||||
* @sdp: Pointer to GFS2 superblock
|
||||
*/
|
||||
static void empty_ail1_list(struct gfs2_sbd *sdp)
|
||||
{
|
||||
unsigned long start = jiffies;
|
||||
|
||||
for (;;) {
|
||||
if (time_after(jiffies, start + (HZ * 600))) {
|
||||
fs_err(sdp, "Error: In %s for 10 minutes! t=%d\n",
|
||||
__func__, current->journal_info ? 1 : 0);
|
||||
dump_ail_list(sdp);
|
||||
return;
|
||||
}
|
||||
gfs2_ail1_start(sdp);
|
||||
gfs2_ail1_wait(sdp);
|
||||
if (gfs2_ail1_empty(sdp, 0))
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* gfs2_log_flush - flush incore transaction(s)
|
||||
* @sdp: the filesystem
|
||||
@ -912,8 +934,6 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
|
||||
tr = sdp->sd_log_tr;
|
||||
if (tr) {
|
||||
sdp->sd_log_tr = NULL;
|
||||
INIT_LIST_HEAD(&tr->tr_ail1_list);
|
||||
INIT_LIST_HEAD(&tr->tr_ail2_list);
|
||||
tr->tr_first = sdp->sd_log_flush_head;
|
||||
if (unlikely (state == SFS_FROZEN))
|
||||
if (gfs2_assert_withdraw_delayed(sdp,
|
||||
@ -965,12 +985,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
|
||||
|
||||
if (!(flags & GFS2_LOG_HEAD_FLUSH_NORMAL)) {
|
||||
if (!sdp->sd_log_idle) {
|
||||
for (;;) {
|
||||
gfs2_ail1_start(sdp);
|
||||
gfs2_ail1_wait(sdp);
|
||||
if (gfs2_ail1_empty(sdp, 0))
|
||||
break;
|
||||
}
|
||||
empty_ail1_list(sdp);
|
||||
if (gfs2_withdrawn(sdp))
|
||||
goto out;
|
||||
atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
|
||||
@ -994,7 +1009,7 @@ out:
|
||||
trace_gfs2_log_flush(sdp, 0, flags);
|
||||
up_write(&sdp->sd_log_flush_lock);
|
||||
|
||||
kfree(tr);
|
||||
gfs2_trans_free(sdp, tr);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1003,8 +1018,10 @@ out:
|
||||
* @new: New transaction to be merged
|
||||
*/
|
||||
|
||||
static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new)
|
||||
static void gfs2_merge_trans(struct gfs2_sbd *sdp, struct gfs2_trans *new)
|
||||
{
|
||||
struct gfs2_trans *old = sdp->sd_log_tr;
|
||||
|
||||
WARN_ON_ONCE(!test_bit(TR_ATTACHED, &old->tr_flags));
|
||||
|
||||
old->tr_num_buf_new += new->tr_num_buf_new;
|
||||
@ -1016,6 +1033,11 @@ static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new)
|
||||
|
||||
list_splice_tail_init(&new->tr_databuf, &old->tr_databuf);
|
||||
list_splice_tail_init(&new->tr_buf, &old->tr_buf);
|
||||
|
||||
spin_lock(&sdp->sd_ail_lock);
|
||||
list_splice_tail_init(&new->tr_ail1_list, &old->tr_ail1_list);
|
||||
list_splice_tail_init(&new->tr_ail2_list, &old->tr_ail2_list);
|
||||
spin_unlock(&sdp->sd_ail_lock);
|
||||
}
|
||||
|
||||
static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
|
||||
@ -1027,7 +1049,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
|
||||
gfs2_log_lock(sdp);
|
||||
|
||||
if (sdp->sd_log_tr) {
|
||||
gfs2_merge_trans(sdp->sd_log_tr, tr);
|
||||
gfs2_merge_trans(sdp, tr);
|
||||
} else if (tr->tr_num_buf_new || tr->tr_num_databuf_new) {
|
||||
gfs2_assert_withdraw(sdp, test_bit(TR_ALLOCED, &tr->tr_flags));
|
||||
sdp->sd_log_tr = tr;
|
||||
|
@ -143,6 +143,12 @@ static int __init init_gfs2_fs(void)
|
||||
if (!gfs2_qadata_cachep)
|
||||
goto fail_cachep7;
|
||||
|
||||
gfs2_trans_cachep = kmem_cache_create("gfs2_trans",
|
||||
sizeof(struct gfs2_trans),
|
||||
0, 0, NULL);
|
||||
if (!gfs2_trans_cachep)
|
||||
goto fail_cachep8;
|
||||
|
||||
error = register_shrinker(&gfs2_qd_shrinker);
|
||||
if (error)
|
||||
goto fail_shrinker;
|
||||
@ -194,6 +200,8 @@ fail_fs2:
|
||||
fail_fs1:
|
||||
unregister_shrinker(&gfs2_qd_shrinker);
|
||||
fail_shrinker:
|
||||
kmem_cache_destroy(gfs2_trans_cachep);
|
||||
fail_cachep8:
|
||||
kmem_cache_destroy(gfs2_qadata_cachep);
|
||||
fail_cachep7:
|
||||
kmem_cache_destroy(gfs2_quotad_cachep);
|
||||
@ -236,6 +244,7 @@ static void __exit exit_gfs2_fs(void)
|
||||
rcu_barrier();
|
||||
|
||||
mempool_destroy(gfs2_page_pool);
|
||||
kmem_cache_destroy(gfs2_trans_cachep);
|
||||
kmem_cache_destroy(gfs2_qadata_cachep);
|
||||
kmem_cache_destroy(gfs2_quotad_cachep);
|
||||
kmem_cache_destroy(gfs2_rgrpd_cachep);
|
||||
|
@ -880,7 +880,7 @@ fail:
|
||||
}
|
||||
|
||||
static const match_table_t nolock_tokens = {
|
||||
{ Opt_jid, "jid=%d\n", },
|
||||
{ Opt_jid, "jid=%d", },
|
||||
{ Opt_err, NULL },
|
||||
};
|
||||
|
||||
|
@ -1835,7 +1835,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
|
||||
*/
|
||||
ip = gl->gl_object;
|
||||
|
||||
if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
|
||||
if (ip || !gfs2_queue_delete_work(gl, 0))
|
||||
gfs2_glock_put(gl);
|
||||
else
|
||||
found++;
|
||||
|
@ -626,7 +626,7 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
|
||||
}
|
||||
}
|
||||
|
||||
flush_workqueue(gfs2_delete_workqueue);
|
||||
gfs2_flush_delete_work(sdp);
|
||||
if (!log_write_allowed && current == sdp->sd_quotad_process)
|
||||
fs_warn(sdp, "The quotad daemon is withdrawing.\n");
|
||||
else if (sdp->sd_quotad_process)
|
||||
@ -1054,7 +1054,7 @@ static int gfs2_drop_inode(struct inode *inode)
|
||||
struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
|
||||
|
||||
gfs2_glock_hold(gl);
|
||||
if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
|
||||
if (!gfs2_queue_delete_work(gl, 0))
|
||||
gfs2_glock_queue_put(gl);
|
||||
return false;
|
||||
}
|
||||
@ -1258,6 +1258,55 @@ static void gfs2_glock_put_eventually(struct gfs2_glock *gl)
|
||||
gfs2_glock_put(gl);
|
||||
}
|
||||
|
||||
static bool gfs2_upgrade_iopen_glock(struct inode *inode)
|
||||
{
|
||||
struct gfs2_inode *ip = GFS2_I(inode);
|
||||
struct gfs2_sbd *sdp = GFS2_SB(inode);
|
||||
struct gfs2_holder *gh = &ip->i_iopen_gh;
|
||||
long timeout = 5 * HZ;
|
||||
int error;
|
||||
|
||||
gh->gh_flags |= GL_NOCACHE;
|
||||
gfs2_glock_dq_wait(gh);
|
||||
|
||||
/*
|
||||
* If there are no other lock holders, we'll get the lock immediately.
|
||||
* Otherwise, the other nodes holding the lock will be notified about
|
||||
* our locking request. If they don't have the inode open, they'll
|
||||
* evict the cached inode and release the lock. Otherwise, if they
|
||||
* poke the inode glock, we'll take this as an indication that they
|
||||
* still need the iopen glock and that they'll take care of deleting
|
||||
* the inode when they're done. As a last resort, if another node
|
||||
* keeps holding the iopen glock without showing any activity on the
|
||||
* inode glock, we'll eventually time out.
|
||||
*
|
||||
* Note that we're passing the LM_FLAG_TRY_1CB flag to the first
|
||||
* locking request as an optimization to notify lock holders as soon as
|
||||
* possible. Without that flag, they'd be notified implicitly by the
|
||||
* second locking request.
|
||||
*/
|
||||
|
||||
gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, gh);
|
||||
error = gfs2_glock_nq(gh);
|
||||
if (error != GLR_TRYFAILED)
|
||||
return !error;
|
||||
|
||||
gfs2_holder_reinit(LM_ST_EXCLUSIVE, GL_ASYNC | GL_NOCACHE, gh);
|
||||
error = gfs2_glock_nq(gh);
|
||||
if (error)
|
||||
return false;
|
||||
|
||||
timeout = wait_event_interruptible_timeout(sdp->sd_async_glock_wait,
|
||||
!test_bit(HIF_WAIT, &gh->gh_iflags) ||
|
||||
test_bit(GLF_DEMOTE, &ip->i_gl->gl_flags),
|
||||
timeout);
|
||||
if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) {
|
||||
gfs2_glock_dq(gh);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* gfs2_evict_inode - Remove an inode from cache
|
||||
* @inode: The inode to evict
|
||||
@ -1299,9 +1348,12 @@ static void gfs2_evict_inode(struct inode *inode)
|
||||
if (test_bit(GIF_ALLOC_FAILED, &ip->i_flags)) {
|
||||
BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));
|
||||
gfs2_holder_mark_uninitialized(&gh);
|
||||
goto alloc_failed;
|
||||
goto out_delete;
|
||||
}
|
||||
|
||||
if (test_bit(GIF_DEFERRED_DELETE, &ip->i_flags))
|
||||
goto out;
|
||||
|
||||
/* Deletes should never happen under memory pressure anymore. */
|
||||
if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
|
||||
goto out;
|
||||
@ -1315,6 +1367,8 @@ static void gfs2_evict_inode(struct inode *inode)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (gfs2_inode_already_deleted(ip->i_gl, ip->i_no_formal_ino))
|
||||
goto out_truncate;
|
||||
error = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED);
|
||||
if (error)
|
||||
goto out_truncate;
|
||||
@ -1331,16 +1385,13 @@ static void gfs2_evict_inode(struct inode *inode)
|
||||
if (inode->i_nlink)
|
||||
goto out_truncate;
|
||||
|
||||
alloc_failed:
|
||||
out_delete:
|
||||
if (gfs2_holder_initialized(&ip->i_iopen_gh) &&
|
||||
test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
|
||||
ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
|
||||
gfs2_glock_dq_wait(&ip->i_iopen_gh);
|
||||
gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE,
|
||||
&ip->i_iopen_gh);
|
||||
error = gfs2_glock_nq(&ip->i_iopen_gh);
|
||||
if (error)
|
||||
if (!gfs2_upgrade_iopen_glock(inode)) {
|
||||
gfs2_holder_uninit(&ip->i_iopen_gh);
|
||||
goto out_truncate;
|
||||
}
|
||||
}
|
||||
|
||||
if (S_ISDIR(inode->i_mode) &&
|
||||
@ -1368,6 +1419,7 @@ alloc_failed:
|
||||
that subsequent inode creates don't see an old gl_object. */
|
||||
glock_clear_object(ip->i_gl, ip);
|
||||
error = gfs2_dinode_dealloc(ip);
|
||||
gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino);
|
||||
goto out_unlock;
|
||||
|
||||
out_truncate:
|
||||
|
@ -37,7 +37,7 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
|
||||
if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
|
||||
return -EROFS;
|
||||
|
||||
tr = kzalloc(sizeof(struct gfs2_trans), GFP_NOFS);
|
||||
tr = kmem_cache_zalloc(gfs2_trans_cachep, GFP_NOFS);
|
||||
if (!tr)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -52,6 +52,8 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
|
||||
tr->tr_reserved += gfs2_struct2blk(sdp, revokes);
|
||||
INIT_LIST_HEAD(&tr->tr_databuf);
|
||||
INIT_LIST_HEAD(&tr->tr_buf);
|
||||
INIT_LIST_HEAD(&tr->tr_ail1_list);
|
||||
INIT_LIST_HEAD(&tr->tr_ail2_list);
|
||||
|
||||
sb_start_intwrite(sdp->sd_vfs);
|
||||
|
||||
@ -65,7 +67,7 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
|
||||
|
||||
fail:
|
||||
sb_end_intwrite(sdp->sd_vfs);
|
||||
kfree(tr);
|
||||
kmem_cache_free(gfs2_trans_cachep, tr);
|
||||
|
||||
return error;
|
||||
}
|
||||
@ -93,7 +95,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
|
||||
if (!test_bit(TR_TOUCHED, &tr->tr_flags)) {
|
||||
gfs2_log_release(sdp, tr->tr_reserved);
|
||||
if (alloced) {
|
||||
kfree(tr);
|
||||
gfs2_trans_free(sdp, tr);
|
||||
sb_end_intwrite(sdp->sd_vfs);
|
||||
}
|
||||
return;
|
||||
@ -109,7 +111,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
|
||||
|
||||
gfs2_log_commit(sdp, tr);
|
||||
if (alloced && !test_bit(TR_ATTACHED, &tr->tr_flags))
|
||||
kfree(tr);
|
||||
gfs2_trans_free(sdp, tr);
|
||||
up_read(&sdp->sd_log_flush_lock);
|
||||
|
||||
if (sdp->sd_vfs->s_flags & SB_SYNCHRONOUS)
|
||||
@ -276,3 +278,14 @@ void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
|
||||
gfs2_log_unlock(sdp);
|
||||
}
|
||||
|
||||
void gfs2_trans_free(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
|
||||
{
|
||||
if (tr == NULL)
|
||||
return;
|
||||
|
||||
gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list));
|
||||
gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list));
|
||||
gfs2_assert_warn(sdp, list_empty(&tr->tr_databuf));
|
||||
gfs2_assert_warn(sdp, list_empty(&tr->tr_buf));
|
||||
kmem_cache_free(gfs2_trans_cachep, tr);
|
||||
}
|
||||
|
@ -42,5 +42,6 @@ extern void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh);
|
||||
extern void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh);
|
||||
extern void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
|
||||
extern void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len);
|
||||
extern void gfs2_trans_free(struct gfs2_sbd *sdp, struct gfs2_trans *tr);
|
||||
|
||||
#endif /* __TRANS_DOT_H__ */
|
||||
|
@ -32,6 +32,7 @@ struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
|
||||
struct kmem_cache *gfs2_rgrpd_cachep __read_mostly;
|
||||
struct kmem_cache *gfs2_quotad_cachep __read_mostly;
|
||||
struct kmem_cache *gfs2_qadata_cachep __read_mostly;
|
||||
struct kmem_cache *gfs2_trans_cachep __read_mostly;
|
||||
mempool_t *gfs2_page_pool __read_mostly;
|
||||
|
||||
void gfs2_assert_i(struct gfs2_sbd *sdp)
|
||||
|
@ -172,6 +172,7 @@ extern struct kmem_cache *gfs2_bufdata_cachep;
|
||||
extern struct kmem_cache *gfs2_rgrpd_cachep;
|
||||
extern struct kmem_cache *gfs2_quotad_cachep;
|
||||
extern struct kmem_cache *gfs2_qadata_cachep;
|
||||
extern struct kmem_cache *gfs2_trans_cachep;
|
||||
extern mempool_t *gfs2_page_pool;
|
||||
extern struct workqueue_struct *gfs2_control_wq;
|
||||
|
||||
|
@ -171,6 +171,12 @@ struct gfs2_rindex {
|
||||
#define GFS2_RGF_NOALLOC 0x00000008
|
||||
#define GFS2_RGF_TRIMMED 0x00000010
|
||||
|
||||
struct gfs2_inode_lvb {
|
||||
__be32 ri_magic;
|
||||
__be32 __pad;
|
||||
__be64 ri_generation_deleted;
|
||||
};
|
||||
|
||||
struct gfs2_rgrp_lvb {
|
||||
__be32 rl_magic;
|
||||
__be32 rl_flags;
|
||||
|
Loading…
Reference in New Issue
Block a user