Index: src/share/man/man9/vnode.9 =================================================================== RCS file: /home/chs/netbsd/cvs/src/share/man/man9/vnode.9,v retrieving revision 1.80 diff -u -p -r1.80 vnode.9 --- src/share/man/man9/vnode.9 28 May 2017 16:39:41 -0000 1.80 +++ src/share/man/man9/vnode.9 14 Jun 2017 16:51:43 -0000 @@ -27,7 +27,7 @@ .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" -.Dd May 28, 2017 +.Dd June 14, 2017 .Dt VNODE 9 .Os .Sh NAME @@ -35,10 +35,12 @@ .Nm vref , .Nm vrele , .Nm vrele_async , +.Nm vrele_async_cleaner , .Nm vput , .Nm vhold , .Nm holdrele , .Nm vcache_get , +.Nm vcache_get_cleaner , .Nm vcache_new , .Nm vcache_rekey_enter , .Nm vcache_rekey_exit , @@ -68,6 +70,8 @@ .Ft void .Fn vrele_async "struct vnode *vp" .Ft void +.Fn vrele_async_cleaner "struct vnode *vp" +.Ft void .Fn vput "struct vnode *vp" .Ft void .Fn vhold "struct vnode *vp" @@ -76,6 +80,8 @@ .Ft int .Fn vcache_get "struct mount *mp" "const void *key" "size_t key_len" "struct vnode **vpp" .Ft int +.Fn vcache_get_cleaner "struct mount *mp" "const void *key" "size_t key_len" "struct vnode **vpp" +.Ft int .Fn vcache_new "struct mount *mp" "struct vnode *dvp" "struct vattr *vap" "kauth_cred_t cred" "struct vnode **vpp" .Ft int .Fn vcache_rekey_enter "struct mount *mp" "struct vnode *vp" "const void *old_key" "size_t old_key_len" "const void *new_key" "size_t new_key_len" @@ -513,6 +519,11 @@ are zero, the vnode is cached. .It Fn vrele_async "vp" Will asynchronously release the vnode in different context than the caller, sometime after the call. +.It Fn vrele_async_cleaner "vp" +Equivalent to +.Fn vrele_async +for references which were taken with +.Fn vcache_get_cleaner . .It Fn vput "vp" Legacy convenience routine for unlocking and releasing .Fa vp . @@ -557,6 +568,24 @@ uniquely identify the file in the file s .Pp If a vnode is successfully retrieved zero is returned, otherwise an appropriate error code is returned. +.It Fn vcache_get_cleaner "mp" "key" "key_len" "vpp" +Equivalent to +.Fn vcache_get , +but this version must be used if the call is made in the context of cleaning the vnode for reuse, which is within the file system's implemenation of +.Fn VOP_FSYNC +when +.Em FSYNC_RECLAIM +is specified, or +.Fn VOP_PUTPAGES +when +.Em PGO_RECLAIM +is specified. +A reference taken with this interface must be releasd with +.Fn vrele_async_cleaner , +and the reference must be released before another reference can be taken on the same vnode with this interface. +File systems using this interface must also set +.Em IMNT_CLEANING +in mnt_iflag. .It Fn vcache_new "mp" "dvp" "vap" "cred" "vpp" Allocate a new vnode with a new file. The new vnode is returned referenced in the address specified by Index: src/sys/sys/fstypes.h =================================================================== RCS file: /home/chs/netbsd/cvs/src/sys/sys/fstypes.h,v retrieving revision 1.35 diff -u -p -r1.35 fstypes.h --- src/sys/sys/fstypes.h 1 Mar 2017 10:44:47 -0000 1.35 +++ src/sys/sys/fstypes.h 14 Jun 2017 16:11:02 -0000 @@ -217,6 +217,7 @@ typedef struct fhandle fhandle_t; #define IMNT_UNMOUNT 0x00000002 /* unmount in progress */ #define IMNT_WANTRDWR 0x00000004 /* upgrade to read/write requested */ #define IMNT_WANTRDONLY 0x00000008 /* upgrade to readonly requested */ +#define IMNT_CLEANING 0x00000010 /* fs takes a new ref while cleaning */ #define IMNT_DTYPE 0x00000040 /* returns d_type fields */ #define IMNT_HAS_TRANS 0x00000080 /* supports transactions */ #define IMNT_MPSAFE 0x00000100 /* file system code MP safe */ @@ -271,6 +272,7 @@ typedef struct fhandle fhandle_t; "\11IMNT_MPSAFE" \ "\10IMNT_HAS_TRANS" \ "\07IMNT_DTYPE" \ + "\05IMNT_CLEANING" \ "\04IMNT_WANTRDONLY" \ "\03IMNT_WANTRDWR" \ "\02IMNT_UNMOUNT" \ Index: src/sys/sys/vnode.h =================================================================== RCS file: /home/chs/netbsd/cvs/src/sys/sys/vnode.h,v retrieving revision 1.278 diff -u -p -r1.278 vnode.h --- src/sys/sys/vnode.h 4 Jun 2017 08:02:26 -0000 1.278 +++ src/sys/sys/vnode.h 14 Jun 2017 16:02:25 -0000 @@ -186,6 +186,7 @@ typedef struct vnode vnode_t; /* * The second set are locked by vp->v_interlock. */ +#define VI_CLEANERREF 0x00000001 /* cleaner has a ref */ #define VI_TEXT 0x00000100 /* vnode is a pure text prototype */ #define VI_EXECMAP 0x00000200 /* might have PROT_EXEC mappings */ #define VI_WRMAP 0x00000400 /* might have PROT_WRITE u. mappings */ @@ -511,6 +512,7 @@ void vput(struct vnode *); bool vrecycle(struct vnode *); void vrele(struct vnode *); void vrele_async(struct vnode *); +void vrele_async_cleaner(struct vnode *); void vrele_flush(struct mount *); int vtruncbuf(struct vnode *, daddr_t, bool, int); void vwakeup(struct buf *); @@ -518,6 +520,7 @@ int vdead_check(struct vnode *, int); void vrevoke(struct vnode *); void vremfree(struct vnode *); int vcache_get(struct mount *, const void *, size_t, struct vnode **); +int vcache_get_cleaner(struct mount *, const void *, size_t, struct vnode **); int vcache_new(struct mount *, struct vnode *, struct vattr *, kauth_cred_t, struct vnode **); int vcache_rekey_enter(struct mount *, struct vnode *, Index: src/sys/sys/vnode_impl.h =================================================================== RCS file: /home/chs/netbsd/cvs/src/sys/sys/vnode_impl.h,v retrieving revision 1.15 diff -u -p -r1.15 vnode_impl.h --- src/sys/sys/vnode_impl.h 4 Jun 2017 08:02:26 -0000 1.15 +++ src/sys/sys/vnode_impl.h 14 Jun 2017 16:44:42 -0000 @@ -42,6 +42,7 @@ enum vnode_state { VS_LOADING, /* Intermediate, initialising the fs node. */ VS_LOADED, /* Stable, valid fs node attached. */ VS_BLOCKED, /* Intermediate, active, no new references allowed. */ + VS_CLEANING, /* Intermediate, cleaning the vnode. */ VS_RECLAIMING, /* Intermediate, detaching the fs node. */ VS_RECLAIMED /* Stable, no fs node attached. */ }; Index: src/sys/kern/vfs_subr.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/sys/kern/vfs_subr.c,v retrieving revision 1.468 diff -u -p -r1.468 vfs_subr.c --- src/sys/kern/vfs_subr.c 4 Jun 2017 07:58:29 -0000 1.468 +++ src/sys/kern/vfs_subr.c 7 Jun 2017 19:02:56 -0000 @@ -1065,6 +1065,8 @@ vstate_name(enum vnode_state state) return "LOADED"; case VS_BLOCKED: return "BLOCKED"; + case VS_CLEANING: + return "CLEANING"; case VS_RECLAIMING: return "RECLAIMING"; case VS_RECLAIMED: Index: src/sys/kern/vfs_vnode.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/sys/kern/vfs_vnode.c,v retrieving revision 1.96 diff -u -p -r1.96 vfs_vnode.c --- src/sys/kern/vfs_vnode.c 4 Jun 2017 08:05:42 -0000 1.96 +++ src/sys/kern/vfs_vnode.c 14 Jun 2017 16:53:38 -0000 @@ -103,6 +103,7 @@ * - LOADED Vnode has associated underlying file system and is * ready to use. * - BLOCKED Vnode is active but cannot get new references. + * - CLEANING Vnode is being cleaned in preparation for reclaiming. * - RECLAIMING Vnode is disassociating from the underlying file * system. * - RECLAIMED Vnode has disassociated from underlying file system @@ -112,7 +113,11 @@ * LOADING -> LOADED * Vnode has been initialised in vcache_get() or * vcache_new() and is ready to use. - * LOADED -> RECLAIMING + * LOADED -> CLEANING + * Vnode starts being cleaned before being disassociated + * from the underlying file system in vcache_reclaim(). + * LOADED -> RECLAIMED (if the fs does not need separate CLEANING) + * CLEANING -> RECLAIMING * Vnode starts disassociation from underlying file * system in vcache_reclaim(). * RECLAIMING -> RECLAIMED @@ -217,6 +222,7 @@ static void vcache_free(vnode_impl_t *) static void vcache_init(void); static void vcache_reinit(void); static void vcache_reclaim(vnode_t *); +static bool vcache_rele_cleaner(vnode_t *); static void vrelel(vnode_t *, int); static void vdrain_thread(void *); static void vnpanic(vnode_t *, const char *, ...) @@ -229,6 +235,27 @@ extern struct vfsops dead_vfsops; /* Vnode state operations and diagnostics. */ +static void +vstate_wait_stable(vnode_t *vp) +{ + vnode_impl_t *vip = VNODE_TO_VIMPL(vp); + + while (vip->vi_state != VS_LOADED && vip->vi_state != VS_RECLAIMED) + cv_wait(&vp->v_cv, vp->v_interlock); +} + +static void +vstate_change(vnode_t *vp, enum vnode_state from, enum vnode_state to) +{ + vnode_impl_t *vip = VNODE_TO_VIMPL(vp); + + vip->vi_state = to; + if (from == VS_LOADING) + cv_broadcast(&vcache_cv); + if (to == VS_LOADED || to == VS_RECLAIMED) + cv_broadcast(&vp->v_cv); +} + #if defined(DIAGNOSTIC) #define VSTATE_VALID(state) \ @@ -280,8 +307,7 @@ vstate_assert_wait_stable(vnode_t *vp, c vnpanic(vp, "state is %s at %s:%d", vstate_name(vip->vi_state), func, line); - while (vip->vi_state != VS_LOADED && vip->vi_state != VS_RECLAIMED) - cv_wait(&vp->v_cv, vp->v_interlock); + vstate_wait_stable(vp); if (! VSTATE_VALID(vip->vi_state)) vnpanic(vp, "state is %s at %s:%d", @@ -312,11 +338,7 @@ vstate_assert_change(vnode_t *vp, enum v vstate_name(from), vstate_name(to), vp->v_usecount, func, line); - vip->vi_state = to; - if (from == VS_LOADING) - cv_broadcast(&vcache_cv); - if (to == VS_LOADED || to == VS_RECLAIMED) - cv_broadcast(&vp->v_cv); + vstate_change(vp, from, to); } #else /* defined(DIAGNOSTIC) */ @@ -332,28 +354,6 @@ _vstate_assert(vnode_t *vp, enum vnode_s { } - -static void -vstate_wait_stable(vnode_t *vp) -{ - vnode_impl_t *vip = VNODE_TO_VIMPL(vp); - - while (vip->vi_state != VS_LOADED && vip->vi_state != VS_RECLAIMED) - cv_wait(&vp->v_cv, vp->v_interlock); -} - -static void -vstate_change(vnode_t *vp, enum vnode_state from, enum vnode_state to) -{ - vnode_impl_t *vip = VNODE_TO_VIMPL(vp); - - vip->vi_state = to; - if (from == VS_LOADING) - cv_broadcast(&vcache_cv); - if (to == VS_LOADED || to == VS_RECLAIMED) - cv_broadcast(&vp->v_cv); -} - #endif /* defined(DIAGNOSTIC) */ void @@ -840,6 +840,29 @@ vrele_async(vnode_t *vp) } /* + * Asynchronous vnode release, vnode is released in different context. + */ +void +vrele_async_cleaner(vnode_t *vp) +{ + + mutex_enter(vp->v_interlock); + + /* + * If this vnode has a cleaner ref, release that. + */ + if (vcache_rele_cleaner(vp)) { + mutex_exit(vp->v_interlock); + return; + } + + /* + * Otherwise do a normal async rele. + */ + vrelel(vp, VRELEL_ASYNC_RELE); +} + +/* * Vnode reference, where a reference is already held by some other * object (for example, a file structure). */ @@ -1241,12 +1264,38 @@ vcache_vget(vnode_t *vp) return 0; } +static void +vcache_ref_cleaner(vnode_t *vp) +{ + + KASSERT(mutex_owned(vp->v_interlock)); + KASSERT((vp->v_mount->mnt_iflag & IMNT_CLEANING) != 0); + KASSERTMSG((vp->v_iflag & VI_CLEANERREF) == 0, "vp %p", vp); + vp->v_iflag |= VI_CLEANERREF; +} + + +static bool +vcache_rele_cleaner(vnode_t *vp) +{ + + KASSERT(mutex_owned(vp->v_interlock)); + KASSERT((vp->v_mount->mnt_iflag & IMNT_CLEANING) != 0); + if ((vp->v_iflag & VI_CLEANERREF) != 0) { + vp->v_iflag &= ~VI_CLEANERREF; + return true; + } + return false; +} + /* * Get a vnode / fs node pair by key and return it referenced through vpp. + * If requested and the vnode is being cleaned, take a cleaner reference + * rather than waiting for the reclamation process to finish. */ -int -vcache_get(struct mount *mp, const void *key, size_t key_len, - struct vnode **vpp) +static int +do_vcache_get(struct mount *mp, const void *key, size_t key_len, + struct vnode **vpp, bool cleaner) { int error; uint32_t hash; @@ -1269,6 +1318,7 @@ again: /* If found, take a reference or retry. */ if (__predict_true(vip != NULL)) { + /* * If the vnode is loading we cannot take the v_interlock * here as it might change during load (see uvm_obj_setlock()). @@ -1284,6 +1334,23 @@ again: } vp = VIMPL_TO_VNODE(vip); mutex_enter(vp->v_interlock); + + /* + * Take a cleaner reference if the vnode is being cleaned + * and a cleaner reference was requested. + */ + if (__predict_false(vip->vi_state == VS_CLEANING && + cleaner)) { + vcache_ref_cleaner(vp); + mutex_exit(vp->v_interlock); + mutex_exit(&vcache_lock); + *vpp = VIMPL_TO_VNODE(vip); + return 0; + } + + /* + * Otherwise take a normal reference. + */ mutex_exit(&vcache_lock); error = vcache_vget(vp); if (error == ENOENT) @@ -1350,6 +1417,25 @@ again: } /* + * Get a vnode / fs node pair by key and return it referenced through vpp. + */ +int +vcache_get(struct mount *mp, const void *key, size_t key_len, + struct vnode **vpp) +{ + + return do_vcache_get(mp, key, key_len, vpp, false); +} + +int +vcache_get_cleaner(struct mount *mp, const void *key, size_t key_len, + struct vnode **vpp) +{ + + return do_vcache_get(mp, key, key_len, vpp, true); +} + +/* * Create a new vnode / fs node pair and return it referenced through vpp. */ int @@ -1528,7 +1614,7 @@ vcache_reclaim(vnode_t *vp) uint32_t hash; uint8_t temp_buf[64], *temp_key; size_t temp_key_len; - bool recycle, active; + bool recycle, active, usecleaning; int error; KASSERT((vp->v_vflag & VV_LOCKSWORK) == 0 || @@ -1538,11 +1624,14 @@ vcache_reclaim(vnode_t *vp) active = (vp->v_usecount > 1); temp_key_len = vip->vi_key.vk_key_len; + usecleaning = (vp->v_mount->mnt_iflag & IMNT_CLEANING) != 0; + /* * Prevent the vnode from being recycled or brought into use - * while we clean it out. + * while we clean it out. Use VS_CLEANING if the fs needs it, + * otherwise go straight to VS_RECLAIMING. */ - VSTATE_CHANGE(vp, VS_LOADED, VS_RECLAIMING); + VSTATE_CHANGE(vp, VS_LOADED, usecleaning ? VS_CLEANING : VS_RECLAIMING); if (vp->v_iflag & VI_EXECMAP) { atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages); atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages); @@ -1581,6 +1670,17 @@ vcache_reclaim(vnode_t *vp) } /* + * Cleaning is done, move on to reclaiming. + * Change state to VS_RECLAIMING if we weren't already there. + */ + if (usecleaning) { + KASSERT((vp->v_iflag & VI_CLEANERREF) == 0); + mutex_enter(vp->v_interlock); + VSTATE_CHANGE(vp, VS_CLEANING, VS_RECLAIMING); + mutex_exit(vp->v_interlock); + } + + /* * Disassociate the underlying file system from the vnode. * VOP_INACTIVE leaves the vnode locked; VOP_RECLAIM unlocks * the vnode, and may destroy the vnode so that VOP_UNLOCK