<!-- received="Tue Jul 18 20:02:20 2000 EET DST" -->
<!-- sent="Tue, 18 Jul 2000 18:00:37 +0100 (BST)" -->
<!-- name="Tigran Aivazian" -->
<!-- email="tigran@veritas.com" -->
<!-- subject="[patch-2.4.0-test5-pre1] nullfs and forced umount" -->
<!-- id="" -->
<!-- inreplyto="" -->
<title>Linux-kernel mailing list archive 2000-29,: [patch-2.4.0-test5-pre1] nullfs and forced umount</title>
<body bgcolor="#FFFFFF"><font face="Arial,Helvetica">
<h1>[patch-2.4.0-test5-pre1] nullfs and forced umount</h1>
<b>Tigran Aivazian</b> (<a href="mailto:tigran@veritas.com"><i>tigran@veritas.com</i></a>)<br>
<i>Tue, 18 Jul 2000 18:00:37 +0100 (BST)</i>
<p>
<ul>
<li> <b>Messages sorted by:</b> <a href="date.html#331">[ date ]</a><a href="index.html#331">[ thread ]</a><a href="subject.html#331">[ subject ]</a><a href="author.html#331">[ author ]</a>
<!-- next="start" -->
<li> <b>Next message:</b> <a href="0332.html">I Lee Hetherington: "Re: 2.4.0-test[34]: nasty make race with CONFIG_MODVERSIONS=y"</a>
<li> <b>Previous message:</b> <a href="0330.html">Andre Hedrick: "driver for SCSI card"</a>
<!-- nextthread="start" -->
<li> <b>Next in thread:</b> <a href="0336.html">Manfred Spraul: "Re: [patch-2.4.0-test5-pre1] nullfs and forced umount"</a>
<li> <b>Reply:</b> <a href="0336.html">Manfred Spraul: "Re: [patch-2.4.0-test5-pre1] nullfs and forced umount"</a>
<li> <b>Reply:</b> <a href="0358.html">Manfred Spraul: "Re: [patch-2.4.0-test5-pre1] nullfs and forced umount"</a>
<!-- reply="end" -->
</ul>
<hr>
<!-- body="start" -->
Hi guys,<br>
<p>
Attached is the small patch implementing the nullfs filesystem in order to<br>
support generic forced umount. Please test it and send me comments, fixes<br>
etc.<br>
<p>
There are known problems with the patch:<br>
<p>
a) access to tsk-&gt;files-&gt;fd[fd] inside disable_fd() is not SMP safe. I<br>
will probably need to take tsk-&gt;files_lock. Still thinking about it.<br>
<p>
b) the mnt_count accounting works for 1-1 sb&lt;-&gt;mnt case but seems to break<br>
randomly when there are multiple mounted instances of a filesystem and<br>
some of them are forcibly umounted.<br>
<p>
Of course I will not send this to Linus until these (and anything else you<br>
find) problems are fixed but it is better to release early so - I hope to<br>
have your feedback.<br>
<p>
Regards,<br>
Tigran<br>
<p>
diff -urN -X dontdiff linux/fs/Makefile nullfs/fs/Makefile<br>
--- linux/fs/Makefile	Tue Jul 11 19:26:50 2000<br>
+++ nullfs/fs/Makefile	Fri Jul 14 23:24:55 2000<br>
@@ -19,9 +19,9 @@<br>
 ALL_SUB_DIRS = coda minix ext2 fat msdos vfat proc isofs nfs umsdos ntfs \<br>
 		hpfs sysv smbfs ncpfs ufs efs affs romfs autofs hfs lockd \<br>
 		nfsd nls devpts devfs adfs partitions qnx4 udf bfs cramfs \<br>
-		openpromfs autofs4 ramfs jffs<br>
+		openpromfs autofs4 ramfs jffs nullfs<br>
 <br>
-SUB_DIRS :=<br>
+SUB_DIRS := nullfs<br>
 <br>
 ifeq ($(CONFIG_QUOTA),y)<br>
 O_OBJS += dquot.o<br>
diff -urN -X dontdiff linux/fs/bad_inode.c nullfs/fs/bad_inode.c<br>
--- linux/fs/bad_inode.c	Thu Apr 27 09:01:30 2000<br>
+++ nullfs/fs/bad_inode.c	Fri Jul 14 23:25:12 2000<br>
@@ -29,7 +29,7 @@<br>
 <br>
 #define EIO_ERROR ((void *) (return_EIO))<br>
 <br>
-static struct file_operations bad_file_ops =<br>
+struct file_operations bad_file_ops =<br>
 {<br>
 	llseek:		EIO_ERROR,<br>
 	read:		EIO_ERROR,<br>
diff -urN -X dontdiff linux/fs/nullfs/Makefile nullfs/fs/nullfs/Makefile<br>
--- linux/fs/nullfs/Makefile	Thu Jan  1 01:00:00 1970<br>
+++ nullfs/fs/nullfs/Makefile	Fri Jul 14 23:25:39 2000<br>
@@ -0,0 +1,9 @@<br>
+#<br>
+# Makefile for NULLFS filesystem.<br>
+#<br>
+<br>
+O_TARGET := nullfs.o<br>
+O_OBJS   := inode.o<br>
+M_OBJS   := $(O_TARGET)<br>
+<br>
+include $(TOPDIR)/Rules.make<br>
diff -urN -X dontdiff linux/fs/nullfs/inode.c nullfs/fs/nullfs/inode.c<br>
--- linux/fs/nullfs/inode.c	Thu Jan  1 01:00:00 1970<br>
+++ nullfs/fs/nullfs/inode.c	Tue Jul 18 17:28:04 2000<br>
@@ -0,0 +1,307 @@<br>
+/*<br>
+ *  nullfs - the Null Filesystem<br>
+ *<br>
+ *  Author - Tigran Aivazian &lt;<a href="mailto:tigran@veritas.com">tigran@veritas.com</a>&gt;<br>
+ *<br>
+ *  This file is released under the GPL.<br>
+ *<br>
+ *  The nullfs filesystem is used by forced umount ('umount -f' command)<br>
+ *  to move inodes that keep the filesystem being umounted busy to it.<br>
+ */<br>
+<br>
+#include &lt;linux/module.h&gt;<br>
+#include &lt;linux/fs.h&gt;<br>
+#include &lt;linux/mm.h&gt;<br>
+#include &lt;linux/init.h&gt;<br>
+#include &lt;linux/smp_lock.h&gt;<br>
+<br>
+#define NULLFS_MAGIC	0xABCD0001<br>
+<br>
+static struct super_block *nullfs_read_super(struct super_block *,void *,int);<br>
+<br>
+static DECLARE_FSTYPE(nullfs_fs_type, "nullfs", <br>
+			nullfs_read_super, FS_NOMOUNT|FS_SINGLE);<br>
+<br>
+static struct vfsmount *nullfs_mnt = NULL;<br>
+static struct super_block *nullfs_sb = NULL; /* nullfs_mnt-&gt;mnt_sb */<br>
+<br>
+/* Kernel module interface */<br>
+static int __init init_nullfs_fs(void)<br>
+{<br>
+	int err = register_filesystem(&amp;nullfs_fs_type);<br>
+<br>
+	if (!err) {<br>
+		nullfs_mnt = kern_mount(&amp;nullfs_fs_type);<br>
+		if (IS_ERR(nullfs_mnt)) {<br>
+			err = PTR_ERR(nullfs_mnt);<br>
+			unregister_filesystem(&amp;nullfs_fs_type);<br>
+		} else {<br>
+			nullfs_sb = nullfs_mnt-&gt;mnt_sb;<br>
+			err = 0;<br>
+		}<br>
+	}<br>
+	return err;<br>
+}<br>
+<br>
+static void __exit exit_nullfs_fs(void)<br>
+{<br>
+	unregister_filesystem(&amp;nullfs_fs_type);<br>
+	kern_umount(nullfs_mnt);<br>
+}<br>
+<br>
+module_init(init_nullfs_fs);<br>
+module_exit(exit_nullfs_fs);<br>
+<br>
+/* internal helper function */<br>
+static struct inode *nullfs_get_inode(struct super_block *sb, int mode)<br>
+{<br>
+	struct inode *inode = get_empty_inode();<br>
+<br>
+	if (inode) {<br>
+		make_bad_inode(inode);<br>
+		inode-&gt;i_sb = sb;<br>
+		inode-&gt;i_dev = sb-&gt;s_dev;<br>
+		inode-&gt;i_mode = mode;<br>
+		inode-&gt;i_nlink = 1;<br>
+		inode-&gt;i_size = 0;<br>
+		inode-&gt;i_blocks = 0;<br>
+	}<br>
+	return inode;<br>
+}<br>
+<br>
+/* VFS -&gt;statfs() method */<br>
+static int nullfs_statfs(struct super_block *sb, struct statfs *buf)<br>
+{<br>
+	buf-&gt;f_type = NULLFS_MAGIC;<br>
+	buf-&gt;f_bsize = BLOCK_SIZE;<br>
+	buf-&gt;f_namelen = 0;<br>
+	return 0;<br>
+}<br>
+<br>
+/* Note that we don't need -&gt;read_inode() here */<br>
+static struct super_operations nullfs_ops = {<br>
+	statfs:		nullfs_statfs,<br>
+};<br>
+<br>
+/* VFS -&gt;read_super() method */<br>
+static struct super_block *nullfs_read_super(struct super_block * sb, <br>
+		void * data, int silent)<br>
+{<br>
+	struct inode * root = nullfs_get_inode(sb, S_IFDIR|S_IRUSR|S_IWUSR);<br>
+<br>
+	if (!root)<br>
+		return NULL;<br>
+	sb-&gt;s_blocksize = 1024;<br>
+	sb-&gt;s_blocksize_bits = 10;<br>
+	sb-&gt;s_magic = NULLFS_MAGIC;<br>
+	sb-&gt;s_op = &amp;nullfs_ops;<br>
+	sb-&gt;s_root = d_alloc(NULL, &amp;(const struct qstr){ "null:", 5, 0});<br>
+	if (!sb-&gt;s_root) {<br>
+		iput(root);<br>
+		return NULL;<br>
+	}<br>
+	sb-&gt;s_root-&gt;d_sb = sb;<br>
+	sb-&gt;s_root-&gt;d_parent = sb-&gt;s_root;<br>
+	d_instantiate(sb-&gt;s_root, root);<br>
+	return sb;<br>
+}<br>
+<br>
+/* moves the inode into nullfs and bads it, called on every open file. <br>
+ * The 'root' argument is passed so that we don't down root-&gt;i_sem twice <br>
+ */<br>
+static void disable_fd(struct task_struct *tsk, int fd, struct dentry *root)<br>
+{<br>
+	struct files_struct *files;<br>
+	struct file *file;<br>
+	struct inode *inode;<br>
+	mode_t saved_mode;<br>
+	struct file_operations *fop;<br>
+<br>
+	files = tsk-&gt;files;<br>
+	file = files-&gt;fd[fd];<br>
+	inode = file-&gt;f_dentry-&gt;d_inode;<br>
+	fop = file-&gt;f_op;<br>
+<br>
+	/* serialize with operations on this inode */<br>
+	if (inode != root-&gt;d_inode) /* root is down'd by the caller */<br>
+		down(&amp;inode-&gt;i_sem);<br>
+<br>
+	lock_kernel();<br>
+	locks_remove_posix(file, files);<br>
+	locks_remove_flock(file);<br>
+	unlock_kernel();<br>
+<br>
+	/* flush and release fs-specific resources */<br>
+	if (fop-&gt;flush)<br>
+		fop-&gt;flush(file);<br>
+	if (fop-&gt;release)<br>
+		fop-&gt;release(inode, file);<br>
+<br>
+	/* get rid of pages in the cache */<br>
+	vmtruncate(inode, 0);<br>
+<br>
+	/* rehash this inode into nullfs */<br>
+	remove_inode_hash(inode);<br>
+	inode-&gt;i_sb = nullfs_sb;<br>
+	insert_inode_hash(inode);<br>
+<br>
+	/* make subsequent io on this inode return EIO */<br>
+	saved_mode = inode-&gt;i_mode;<br>
+	make_bad_inode(inode);<br>
+	inode-&gt;i_mode = saved_mode;<br>
+	inode-&gt;i_dev = nullfs_sb-&gt;s_dev;<br>
+	xchg(&amp;file-&gt;f_op, &amp;bad_file_ops);<br>
+<br>
+	/* switch file-&gt;f_vfsmnt to nullfs mntpoint */<br>
+	mntput(file-&gt;f_vfsmnt);<br>
+	file-&gt;f_vfsmnt = mntget(nullfs_mnt);<br>
+<br>
+	/* ok, now we can decrement root's d_count */<br>
+	dput(root);<br>
+<br>
+	if (inode != root-&gt;d_inode) /* root is down'd by the caller */<br>
+		up(&amp;inode-&gt;i_sem);<br>
+<br>
+	printk(KERN_WARNING "VFS: disabled fd=%d for %s/%d (d_count=%d)\n", <br>
+			fd, tsk-&gt;comm, tsk-&gt;pid, atomic_read(&amp;root-&gt;d_count));<br>
+}<br>
+<br>
+static void disable_pwd(struct task_struct *tsk)<br>
+{<br>
+	struct inode *inode;<br>
+	struct dentry *dentry;<br>
+<br>
+	inode = nullfs_get_inode(nullfs_sb, S_IFDIR|0755);<br>
+	if (!inode) {<br>
+		printk(KERN_ERR "disable_pwd(): can't allocate inode\n");<br>
+		return;<br>
+	}<br>
+	dentry = d_alloc(nullfs_sb-&gt;s_root, <br>
+			&amp;(const struct qstr){"dead_pwd", 8, 0});<br>
+	if (!dentry) {<br>
+		iput(inode);<br>
+		printk(KERN_ERR "disable_pwd(): can't allocate dentry\n");<br>
+		return;<br>
+	}<br>
+	d_instantiate(dentry, inode);<br>
+	dget(dentry); /* pin the dentry in core */<br>
+	set_fs_pwd(tsk-&gt;fs, nullfs_mnt, dentry);<br>
+	printk(KERN_ERR "VFS: disabled pwd for %s/%d\n", tsk-&gt;comm, tsk-&gt;pid);<br>
+}<br>
+<br>
+static void disable_root(struct task_struct *tsk)<br>
+{<br>
+	struct inode *inode;<br>
+	struct dentry *dentry;<br>
+<br>
+	inode = nullfs_get_inode(nullfs_sb, S_IFDIR|0755);<br>
+	if (!inode) {<br>
+		printk(KERN_ERR "disable_root(): can't allocate inode\n");<br>
+		return;<br>
+	}<br>
+	dentry = d_alloc(nullfs_sb-&gt;s_root, <br>
+			&amp;(const struct qstr){"dead_root", 9, 0});<br>
+	if (!dentry) {<br>
+		iput(inode);<br>
+		printk(KERN_ERR "disable_root(): can't allocate dentry\n");<br>
+		return;<br>
+	}<br>
+	d_instantiate(dentry, inode);<br>
+	dget(dentry); /* pin the dentry in core */<br>
+	set_fs_root(tsk-&gt;fs, nullfs_mnt, dentry);<br>
+	printk(KERN_INFO "VFS: disabled root for %s/%d\n", <br>
+			tsk-&gt;comm, tsk-&gt;pid);<br>
+}<br>
+<br>
+void disable_filesystem(struct vfsmount *mnt)<br>
+{<br>
+	struct task_struct *tsk;<br>
+	struct dentry *root;<br>
+	struct mm_struct *mm;<br>
+	struct vm_area_struct *vma;<br>
+	struct file *file;<br>
+	struct inode *inode;<br>
+	struct super_block *sb;<br>
+<br>
+	sb = mnt-&gt;mnt_sb;<br>
+	root = sb-&gt;s_root;<br>
+	down(&amp;root-&gt;d_inode-&gt;i_sem);<br>
+repeat:<br>
+	file_list_lock();<br>
+	read_lock(&amp;tasklist_lock);<br>
+	for_each_task(tsk) {<br>
+		int fd, j = 0;<br>
+		struct files_struct *files = tsk-&gt;files;<br>
+<br>
+		mm = tsk-&gt;mm;<br>
+		if (!files || !mm)<br>
+			continue;<br>
+<br>
+		if (tsk-&gt;fs-&gt;pwdmnt == mnt) {<br>
+			read_unlock(&amp;tasklist_lock);<br>
+			file_list_unlock();<br>
+			disable_pwd(tsk); /* may block */<br>
+			goto repeat;<br>
+		}<br>
+<br>
+		if (tsk-&gt;fs-&gt;rootmnt == mnt) {<br>
+			read_unlock(&amp;tasklist_lock);<br>
+			file_list_unlock();<br>
+			disable_root(tsk); /* may block */<br>
+			goto repeat;<br>
+		}<br>
+<br>
+		/* check if any process has open files here */<br>
+		while (1) {<br>
+			unsigned long set;<br>
+<br>
+			fd = j * __NFDBITS;<br>
+			if (fd &gt;= files-&gt;max_fdset || fd &gt;= files-&gt;max_fds)<br>
+				break;<br>
+			set = files-&gt;open_fds-&gt;fds_bits[j++];<br>
+			while (set) {<br>
+				if (set &amp; 1) {<br>
+					file = files-&gt;fd[fd];<br>
+					inode = file-&gt;f_dentry-&gt;d_inode;<br>
+					if (inode &amp;&amp; (inode-&gt;i_sb == sb) &amp;&amp;<br>
+						!is_bad_inode(inode)) {<br>
+						read_unlock(&amp;tasklist_lock);<br>
+						file_list_unlock();<br>
+<br>
+						/* may block */<br>
+						disable_fd(tsk, fd, root);<br>
+						goto repeat;<br>
+					}<br>
+				}<br>
+				fd++;<br>
+				set &gt;&gt;= 1;<br>
+			}<br>
+		}<br>
+<br>
+		/* now check for mmap'd files and unmap them */<br>
+		vmlist_modify_lock(mm);<br>
+		for (vma = mm-&gt;mmap; vma; vma=vma-&gt;vm_next) {<br>
+			file = vma-&gt;vm_file;<br>
+			if (!file)<br>
+				continue;<br>
+			inode = file-&gt;f_dentry-&gt;d_inode;<br>
+			if (!inode || !inode-&gt;i_sb)<br>
+				continue;<br>
+			if (inode-&gt;i_sb == sb) {<br>
+				vmlist_modify_unlock(mm);<br>
+				read_unlock(&amp;tasklist_lock);<br>
+				file_list_unlock();<br>
+				down(&amp;mm-&gt;mmap_sem);<br>
+				do_munmap(mm, vma-&gt;vm_start, <br>
+						vma-&gt;vm_end - vma-&gt;vm_start);<br>
+				up(&amp;mm-&gt;mmap_sem);<br>
+				goto repeat;<br>
+			}<br>
+		}<br>
+		vmlist_modify_unlock(mm);<br>
+	}<br>
+<br>
+	read_unlock(&amp;tasklist_lock);<br>
+	file_list_unlock();<br>
+	up(&amp;root-&gt;d_inode-&gt;i_sem);<br>
+}<br>
diff -urN -X dontdiff linux/fs/super.c nullfs/fs/super.c<br>
--- linux/fs/super.c	Tue Jul 11 19:26:50 2000<br>
+++ nullfs/fs/super.c	Tue Jul 18 17:29:34 2000<br>
@@ -979,6 +979,9 @@<br>
 		return retval;<br>
 	}<br>
 <br>
+	if (flags&amp;MNT_FORCE)<br>
+		disable_filesystem(mnt);<br>
+<br>
 	spin_lock(&amp;dcache_lock);<br>
 	if (atomic_read(&amp;mnt-&gt;mnt_count) &gt; 2) {<br>
 		spin_unlock(&amp;dcache_lock);<br>
diff -urN -X dontdiff linux/include/linux/fs.h nullfs/include/linux/fs.h<br>
--- linux/include/linux/fs.h	Tue Jul 11 19:26:51 2000<br>
+++ nullfs/include/linux/fs.h	Fri Jul 14 23:36:01 2000<br>
@@ -913,6 +913,7 @@<br>
 /* Invalid inode operations -- fs/bad_inode.c */<br>
 extern void make_bad_inode(struct inode *);<br>
 extern int is_bad_inode(struct inode *);<br>
+extern struct file_operations bad_file_ops;<br>
 <br>
 extern struct file_operations read_fifo_fops;<br>
 extern struct file_operations write_fifo_fops;<br>
@@ -1174,6 +1175,8 @@<br>
 extern kdev_t ROOT_DEV;<br>
 extern char root_device_name[];<br>
 <br>
+/* fs/nullfs/inode.c - used by forced umount */<br>
+extern void disable_filesystem(struct vfsmount *);<br>
 <br>
 extern void show_buffers(void);<br>
 extern void mount_root(void);<br>
<p>
<p>
-<br>
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in<br>
the body of a message to majordomo@vger.rutgers.edu<br>
Please read the FAQ at <a href="http://www.tux.org/lkml/">http://www.tux.org/lkml/</a><br>
<!-- body="end" -->
<hr>
<p>
<ul>
<!-- next="start" -->
<li> <b>Next message:</b> <a href="0332.html">I Lee Hetherington: "Re: 2.4.0-test[34]: nasty make race with CONFIG_MODVERSIONS=y"</a>
<li> <b>Previous message:</b> <a href="0330.html">Andre Hedrick: "driver for SCSI card"</a>
<!-- nextthread="start" -->
<li> <b>Next in thread:</b> <a href="0336.html">Manfred Spraul: "Re: [patch-2.4.0-test5-pre1] nullfs and forced umount"</a>
<li> <b>Reply:</b> <a href="0336.html">Manfred Spraul: "Re: [patch-2.4.0-test5-pre1] nullfs and forced umount"</a>
<li> <b>Reply:</b> <a href="0358.html">Manfred Spraul: "Re: [patch-2.4.0-test5-pre1] nullfs and forced umount"</a>
<!-- reply="end" -->
</ul>
</font></body>
