Merge with master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6.git

author: David Woodhouse <dwmw2@shinybook.infradead.org> 2005-07-02 13:39:09 +0100
committer: David Woodhouse <dwmw2@shinybook.infradead.org> 2005-07-02 13:39:09 +0100
commit: d2f6409584e2c62ffad81690562330ff3bf4a458 (patch)
tree: 3bdfb97d0b51be2f7f414f2107e97603c1206abb /fs
parent: e1b09eba2686eca94a3a188042b518df6044a3c1 (diff)
parent: 4a89a04f1ee21a7c1f4413f1ad7dcfac50ff9b63 (diff)
248 files changed, 8350 insertions, 3993 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 6a4ad4bb7a54..062177956239 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -50,6 +50,23 @@ config EXT2_FS_SECURITY
 	  If you are not using a security module that requires using
 	  extended attributes for file security labels, say N.
 
+config EXT2_FS_XIP
+	bool "Ext2 execute in place support"
+	depends on EXT2_FS
+	help
+	  Execute in place can be used on memory-backed block devices. If you
+	  enable this option, you can select to mount block devices which are
+	  capable of this feature without using the page cache.
+
+	  If you do not use a block device that is capable of using this,
+	  or if unsure, say N.
+
+config FS_XIP
+# execute in place
+	bool
+	depends on EXT2_FS_XIP
+	default y
+
 config EXT3_FS
 	tristate "Ext3 journalling file system support"
 	help
@@ -717,6 +734,12 @@ config PROC_KCORE
 	bool "/proc/kcore support" if !ARM
 	depends on PROC_FS && MMU
 
+config PROC_VMCORE
+        bool "/proc/vmcore support (EXPERIMENTAL)"
+        depends on PROC_FS && EMBEDDED && EXPERIMENTAL && CRASH_DUMP
+        help
+        Exports the dump image of crashed kernel in ELF format.
+
 config SYSFS
 	bool "sysfs file system support" if EMBEDDED
 	default y
@@ -741,56 +764,6 @@ config SYSFS
 
 	Designers of embedded systems may wish to say N here to conserve space.
 
-config DEVFS_FS
-	bool "/dev file system support (OBSOLETE)"
-	depends on EXPERIMENTAL
-	help
-	  This is support for devfs, a virtual file system (like /proc) which
-	  provides the file system interface to device drivers, normally found
-	  in /dev. Devfs does not depend on major and minor number
-	  allocations. Device drivers register entries in /dev which then
-	  appear automatically, which means that the system administrator does
-	  not have to create character and block special device files in the
-	  /dev directory using the mknod command (or MAKEDEV script) anymore.
-
-	  This is work in progress. If you want to use this, you *must* read
-	  the material in <file:Documentation/filesystems/devfs/>, especially
-	  the file README there.
-
-	  Note that devfs no longer manages /dev/pts!  If you are using UNIX98
-	  ptys, you will also need to mount the /dev/pts filesystem (devpts).
-
-	  Note that devfs has been obsoleted by udev,
-	  <http://www.kernel.org/pub/linux/utils/kernel/hotplug/>.
-	  It has been stripped down to a bare minimum and is only provided for
-	  legacy installations that use its naming scheme which is
-	  unfortunately different from the names normal Linux installations
-	  use.
-
-	  If unsure, say N.
-
-config DEVFS_MOUNT
-	bool "Automatically mount at boot"
-	depends on DEVFS_FS
-	help
-	  This option appears if you have CONFIG_DEVFS_FS enabled. Setting
-	  this to 'Y' will make the kernel automatically mount devfs onto /dev
-	  when the system is booted, before the init thread is started.
-	  You can override this with the "devfs=nomount" boot option.
-
-	  If unsure, say N.
-
-config DEVFS_DEBUG
-	bool "Debug devfs"
-	depends on DEVFS_FS
-	help
-	  If you say Y here, then the /dev file system code will generate
-	  debugging messages. See the file
-	  <file:Documentation/filesystems/devfs/boot-options> for more
-	  details.
-
-	  If unsure, say N.
-
 config DEVPTS_FS_XATTR
 	bool "/dev/pts Extended Attributes"
 	depends on UNIX98_PTYS
@@ -1318,6 +1291,7 @@ config NFS_FS
 	depends on INET
 	select LOCKD
 	select SUNRPC
+	select NFS_ACL_SUPPORT if NFS_V3_ACL
 	help
 	  If you are connected to some other (usually local) Unix computer
 	  (using SLIP, PLIP, PPP or Ethernet) and want to mount files residing
@@ -1360,6 +1334,16 @@ config NFS_V3
 
 	  If unsure, say Y.
 
+config NFS_V3_ACL
+	bool "Provide client support for the NFSv3 ACL protocol extension"
+	depends on NFS_V3
+	help
+	  Implement the NFSv3 ACL protocol extension for manipulating POSIX
+	  Access Control Lists.  The server should also be compiled with
+	  the NFSv3 ACL protocol extension; see the CONFIG_NFSD_V3_ACL option.
+
+	  If unsure, say N.
+
 config NFS_V4
 	bool "Provide NFSv4 client support (EXPERIMENTAL)"
 	depends on NFS_FS && EXPERIMENTAL
@@ -1403,6 +1387,7 @@ config NFSD
 	select LOCKD
 	select SUNRPC
 	select EXPORTFS
+	select NFS_ACL_SUPPORT if NFSD_V3_ACL || NFSD_V2_ACL
 	help
 	  If you want your Linux box to act as an NFS *server*, so that other
 	  computers on your local network which support NFS can access certain
@@ -1426,6 +1411,10 @@ config NFSD
 	  To compile the NFS server support as a module, choose M here: the
 	  module will be called nfsd.  If unsure, say N.
 
+config NFSD_V2_ACL
+	bool
+	depends on NFSD
+
 config NFSD_V3
 	bool "Provide NFSv3 server support"
 	depends on NFSD
@@ -1433,10 +1422,22 @@ config NFSD_V3
 	  If you would like to include the NFSv3 server as well as the NFSv2
 	  server, say Y here.  If unsure, say Y.
 
+config NFSD_V3_ACL
+	bool "Provide server support for the NFSv3 ACL protocol extension"
+	depends on NFSD_V3
+	select NFSD_V2_ACL
+	help
+	  Implement the NFSv3 ACL protocol extension for manipulating POSIX
+	  Access Control Lists on exported file systems. NFS clients should
+	  be compiled with the NFSv3 ACL protocol extension; see the
+	  CONFIG_NFS_V3_ACL option.  If unsure, say N.
+
 config NFSD_V4
 	bool "Provide NFSv4 server support (EXPERIMENTAL)"
 	depends on NFSD_V3 && EXPERIMENTAL
 	select NFSD_TCP
+	select CRYPTO_MD5
+	select CRYPTO
 	help
 	  If you would like to include the NFSv4 server as well as the NFSv2
 	  and NFSv3 servers, say Y here.  This feature is experimental, and
@@ -1477,6 +1478,15 @@ config LOCKD_V4
 config EXPORTFS
 	tristate
 
+config NFS_ACL_SUPPORT
+	tristate
+	select FS_POSIX_ACL
+
+config NFS_COMMON
+	bool
+	depends on NFSD || NFS_FS
+	default y
+
 config SUNRPC
 	tristate
 
diff --git a/fs/Makefile b/fs/Makefile
index 443f2bc56ccf..20edcf28bfd2 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -10,6 +10,7 @@ obj-y :=	open.o read_write.o file_table.o buffer.o  bio.o super.o \
 		ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
+		ioprio.o
 
 obj-$(CONFIG_EPOLL)		+= eventpoll.o
 obj-$(CONFIG_COMPAT)		+= compat.o
@@ -31,6 +32,7 @@ obj-$(CONFIG_BINFMT_FLAT)	+= binfmt_flat.o
 
 obj-$(CONFIG_FS_MBCACHE)	+= mbcache.o
 obj-$(CONFIG_FS_POSIX_ACL)	+= posix_acl.o xattr_acl.o
+obj-$(CONFIG_NFS_COMMON)	+= nfs_common/
 
 obj-$(CONFIG_QUOTA)		+= dquot.o
 obj-$(CONFIG_QFMT_V1)		+= quota_v1.o
diff --git a/fs/afs/kafsasyncd.c b/fs/afs/kafsasyncd.c
index 6fc88ae8ad94..7ac07d0d47b9 100644
--- a/fs/afs/kafsasyncd.c
+++ b/fs/afs/kafsasyncd.c
@@ -116,7 +116,7 @@ static int kafsasyncd(void *arg)
 		remove_wait_queue(&kafsasyncd_sleepq, &myself);
 		set_current_state(TASK_RUNNING);
 
-		try_to_freeze(PF_FREEZE);
+		try_to_freeze();
 
 		/* discard pending signals */
 		afs_discard_my_signals();
diff --git a/fs/afs/kafstimod.c b/fs/afs/kafstimod.c
index 86e710dd057e..65bc05ab8182 100644
--- a/fs/afs/kafstimod.c
+++ b/fs/afs/kafstimod.c
@@ -91,7 +91,7 @@ static int kafstimod(void *arg)
 			complete_and_exit(&kafstimod_dead, 0);
 		}
 
-		try_to_freeze(PF_FREEZE);
+		try_to_freeze();
 
 		/* discard pending signals */
 		afs_discard_my_signals();
diff --git a/fs/aio.c b/fs/aio.c
index 7afa222f6802..06d7d4390fe7 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -58,6 +58,7 @@ static DEFINE_SPINLOCK(fput_lock);
 static LIST_HEAD(fput_head);
 
 static void aio_kick_handler(void *);
+static void aio_queue_work(struct kioctx *);
 
 /* aio_setup
  *	Creates the slab caches used by the aio routines, panic on
@@ -747,6 +748,14 @@ out:
 		 * has already been kicked */
 		if (kiocbIsKicked(iocb)) {
 			__queue_kicked_iocb(iocb);
+
+			/*
+			 * __queue_kicked_iocb will always return 1 here, because
+			 * iocb->ki_run_list is empty at this point so it should
+			 * be safe to unconditionally queue the context into the
+			 * work queue.
+			 */
+			aio_queue_work(ctx);
 		}
 	}
 	return ret;
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index c7b2b8890188..9c09641ce907 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -185,6 +185,19 @@ int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify);
 int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int);
 void autofs4_catatonic_mode(struct autofs_sb_info *);
 
+static inline int autofs4_follow_mount(struct vfsmount **mnt, struct dentry **dentry)
+{
+	int res = 0;
+
+	while (d_mountpoint(*dentry)) {
+		int followed = follow_down(mnt, dentry);
+		if (!followed)
+			break;
+		res = 1;
+	}
+	return res;
+}
+
 static inline int simple_positive(struct dentry *dentry)
 {
 	return dentry->d_inode && !d_unhashed(dentry);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 500425e24fba..feb6ac427d05 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -56,12 +56,9 @@ static int autofs4_check_mount(struct vfsmount *mnt, struct dentry *dentry)
 	mntget(mnt);
 	dget(dentry);
 
-	if (!follow_down(&mnt, &dentry))
+	if (!autofs4_follow_mount(&mnt, &dentry))
 		goto done;
 
-	while (d_mountpoint(dentry) && follow_down(&mnt, &dentry))
-		;
-
 	/* This is an autofs submount, we can't expire it */
 	if (is_autofs4_dentry(dentry))
 		goto done;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 3765c047f157..2a771ec66956 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -205,7 +205,11 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
 		struct vfsmount *fp_mnt = mntget(mnt);
 		struct dentry *fp_dentry = dget(dentry);
 
-		while (follow_down(&fp_mnt, &fp_dentry) && d_mountpoint(fp_dentry));
+		if (!autofs4_follow_mount(&fp_mnt, &fp_dentry)) {
+			dput(fp_dentry);
+			mntput(fp_mnt);
+			return -ENOENT;
+		}
 
 		fp = dentry_open(fp_dentry, fp_mnt, file->f_flags);
 		status = PTR_ERR(fp);
@@ -302,7 +306,14 @@ static int try_to_fill_dentry(struct dentry *dentry,
 		
 		DPRINTK("expire done status=%d", status);
 		
-		return 0;
+		/*
+		 * If the directory still exists the mount request must
+		 * continue otherwise it can't be followed at the right
+		 * time during the walk.
+		 */
+		status = d_invalidate(dentry);
+		if (status != -EBUSY)
+			return 0;
 	}
 
 	DPRINTK("dentry=%p %.*s ino=%p",
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 5a40d36e5a51..fa2348dcd671 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -191,6 +191,13 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 	}
 
 	if ( !wq ) {
+		/* Can't wait for an expire if there's no mount */
+		if (notify == NFY_NONE && !d_mountpoint(dentry)) {
+			kfree(name);
+			up(&sbi->wq_sem);
+			return -ENOENT;
+		}
+
 		/* Create a new wait queue */
 		wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL);
 		if ( !wq ) {
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 009b8920c1ff..dd9baabaf016 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -316,6 +316,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	current->mm->brk = ex.a_bss +
 		(current->mm->start_brk = N_BSSADDR(ex));
 	current->mm->free_area_cache = current->mm->mmap_base;
+	current->mm->cached_hole_size = 0;
 
 	set_mm_counter(current->mm, rss, 0);
 	current->mm->mmap = NULL;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index f8f6b6b76179..7976a238f0a3 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -775,6 +775,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	   change some of these later */
 	set_mm_counter(current->mm, rss, 0);
 	current->mm->free_area_cache = current->mm->mmap_base;
+	current->mm->cached_hole_size = 0;
 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 				 executable_stack);
 	if (retval < 0) {
diff --git a/fs/block_dev.c b/fs/block_dev.c
index c0cbd1bc1a02..e0df94c37b7e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -777,8 +777,7 @@ static ssize_t blkdev_file_aio_write(struct kiocb *iocb, const char __user *buf,
 	return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
 }
 
-static int block_ioctl(struct inode *inode, struct file *file, unsigned cmd,
-			unsigned long arg)
+static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 {
 	return blkdev_ioctl(file->f_mapping->host, file, cmd, arg);
 }
@@ -803,7 +802,7 @@ struct file_operations def_blk_fops = {
   	.aio_write	= blkdev_file_aio_write, 
 	.mmap		= generic_file_mmap,
 	.fsync		= block_fsync,
-	.ioctl		= block_ioctl,
+	.unlocked_ioctl	= block_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= compat_blkdev_ioctl,
 #endif
diff --git a/fs/buffer.c b/fs/buffer.c
index 7e9e409feaa7..561e63a14966 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -278,7 +278,7 @@ EXPORT_SYMBOL(thaw_bdev);
  */
 static void do_sync(unsigned long wait)
 {
-	wakeup_bdflush(0);
+	wakeup_pdflush(0);
 	sync_inodes(0);		/* All mappings, inodes and their blockdevs */
 	DQUOT_SYNC(NULL);
 	sync_supers();		/* Write the superblocks */
@@ -331,7 +331,7 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
 	return ret;
 }
 
-asmlinkage long sys_fsync(unsigned int fd)
+static long do_fsync(unsigned int fd, int datasync)
 {
 	struct file * file;
 	struct address_space *mapping;
@@ -342,14 +342,14 @@ asmlinkage long sys_fsync(unsigned int fd)
 	if (!file)
 		goto out;
 
-	mapping = file->f_mapping;
-
 	ret = -EINVAL;
 	if (!file->f_op || !file->f_op->fsync) {
 		/* Why?  We can still call filemap_fdatawrite */
 		goto out_putf;
 	}
 
+	mapping = file->f_mapping;
+
 	current->flags |= PF_SYNCWRITE;
 	ret = filemap_fdatawrite(mapping);
 
@@ -358,7 +358,7 @@ asmlinkage long sys_fsync(unsigned int fd)
 	 * which could cause livelocks in fsync_buffers_list
 	 */
 	down(&mapping->host->i_sem);
-	err = file->f_op->fsync(file, file->f_dentry, 0);
+	err = file->f_op->fsync(file, file->f_dentry, datasync);
 	if (!ret)
 		ret = err;
 	up(&mapping->host->i_sem);
@@ -373,39 +373,14 @@ out:
 	return ret;
 }
 
-asmlinkage long sys_fdatasync(unsigned int fd)
+asmlinkage long sys_fsync(unsigned int fd)
 {
-	struct file * file;
-	struct address_space *mapping;
-	int ret, err;
-
-	ret = -EBADF;
-	file = fget(fd);
-	if (!file)
-		goto out;
-
-	ret = -EINVAL;
-	if (!file->f_op || !file->f_op->fsync)
-		goto out_putf;
-
-	mapping = file->f_mapping;
-
-	current->flags |= PF_SYNCWRITE;
-	ret = filemap_fdatawrite(mapping);
-	down(&mapping->host->i_sem);
-	err = file->f_op->fsync(file, file->f_dentry, 1);
-	if (!ret)
-		ret = err;
-	up(&mapping->host->i_sem);
-	err = filemap_fdatawait(mapping);
-	if (!ret)
-		ret = err;
-	current->flags &= ~PF_SYNCWRITE;
+	return do_fsync(fd, 0);
+}
 
-out_putf:
-	fput(file);
-out:
-	return ret;
+asmlinkage long sys_fdatasync(unsigned int fd)
+{
+	return do_fsync(fd, 1);
 }
 
 /*
@@ -522,13 +497,13 @@ static void free_more_memory(void)
 	struct zone **zones;
 	pg_data_t *pgdat;
 
-	wakeup_bdflush(1024);
+	wakeup_pdflush(1024);
 	yield();
 
 	for_each_pgdat(pgdat) {
 		zones = pgdat->node_zonelists[GFP_NOFS&GFP_ZONEMASK].zones;
 		if (*zones)
-			try_to_free_pages(zones, GFP_NOFS, 0);
+			try_to_free_pages(zones, GFP_NOFS);
 	}
 }
 
@@ -1951,7 +1926,6 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
 			if (err)
 				break;
 			if (buffer_new(bh)) {
-				clear_buffer_new(bh);
 				unmap_underlying_metadata(bh->b_bdev,
 							bh->b_blocknr);
 				if (PageUptodate(page)) {
@@ -1993,9 +1967,14 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
 		if (!buffer_uptodate(*wait_bh))
 			err = -EIO;
 	}
-	if (!err)
-		return err;
-
+	if (!err) {
+		bh = head;
+		do {
+			if (buffer_new(bh))
+				clear_buffer_new(bh);
+		} while ((bh = bh->b_this_page) != head);
+		return 0;
+	}
 	/* Error case: */
 	/*
 	 * Zero out any newly allocated blocks to avoid exposing stale
diff --git a/fs/char_dev.c b/fs/char_dev.c
index c1e3537909fc..a69a5d8a406f 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -56,10 +56,21 @@ int get_chrdev_list(char *page)
 
 	down(&chrdevs_lock);
 	for (i = 0; i < ARRAY_SIZE(chrdevs) ; i++) {
-		for (cd = chrdevs[i]; cd; cd = cd->next)
+		for (cd = chrdevs[i]; cd; cd = cd->next) {
+			/*
+			 * if the current name, plus the 5 extra characters
+			 * in the device line for this entry
+			 * would run us off the page, we're done
+			 */
+			if ((len+strlen(cd->name) + 5) >= PAGE_SIZE)
+				goto page_full;
+
+
 			len += sprintf(page+len, "%3d %s\n",
 				       cd->major, cd->name);
+		}
 	}
+page_full:
 	up(&chrdevs_lock);
 
 	return len;
@@ -139,7 +150,7 @@ __unregister_chrdev_region(unsigned major, unsigned baseminor, int minorct)
 	struct char_device_struct *cd = NULL, **cp;
 	int i = major_to_index(major);
 
-	up(&chrdevs_lock);
+	down(&chrdevs_lock);
 	for (cp = &chrdevs[i]; *cp; cp = &(*cp)->next)
 		if ((*cp)->major == major &&
 		    (*cp)->baseminor == baseminor &&
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index ef001a9313e6..3d1cce3653b8 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -61,7 +61,7 @@ unsigned long coda_timeout = 30; /* .. secs, then signals will dequeue */
 
 
 struct venus_comm coda_comms[MAX_CODADEVS];
-static struct class_simple *coda_psdev_class;
+static struct class *coda_psdev_class;
 
 /*
  * Device operations
@@ -363,14 +363,14 @@ static int init_coda_psdev(void)
 		     CODA_PSDEV_MAJOR);
               return -EIO;
 	}
-	coda_psdev_class = class_simple_create(THIS_MODULE, "coda");
+	coda_psdev_class = class_create(THIS_MODULE, "coda");
 	if (IS_ERR(coda_psdev_class)) {
 		err = PTR_ERR(coda_psdev_class);
 		goto out_chrdev;
 	}		
 	devfs_mk_dir ("coda");
 	for (i = 0; i < MAX_CODADEVS; i++) {
-		class_simple_device_add(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR,i), 
+		class_device_create(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR,i),
 				NULL, "cfs%d", i);
 		err = devfs_mk_cdev(MKDEV(CODA_PSDEV_MAJOR, i),
 				S_IFCHR|S_IRUSR|S_IWUSR, "coda/%d", i);
@@ -382,8 +382,8 @@ static int init_coda_psdev(void)
 
 out_class:
 	for (i = 0; i < MAX_CODADEVS; i++) 
-		class_simple_device_remove(MKDEV(CODA_PSDEV_MAJOR, i));
-	class_simple_destroy(coda_psdev_class);
+		class_device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i));
+	class_destroy(coda_psdev_class);
 out_chrdev:
 	unregister_chrdev(CODA_PSDEV_MAJOR, "coda");
 out:
@@ -425,10 +425,10 @@ static int __init init_coda(void)
 	return 0;
 out:
 	for (i = 0; i < MAX_CODADEVS; i++) {
-		class_simple_device_remove(MKDEV(CODA_PSDEV_MAJOR, i));
+		class_device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i));
 		devfs_remove("coda/%d", i);
 	}
-	class_simple_destroy(coda_psdev_class);
+	class_destroy(coda_psdev_class);
 	devfs_remove("coda");
 	unregister_chrdev(CODA_PSDEV_MAJOR, "coda");
 	coda_sysctl_clean();
@@ -447,10 +447,10 @@ static void __exit exit_coda(void)
                 printk("coda: failed to unregister filesystem\n");
         }
 	for (i = 0; i < MAX_CODADEVS; i++) {
-		class_simple_device_remove(MKDEV(CODA_PSDEV_MAJOR, i));
+		class_device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i));
 		devfs_remove("coda/%d", i);
 	}
-	class_simple_destroy(coda_psdev_class);
+	class_destroy(coda_psdev_class);
 	devfs_remove("coda");
 	unregister_chrdev(CODA_PSDEV_MAJOR, "coda");
 	coda_sysctl_clean();
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 548556ff2506..efc97d9b7860 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -45,44 +45,15 @@ struct file_operations debugfs_file_operations = {
 	.open =		default_open,
 };
 
-#define simple_type(type, format, temptype, strtolfn)				\
-static ssize_t read_file_##type(struct file *file, char __user *user_buf,	\
-				size_t count, loff_t *ppos)			\
-{										\
-	char buf[32];								\
-	type *val = file->private_data;						\
-										\
-	snprintf(buf, sizeof(buf), format "\n", *val);				\
-	return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));\
-}										\
-static ssize_t write_file_##type(struct file *file, const char __user *user_buf,\
-				 size_t count, loff_t *ppos)			\
-{										\
-	char *endp;								\
-	char buf[32];								\
-	int buf_size;								\
-	type *val = file->private_data;						\
-	temptype tmp;								\
-										\
-	memset(buf, 0x00, sizeof(buf));						\
-	buf_size = min(count, (sizeof(buf)-1));					\
-	if (copy_from_user(buf, user_buf, buf_size))				\
-		return -EFAULT;							\
-										\
-	tmp = strtolfn(buf, &endp, 0);						\
-	if ((endp == buf) || ((type)tmp != tmp))				\
-		return -EINVAL;							\
-	*val = tmp;								\
-	return count;								\
-}										\
-static struct file_operations fops_##type = {					\
-	.read =		read_file_##type,					\
-	.write =	write_file_##type,					\
-	.open =		default_open,						\
-};
-simple_type(u8, "%c", unsigned long, simple_strtoul);
-simple_type(u16, "%hi", unsigned long, simple_strtoul);
-simple_type(u32, "%i", unsigned long, simple_strtoul);
+static void debugfs_u8_set(void *data, u64 val)
+{
+	*(u8 *)data = val;
+}
+static u64 debugfs_u8_get(void *data)
+{
+	return *(u8 *)data;
+}
+DEFINE_SIMPLE_ATTRIBUTE(fops_u8, debugfs_u8_get, debugfs_u8_set, "%llu\n");
 
 /**
  * debugfs_create_u8 - create a file in the debugfs filesystem that is used to read and write a unsigned 8 bit value.
@@ -116,6 +87,16 @@ struct dentry *debugfs_create_u8(const char *name, mode_t mode,
 }
 EXPORT_SYMBOL_GPL(debugfs_create_u8);
 
+static void debugfs_u16_set(void *data, u64 val)
+{
+	*(u16 *)data = val;
+}
+static u64 debugfs_u16_get(void *data)
+{
+	return *(u16 *)data;
+}
+DEFINE_SIMPLE_ATTRIBUTE(fops_u16, debugfs_u16_get, debugfs_u16_set, "%llu\n");
+
 /**
  * debugfs_create_u16 - create a file in the debugfs filesystem that is used to read and write a unsigned 8 bit value.
  *
@@ -148,6 +129,16 @@ struct dentry *debugfs_create_u16(const char *name, mode_t mode,
 }
 EXPORT_SYMBOL_GPL(debugfs_create_u16);
 
+static void debugfs_u32_set(void *data, u64 val)
+{
+	*(u32 *)data = val;
+}
+static u64 debugfs_u32_get(void *data)
+{
+	return *(u32 *)data;
+}
+DEFINE_SIMPLE_ATTRIBUTE(fops_u32, debugfs_u32_get, debugfs_u32_set, "%llu\n");
+
 /**
  * debugfs_create_u32 - create a file in the debugfs filesystem that is used to read and write a unsigned 8 bit value.
  *
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index b529786699e7..a86ac4aeaedb 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -110,16 +110,6 @@ static int debug_fill_super(struct super_block *sb, void *data, int silent)
 	return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
 }
 
-static struct dentry * get_dentry(struct dentry *parent, const char *name)
-{               
-	struct qstr qstr;
-
-	qstr.name = name;
-	qstr.len = strlen(name);
-	qstr.hash = full_name_hash(name,qstr.len);
-	return lookup_hash(&qstr,parent);
-}               
-
 static struct super_block *debug_get_sb(struct file_system_type *fs_type,
 				        int flags, const char *dev_name,
 					void *data)
@@ -157,7 +147,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
 
 	*dentry = NULL;
 	down(&parent->d_inode->i_sem);
-	*dentry = get_dentry (parent, name);
+	*dentry = lookup_one_len(name, parent, strlen(name));
 	if (!IS_ERR(dentry)) {
 		if ((mode & S_IFMT) == S_IFDIR)
 			error = debugfs_mkdir(parent->d_inode, *dentry, mode);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 1d55e7e67342..0d06097bc995 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -215,7 +215,7 @@ static struct page *dio_get_page(struct dio *dio)
 static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes)
 {
 	if (dio->end_io && dio->result)
-		dio->end_io(dio->inode, offset, bytes, dio->map_bh.b_private);
+		dio->end_io(dio->iocb, offset, bytes, dio->map_bh.b_private);
 	if (dio->lock_type == DIO_LOCKING)
 		up_read(&dio->inode->i_alloc_sem);
 }
diff --git a/fs/dquot.c b/fs/dquot.c
index 3995ce7907cc..b9732335bcdc 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -409,13 +409,10 @@ out_dqlock:
  * for this sb+type at all. */
 static void invalidate_dquots(struct super_block *sb, int type)
 {
-	struct dquot *dquot;
-	struct list_head *head;
+	struct dquot *dquot, *tmp;
 
 	spin_lock(&dq_list_lock);
-	for (head = inuse_list.next; head != &inuse_list;) {
-		dquot = list_entry(head, struct dquot, dq_inuse);
-		head = head->next;
+	list_for_each_entry_safe(dquot, tmp, &inuse_list, dq_inuse) {
 		if (dquot->dq_sb != sb)
 			continue;
 		if (dquot->dq_type != type)
@@ -1519,14 +1516,22 @@ out_path:
  * This function is used when filesystem needs to initialize quotas
  * during mount time.
  */
-int vfs_quota_on_mount(int type, int format_id, struct dentry *dentry)
+int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
+		int format_id, int type)
 {
+	struct dentry *dentry;
 	int error;
 
+	dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name));
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+
 	error = security_quota_on(dentry);
-	if (error)
-		return error;
-	return vfs_quota_on_inode(dentry->d_inode, type, format_id);
+	if (!error)
+		error = vfs_quota_on_inode(dentry->d_inode, type, format_id);
+
+	dput(dentry);
+	return error;
 }
 
 /* Generic routine for getting common part of quota structure */
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 9900e333655a..6ab1dd0ca904 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -101,57 +101,6 @@
 /* Maximum number of poll wake up nests we are allowing */
 #define EP_MAX_POLLWAKE_NESTS 4
 
-/* Macro to allocate a "struct epitem" from the slab cache */
-#define EPI_MEM_ALLOC()	(struct epitem *) kmem_cache_alloc(epi_cache, SLAB_KERNEL)
-
-/* Macro to free a "struct epitem" to the slab cache */
-#define EPI_MEM_FREE(p) kmem_cache_free(epi_cache, p)
-
-/* Macro to allocate a "struct eppoll_entry" from the slab cache */
-#define PWQ_MEM_ALLOC()	(struct eppoll_entry *) kmem_cache_alloc(pwq_cache, SLAB_KERNEL)
-
-/* Macro to free a "struct eppoll_entry" to the slab cache */
-#define PWQ_MEM_FREE(p) kmem_cache_free(pwq_cache, p)
-
-/* Fast test to see if the file is an evenpoll file */
-#define IS_FILE_EPOLL(f) ((f)->f_op == &eventpoll_fops)
-
-/* Setup the structure that is used as key for the rb-tree */
-#define EP_SET_FFD(p, f, d) do { (p)->file = (f); (p)->fd = (d); } while (0)
-
-/* Compare rb-tree keys */
-#define EP_CMP_FFD(p1, p2) ((p1)->file > (p2)->file ? +1: \
-			    ((p1)->file < (p2)->file ? -1: (p1)->fd - (p2)->fd))
-
-/* Special initialization for the rb-tree node to detect linkage */
-#define EP_RB_INITNODE(n) (n)->rb_parent = (n)
-
-/* Removes a node from the rb-tree and marks it for a fast is-linked check */
-#define EP_RB_ERASE(n, r) do { rb_erase(n, r); (n)->rb_parent = (n); } while (0)
-
-/* Fast check to verify that the item is linked to the main rb-tree */
-#define EP_RB_LINKED(n) ((n)->rb_parent != (n))
-
-/*
- * Remove the item from the list and perform its initialization.
- * This is useful for us because we can test if the item is linked
- * using "EP_IS_LINKED(p)".
- */
-#define EP_LIST_DEL(p) do { list_del(p); INIT_LIST_HEAD(p); } while (0)
-
-/* Tells us if the item is currently linked */
-#define EP_IS_LINKED(p) (!list_empty(p))
-
-/* Get the "struct epitem" from a wait queue pointer */
-#define EP_ITEM_FROM_WAIT(p) ((struct epitem *) container_of(p, struct eppoll_entry, wait)->base)
-
-/* Get the "struct epitem" from an epoll queue wrapper */
-#define EP_ITEM_FROM_EPQUEUE(p) (container_of(p, struct ep_pqueue, pt)->epi)
-
-/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
-#define EP_OP_HASH_EVENT(op) ((op) != EPOLL_CTL_DEL)
-
-
 struct epoll_filefd {
 	struct file *file;
 	int fd;
@@ -357,6 +306,82 @@ static struct dentry_operations eventpollfs_dentry_operations = {
 
 
 
+/* Fast test to see if the file is an evenpoll file */
+static inline int is_file_epoll(struct file *f)
+{
+	return f->f_op == &eventpoll_fops;
+}
+
+/* Setup the structure that is used as key for the rb-tree */
+static inline void ep_set_ffd(struct epoll_filefd *ffd,
+			      struct file *file, int fd)
+{
+	ffd->file = file;
+	ffd->fd = fd;
+}
+
+/* Compare rb-tree keys */
+static inline int ep_cmp_ffd(struct epoll_filefd *p1,
+			     struct epoll_filefd *p2)
+{
+	return (p1->file > p2->file ? +1:
+	        (p1->file < p2->file ? -1 : p1->fd - p2->fd));
+}
+
+/* Special initialization for the rb-tree node to detect linkage */
+static inline void ep_rb_initnode(struct rb_node *n)
+{
+	n->rb_parent = n;
+}
+
+/* Removes a node from the rb-tree and marks it for a fast is-linked check */
+static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r)
+{
+	rb_erase(n, r);
+	n->rb_parent = n;
+}
+
+/* Fast check to verify that the item is linked to the main rb-tree */
+static inline int ep_rb_linked(struct rb_node *n)
+{
+	return n->rb_parent != n;
+}
+
+/*
+ * Remove the item from the list and perform its initialization.
+ * This is useful for us because we can test if the item is linked
+ * using "ep_is_linked(p)".
+ */
+static inline void ep_list_del(struct list_head *p)
+{
+	list_del(p);
+	INIT_LIST_HEAD(p);
+}
+
+/* Tells us if the item is currently linked */
+static inline int ep_is_linked(struct list_head *p)
+{
+	return !list_empty(p);
+}
+
+/* Get the "struct epitem" from a wait queue pointer */
+static inline struct epitem * ep_item_from_wait(wait_queue_t *p)
+{
+	return container_of(p, struct eppoll_entry, wait)->base;
+}
+
+/* Get the "struct epitem" from an epoll queue wrapper */
+static inline struct epitem * ep_item_from_epqueue(poll_table *p)
+{
+	return container_of(p, struct ep_pqueue, pt)->epi;
+}
+
+/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
+static inline int ep_op_hash_event(int op)
+{
+	return op != EPOLL_CTL_DEL;
+}
+
 /* Initialize the poll safe wake up structure */
 static void ep_poll_safewake_init(struct poll_safewake *psw)
 {
@@ -456,7 +481,7 @@ void eventpoll_release_file(struct file *file)
 		epi = list_entry(lsthead->next, struct epitem, fllink);
 
 		ep = epi->ep;
-		EP_LIST_DEL(&epi->fllink);
+		ep_list_del(&epi->fllink);
 		down_write(&ep->sem);
 		ep_remove(ep, epi);
 		up_write(&ep->sem);
@@ -534,7 +559,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
 		     current, epfd, op, fd, event));
 
 	error = -EFAULT;
-	if (EP_OP_HASH_EVENT(op) &&
+	if (ep_op_hash_event(op) &&
 	    copy_from_user(&epds, event, sizeof(struct epoll_event)))
 		goto eexit_1;
 
@@ -560,7 +585,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
 	 * adding an epoll file descriptor inside itself.
 	 */
 	error = -EINVAL;
-	if (file == tfile || !IS_FILE_EPOLL(file))
+	if (file == tfile || !is_file_epoll(file))
 		goto eexit_3;
 
 	/*
@@ -656,7 +681,7 @@ asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events,
 	 * the user passed to us _is_ an eventpoll file.
 	 */
 	error = -EINVAL;
-	if (!IS_FILE_EPOLL(file))
+	if (!is_file_epoll(file))
 		goto eexit_2;
 
 	/*
@@ -831,11 +856,11 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
 	struct epitem *epi, *epir = NULL;
 	struct epoll_filefd ffd;
 
-	EP_SET_FFD(&ffd, file, fd);
+	ep_set_ffd(&ffd, file, fd);
 	read_lock_irqsave(&ep->lock, flags);
 	for (rbp = ep->rbr.rb_node; rbp; ) {
 		epi = rb_entry(rbp, struct epitem, rbn);
-		kcmp = EP_CMP_FFD(&ffd, &epi->ffd);
+		kcmp = ep_cmp_ffd(&ffd, &epi->ffd);
 		if (kcmp > 0)
 			rbp = rbp->rb_right;
 		else if (kcmp < 0)
@@ -875,7 +900,7 @@ static void ep_release_epitem(struct epitem *epi)
 {
 
 	if (atomic_dec_and_test(&epi->usecnt))
-		EPI_MEM_FREE(epi);
+		kmem_cache_free(epi_cache, epi);
 }
 
 
@@ -886,10 +911,10 @@ static void ep_release_epitem(struct epitem *epi)
 static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
 				 poll_table *pt)
 {
-	struct epitem *epi = EP_ITEM_FROM_EPQUEUE(pt);
+	struct epitem *epi = ep_item_from_epqueue(pt);
 	struct eppoll_entry *pwq;
 
-	if (epi->nwait >= 0 && (pwq = PWQ_MEM_ALLOC())) {
+	if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, SLAB_KERNEL))) {
 		init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);
 		pwq->whead = whead;
 		pwq->base = epi;
@@ -912,7 +937,7 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
 	while (*p) {
 		parent = *p;
 		epic = rb_entry(parent, struct epitem, rbn);
-		kcmp = EP_CMP_FFD(&epi->ffd, &epic->ffd);
+		kcmp = ep_cmp_ffd(&epi->ffd, &epic->ffd);
 		if (kcmp > 0)
 			p = &parent->rb_right;
 		else
@@ -932,17 +957,17 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
 	struct ep_pqueue epq;
 
 	error = -ENOMEM;
-	if (!(epi = EPI_MEM_ALLOC()))
+	if (!(epi = kmem_cache_alloc(epi_cache, SLAB_KERNEL)))
 		goto eexit_1;
 
 	/* Item initialization follow here ... */
-	EP_RB_INITNODE(&epi->rbn);
+	ep_rb_initnode(&epi->rbn);
 	INIT_LIST_HEAD(&epi->rdllink);
 	INIT_LIST_HEAD(&epi->fllink);
 	INIT_LIST_HEAD(&epi->txlink);
 	INIT_LIST_HEAD(&epi->pwqlist);
 	epi->ep = ep;
-	EP_SET_FFD(&epi->ffd, tfile, fd);
+	ep_set_ffd(&epi->ffd, tfile, fd);
 	epi->event = *event;
 	atomic_set(&epi->usecnt, 1);
 	epi->nwait = 0;
@@ -978,7 +1003,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
 	ep_rbtree_insert(ep, epi);
 
 	/* If the file is already "ready" we drop it inside the ready list */
-	if ((revents & event->events) && !EP_IS_LINKED(&epi->rdllink)) {
+	if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
 		list_add_tail(&epi->rdllink, &ep->rdllist);
 
 		/* Notify waiting tasks that events are available */
@@ -1007,11 +1032,11 @@ eexit_2:
 	 * allocated wait queue.
 	 */
 	write_lock_irqsave(&ep->lock, flags);
-	if (EP_IS_LINKED(&epi->rdllink))
-		EP_LIST_DEL(&epi->rdllink);
+	if (ep_is_linked(&epi->rdllink))
+		ep_list_del(&epi->rdllink);
 	write_unlock_irqrestore(&ep->lock, flags);
 
-	EPI_MEM_FREE(epi);
+	kmem_cache_free(epi_cache, epi);
 eexit_1:
 	return error;
 }
@@ -1050,14 +1075,14 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
 	 * If the item is not linked to the hash it means that it's on its
 	 * way toward the removal. Do nothing in this case.
 	 */
-	if (EP_RB_LINKED(&epi->rbn)) {
+	if (ep_rb_linked(&epi->rbn)) {
 		/*
 		 * If the item is "hot" and it is not registered inside the ready
 		 * list, push it inside. If the item is not "hot" and it is currently
 		 * registered inside the ready list, unlink it.
 		 */
 		if (revents & event->events) {
-			if (!EP_IS_LINKED(&epi->rdllink)) {
+			if (!ep_is_linked(&epi->rdllink)) {
 				list_add_tail(&epi->rdllink, &ep->rdllist);
 
 				/* Notify waiting tasks that events are available */
@@ -1097,9 +1122,9 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
 		while (!list_empty(lsthead)) {
 			pwq = list_entry(lsthead->next, struct eppoll_entry, llink);
 
-			EP_LIST_DEL(&pwq->llink);
+			ep_list_del(&pwq->llink);
 			remove_wait_queue(pwq->whead, &pwq->wait);
-			PWQ_MEM_FREE(pwq);
+			kmem_cache_free(pwq_cache, pwq);
 		}
 	}
 }
@@ -1118,7 +1143,7 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi)
 	 * The check protect us from doing a double unlink ( crash ).
 	 */
 	error = -ENOENT;
-	if (!EP_RB_LINKED(&epi->rbn))
+	if (!ep_rb_linked(&epi->rbn))
 		goto eexit_1;
 
 	/*
@@ -1133,14 +1158,14 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi)
 	 * This operation togheter with the above check closes the door to
 	 * double unlinks.
 	 */
-	EP_RB_ERASE(&epi->rbn, &ep->rbr);
+	ep_rb_erase(&epi->rbn, &ep->rbr);
 
 	/*
 	 * If the item we are going to remove is inside the ready file descriptors
 	 * we want to remove it from this list to avoid stale events.
 	 */
-	if (EP_IS_LINKED(&epi->rdllink))
-		EP_LIST_DEL(&epi->rdllink);
+	if (ep_is_linked(&epi->rdllink))
+		ep_list_del(&epi->rdllink);
 
 	error = 0;
 eexit_1:
@@ -1174,8 +1199,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
 
 	/* Remove the current item from the list of epoll hooks */
 	spin_lock(&file->f_ep_lock);
-	if (EP_IS_LINKED(&epi->fllink))
-		EP_LIST_DEL(&epi->fllink);
+	if (ep_is_linked(&epi->fllink))
+		ep_list_del(&epi->fllink);
 	spin_unlock(&file->f_ep_lock);
 
 	/* We need to acquire the write IRQ lock before calling ep_unlink() */
@@ -1210,7 +1235,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
 {
 	int pwake = 0;
 	unsigned long flags;
-	struct epitem *epi = EP_ITEM_FROM_WAIT(wait);
+	struct epitem *epi = ep_item_from_wait(wait);
 	struct eventpoll *ep = epi->ep;
 
 	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n",
@@ -1228,7 +1253,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
 		goto is_disabled;
 
 	/* If this file is already in the ready list we exit soon */
-	if (EP_IS_LINKED(&epi->rdllink))
+	if (ep_is_linked(&epi->rdllink))
 		goto is_linked;
 
 	list_add_tail(&epi->rdllink, &ep->rdllist);
@@ -1307,7 +1332,7 @@ static int ep_collect_ready_items(struct eventpoll *ep, struct list_head *txlist
 		lnk = lnk->next;
 
 		/* If this file is already in the ready list we exit soon */
-		if (!EP_IS_LINKED(&epi->txlink)) {
+		if (!ep_is_linked(&epi->txlink)) {
 			/*
 			 * This is initialized in this way so that the default
 			 * behaviour of the reinjecting code will be to push back
@@ -1322,7 +1347,7 @@ static int ep_collect_ready_items(struct eventpoll *ep, struct list_head *txlist
 			/*
 			 * Unlink the item from the ready list.
 			 */
-			EP_LIST_DEL(&epi->rdllink);
+			ep_list_del(&epi->rdllink);
 		}
 	}
 
@@ -1401,7 +1426,7 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist)
 		epi = list_entry(txlist->next, struct epitem, txlink);
 
 		/* Unlink the current item from the transfer list */
-		EP_LIST_DEL(&epi->txlink);
+		ep_list_del(&epi->txlink);
 
 		/*
 		 * If the item is no more linked to the interest set, we don't
@@ -1410,8 +1435,8 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist)
 		 * item is set to have an Edge Triggered behaviour, we don't have
 		 * to push it back either.
 		 */
-		if (EP_RB_LINKED(&epi->rbn) && !(epi->event.events & EPOLLET) &&
-		    (epi->revents & epi->event.events) && !EP_IS_LINKED(&epi->rdllink)) {
+		if (ep_rb_linked(&epi->rbn) && !(epi->event.events & EPOLLET) &&
+		    (epi->revents & epi->event.events) && !ep_is_linked(&epi->rdllink)) {
 			list_add_tail(&epi->rdllink, &ep->rdllist);
 			ricnt++;
 		}
diff --git a/fs/exec.c b/fs/exec.c
index 3a4b35a14c0d..48871917d363 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -58,6 +58,9 @@
 
 int core_uses_pid;
 char core_pattern[65] = "core";
+int suid_dumpable = 0;
+
+EXPORT_SYMBOL(suid_dumpable);
 /* The maximal length of core_pattern is also specified in sysctl.c */
 
 static struct linux_binfmt *formats;
@@ -864,6 +867,9 @@ int flush_old_exec(struct linux_binprm * bprm)
 
 	if (current->euid == current->uid && current->egid == current->gid)
 		current->mm->dumpable = 1;
+	else
+		current->mm->dumpable = suid_dumpable;
+
 	name = bprm->filename;
 
 	/* Copies the binary name from after last slash */
@@ -884,7 +890,7 @@ int flush_old_exec(struct linux_binprm * bprm)
 	    permission(bprm->file->f_dentry->d_inode,MAY_READ, NULL) ||
 	    (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) {
 		suid_keys(current);
-		current->mm->dumpable = 0;
+		current->mm->dumpable = suid_dumpable;
 	}
 
 	/* An exec changes our domain. We are no longer part of the thread
@@ -1432,6 +1438,8 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 	struct inode * inode;
 	struct file * file;
 	int retval = 0;
+	int fsuid = current->fsuid;
+	int flag = 0;
 
 	binfmt = current->binfmt;
 	if (!binfmt || !binfmt->core_dump)
@@ -1441,6 +1449,16 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 		up_write(&mm->mmap_sem);
 		goto fail;
 	}
+
+	/*
+	 *	We cannot trust fsuid as being the "true" uid of the
+	 *	process nor do we know its entire history. We only know it
+	 *	was tainted so we dump it as root in mode 2.
+	 */
+	if (mm->dumpable == 2) {	/* Setuid core dump mode */
+		flag = O_EXCL;		/* Stop rewrite attacks */
+		current->fsuid = 0;	/* Dump root private */
+	}
 	mm->dumpable = 0;
 	init_completion(&mm->core_done);
 	spin_lock_irq(&current->sighand->siglock);
@@ -1466,7 +1484,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
  	lock_kernel();
 	format_corename(corename, core_pattern, signr);
 	unlock_kernel();
-	file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE, 0600);
+	file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 0600);
 	if (IS_ERR(file))
 		goto fail_unlock;
 	inode = file->f_dentry->d_inode;
@@ -1491,6 +1509,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 close_fail:
 	filp_close(file, NULL);
 fail_unlock:
+	current->fsuid = fsuid;
 	complete_all(&mm->core_done);
 fail:
 	return retval;
diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile
index ee240a14e70f..c5d02da73bc3 100644
--- a/fs/ext2/Makefile
+++ b/fs/ext2/Makefile
@@ -10,3 +10,4 @@ ext2-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
 ext2-$(CONFIG_EXT2_FS_XATTR)	 += xattr.o xattr_user.o xattr_trusted.o
 ext2-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o
 ext2-$(CONFIG_EXT2_FS_SECURITY)	 += xattr_security.o
+ext2-$(CONFIG_EXT2_FS_XIP)	 += xip.o
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 25f4a64fd6bc..213148c36ebe 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -396,12 +396,12 @@ static size_t
 ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size,
 			   const char *name, size_t name_len)
 {
-	const size_t size = sizeof(XATTR_NAME_ACL_ACCESS);
+	const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
 
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_size)
-		memcpy(list, XATTR_NAME_ACL_ACCESS, size);
+		memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
 	return size;
 }
 
@@ -409,12 +409,12 @@ static size_t
 ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size,
 			    const char *name, size_t name_len)
 {
-	const size_t size = sizeof(XATTR_NAME_ACL_DEFAULT);
+	const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
 
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_size)
-		memcpy(list, XATTR_NAME_ACL_DEFAULT, size);
+		memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
 	return size;
 }
 
@@ -506,14 +506,14 @@ ext2_xattr_set_acl_default(struct inode *inode, const char *name,
 }
 
 struct xattr_handler ext2_xattr_acl_access_handler = {
-	.prefix	= XATTR_NAME_ACL_ACCESS,
+	.prefix	= POSIX_ACL_XATTR_ACCESS,
 	.list	= ext2_xattr_list_acl_access,
 	.get	= ext2_xattr_get_acl_access,
 	.set	= ext2_xattr_set_acl_access,
 };
 
 struct xattr_handler ext2_xattr_acl_default_handler = {
-	.prefix	= XATTR_NAME_ACL_DEFAULT,
+	.prefix	= POSIX_ACL_XATTR_DEFAULT,
 	.list	= ext2_xattr_list_acl_default,
 	.get	= ext2_xattr_get_acl_default,
 	.set	= ext2_xattr_set_acl_default,
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index fed96ae81a7d..0bde85bafe38 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -4,7 +4,7 @@
   (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
 */
 
-#include <linux/xattr_acl.h>
+#include <linux/posix_acl_xattr.h>
 
 #define EXT2_ACL_VERSION	0x0001
 
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 8f0fd726c3f1..eed521d22cf0 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -147,9 +147,11 @@ extern struct file_operations ext2_dir_operations;
 /* file.c */
 extern struct inode_operations ext2_file_inode_operations;
 extern struct file_operations ext2_file_operations;
+extern struct file_operations ext2_xip_file_operations;
 
 /* inode.c */
 extern struct address_space_operations ext2_aops;
+extern struct address_space_operations ext2_aops_xip;
 extern struct address_space_operations ext2_nobh_aops;
 
 /* namei.c */
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index f5e86141ec54..a484412fc782 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -55,6 +55,20 @@ struct file_operations ext2_file_operations = {
 	.sendfile	= generic_file_sendfile,
 };
 
+#ifdef CONFIG_EXT2_FS_XIP
+struct file_operations ext2_xip_file_operations = {
+	.llseek		= generic_file_llseek,
+	.read		= xip_file_read,
+	.write		= xip_file_write,
+	.ioctl		= ext2_ioctl,
+	.mmap		= xip_file_mmap,
+	.open		= generic_file_open,
+	.release	= ext2_release_file,
+	.fsync		= ext2_sync_file,
+	.sendfile	= xip_file_sendfile,
+};
+#endif
+
 struct inode_operations ext2_file_inode_operations = {
 	.truncate	= ext2_truncate,
 #ifdef CONFIG_EXT2_FS_XATTR
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index a50d9db4b6e4..53dceb0c6593 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -33,6 +33,7 @@
 #include <linux/mpage.h>
 #include "ext2.h"
 #include "acl.h"
+#include "xip.h"
 
 MODULE_AUTHOR("Remy Card and others");
 MODULE_DESCRIPTION("Second Extended Filesystem");
@@ -594,6 +595,16 @@ out:
 	if (err)
 		goto cleanup;
 
+	if (ext2_use_xip(inode->i_sb)) {
+		/*
+		 * we need to clear the block
+		 */
+		err = ext2_clear_xip_target (inode,
+			le32_to_cpu(chain[depth-1].key));
+		if (err)
+			goto cleanup;
+	}
+
 	if (ext2_splice_branch(inode, iblock, chain, partial, left) < 0)
 		goto changed;
 
@@ -691,6 +702,11 @@ struct address_space_operations ext2_aops = {
 	.writepages		= ext2_writepages,
 };
 
+struct address_space_operations ext2_aops_xip = {
+	.bmap			= ext2_bmap,
+	.get_xip_page		= ext2_get_xip_page,
+};
+
 struct address_space_operations ext2_nobh_aops = {
 	.readpage		= ext2_readpage,
 	.readpages		= ext2_readpages,
@@ -910,7 +926,9 @@ void ext2_truncate (struct inode * inode)
 	iblock = (inode->i_size + blocksize-1)
 					>> EXT2_BLOCK_SIZE_BITS(inode->i_sb);
 
-	if (test_opt(inode->i_sb, NOBH))
+	if (mapping_is_xip(inode->i_mapping))
+		xip_truncate_page(inode->i_mapping, inode->i_size);
+	else if (test_opt(inode->i_sb, NOBH))
 		nobh_truncate_page(inode->i_mapping, inode->i_size);
 	else
 		block_truncate_page(inode->i_mapping,
@@ -1110,11 +1128,16 @@ void ext2_read_inode (struct inode * inode)
 
 	if (S_ISREG(inode->i_mode)) {
 		inode->i_op = &ext2_file_inode_operations;
-		inode->i_fop = &ext2_file_operations;
-		if (test_opt(inode->i_sb, NOBH))
+		if (ext2_use_xip(inode->i_sb)) {
+			inode->i_mapping->a_ops = &ext2_aops_xip;
+			inode->i_fop = &ext2_xip_file_operations;
+		} else if (test_opt(inode->i_sb, NOBH)) {
 			inode->i_mapping->a_ops = &ext2_nobh_aops;
-		else
+			inode->i_fop = &ext2_file_operations;
+		} else {
 			inode->i_mapping->a_ops = &ext2_aops;
+			inode->i_fop = &ext2_file_operations;
+		}
 	} else if (S_ISDIR(inode->i_mode)) {
 		inode->i_op = &ext2_dir_inode_operations;
 		inode->i_fop = &ext2_dir_operations;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 3176b3d3ffa8..c5513953c825 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -34,6 +34,7 @@
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
+#include "xip.h"
 
 /*
  * Couple of helper functions - make the code slightly cleaner.
@@ -127,11 +128,16 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, st
 	int err = PTR_ERR(inode);
 	if (!IS_ERR(inode)) {
 		inode->i_op = &ext2_file_inode_operations;
-		inode->i_fop = &ext2_file_operations;
-		if (test_opt(inode->i_sb, NOBH))
+		if (ext2_use_xip(inode->i_sb)) {
+			inode->i_mapping->a_ops = &ext2_aops_xip;
+			inode->i_fop = &ext2_xip_file_operations;
+		} else if (test_opt(inode->i_sb, NOBH)) {
 			inode->i_mapping->a_ops = &ext2_nobh_aops;
-		else
+			inode->i_fop = &ext2_file_operations;
+		} else {
 			inode->i_mapping->a_ops = &ext2_aops;
+			inode->i_fop = &ext2_file_operations;
+		}
 		mark_inode_dirty(inode);
 		err = ext2_add_nondir(dentry, inode);
 	}
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 661c3d98d946..876e391f2871 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -31,6 +31,7 @@
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
+#include "xip.h"
 
 static void ext2_sync_super(struct super_block *sb,
 			    struct ext2_super_block *es);
@@ -257,7 +258,7 @@ enum {
 	Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
 	Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
 	Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh,
-	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
+	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_xip,
 	Opt_ignore, Opt_err,
 };
 
@@ -286,6 +287,7 @@ static match_table_t tokens = {
 	{Opt_nouser_xattr, "nouser_xattr"},
 	{Opt_acl, "acl"},
 	{Opt_noacl, "noacl"},
+	{Opt_xip, "xip"},
 	{Opt_ignore, "grpquota"},
 	{Opt_ignore, "noquota"},
 	{Opt_ignore, "quota"},
@@ -397,6 +399,13 @@ static int parse_options (char * options,
 			printk("EXT2 (no)acl options not supported\n");
 			break;
 #endif
+		case Opt_xip:
+#ifdef CONFIG_EXT2_FS_XIP
+			set_opt (sbi->s_mount_opt, XIP);
+#else
+			printk("EXT2 xip option not supported\n");
+#endif
+			break;
 		case Opt_ignore:
 			break;
 		default:
@@ -640,6 +649,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 		((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
 		 MS_POSIXACL : 0);
 
+	ext2_xip_verify_sb(sb); /* see if bdev supports xip, unset
+				    EXT2_MOUNT_XIP if not */
+
 	if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV &&
 	    (EXT2_HAS_COMPAT_FEATURE(sb, ~0U) ||
 	     EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
@@ -668,6 +680,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 
 	blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
 
+	if ((ext2_use_xip(sb)) && ((blocksize != PAGE_SIZE) ||
+				  (sb->s_blocksize != blocksize))) {
+		if (!silent)
+			printk("XIP: Unsupported blocksize\n");
+		goto failed_mount;
+	}
+
 	/* If the blocksize doesn't match, re-read the thing.. */
 	if (sb->s_blocksize != blocksize) {
 		brelse(bh);
@@ -916,6 +935,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
 {
 	struct ext2_sb_info * sbi = EXT2_SB(sb);
 	struct ext2_super_block * es;
+	unsigned long old_mount_opt = sbi->s_mount_opt;
 
 	/*
 	 * Allow the "check" option to be passed as a remount option.
@@ -927,6 +947,11 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
 		((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
 
 	es = sbi->s_es;
+	if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) !=
+	    (old_mount_opt & EXT2_MOUNT_XIP)) &&
+	    invalidate_inodes(sb))
+		ext2_warning(sb, __FUNCTION__, "busy inodes while remounting "\
+			     "xip remain in cache (no functional problem)");
 	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
 		return 0;
 	if (*flags & MS_RDONLY) {
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
new file mode 100644
index 000000000000..d44431d1a338
--- /dev/null
+++ b/fs/ext2/xip.c
@@ -0,0 +1,80 @@
+/*
+ *  linux/fs/ext2/xip.c
+ *
+ * Copyright (C) 2005 IBM Corporation
+ * Author: Carsten Otte (cotte@de.ibm.com)
+ */
+
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/genhd.h>
+#include <linux/buffer_head.h>
+#include <linux/ext2_fs_sb.h>
+#include <linux/ext2_fs.h>
+#include "ext2.h"
+#include "xip.h"
+
+static inline int
+__inode_direct_access(struct inode *inode, sector_t sector, unsigned long *data) {
+	BUG_ON(!inode->i_sb->s_bdev->bd_disk->fops->direct_access);
+	return inode->i_sb->s_bdev->bd_disk->fops
+		->direct_access(inode->i_sb->s_bdev,sector,data);
+}
+
+int
+ext2_clear_xip_target(struct inode *inode, int block) {
+	sector_t sector = block*(PAGE_SIZE/512);
+	unsigned long data;
+	int rc;
+
+	rc = __inode_direct_access(inode, sector, &data);
+	if (rc)
+		return rc;
+	clear_page((void*)data);
+	return 0;
+}
+
+void ext2_xip_verify_sb(struct super_block *sb)
+{
+	struct ext2_sb_info *sbi = EXT2_SB(sb);
+
+	if ((sbi->s_mount_opt & EXT2_MOUNT_XIP)) {
+		if ((sb->s_bdev == NULL) ||
+			sb->s_bdev->bd_disk == NULL ||
+			sb->s_bdev->bd_disk->fops == NULL ||
+			sb->s_bdev->bd_disk->fops->direct_access == NULL) {
+			sbi->s_mount_opt &= (~EXT2_MOUNT_XIP);
+			ext2_warning(sb, __FUNCTION__,
+				"ignoring xip option - not supported by bdev");
+		}
+	}
+}
+
+struct page*
+ext2_get_xip_page(struct address_space *mapping, sector_t blockno,
+		   int create)
+{
+	int rc;
+	unsigned long data;
+	struct buffer_head tmp;
+
+	tmp.b_state = 0;
+	tmp.b_blocknr = 0;
+	rc = ext2_get_block(mapping->host, blockno/(PAGE_SIZE/512) , &tmp,
+				create);
+	if (rc)
+		return ERR_PTR(rc);
+	if (tmp.b_blocknr == 0) {
+		/* SPARSE block */
+		BUG_ON(create);
+		return ERR_PTR(-ENODATA);
+	}
+
+	rc = __inode_direct_access
+		(mapping->host,tmp.b_blocknr*(PAGE_SIZE/512) ,&data);
+	if (rc)
+		return ERR_PTR(rc);
+
+	SetPageUptodate(virt_to_page(data));
+	return virt_to_page(data);
+}
diff --git a/fs/ext2/xip.h b/fs/ext2/xip.h
new file mode 100644
index 000000000000..aa85331d6c56
--- /dev/null
+++ b/fs/ext2/xip.h
@@ -0,0 +1,25 @@
+/*
+ *  linux/fs/ext2/xip.h
+ *
+ * Copyright (C) 2005 IBM Corporation
+ * Author: Carsten Otte (cotte@de.ibm.com)
+ */
+
+#ifdef CONFIG_EXT2_FS_XIP
+extern void ext2_xip_verify_sb (struct super_block *);
+extern int ext2_clear_xip_target (struct inode *, int);
+
+static inline int ext2_use_xip (struct super_block *sb)
+{
+	struct ext2_sb_info *sbi = EXT2_SB(sb);
+	return (sbi->s_mount_opt & EXT2_MOUNT_XIP);
+}
+struct page* ext2_get_xip_page (struct address_space *, sector_t, int);
+#define mapping_is_xip(map) unlikely(map->a_ops->get_xip_page)
+#else
+#define mapping_is_xip(map)			0
+#define ext2_xip_verify_sb(sb)			do { } while (0)
+#define ext2_use_xip(sb)			0
+#define ext2_clear_xip_target(inode, chain)	0
+#define ext2_get_xip_page			NULL
+#endif
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 638c13a26c03..3ac38266fc9e 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -393,7 +393,8 @@ ext3_acl_chmod(struct inode *inode)
 		int retries = 0;
 
 	retry:
-		handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS);
+		handle = ext3_journal_start(inode,
+				EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
 		if (IS_ERR(handle)) {
 			error = PTR_ERR(handle);
 			ext3_std_error(inode->i_sb, error);
@@ -417,12 +418,12 @@ static size_t
 ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
 			   const char *name, size_t name_len)
 {
-	const size_t size = sizeof(XATTR_NAME_ACL_ACCESS);
+	const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
 
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_len)
-		memcpy(list, XATTR_NAME_ACL_ACCESS, size);
+		memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
 	return size;
 }
 
@@ -430,12 +431,12 @@ static size_t
 ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
 			    const char *name, size_t name_len)
 {
-	const size_t size = sizeof(XATTR_NAME_ACL_DEFAULT);
+	const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
 
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_len)
-		memcpy(list, XATTR_NAME_ACL_DEFAULT, size);
+		memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
 	return size;
 }
 
@@ -503,7 +504,7 @@ ext3_xattr_set_acl(struct inode *inode, int type, const void *value,
 		acl = NULL;
 
 retry:
-	handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS);
+	handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	error = ext3_set_acl(handle, inode, type, acl);
@@ -535,14 +536,14 @@ ext3_xattr_set_acl_default(struct inode *inode, const char *name,
 }
 
 struct xattr_handler ext3_xattr_acl_access_handler = {
-	.prefix	= XATTR_NAME_ACL_ACCESS,
+	.prefix	= POSIX_ACL_XATTR_ACCESS,
 	.list	= ext3_xattr_list_acl_access,
 	.get	= ext3_xattr_get_acl_access,
 	.set	= ext3_xattr_set_acl_access,
 };
 
 struct xattr_handler ext3_xattr_acl_default_handler = {
-	.prefix	= XATTR_NAME_ACL_DEFAULT,
+	.prefix	= POSIX_ACL_XATTR_DEFAULT,
 	.list	= ext3_xattr_list_acl_default,
 	.get	= ext3_xattr_get_acl_default,
 	.set	= ext3_xattr_set_acl_default,
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 98af0c0d0ba9..92d50b53a933 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -4,7 +4,7 @@
   (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
 */
 
-#include <linux/xattr_acl.h>
+#include <linux/posix_acl_xattr.h>
 
 #define EXT3_ACL_VERSION	0x0001
 
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index ccd632fcc6d8..e463dca008e4 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -749,24 +749,24 @@ fail_access:
  * 	to find a free region that is of my size and has not
  * 	been reserved.
  *
- *	on succeed, it returns the reservation window to be appended to.
- *	failed, return NULL.
  */
-static struct ext3_reserve_window_node *find_next_reservable_window(
+static int find_next_reservable_window(
 				struct ext3_reserve_window_node *search_head,
-				unsigned long size, int *start_block,
+				struct ext3_reserve_window_node *my_rsv,
+				struct super_block * sb, int start_block,
 				int last_block)
 {
 	struct rb_node *next;
 	struct ext3_reserve_window_node *rsv, *prev;
 	int cur;
+	int size = my_rsv->rsv_goal_size;
 
 	/* TODO: make the start of the reservation window byte-aligned */
 	/* cur = *start_block & ~7;*/
-	cur = *start_block;
+	cur = start_block;
 	rsv = search_head;
 	if (!rsv)
-		return NULL;
+		return -1;
 
 	while (1) {
 		if (cur <= rsv->rsv_end)
@@ -782,11 +782,11 @@ static struct ext3_reserve_window_node *find_next_reservable_window(
 		 * space with expected-size (or more)...
 		 */
 		if (cur > last_block)
-			return NULL;		/* fail */
+			return -1;		/* fail */
 
 		prev = rsv;
 		next = rb_next(&rsv->rsv_node);
-		rsv = list_entry(next, struct ext3_reserve_window_node, rsv_node);
+		rsv = list_entry(next,struct ext3_reserve_window_node,rsv_node);
 
 		/*
 		 * Reached the last reservation, we can just append to the
@@ -813,8 +813,25 @@ static struct ext3_reserve_window_node *find_next_reservable_window(
 	 * return the reservation window that we could append to.
 	 * succeed.
 	 */
-	*start_block = cur;
-	return prev;
+
+	if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window)))
+		rsv_window_remove(sb, my_rsv);
+
+	/*
+	 * Let's book the whole avaliable window for now.  We will check the
+	 * disk bitmap later and then, if there are free blocks then we adjust
+	 * the window size if it's larger than requested.
+	 * Otherwise, we will remove this node from the tree next time
+	 * call find_next_reservable_window.
+	 */
+	my_rsv->rsv_start = cur;
+	my_rsv->rsv_end = cur + size - 1;
+	my_rsv->rsv_alloc_hit = 0;
+
+	if (prev != my_rsv)
+		ext3_rsv_window_add(sb, my_rsv);
+
+	return 0;
 }
 
 /**
@@ -852,6 +869,7 @@ static struct ext3_reserve_window_node *find_next_reservable_window(
  *	@sb: the super block
  *	@group: the group we are trying to allocate in
  *	@bitmap_bh: the block group block bitmap
+ *
  */
 static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
 		int goal, struct super_block *sb,
@@ -860,10 +878,10 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
 	struct ext3_reserve_window_node *search_head;
 	int group_first_block, group_end_block, start_block;
 	int first_free_block;
-	int reservable_space_start;
-	struct ext3_reserve_window_node *prev_rsv;
 	struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root;
 	unsigned long size;
+	int ret;
+	spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
 
 	group_first_block = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) +
 				group * EXT3_BLOCKS_PER_GROUP(sb);
@@ -875,6 +893,7 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
 		start_block = goal + group_first_block;
 
 	size = my_rsv->rsv_goal_size;
+
 	if (!rsv_is_empty(&my_rsv->rsv_window)) {
 		/*
 		 * if the old reservation is cross group boundary
@@ -908,6 +927,8 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
 			my_rsv->rsv_goal_size= size;
 		}
 	}
+
+	spin_lock(rsv_lock);
 	/*
 	 * shift the search start to the window near the goal block
 	 */
@@ -921,11 +942,16 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
 	 * need to check the bitmap after we found a reservable window.
 	 */
 retry:
-	prev_rsv = find_next_reservable_window(search_head, size,
-						&start_block, group_end_block);
-	if (prev_rsv == NULL)
-		goto failed;
-	reservable_space_start = start_block;
+	ret = find_next_reservable_window(search_head, my_rsv, sb,
+						start_block, group_end_block);
+
+	if (ret == -1) {
+		if (!rsv_is_empty(&my_rsv->rsv_window))
+			rsv_window_remove(sb, my_rsv);
+		spin_unlock(rsv_lock);
+		return -1;
+	}
+
 	/*
 	 * On success, find_next_reservable_window() returns the
 	 * reservation window where there is a reservable space after it.
@@ -937,8 +963,9 @@ retry:
 	 * block. Search start from the start block of the reservable space
 	 * we just found.
 	 */
+	spin_unlock(rsv_lock);
 	first_free_block = bitmap_search_next_usable_block(
-			reservable_space_start - group_first_block,
+			my_rsv->rsv_start - group_first_block,
 			bitmap_bh, group_end_block - group_first_block + 1);
 
 	if (first_free_block < 0) {
@@ -946,54 +973,29 @@ retry:
 		 * no free block left on the bitmap, no point
 		 * to reserve the space. return failed.
 		 */
-		goto failed;
+		spin_lock(rsv_lock);
+		if (!rsv_is_empty(&my_rsv->rsv_window))
+			rsv_window_remove(sb, my_rsv);
+		spin_unlock(rsv_lock);
+		return -1;		/* failed */
 	}
+
 	start_block = first_free_block + group_first_block;
 	/*
 	 * check if the first free block is within the
-	 * free space we just found
+	 * free space we just reserved
 	 */
-	if ((start_block >= reservable_space_start) &&
-	  (start_block < reservable_space_start + size))
-		goto found_rsv_window;
+	if (start_block >= my_rsv->rsv_start && start_block < my_rsv->rsv_end)
+		return 0;		/* success */
 	/*
 	 * if the first free bit we found is out of the reservable space
-	 * this means there is no free block on the reservable space
-	 * we should continue search for next reservable space,
+	 * continue search for next reservable space,
 	 * start from where the free block is,
 	 * we also shift the list head to where we stopped last time
 	 */
-	search_head = prev_rsv;
+	search_head = my_rsv;
+	spin_lock(rsv_lock);
 	goto retry;
-
-found_rsv_window:
-	/*
-	 * great! the reservable space contains some free blocks.
-	 * if the search returns that we should add the new
-	 * window just next to where the old window, we don't
- 	 * need to remove the old window first then add it to the
-	 * same place, just update the new start and new end.
-	 */
-	if (my_rsv != prev_rsv)  {
-		if (!rsv_is_empty(&my_rsv->rsv_window))
-			rsv_window_remove(sb, my_rsv);
-	}
-	my_rsv->rsv_start = reservable_space_start;
-	my_rsv->rsv_end = my_rsv->rsv_start + size - 1;
-	my_rsv->rsv_alloc_hit = 0;
-	if (my_rsv != prev_rsv)  {
-		ext3_rsv_window_add(sb, my_rsv);
-	}
-	return 0;		/* succeed */
-failed:
-	/*
-	 * failed to find a new reservation window in the current
-	 * group, remove the current(stale) reservation window
-	 * if there is any
-	 */
-	if (!rsv_is_empty(&my_rsv->rsv_window))
-		rsv_window_remove(sb, my_rsv);
-	return -1;		/* failed */
 }
 
 /*
@@ -1023,7 +1025,6 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 			int goal, struct ext3_reserve_window_node * my_rsv,
 			int *errp)
 {
-	spinlock_t *rsv_lock;
 	unsigned long group_first_block;
 	int ret = 0;
 	int fatal;
@@ -1052,7 +1053,6 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 		ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal, NULL);
 		goto out;
 	}
-	rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
 	/*
 	 * goal is a group relative block number (if there is a goal)
 	 * 0 < goal < EXT3_BLOCKS_PER_GROUP(sb)
@@ -1078,30 +1078,21 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 	 * then we could go to allocate from the reservation window directly.
 	 */
 	while (1) {
-		struct ext3_reserve_window rsv_copy;
-
-		rsv_copy._rsv_start = my_rsv->rsv_start;
-		rsv_copy._rsv_end = my_rsv->rsv_end;
-
-		if (rsv_is_empty(&rsv_copy) || (ret < 0) ||
-			!goal_in_my_reservation(&rsv_copy, goal, group, sb)) {
-			spin_lock(rsv_lock);
+		if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) ||
+			!goal_in_my_reservation(&my_rsv->rsv_window, goal, group, sb)) {
 			ret = alloc_new_reservation(my_rsv, goal, sb,
 							group, bitmap_bh);
-			rsv_copy._rsv_start = my_rsv->rsv_start;
-			rsv_copy._rsv_end = my_rsv->rsv_end;
-			spin_unlock(rsv_lock);
 			if (ret < 0)
 				break;			/* failed */
 
-			if (!goal_in_my_reservation(&rsv_copy, goal, group, sb))
+			if (!goal_in_my_reservation(&my_rsv->rsv_window, goal, group, sb))
 				goal = -1;
 		}
-		if ((rsv_copy._rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb))
-		    || (rsv_copy._rsv_end < group_first_block))
+		if ((my_rsv->rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb))
+		    || (my_rsv->rsv_end < group_first_block))
 			BUG();
 		ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal,
-					   &rsv_copy);
+					   &my_rsv->rsv_window);
 		if (ret >= 0) {
 			my_rsv->rsv_alloc_hit++;
 			break;				/* succeed */
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 5ad8cf0292df..98e78345ead9 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -36,7 +36,11 @@ static int ext3_release_file (struct inode * inode, struct file * filp)
 	/* if we are the last writer on the inode, drop the block reservation */
 	if ((filp->f_mode & FMODE_WRITE) &&
 			(atomic_read(&inode->i_writecount) == 1))
+	{
+		down(&EXT3_I(inode)->truncate_sem);
 		ext3_discard_reservation(inode);
+		up(&EXT3_I(inode)->truncate_sem);
+	}
 	if (is_dx(inode) && filp->private_data)
 		ext3_htree_free_dir_info(filp->private_data);
 
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 0d5fa73b18dc..0b2db4f618cb 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -128,7 +128,7 @@ static unsigned long blocks_for_truncate(struct inode *inode)
 	if (needed > EXT3_MAX_TRANS_DATA) 
 		needed = EXT3_MAX_TRANS_DATA;
 
-	return EXT3_DATA_TRANS_BLOCKS + needed;
+	return EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + needed;
 }
 
 /* 
@@ -2763,7 +2763,8 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
 
 		/* (user+group)*(old+new) structure, inode write (sb,
 		 * inode block, ? - but truncate inode update has it) */
-		handle = ext3_journal_start(inode, 4*EXT3_QUOTA_INIT_BLOCKS+3);
+		handle = ext3_journal_start(inode, 2*(EXT3_QUOTA_INIT_BLOCKS(inode->i_sb)+
+					EXT3_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
 		if (IS_ERR(handle)) {
 			error = PTR_ERR(handle);
 			goto err_out;
@@ -2861,7 +2862,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode)
 #ifdef CONFIG_QUOTA
 	/* We know that structure was already allocated during DQUOT_INIT so
 	 * we will be updating only the data blocks + inodes */
-	ret += 2*EXT3_QUOTA_TRANS_BLOCKS;
+	ret += 2*EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb);
 #endif
 
 	return ret;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 79742d824a0a..50378d8ff84b 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -932,8 +932,16 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
 	struct inode *dir = dentry->d_parent->d_inode;
 
 	sb = dir->i_sb;
-	if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err)))
-		return NULL;
+	/* NFS may look up ".." - look at dx_root directory block */
+	if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
+		if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err)))
+			return NULL;
+	} else {
+		frame = frames;
+		frame->bh = NULL;			/* for dx_release() */
+		frame->at = (struct dx_entry *)frames;	/* hack for zero entry*/
+		dx_set_block(frame->at, 0);		/* dx_root block is 0 */
+	}
 	hash = hinfo.hash;
 	do {
 		block = dx_get_block(frame->at);
@@ -1637,9 +1645,9 @@ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
 	int err, retries = 0;
 
 retry:
-	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-					2*EXT3_QUOTA_INIT_BLOCKS);
+					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -1671,9 +1679,9 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry,
 		return -EINVAL;
 
 retry:
-	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 			 		EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-					2*EXT3_QUOTA_INIT_BLOCKS);
+					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -1707,9 +1715,9 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 		return -EMLINK;
 
 retry:
-	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-					2*EXT3_QUOTA_INIT_BLOCKS);
+					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -1998,7 +2006,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
 	/* Initialize quotas before so that eventual writes go in
 	 * separate transaction */
 	DQUOT_INIT(dentry->d_inode);
-	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
+	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -2057,7 +2065,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
 	/* Initialize quotas before so that eventual writes go
 	 * in separate transaction */
 	DQUOT_INIT(dentry->d_inode);
-	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
+	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -2112,9 +2120,9 @@ static int ext3_symlink (struct inode * dir,
 		return -ENAMETOOLONG;
 
 retry:
-	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 			 		EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
-					2*EXT3_QUOTA_INIT_BLOCKS);
+					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -2166,7 +2174,7 @@ static int ext3_link (struct dentry * old_dentry,
 		return -EMLINK;
 
 retry:
-	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS);
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
@@ -2208,7 +2216,8 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
 	 * in separate transaction */
 	if (new_dentry->d_inode)
 		DQUOT_INIT(new_dentry->d_inode);
-	handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS +
+	handle = ext3_journal_start(old_dir, 2 *
+					EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) +
 			 		EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 981ccb233ef5..a6d1779d7de4 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -589,7 +589,7 @@ enum {
 	Opt_commit, Opt_journal_update, Opt_journal_inum,
 	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
 	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
-	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0,
+	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
 	Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
 };
 
@@ -634,10 +634,10 @@ static match_table_t tokens = {
 	{Opt_grpjquota, "grpjquota=%s"},
 	{Opt_jqfmt_vfsold, "jqfmt=vfsold"},
 	{Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
-	{Opt_ignore, "grpquota"},
-	{Opt_ignore, "noquota"},
-	{Opt_ignore, "quota"},
-	{Opt_ignore, "usrquota"},
+	{Opt_quota, "grpquota"},
+	{Opt_noquota, "noquota"},
+	{Opt_quota, "quota"},
+	{Opt_quota, "usrquota"},
 	{Opt_barrier, "barrier=%u"},
 	{Opt_err, NULL},
 	{Opt_resize, "resize"},
@@ -876,6 +876,7 @@ set_qf_name:
 				sbi->s_qf_names[qtype] = NULL;
 				return 0;
 			}
+			set_opt(sbi->s_mount_opt, QUOTA);
 			break;
 		case Opt_offusrjquota:
 			qtype = USRQUOTA;
@@ -898,6 +899,17 @@ clear_qf_name:
 		case Opt_jqfmt_vfsv0:
 			sbi->s_jquota_fmt = QFMT_VFS_V0;
 			break;
+		case Opt_quota:
+			set_opt(sbi->s_mount_opt, QUOTA);
+			break;
+		case Opt_noquota:
+			if (sb_any_quota_enabled(sb)) {
+				printk(KERN_ERR "EXT3-fs: Cannot change quota "
+					"options when quota turned on.\n");
+				return 0;
+			}
+			clear_opt(sbi->s_mount_opt, QUOTA);
+			break;
 #else
 		case Opt_usrjquota:
 		case Opt_grpjquota:
@@ -909,6 +921,9 @@ clear_qf_name:
 				"EXT3-fs: journalled quota options not "
 				"supported.\n");
 			break;
+		case Opt_quota:
+		case Opt_noquota:
+			break;
 #endif
 		case Opt_abort:
 			set_opt(sbi->s_mount_opt, ABORT);
@@ -929,7 +944,8 @@ clear_qf_name:
 					"for remount\n");
 				return 0;
 			}
-			match_int(&args[0], &option);
+			if (match_int(&args[0], &option) != 0)
+				return 0;
 			*n_blocks_count = option;
 			break;
 		case Opt_nobh:
@@ -2238,7 +2254,7 @@ static int ext3_dquot_initialize(struct inode *inode, int type)
 	int ret, err;
 
 	/* We may create quota structure so we need to reserve enough blocks */
-	handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS);
+	handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS(inode->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_initialize(inode, type);
@@ -2254,7 +2270,7 @@ static int ext3_dquot_drop(struct inode *inode)
 	int ret, err;
 
 	/* We may delete quota structure so we need to reserve enough blocks */
-	handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS);
+	handle = ext3_journal_start(inode, 2*EXT3_QUOTA_DEL_BLOCKS(inode->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_drop(inode);
@@ -2272,7 +2288,7 @@ static int ext3_write_dquot(struct dquot *dquot)
 
 	inode = dquot_to_inode(dquot);
 	handle = ext3_journal_start(inode,
-					EXT3_QUOTA_TRANS_BLOCKS);
+					EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_commit(dquot);
@@ -2288,7 +2304,7 @@ static int ext3_acquire_dquot(struct dquot *dquot)
 	handle_t *handle;
 
 	handle = ext3_journal_start(dquot_to_inode(dquot),
-					EXT3_QUOTA_INIT_BLOCKS);
+					EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_acquire(dquot);
@@ -2304,7 +2320,7 @@ static int ext3_release_dquot(struct dquot *dquot)
 	handle_t *handle;
 
 	handle = ext3_journal_start(dquot_to_inode(dquot),
-					EXT3_QUOTA_INIT_BLOCKS);
+					EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_release(dquot);
@@ -2348,22 +2364,8 @@ static int ext3_write_info(struct super_block *sb, int type)
  */
 static int ext3_quota_on_mount(struct super_block *sb, int type)
 {
-	int err;
-	struct dentry *dentry;
-	struct qstr name = { .name = EXT3_SB(sb)->s_qf_names[type],
-			     .hash = 0,
-			     .len = strlen(EXT3_SB(sb)->s_qf_names[type])};
-
-	dentry = lookup_hash(&name, sb->s_root);
-	if (IS_ERR(dentry))
-		return PTR_ERR(dentry);
-	err = vfs_quota_on_mount(type, EXT3_SB(sb)->s_jquota_fmt, dentry);
-	/* Now invalidate and put the dentry - quota got its own reference
-	 * to inode and dentry has at least wrong hash so we had better
-	 * throw it away */
-	d_invalidate(dentry);
-	dput(dentry);
-	return err;
+	return vfs_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type],
+			EXT3_SB(sb)->s_jquota_fmt, type);
 }
 
 /*
@@ -2375,6 +2377,8 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
 	int err;
 	struct nameidata nd;
 
+	if (!test_opt(sb, QUOTA))
+		return -EINVAL;
 	/* Not journalling quota? */
 	if (!EXT3_SB(sb)->s_qf_names[USRQUOTA] &&
 	    !EXT3_SB(sb)->s_qf_names[GRPQUOTA])
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 4cbc6d0212d3..3f9dfa643b19 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -1044,7 +1044,7 @@ ext3_xattr_set(struct inode *inode, int name_index, const char *name,
 	int error, retries = 0;
 
 retry:
-	handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS);
+	handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
 	if (IS_ERR(handle)) {
 		error = PTR_ERR(handle);
 	} else {
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 7c52e465a619..77c24fcf712a 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -56,7 +56,7 @@ int __init fat_cache_init(void)
 	return 0;
 }
 
-void __exit fat_cache_destroy(void)
+void fat_cache_destroy(void)
 {
 	if (kmem_cache_destroy(fat_cache_cachep))
 		printk(KERN_INFO "fat_cache: not all structures were freed\n");
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 8ccee8415488..96ae85b67eba 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1327,16 +1327,25 @@ out_fail:
 EXPORT_SYMBOL(fat_fill_super);
 
 int __init fat_cache_init(void);
-void __exit fat_cache_destroy(void);
+void fat_cache_destroy(void);
 
 static int __init init_fat_fs(void)
 {
-	int ret;
+	int err;
 
-	ret = fat_cache_init();
-	if (ret < 0)
-		return ret;
-	return fat_init_inodecache();
+	err = fat_cache_init();
+	if (err)
+		return err;
+
+	err = fat_init_inodecache();
+	if (err)
+		goto failed;
+
+	return 0;
+
+failed:
+	fat_cache_destroy();
+	return err;
 }
 
 static void __exit exit_fat_fs(void)
diff --git a/fs/file_table.c b/fs/file_table.c
index 03d83cb686b1..fa7849fae134 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -63,42 +63,45 @@ static inline void file_free(struct file *f)
  */
 struct file *get_empty_filp(void)
 {
-static int old_max;
+	static int old_max;
 	struct file * f;
 
 	/*
 	 * Privileged users can go above max_files
 	 */
-	if (files_stat.nr_files < files_stat.max_files ||
-				capable(CAP_SYS_ADMIN)) {
-		f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
-		if (f) {
-			memset(f, 0, sizeof(*f));
-			if (security_file_alloc(f)) {
-				file_free(f);
-				goto fail;
-			}
-			eventpoll_init_file(f);
-			atomic_set(&f->f_count, 1);
-			f->f_uid = current->fsuid;
-			f->f_gid = current->fsgid;
-			rwlock_init(&f->f_owner.lock);
-			/* f->f_version: 0 */
-			INIT_LIST_HEAD(&f->f_list);
-			f->f_maxcount = INT_MAX;
-			return f;
-		}
-	}
-
+	if (files_stat.nr_files >= files_stat.max_files &&
+				!capable(CAP_SYS_ADMIN))
+		goto over;
+
+	f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
+	if (f == NULL)
+		goto fail;
+
+	memset(f, 0, sizeof(*f));
+	if (security_file_alloc(f))
+		goto fail_sec;
+
+	eventpoll_init_file(f);
+	atomic_set(&f->f_count, 1);
+	f->f_uid = current->fsuid;
+	f->f_gid = current->fsgid;
+	rwlock_init(&f->f_owner.lock);
+	/* f->f_version: 0 */
+	INIT_LIST_HEAD(&f->f_list);
+	f->f_maxcount = INT_MAX;
+	return f;
+
+over:
 	/* Ran out of filps - report that */
-	if (files_stat.max_files >= old_max) {
+	if (files_stat.nr_files > old_max) {
 		printk(KERN_INFO "VFS: file-max limit %d reached\n",
 					files_stat.max_files);
-		old_max = files_stat.max_files;
-	} else {
-		/* Big problems... */
-		printk(KERN_WARNING "VFS: filp allocation failed\n");
+		old_max = files_stat.nr_files;
 	}
+	goto fail;
+
+fail_sec:
+	file_free(f);
 fail:
 	return NULL;
 }
diff --git a/fs/freevxfs/vxfs.h b/fs/freevxfs/vxfs.h
index 8da0252642a4..583bd78086d8 100644
--- a/fs/freevxfs/vxfs.h
+++ b/fs/freevxfs/vxfs.h
@@ -37,7 +37,6 @@
  * superblocks of the Veritas Filesystem.
  */
 #include <linux/types.h>
-#include "vxfs_kcompat.h"
 
 
 /*
diff --git a/fs/freevxfs/vxfs_bmap.c b/fs/freevxfs/vxfs_bmap.c
index bc4b57da306a..d3f6b2835bc8 100644
--- a/fs/freevxfs/vxfs_bmap.c
+++ b/fs/freevxfs/vxfs_bmap.c
@@ -101,7 +101,7 @@ vxfs_bmap_ext4(struct inode *ip, long bn)
 	return 0;
 
 fail_size:
-	printk("vxfs: indirect extent to big!\n");
+	printk("vxfs: indirect extent too big!\n");
 fail_buf:
 	return 0;
 }
diff --git a/fs/freevxfs/vxfs_fshead.c b/fs/freevxfs/vxfs_fshead.c
index 05b19f70bf97..6dee109aeea4 100644
--- a/fs/freevxfs/vxfs_fshead.c
+++ b/fs/freevxfs/vxfs_fshead.c
@@ -78,17 +78,18 @@ vxfs_getfsh(struct inode *ip, int which)
 	struct buffer_head		*bp;
 
 	bp = vxfs_bread(ip, which);
-	if (buffer_mapped(bp)) {
+	if (bp) {
 		struct vxfs_fsh		*fhp;
 
-		if (!(fhp = kmalloc(sizeof(*fhp), SLAB_KERNEL)))
-			return NULL;
+		if (!(fhp = kmalloc(sizeof(*fhp), GFP_KERNEL)))
+			goto out;
 		memcpy(fhp, bp->b_data, sizeof(*fhp));
 
-		brelse(bp);
+		put_bh(bp);
 		return (fhp);
 	}
-
+out:
+	brelse(bp);
 	return NULL;
 }
 
diff --git a/fs/freevxfs/vxfs_kcompat.h b/fs/freevxfs/vxfs_kcompat.h
deleted file mode 100644
index 342a4cc860f4..000000000000
--- a/fs/freevxfs/vxfs_kcompat.h
+++ /dev/null
@@ -1,49 +0,0 @@
-#ifndef _VXFS_KCOMPAT_H
-#define _VXFS_KCOMPAT_H
-
-#include <linux/version.h>
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-
-#include <linux/blkdev.h>
-
-typedef long sector_t;
-
-/* From include/linux/fs.h (Linux 2.5.2-pre3)  */
-static inline struct buffer_head * sb_bread(struct super_block *sb, int block)
-{
-	return bread(sb->s_dev, block, sb->s_blocksize);
-}
-
-/* Dito.  */
-static inline void map_bh(struct buffer_head *bh, struct super_block *sb, int block)
-{
-	bh->b_state |= 1 << BH_Mapped;
-	bh->b_dev = sb->s_dev;
-	bh->b_blocknr = block;
-}
-
-/* From fs/block_dev.c (Linux 2.5.2-pre2)  */
-static inline int sb_set_blocksize(struct super_block *sb, int size)
-{
-	int bits;
-	if (set_blocksize(sb->s_dev, size) < 0)
-		return 0;
-	sb->s_blocksize = size;
-	for (bits = 9, size >>= 9; size >>= 1; bits++)
-		;
-	sb->s_blocksize_bits = bits;
-	return sb->s_blocksize;
-}
-
-/* Dito.  */
-static inline int sb_min_blocksize(struct super_block *sb, int size)
-{
-	int minsize = get_hardsect_size(sb->s_dev);
-	if (size < minsize)
-		size = minsize;
-	return sb_set_blocksize(sb, size);
-}
-
-#endif /* Kernel 2.4 */
-#endif /* _VXFS_KCOMPAT_H */
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c
index 506ae251d2c0..554eb455722c 100644
--- a/fs/freevxfs/vxfs_lookup.c
+++ b/fs/freevxfs/vxfs_lookup.c
@@ -61,13 +61,13 @@ struct file_operations vxfs_dir_operations = {
 };
 
  
-static __inline__ u_long
+static inline u_long
 dir_pages(struct inode *inode)
 {
 	return (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 }
  
-static __inline__ u_long
+static inline u_long
 dir_blocks(struct inode *ip)
 {
 	u_long			bsize = ip->i_sb->s_blocksize;
@@ -79,7 +79,7 @@ dir_blocks(struct inode *ip)
  *
  * len <= VXFS_NAMELEN and de != NULL are guaranteed by caller.
  */
-static __inline__ int
+static inline int
 vxfs_match(int len, const char * const name, struct vxfs_direct *de)
 {
 	if (len != de->d_namelen)
@@ -89,7 +89,7 @@ vxfs_match(int len, const char * const name, struct vxfs_direct *de)
 	return !memcmp(name, de->d_name, len);
 }
 
-static __inline__ struct vxfs_direct *
+static inline struct vxfs_direct *
 vxfs_next_entry(struct vxfs_direct *de)
 {
 	return ((struct vxfs_direct *)((char*)de + de->d_reclen));
diff --git a/fs/freevxfs/vxfs_olt.c b/fs/freevxfs/vxfs_olt.c
index 7a204e31aad9..133476201d84 100644
--- a/fs/freevxfs/vxfs_olt.c
+++ b/fs/freevxfs/vxfs_olt.c
@@ -38,7 +38,7 @@
 #include "vxfs_olt.h"
 
 
-static __inline__ void
+static inline void
 vxfs_get_fshead(struct vxfs_oltfshead *fshp, struct vxfs_sb_info *infp)
 {
 	if (infp->vsi_fshino)
@@ -46,7 +46,7 @@ vxfs_get_fshead(struct vxfs_oltfshead *fshp, struct vxfs_sb_info *infp)
 	infp->vsi_fshino = fshp->olt_fsino[0];
 }
 
-static __inline__ void
+static inline void
 vxfs_get_ilist(struct vxfs_oltilist *ilistp, struct vxfs_sb_info *infp)
 {
 	if (infp->vsi_iext)
@@ -54,7 +54,7 @@ vxfs_get_ilist(struct vxfs_oltilist *ilistp, struct vxfs_sb_info *infp)
 	infp->vsi_iext = ilistp->olt_iext[0]; 
 }
 
-static __inline__ u_long
+static inline u_long
 vxfs_oblock(struct super_block *sbp, daddr_t block, u_long bsize)
 {
 	if (sbp->s_blocksize % bsize)
@@ -104,8 +104,8 @@ vxfs_read_olt(struct super_block *sbp, u_long bsize)
 		goto fail;
 	}
 
-	oaddr = (char *)bp->b_data + op->olt_size;
-	eaddr = (char *)bp->b_data + (infp->vsi_oltsize * sbp->s_blocksize);
+	oaddr = bp->b_data + op->olt_size;
+	eaddr = bp->b_data + (infp->vsi_oltsize * sbp->s_blocksize);
 
 	while (oaddr < eaddr) {
 		struct vxfs_oltcommon	*ocp =
diff --git a/fs/freevxfs/vxfs_subr.c b/fs/freevxfs/vxfs_subr.c
index 5e305612054a..50aae77651b2 100644
--- a/fs/freevxfs/vxfs_subr.c
+++ b/fs/freevxfs/vxfs_subr.c
@@ -36,7 +36,6 @@
 #include <linux/slab.h>
 #include <linux/pagemap.h>
 
-#include "vxfs_kcompat.h"
 #include "vxfs_extern.h"
 
 
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 0ae2c7b8182a..27f66d3e8a04 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -155,12 +155,11 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent)
 
 	sbp->s_flags |= MS_RDONLY;
 
-	infp = kmalloc(sizeof(*infp), GFP_KERNEL);
+	infp = kcalloc(1, sizeof(*infp), GFP_KERNEL);
 	if (!infp) {
 		printk(KERN_WARNING "vxfs: unable to allocate incore superblock\n");
 		return -ENOMEM;
 	}
-	memset(infp, 0, sizeof(*infp));
 
 	bsize = sb_min_blocksize(sbp, BLOCK_SIZE);
 	if (!bsize) {
@@ -196,7 +195,7 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent)
 #endif
 
 	sbp->s_magic = rsbp->vs_magic;
-	sbp->s_fs_info = (void *)infp;
+	sbp->s_fs_info = infp;
 
 	infp->vsi_raw = rsbp;
 	infp->vsi_bp = bp;
@@ -263,7 +262,7 @@ vxfs_init(void)
 			sizeof(struct vxfs_inode_info), 0, 
 			SLAB_RECLAIM_ACCOUNT, NULL, NULL);
 	if (vxfs_inode_cachep)
-		return (register_filesystem(&vxfs_fs_type));
+		return register_filesystem(&vxfs_fs_type);
 	return -ENOMEM;
 }
 
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 8e050fa58218..e94ab398b717 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -485,32 +485,6 @@ static void set_sb_syncing(int val)
 	spin_unlock(&sb_lock);
 }
 
-/*
- * Find a superblock with inodes that need to be synced
- */
-static struct super_block *get_super_to_sync(void)
-{
-	struct super_block *sb;
-restart:
-	spin_lock(&sb_lock);
-	sb = sb_entry(super_blocks.prev);
-	for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) {
-		if (sb->s_syncing)
-			continue;
-		sb->s_syncing = 1;
-		sb->s_count++;
-		spin_unlock(&sb_lock);
-		down_read(&sb->s_umount);
-		if (!sb->s_root) {
-			drop_super(sb);
-			goto restart;
-		}
-		return sb;
-	}
-	spin_unlock(&sb_lock);
-	return NULL;
-}
-
 /**
  * sync_inodes - writes all inodes to disk
  * @wait: wait for completion
@@ -530,23 +504,39 @@ restart:
  * outstanding dirty inodes, the writeback goes block-at-a-time within the
  * filesystem's write_inode().  This is extremely slow.
  */
-void sync_inodes(int wait)
+static void __sync_inodes(int wait)
 {
 	struct super_block *sb;
 
-	set_sb_syncing(0);
-	while ((sb = get_super_to_sync()) != NULL) {
-		sync_inodes_sb(sb, 0);
-		sync_blockdev(sb->s_bdev);
-		drop_super(sb);
+	spin_lock(&sb_lock);
+restart:
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (sb->s_syncing)
+			continue;
+		sb->s_syncing = 1;
+		sb->s_count++;
+		spin_unlock(&sb_lock);
+		down_read(&sb->s_umount);
+		if (sb->s_root) {
+			sync_inodes_sb(sb, wait);
+			sync_blockdev(sb->s_bdev);
+		}
+		up_read(&sb->s_umount);
+		spin_lock(&sb_lock);
+		if (__put_super_and_need_restart(sb))
+			goto restart;
 	}
+	spin_unlock(&sb_lock);
+}
+
+void sync_inodes(int wait)
+{
+	set_sb_syncing(0);
+	__sync_inodes(0);
+
 	if (wait) {
 		set_sb_syncing(0);
-		while ((sb = get_super_to_sync()) != NULL) {
-			sync_inodes_sb(sb, 1);
-			sync_blockdev(sb->s_bdev);
-			drop_super(sb);
-		}
+		__sync_inodes(1);
 	}
 }
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 2af3338f891b..3a9b6d179cbd 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -122,6 +122,9 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 
 	start_addr = mm->free_area_cache;
 
+	if (len <= mm->cached_hole_size)
+		start_addr = TASK_UNMAPPED_BASE;
+
 full_search:
 	addr = ALIGN(start_addr, HPAGE_SIZE);
 
diff --git a/fs/inode.c b/fs/inode.c
index 801fe7f36280..1f9a3a2b89bc 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -500,7 +500,7 @@ repeat:
 			continue;
 		if (!test(inode, data))
 			continue;
-		if (inode->i_state & (I_FREEING|I_CLEAR)) {
+		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
 			__wait_on_freeing_inode(inode);
 			goto repeat;
 		}
@@ -525,7 +525,7 @@ repeat:
 			continue;
 		if (inode->i_sb != sb)
 			continue;
-		if (inode->i_state & (I_FREEING|I_CLEAR)) {
+		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
 			__wait_on_freeing_inode(inode);
 			goto repeat;
 		}
@@ -727,7 +727,7 @@ EXPORT_SYMBOL(iunique);
 struct inode *igrab(struct inode *inode)
 {
 	spin_lock(&inode_lock);
-	if (!(inode->i_state & I_FREEING))
+	if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
 		__iget(inode);
 	else
 		/*
@@ -1024,17 +1024,21 @@ static void generic_forget_inode(struct inode *inode)
 		if (!(inode->i_state & (I_DIRTY|I_LOCK)))
 			list_move(&inode->i_list, &inode_unused);
 		inodes_stat.nr_unused++;
-		spin_unlock(&inode_lock);
-		if (!sb || (sb->s_flags & MS_ACTIVE))
+		if (!sb || (sb->s_flags & MS_ACTIVE)) {
+			spin_unlock(&inode_lock);
 			return;
+		}
+		inode->i_state |= I_WILL_FREE;
+		spin_unlock(&inode_lock);
 		write_inode_now(inode, 1);
 		spin_lock(&inode_lock);
+		inode->i_state &= ~I_WILL_FREE;
 		inodes_stat.nr_unused--;
 		hlist_del_init(&inode->i_hash);
 	}
 	list_del_init(&inode->i_list);
 	list_del_init(&inode->i_sb_list);
-	inode->i_state|=I_FREEING;
+	inode->i_state |= I_FREEING;
 	inodes_stat.nr_inodes--;
 	spin_unlock(&inode_lock);
 	if (inode->i_data.nrpages)
diff --git a/fs/ioprio.c b/fs/ioprio.c
new file mode 100644
index 000000000000..663e420636d6
--- /dev/null
+++ b/fs/ioprio.c
@@ -0,0 +1,172 @@
+/*
+ * fs/ioprio.c
+ *
+ * Copyright (C) 2004 Jens Axboe <axboe@suse.de>
+ *
+ * Helper functions for setting/querying io priorities of processes. The
+ * system calls closely mimmick getpriority/setpriority, see the man page for
+ * those. The prio argument is a composite of prio class and prio data, where
+ * the data argument has meaning within that class. The standard scheduling
+ * classes have 8 distinct prio levels, with 0 being the highest prio and 7
+ * being the lowest.
+ *
+ * IOW, setting BE scheduling class with prio 2 is done ala:
+ *
+ * unsigned int prio = (IOPRIO_CLASS_BE << IOPRIO_CLASS_SHIFT) | 2;
+ *
+ * ioprio_set(PRIO_PROCESS, pid, prio);
+ *
+ * See also Documentation/block/ioprio.txt
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/ioprio.h>
+#include <linux/blkdev.h>
+
+static int set_task_ioprio(struct task_struct *task, int ioprio)
+{
+	struct io_context *ioc;
+
+	if (task->uid != current->euid &&
+	    task->uid != current->uid && !capable(CAP_SYS_NICE))
+		return -EPERM;
+
+	task_lock(task);
+
+	task->ioprio = ioprio;
+
+	ioc = task->io_context;
+	if (ioc && ioc->set_ioprio)
+		ioc->set_ioprio(ioc, ioprio);
+
+	task_unlock(task);
+	return 0;
+}
+
+asmlinkage int sys_ioprio_set(int which, int who, int ioprio)
+{
+	int class = IOPRIO_PRIO_CLASS(ioprio);
+	int data = IOPRIO_PRIO_DATA(ioprio);
+	struct task_struct *p, *g;
+	struct user_struct *user;
+	int ret;
+
+	switch (class) {
+		case IOPRIO_CLASS_RT:
+			if (!capable(CAP_SYS_ADMIN))
+				return -EPERM;
+			/* fall through, rt has prio field too */
+		case IOPRIO_CLASS_BE:
+			if (data >= IOPRIO_BE_NR || data < 0)
+				return -EINVAL;
+
+			break;
+		case IOPRIO_CLASS_IDLE:
+			break;
+		default:
+			return -EINVAL;
+	}
+
+	ret = -ESRCH;
+	read_lock_irq(&tasklist_lock);
+	switch (which) {
+		case IOPRIO_WHO_PROCESS:
+			if (!who)
+				p = current;
+			else
+				p = find_task_by_pid(who);
+			if (p)
+				ret = set_task_ioprio(p, ioprio);
+			break;
+		case IOPRIO_WHO_PGRP:
+			if (!who)
+				who = process_group(current);
+			do_each_task_pid(who, PIDTYPE_PGID, p) {
+				ret = set_task_ioprio(p, ioprio);
+				if (ret)
+					break;
+			} while_each_task_pid(who, PIDTYPE_PGID, p);
+			break;
+		case IOPRIO_WHO_USER:
+			if (!who)
+				user = current->user;
+			else
+				user = find_user(who);
+
+			if (!user)
+				break;
+
+			do_each_thread(g, p) {
+				if (p->uid != who)
+					continue;
+				ret = set_task_ioprio(p, ioprio);
+				if (ret)
+					break;
+			} while_each_thread(g, p);
+
+			if (who)
+				free_uid(user);
+			break;
+		default:
+			ret = -EINVAL;
+	}
+
+	read_unlock_irq(&tasklist_lock);
+	return ret;
+}
+
+asmlinkage int sys_ioprio_get(int which, int who)
+{
+	struct task_struct *g, *p;
+	struct user_struct *user;
+	int ret = -ESRCH;
+
+	read_lock_irq(&tasklist_lock);
+	switch (which) {
+		case IOPRIO_WHO_PROCESS:
+			if (!who)
+				p = current;
+			else
+				p = find_task_by_pid(who);
+			if (p)
+				ret = p->ioprio;
+			break;
+		case IOPRIO_WHO_PGRP:
+			if (!who)
+				who = process_group(current);
+			do_each_task_pid(who, PIDTYPE_PGID, p) {
+				if (ret == -ESRCH)
+					ret = p->ioprio;
+				else
+					ret = ioprio_best(ret, p->ioprio);
+			} while_each_task_pid(who, PIDTYPE_PGID, p);
+			break;
+		case IOPRIO_WHO_USER:
+			if (!who)
+				user = current->user;
+			else
+				user = find_user(who);
+
+			if (!user)
+				break;
+
+			do_each_thread(g, p) {
+				if (p->uid != user->uid)
+					continue;
+				if (ret == -ESRCH)
+					ret = p->ioprio;
+				else
+					ret = ioprio_best(ret, p->ioprio);
+			} while_each_thread(g, p);
+
+			if (who)
+				free_uid(user);
+			break;
+		default:
+			ret = -EINVAL;
+	}
+
+	read_unlock_irq(&tasklist_lock);
+	return ret;
+}
+
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 6030956b894b..7901ac9f97ab 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -193,12 +193,17 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp,
 
 		/* Handle everything else.  Do name translation if there
 		   is no Rock Ridge NM field. */
-		if (sbi->s_unhide == 'n') {
-			/* Do not report hidden or associated files */
-			if (de->flags[-sbi->s_high_sierra] & 5) {
-				filp->f_pos += de_len;
-				continue;
-			}
+
+		/*
+		 * Do not report hidden files if so instructed, or associated
+		 * files unless instructed to do so
+		 */
+		if ((sbi->s_hide == 'y' &&
+				(de->flags[-sbi->s_high_sierra] & 1)) ||
+		      (sbi->s_showassoc =='n' &&
+				(de->flags[-sbi->s_high_sierra] & 4))) {
+			filp->f_pos += de_len;
+			continue;
 		}
 
 		map = 1;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index abd7b12eeca7..1652de1b6cb9 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -28,11 +28,6 @@
 
 #define BEQUIET
 
-#ifdef LEAK_CHECK
-static int check_malloc;
-static int check_bread;
-#endif
-
 static int isofs_hashi(struct dentry *parent, struct qstr *qstr);
 static int isofs_hash(struct dentry *parent, struct qstr *qstr);
 static int isofs_dentry_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b);
@@ -55,11 +50,6 @@ static void isofs_put_super(struct super_block *sb)
 	}
 #endif
 
-#ifdef LEAK_CHECK
-	printk("Outstanding mallocs:%d, outstanding buffers: %d\n",
-	       check_malloc, check_bread);
-#endif
-
 	kfree(sbi);
 	sb->s_fs_info = NULL;
 	return;
@@ -73,7 +63,7 @@ static kmem_cache_t *isofs_inode_cachep;
 static struct inode *isofs_alloc_inode(struct super_block *sb)
 {
 	struct iso_inode_info *ei;
-	ei = (struct iso_inode_info *)kmem_cache_alloc(isofs_inode_cachep, SLAB_KERNEL);
+	ei = kmem_cache_alloc(isofs_inode_cachep, SLAB_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
@@ -84,9 +74,9 @@ static void isofs_destroy_inode(struct inode *inode)
 	kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
 {
-	struct iso_inode_info *ei = (struct iso_inode_info *) foo;
+	struct iso_inode_info *ei = foo;
 
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 	    SLAB_CTOR_CONSTRUCTOR)
@@ -107,7 +97,8 @@ static int init_inodecache(void)
 static void destroy_inodecache(void)
 {
 	if (kmem_cache_destroy(isofs_inode_cachep))
-		printk(KERN_INFO "iso_inode_cache: not all structures were freed\n");
+		printk(KERN_INFO "iso_inode_cache: not all structures were "
+					"freed\n");
 }
 
 static int isofs_remount(struct super_block *sb, int *flags, char *data)
@@ -144,7 +135,7 @@ static struct dentry_operations isofs_dentry_ops[] = {
 	{
 		.d_hash		= isofs_hashi_ms,
 		.d_compare	= isofs_dentry_cmpi_ms,
-	}
+	},
 #endif
 };
 
@@ -153,7 +144,8 @@ struct iso9660_options{
 	char rock;
 	char joliet;
 	char cruft;
-	char unhide;
+	char hide;
+	char showassoc;
 	char nocompress;
 	unsigned char check;
 	unsigned int blocksize;
@@ -219,8 +211,8 @@ isofs_hashi_common(struct dentry *dentry, struct qstr *qstr, int ms)
 /*
  * Case insensitive compare of two isofs names.
  */
-static int
-isofs_dentry_cmpi_common(struct dentry *dentry,struct qstr *a,struct qstr *b,int ms)
+static int isofs_dentry_cmpi_common(struct dentry *dentry, struct qstr *a,
+				struct qstr *b, int ms)
 {
 	int alen, blen;
 
@@ -243,8 +235,8 @@ isofs_dentry_cmpi_common(struct dentry *dentry,struct qstr *a,struct qstr *b,int
 /*
  * Case sensitive compare of two isofs names.
  */
-static int
-isofs_dentry_cmp_common(struct dentry *dentry,struct qstr *a,struct qstr *b,int ms)
+static int isofs_dentry_cmp_common(struct dentry *dentry, struct qstr *a,
+					struct qstr *b, int ms)
 {
 	int alen, blen;
 
@@ -318,13 +310,15 @@ enum {
 	Opt_block, Opt_check_r, Opt_check_s, Opt_cruft, Opt_gid, Opt_ignore,
 	Opt_iocharset, Opt_map_a, Opt_map_n, Opt_map_o, Opt_mode, Opt_nojoliet,
 	Opt_norock, Opt_sb, Opt_session, Opt_uid, Opt_unhide, Opt_utf8, Opt_err,
-	Opt_nocompress,
+	Opt_nocompress, Opt_hide, Opt_showassoc,
 };
 
 static match_table_t tokens = {
 	{Opt_norock, "norock"},
 	{Opt_nojoliet, "nojoliet"},
 	{Opt_unhide, "unhide"},
+	{Opt_hide, "hide"},
+	{Opt_showassoc, "showassoc"},
 	{Opt_cruft, "cruft"},
 	{Opt_utf8, "utf8"},
 	{Opt_iocharset, "iocharset=%s"},
@@ -356,7 +350,7 @@ static match_table_t tokens = {
 	{Opt_err, NULL}
 };
 
-static int parse_options(char *options, struct iso9660_options * popt)
+static int parse_options(char *options, struct iso9660_options *popt)
 {
 	char *p;
 	int option;
@@ -365,7 +359,8 @@ static int parse_options(char *options, struct iso9660_options * popt)
 	popt->rock = 'y';
 	popt->joliet = 'y';
 	popt->cruft = 'n';
-	popt->unhide = 'n';
+	popt->hide = 'n';
+	popt->showassoc = 'n';
 	popt->check = 'u';		/* unset */
 	popt->nocompress = 0;
 	popt->blocksize = 1024;
@@ -398,8 +393,12 @@ static int parse_options(char *options, struct iso9660_options * popt)
 		case Opt_nojoliet:
 			popt->joliet = 'n';
 			break;
+		case Opt_hide:
+			popt->hide = 'y';
+			break;
 		case Opt_unhide:
-			popt->unhide = 'y';
+		case Opt_showassoc:
+			popt->showassoc = 'y';
 			break;
 		case Opt_cruft:
 			popt->cruft = 'y';
@@ -493,7 +492,7 @@ static int parse_options(char *options, struct iso9660_options * popt)
  */
 #define WE_OBEY_THE_WRITTEN_STANDARDS 1
 
-static unsigned int isofs_get_last_session(struct super_block *sb,s32 session )
+static unsigned int isofs_get_last_session(struct super_block *sb, s32 session)
 {
 	struct cdrom_multisession ms_info;
 	unsigned int vol_desc_start;
@@ -518,7 +517,8 @@ static unsigned int isofs_get_last_session(struct super_block *sb,s32 session )
 		printk(KERN_ERR "Invalid session number or type of track\n");
 	}
 	i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long) &ms_info);
-	if(session > 0) printk(KERN_ERR "Invalid session number\n");
+	if (session > 0)
+		printk(KERN_ERR "Invalid session number\n");
 #if 0
 	printk("isofs.inode: CDROMMULTISESSION: rc=%d\n",i);
 	if (i==0) {
@@ -557,13 +557,13 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
 	struct iso9660_options		opt;
 	struct isofs_sb_info	      * sbi;
 
-	sbi = kmalloc(sizeof(struct isofs_sb_info), GFP_KERNEL);
+	sbi = kmalloc(sizeof(*sbi), GFP_KERNEL);
 	if (!sbi)
 		return -ENOMEM;
 	s->s_fs_info = sbi;
-	memset(sbi, 0, sizeof(struct isofs_sb_info));
+	memset(sbi, 0, sizeof(*sbi));
 
-	if (!parse_options((char *) data, &opt))
+	if (!parse_options((char *)data, &opt))
 		goto out_freesbi;
 
 	/*
@@ -792,7 +792,8 @@ root_found:
 	sbi->s_rock = (opt.rock == 'y' ? 2 : 0);
 	sbi->s_rock_offset = -1; /* initial offset, will guess until SP is found*/
 	sbi->s_cruft = opt.cruft;
-	sbi->s_unhide = opt.unhide;
+	sbi->s_hide = opt.hide;
+	sbi->s_showassoc = opt.showassoc;
 	sbi->s_uid = opt.uid;
 	sbi->s_gid = opt.gid;
 	sbi->s_utf8 = opt.utf8;
@@ -1002,7 +1003,6 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s,
 		rv++;
 	}
 
-
 abort:
 	unlock_kernel();
 	return rv;
@@ -1014,7 +1014,7 @@ abort:
 static int isofs_get_block(struct inode *inode, sector_t iblock,
 		    struct buffer_head *bh_result, int create)
 {
-	if ( create ) {
+	if (create) {
 		printk("isofs_get_block: Kernel tries to allocate a block\n");
 		return -EROFS;
 	}
@@ -1061,19 +1061,17 @@ static struct address_space_operations isofs_aops = {
 
 static inline void test_and_set_uid(uid_t *p, uid_t value)
 {
-	if(value) {
+	if (value)
 		*p = value;
-	}
 }
 
 static inline void test_and_set_gid(gid_t *p, gid_t value)
 {
-        if(value) {
+        if (value)
                 *p = value;
-        }
 }
 
-static int isofs_read_level3_size(struct inode * inode)
+static int isofs_read_level3_size(struct inode *inode)
 {
 	unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
 	int high_sierra = ISOFS_SB(inode->i_sb)->s_high_sierra;
@@ -1136,7 +1134,7 @@ static int isofs_read_level3_size(struct inode * inode)
 				bh = sb_bread(inode->i_sb, block);
 				if (!bh)
 					goto out_noread;
-				memcpy((void *) tmpde + slop, bh->b_data, offset);
+				memcpy((void *)tmpde+slop, bh->b_data, offset);
 			}
 			de = tmpde;
 		}
@@ -1150,12 +1148,11 @@ static int isofs_read_level3_size(struct inode * inode)
 		more_entries = de->flags[-high_sierra] & 0x80;
 
 		i++;
-		if(i > 100)
+		if (i > 100)
 			goto out_toomany;
-	} while(more_entries);
+	} while (more_entries);
 out:
-	if (tmpde)
-		kfree(tmpde);
+	kfree(tmpde);
 	if (bh)
 		brelse(bh);
 	return 0;
@@ -1179,7 +1176,7 @@ out_toomany:
 	goto out;
 }
 
-static void isofs_read_inode(struct inode * inode)
+static void isofs_read_inode(struct inode *inode)
 {
 	struct super_block *sb = inode->i_sb;
 	struct isofs_sb_info *sbi = ISOFS_SB(sb);
@@ -1249,7 +1246,7 @@ static void isofs_read_inode(struct inode * inode)
 	ei->i_format_parm[2] = 0;
 
 	ei->i_section_size = isonum_733 (de->size);
-	if(de->flags[-high_sierra] & 0x80) {
+	if (de->flags[-high_sierra] & 0x80) {
 		if(isofs_read_level3_size(inode)) goto fail;
 	} else {
 		ei->i_next_section_block = 0;
@@ -1336,16 +1333,16 @@ static void isofs_read_inode(struct inode * inode)
 		/* XXX - parse_rock_ridge_inode() had already set i_rdev. */
 		init_special_inode(inode, inode->i_mode, inode->i_rdev);
 
- out:
+out:
 	if (tmpde)
 		kfree(tmpde);
 	if (bh)
 		brelse(bh);
 	return;
 
- out_badread:
+out_badread:
 	printk(KERN_WARNING "ISOFS: unable to read i-node block\n");
- fail:
+fail:
 	make_bad_inode(inode);
 	goto out;
 }
@@ -1394,11 +1391,8 @@ struct inode *isofs_iget(struct super_block *sb,
 
 	hashval = (block << sb->s_blocksize_bits) | offset;
 
-	inode = iget5_locked(sb,
-			     hashval,
-			     &isofs_iget5_test,
-			     &isofs_iget5_set,
-			     &data);
+	inode = iget5_locked(sb, hashval, &isofs_iget5_test,
+			     &isofs_iget5_set, &data);
 
 	if (inode && (inode->i_state & I_NEW)) {
 		sb->s_op->read_inode(inode);
@@ -1408,36 +1402,6 @@ struct inode *isofs_iget(struct super_block *sb,
 	return inode;
 }
 
-#ifdef LEAK_CHECK
-#undef malloc
-#undef free_s
-#undef sb_bread
-#undef brelse
-
-void * leak_check_malloc(unsigned int size){
-  void * tmp;
-  check_malloc++;
-  tmp = kmalloc(size, GFP_KERNEL);
-  return tmp;
-}
-
-void leak_check_free_s(void * obj, int size){
-  check_malloc--;
-  return kfree(obj);
-}
-
-struct buffer_head * leak_check_bread(struct super_block *sb, int block){
-  check_bread++;
-  return sb_bread(sb, block);
-}
-
-void leak_check_brelse(struct buffer_head * bh){
-  check_bread--;
-  return brelse(bh);
-}
-
-#endif
-
 static struct super_block *isofs_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data)
 {
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 9ce7b51fb614..38c75151fc66 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -47,6 +47,8 @@ struct isofs_sb_info {
 	unsigned char s_nosuid;
 	unsigned char s_nodev;
 	unsigned char s_nocompress;
+	unsigned char s_hide;
+	unsigned char s_showassoc;
 
 	mode_t s_mode;
 	gid_t s_gid;
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 690edf37173c..e37e82b7cbf0 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -131,14 +131,16 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
 		}
 
 		/*
-		 * Skip hidden or associated files unless unhide is set 
+		 * Skip hidden or associated files unless hide or showassoc,
+		 * respectively, is set
 		 */
 		match = 0;
 		if (dlen > 0 &&
-		    (!(de->flags[-sbi->s_high_sierra] & 5)
-		     || sbi->s_unhide == 'y'))
-		{
-			match = (isofs_cmp(dentry,dpnt,dlen) == 0);
+			(sbi->s_hide =='n' ||
+				(!(de->flags[-sbi->s_high_sierra] & 1))) &&
+			(sbi->s_showassoc =='y' ||
+				(!(de->flags[-sbi->s_high_sierra] & 4)))) {
+			match = (isofs_cmp(dentry, dpnt, dlen) == 0);
 		}
 		if (match) {
 			isofs_normalize_block_and_offset(de,
@@ -146,11 +148,11 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
 							 &offset_saved);
                         *block_rv = block_saved;
                         *offset_rv = offset_saved;
-			if (bh) brelse(bh);
+			brelse(bh);
 			return 1;
 		}
 	}
-	if (bh) brelse(bh);
+	brelse(bh);
 	return 0;
 }
 
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 089e79c65585..4326cb47f8fa 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -13,352 +13,542 @@
 #include "isofs.h"
 #include "rock.h"
 
-/* These functions are designed to read the system areas of a directory record
+/*
+ * These functions are designed to read the system areas of a directory record
  * and extract relevant information.  There are different functions provided
  * depending upon what information we need at the time.  One function fills
  * out an inode structure, a second one extracts a filename, a third one
  * returns a symbolic link name, and a fourth one returns the extent number
- * for the file. */
-
-#define SIG(A,B) ((A) | ((B) << 8)) /* isonum_721() */
-
-
-/* This is a way of ensuring that we have something in the system
-   use fields that is compatible with Rock Ridge */
-#define CHECK_SP(FAIL)	       			\
-      if(rr->u.SP.magic[0] != 0xbe) FAIL;	\
-      if(rr->u.SP.magic[1] != 0xef) FAIL;       \
-      ISOFS_SB(inode->i_sb)->s_rock_offset=rr->u.SP.skip;
-/* We define a series of macros because each function must do exactly the
-   same thing in certain places.  We use the macros to ensure that everything
-   is done correctly */
-
-#define CONTINUE_DECLS \
-  int cont_extent = 0, cont_offset = 0, cont_size = 0;   \
-  void *buffer = NULL
-
-#define CHECK_CE	       			\
-      {cont_extent = isonum_733(rr->u.CE.extent); \
-      cont_offset = isonum_733(rr->u.CE.offset); \
-      cont_size = isonum_733(rr->u.CE.size);}
-
-#define SETUP_ROCK_RIDGE(DE,CHR,LEN)	      		      	\
-  {LEN= sizeof(struct iso_directory_record) + DE->name_len[0];	\
-  if(LEN & 1) LEN++;						\
-  CHR = ((unsigned char *) DE) + LEN;				\
-  LEN = *((unsigned char *) DE) - LEN;                          \
-  if (LEN<0) LEN=0;                                             \
-  if (ISOFS_SB(inode->i_sb)->s_rock_offset!=-1)                \
-  {                                                             \
-     LEN-=ISOFS_SB(inode->i_sb)->s_rock_offset;                \
-     CHR+=ISOFS_SB(inode->i_sb)->s_rock_offset;                \
-     if (LEN<0) LEN=0;                                          \
-  }                                                             \
-}                                     
-
-#define MAYBE_CONTINUE(LABEL,DEV) \
-  {if (buffer) { kfree(buffer); buffer = NULL; } \
-  if (cont_extent){ \
-    int block, offset, offset1; \
-    struct buffer_head * pbh; \
-    buffer = kmalloc(cont_size,GFP_KERNEL); \
-    if (!buffer) goto out; \
-    block = cont_extent; \
-    offset = cont_offset; \
-    offset1 = 0; \
-    pbh = sb_bread(DEV->i_sb, block); \
-    if(pbh){       \
-      if (offset > pbh->b_size || offset + cont_size > pbh->b_size){	\
-	brelse(pbh); \
-	goto out; \
-      } \
-      memcpy(buffer + offset1, pbh->b_data + offset, cont_size - offset1); \
-      brelse(pbh); \
-      chr = (unsigned char *) buffer; \
-      len = cont_size; \
-      cont_extent = 0; \
-      cont_size = 0; \
-      cont_offset = 0; \
-      goto LABEL; \
-    }    \
-    printk("Unable to read rock-ridge attributes\n");    \
-  }}
-
-/* return length of name field; 0: not found, -1: to be ignored */
-int get_rock_ridge_filename(struct iso_directory_record * de,
-			    char * retname, struct inode * inode)
+ * for the file.
+ */
+
+#define SIG(A,B) ((A) | ((B) << 8))	/* isonum_721() */
+
+struct rock_state {
+	void *buffer;
+	unsigned char *chr;
+	int len;
+	int cont_size;
+	int cont_extent;
+	int cont_offset;
+	struct inode *inode;
+};
+
+/*
+ * This is a way of ensuring that we have something in the system
+ * use fields that is compatible with Rock Ridge.  Return zero on success.
+ */
+
+static int check_sp(struct rock_ridge *rr, struct inode *inode)
 {
-  int len;
-  unsigned char * chr;
-  CONTINUE_DECLS;
-  int retnamlen = 0, truncate=0;
- 
-  if (!ISOFS_SB(inode->i_sb)->s_rock) return 0;
-  *retname = 0;
-
-  SETUP_ROCK_RIDGE(de, chr, len);
- repeat:
-  {
-    struct rock_ridge * rr;
-    int sig;
-    
-    while (len > 2){ /* There may be one byte for padding somewhere */
-      rr = (struct rock_ridge *) chr;
-      if (rr->len < 3) goto out; /* Something got screwed up here */
-      sig = isonum_721(chr);
-      chr += rr->len; 
-      len -= rr->len;
-      if (len < 0) goto out;	/* corrupted isofs */
-
-      switch(sig){
-      case SIG('R','R'):
-	if((rr->u.RR.flags[0] & RR_NM) == 0) goto out;
-	break;
-      case SIG('S','P'):
-	CHECK_SP(goto out);
-	break;
-      case SIG('C','E'):
-	CHECK_CE;
-	break;
-      case SIG('N','M'):
-	if (truncate) break;
-	if (rr->len < 5) break;
-        /*
-	 * If the flags are 2 or 4, this indicates '.' or '..'.
-	 * We don't want to do anything with this, because it
-	 * screws up the code that calls us.  We don't really
-	 * care anyways, since we can just use the non-RR
-	 * name.
-	 */
-	if (rr->u.NM.flags & 6) {
-	  break;
+	if (rr->u.SP.magic[0] != 0xbe)
+		return -1;
+	if (rr->u.SP.magic[1] != 0xef)
+		return -1;
+	ISOFS_SB(inode->i_sb)->s_rock_offset = rr->u.SP.skip;
+	return 0;
+}
+
+static void setup_rock_ridge(struct iso_directory_record *de,
+			struct inode *inode, struct rock_state *rs)
+{
+	rs->len = sizeof(struct iso_directory_record) + de->name_len[0];
+	if (rs->len & 1)
+		(rs->len)++;
+	rs->chr = (unsigned char *)de + rs->len;
+	rs->len = *((unsigned char *)de) - rs->len;
+	if (rs->len < 0)
+		rs->len = 0;
+
+	if (ISOFS_SB(inode->i_sb)->s_rock_offset != -1) {
+		rs->len -= ISOFS_SB(inode->i_sb)->s_rock_offset;
+		rs->chr += ISOFS_SB(inode->i_sb)->s_rock_offset;
+		if (rs->len < 0)
+			rs->len = 0;
+	}
+}
+
+static void init_rock_state(struct rock_state *rs, struct inode *inode)
+{
+	memset(rs, 0, sizeof(*rs));
+	rs->inode = inode;
+}
+
+/*
+ * Returns 0 if the caller should continue scanning, 1 if the scan must end
+ * and -ve on error.
+ */
+static int rock_continue(struct rock_state *rs)
+{
+	int ret = 1;
+	int blocksize = 1 << rs->inode->i_blkbits;
+	const int min_de_size = offsetof(struct rock_ridge, u);
+
+	kfree(rs->buffer);
+	rs->buffer = NULL;
+
+	if ((unsigned)rs->cont_offset > blocksize - min_de_size ||
+	    (unsigned)rs->cont_size > blocksize ||
+	    (unsigned)(rs->cont_offset + rs->cont_size) > blocksize) {
+		printk(KERN_NOTICE "rock: corrupted directory entry. "
+			"extent=%d, offset=%d, size=%d\n",
+			rs->cont_extent, rs->cont_offset, rs->cont_size);
+		ret = -EIO;
+		goto out;
 	}
 
-	if (rr->u.NM.flags & ~1) {
-	  printk("Unsupported NM flag settings (%d)\n",rr->u.NM.flags);
-	  break;
+	if (rs->cont_extent) {
+		struct buffer_head *bh;
+
+		rs->buffer = kmalloc(rs->cont_size, GFP_KERNEL);
+		if (!rs->buffer) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		ret = -EIO;
+		bh = sb_bread(rs->inode->i_sb, rs->cont_extent);
+		if (bh) {
+			memcpy(rs->buffer, bh->b_data + rs->cont_offset,
+					rs->cont_size);
+			put_bh(bh);
+			rs->chr = rs->buffer;
+			rs->len = rs->cont_size;
+			rs->cont_extent = 0;
+			rs->cont_size = 0;
+			rs->cont_offset = 0;
+			return 0;
+		}
+		printk("Unable to read rock-ridge attributes\n");
+	}
+out:
+	kfree(rs->buffer);
+	rs->buffer = NULL;
+	return ret;
+}
+
+/*
+ * We think there's a record of type `sig' at rs->chr.  Parse the signature
+ * and make sure that there's really room for a record of that type.
+ */
+static int rock_check_overflow(struct rock_state *rs, int sig)
+{
+	int len;
+
+	switch (sig) {
+	case SIG('S', 'P'):
+		len = sizeof(struct SU_SP_s);
+		break;
+	case SIG('C', 'E'):
+		len = sizeof(struct SU_CE_s);
+		break;
+	case SIG('E', 'R'):
+		len = sizeof(struct SU_ER_s);
+		break;
+	case SIG('R', 'R'):
+		len = sizeof(struct RR_RR_s);
+		break;
+	case SIG('P', 'X'):
+		len = sizeof(struct RR_PX_s);
+		break;
+	case SIG('P', 'N'):
+		len = sizeof(struct RR_PN_s);
+		break;
+	case SIG('S', 'L'):
+		len = sizeof(struct RR_SL_s);
+		break;
+	case SIG('N', 'M'):
+		len = sizeof(struct RR_NM_s);
+		break;
+	case SIG('C', 'L'):
+		len = sizeof(struct RR_CL_s);
+		break;
+	case SIG('P', 'L'):
+		len = sizeof(struct RR_PL_s);
+		break;
+	case SIG('T', 'F'):
+		len = sizeof(struct RR_TF_s);
+		break;
+	case SIG('Z', 'F'):
+		len = sizeof(struct RR_ZF_s);
+		break;
+	default:
+		len = 0;
+		break;
 	}
-	if((strlen(retname) + rr->len - 5) >= 254) {
-	  truncate = 1;
-	  break;
+	len += offsetof(struct rock_ridge, u);
+	if (len > rs->len) {
+		printk(KERN_NOTICE "rock: directory entry would overflow "
+				"storage\n");
+		printk(KERN_NOTICE "rock: sig=0x%02x, size=%d, remaining=%d\n",
+				sig, len, rs->len);
+		return -EIO;
+	}
+	return 0;
+}
+
+/*
+ * return length of name field; 0: not found, -1: to be ignored
+ */
+int get_rock_ridge_filename(struct iso_directory_record *de,
+			    char *retname, struct inode *inode)
+{
+	struct rock_state rs;
+	struct rock_ridge *rr;
+	int sig;
+	int retnamlen = 0;
+	int truncate = 0;
+	int ret = 0;
+
+	if (!ISOFS_SB(inode->i_sb)->s_rock)
+		return 0;
+	*retname = 0;
+
+	init_rock_state(&rs, inode);
+	setup_rock_ridge(de, inode, &rs);
+repeat:
+
+	while (rs.len > 2) { /* There may be one byte for padding somewhere */
+		rr = (struct rock_ridge *)rs.chr;
+		if (rr->len < 3)
+			goto out;	/* Something got screwed up here */
+		sig = isonum_721(rs.chr);
+		if (rock_check_overflow(&rs, sig))
+			goto eio;
+		rs.chr += rr->len;
+		rs.len -= rr->len;
+		if (rs.len < 0)
+			goto eio;	/* corrupted isofs */
+
+		switch (sig) {
+		case SIG('R', 'R'):
+			if ((rr->u.RR.flags[0] & RR_NM) == 0)
+				goto out;
+			break;
+		case SIG('S', 'P'):
+			if (check_sp(rr, inode))
+				goto out;
+			break;
+		case SIG('C', 'E'):
+			rs.cont_extent = isonum_733(rr->u.CE.extent);
+			rs.cont_offset = isonum_733(rr->u.CE.offset);
+			rs.cont_size = isonum_733(rr->u.CE.size);
+			break;
+		case SIG('N', 'M'):
+			if (truncate)
+				break;
+			if (rr->len < 5)
+				break;
+			/*
+			 * If the flags are 2 or 4, this indicates '.' or '..'.
+			 * We don't want to do anything with this, because it
+			 * screws up the code that calls us.  We don't really
+			 * care anyways, since we can just use the non-RR
+			 * name.
+			 */
+			if (rr->u.NM.flags & 6)
+				break;
+
+			if (rr->u.NM.flags & ~1) {
+				printk("Unsupported NM flag settings (%d)\n",
+					rr->u.NM.flags);
+				break;
+			}
+			if ((strlen(retname) + rr->len - 5) >= 254) {
+				truncate = 1;
+				break;
+			}
+			strncat(retname, rr->u.NM.name, rr->len - 5);
+			retnamlen += rr->len - 5;
+			break;
+		case SIG('R', 'E'):
+			kfree(rs.buffer);
+			return -1;
+		default:
+			break;
+		}
 	}
-	strncat(retname, rr->u.NM.name, rr->len - 5);
-	retnamlen += rr->len - 5;
-	break;
-      case SIG('R','E'):
-	if (buffer) kfree(buffer);
-	return -1;
-      default:
-	break;
-      }
-    }
-  }
-  MAYBE_CONTINUE(repeat,inode);
-  if (buffer) kfree(buffer);
-  return retnamlen; /* If 0, this file did not have a NM field */
- out:
-  if(buffer) kfree(buffer);
-  return 0;
+	ret = rock_continue(&rs);
+	if (ret == 0)
+		goto repeat;
+	if (ret == 1)
+		return retnamlen; /* If 0, this file did not have a NM field */
+out:
+	kfree(rs.buffer);
+	return ret;
+eio:
+	ret = -EIO;
+	goto out;
 }
 
 static int
 parse_rock_ridge_inode_internal(struct iso_directory_record *de,
 				struct inode *inode, int regard_xa)
 {
-  int len;
-  unsigned char * chr;
-  int symlink_len = 0;
-  CONTINUE_DECLS;
-
-  if (!ISOFS_SB(inode->i_sb)->s_rock) return 0;
-
-  SETUP_ROCK_RIDGE(de, chr, len);
-  if (regard_xa)
-   {
-     chr+=14;
-     len-=14;
-     if (len<0) len=0;
-   }
-   
- repeat:
-  {
-    int cnt, sig;
-    struct inode * reloc;
-    struct rock_ridge * rr;
-    int rootflag;
-    
-    while (len > 2){ /* There may be one byte for padding somewhere */
-      rr = (struct rock_ridge *) chr;
-      if (rr->len < 3) goto out; /* Something got screwed up here */
-      sig = isonum_721(chr);
-      chr += rr->len; 
-      len -= rr->len;
-      if (len < 0) goto out;	/* corrupted isofs */
-      
-      switch(sig){
+	int symlink_len = 0;
+	int cnt, sig;
+	struct inode *reloc;
+	struct rock_ridge *rr;
+	int rootflag;
+	struct rock_state rs;
+	int ret = 0;
+
+	if (!ISOFS_SB(inode->i_sb)->s_rock)
+		return 0;
+
+	init_rock_state(&rs, inode);
+	setup_rock_ridge(de, inode, &rs);
+	if (regard_xa) {
+		rs.chr += 14;
+		rs.len -= 14;
+		if (rs.len < 0)
+			rs.len = 0;
+	}
+
+repeat:
+	while (rs.len > 2) { /* There may be one byte for padding somewhere */
+		rr = (struct rock_ridge *)rs.chr;
+		if (rr->len < 3)
+			goto out;	/* Something got screwed up here */
+		sig = isonum_721(rs.chr);
+		if (rock_check_overflow(&rs, sig))
+			goto eio;
+		rs.chr += rr->len;
+		rs.len -= rr->len;
+		if (rs.len < 0)
+			goto eio;	/* corrupted isofs */
+
+		switch (sig) {
 #ifndef CONFIG_ZISOFS		/* No flag for SF or ZF */
-      case SIG('R','R'):
-	if((rr->u.RR.flags[0] & 
- 	    (RR_PX | RR_TF | RR_SL | RR_CL)) == 0) goto out;
-	break;
+		case SIG('R', 'R'):
+			if ((rr->u.RR.flags[0] &
+			     (RR_PX | RR_TF | RR_SL | RR_CL)) == 0)
+				goto out;
+			break;
 #endif
-      case SIG('S','P'):
-	CHECK_SP(goto out);
-	break;
-      case SIG('C','E'):
-	CHECK_CE;
-	break;
-      case SIG('E','R'):
-	ISOFS_SB(inode->i_sb)->s_rock = 1;
-	printk(KERN_DEBUG "ISO 9660 Extensions: ");
-	{ int p;
-	  for(p=0;p<rr->u.ER.len_id;p++) printk("%c",rr->u.ER.data[p]);
-	}
-	  printk("\n");
-	break;
-      case SIG('P','X'):
-	inode->i_mode  = isonum_733(rr->u.PX.mode);
-	inode->i_nlink = isonum_733(rr->u.PX.n_links);
-	inode->i_uid   = isonum_733(rr->u.PX.uid);
-	inode->i_gid   = isonum_733(rr->u.PX.gid);
-	break;
-      case SIG('P','N'):
-	{ int high, low;
-	  high = isonum_733(rr->u.PN.dev_high);
-	  low = isonum_733(rr->u.PN.dev_low);
-	  /*
-	   * The Rock Ridge standard specifies that if sizeof(dev_t) <= 4,
-	   * then the high field is unused, and the device number is completely
-	   * stored in the low field.  Some writers may ignore this subtlety,
-	   * and as a result we test to see if the entire device number is
-	   * stored in the low field, and use that.
-	   */
-	  if((low & ~0xff) && high == 0) {
-	    inode->i_rdev = MKDEV(low >> 8, low & 0xff);
-	  } else {
-	    inode->i_rdev = MKDEV(high, low);
-	  }
-	}
-	break;
-      case SIG('T','F'):
-	/* Some RRIP writers incorrectly place ctime in the TF_CREATE field.
-	   Try to handle this correctly for either case. */
-	cnt = 0; /* Rock ridge never appears on a High Sierra disk */
-	if(rr->u.TF.flags & TF_CREATE) { 
-	  inode->i_ctime.tv_sec = iso_date(rr->u.TF.times[cnt++].time, 0);
-	  inode->i_ctime.tv_nsec = 0;
-	}
-	if(rr->u.TF.flags & TF_MODIFY) {
-	  inode->i_mtime.tv_sec = iso_date(rr->u.TF.times[cnt++].time, 0);
-	  inode->i_mtime.tv_nsec = 0;
-	}
-	if(rr->u.TF.flags & TF_ACCESS) {
-	  inode->i_atime.tv_sec = iso_date(rr->u.TF.times[cnt++].time, 0);
-	  inode->i_atime.tv_nsec = 0;
-	}
-	if(rr->u.TF.flags & TF_ATTRIBUTES) { 
-	  inode->i_ctime.tv_sec = iso_date(rr->u.TF.times[cnt++].time, 0);
-	  inode->i_ctime.tv_nsec = 0;
-	} 
-	break;
-      case SIG('S','L'):
-	{int slen;
-	 struct SL_component * slp;
-	 struct SL_component * oldslp;
-	 slen = rr->len - 5;
-	 slp = &rr->u.SL.link;
-	 inode->i_size = symlink_len;
-	 while (slen > 1){
-	   rootflag = 0;
-	   switch(slp->flags &~1){
-	   case 0:
-	     inode->i_size += slp->len;
-	     break;
-	   case 2:
-	     inode->i_size += 1;
-	     break;
-	   case 4:
-	     inode->i_size += 2;
-	     break;
-	   case 8:
-	     rootflag = 1;
-	     inode->i_size += 1;
-	     break;
-	   default:
-	     printk("Symlink component flag not implemented\n");
-	   }
-	   slen -= slp->len + 2;
-	   oldslp = slp;
-	   slp = (struct SL_component *) (((char *) slp) + slp->len + 2);
-
-	   if(slen < 2) {
-	     if(    ((rr->u.SL.flags & 1) != 0) 
-		    && ((oldslp->flags & 1) == 0) ) inode->i_size += 1;
-	     break;
-	   }
-
-	   /*
-	    * If this component record isn't continued, then append a '/'.
-	    */
-	   if (!rootflag && (oldslp->flags & 1) == 0)
-		   inode->i_size += 1;
-	 }
-	}
-	symlink_len = inode->i_size;
-	break;
-      case SIG('R','E'):
-	printk(KERN_WARNING "Attempt to read inode for relocated directory\n");
-	goto out;
-      case SIG('C','L'):
-	ISOFS_I(inode)->i_first_extent = isonum_733(rr->u.CL.location);
-	reloc = isofs_iget(inode->i_sb, ISOFS_I(inode)->i_first_extent, 0);
-	if (!reloc)
-		goto out;
-	inode->i_mode = reloc->i_mode;
-	inode->i_nlink = reloc->i_nlink;
-	inode->i_uid = reloc->i_uid;
-	inode->i_gid = reloc->i_gid;
-	inode->i_rdev = reloc->i_rdev;
-	inode->i_size = reloc->i_size;
-	inode->i_blocks = reloc->i_blocks;
-	inode->i_atime = reloc->i_atime;
-	inode->i_ctime = reloc->i_ctime;
-	inode->i_mtime = reloc->i_mtime;
-	iput(reloc);
-	break;
+		case SIG('S', 'P'):
+			if (check_sp(rr, inode))
+				goto out;
+			break;
+		case SIG('C', 'E'):
+			rs.cont_extent = isonum_733(rr->u.CE.extent);
+			rs.cont_offset = isonum_733(rr->u.CE.offset);
+			rs.cont_size = isonum_733(rr->u.CE.size);
+			break;
+		case SIG('E', 'R'):
+			ISOFS_SB(inode->i_sb)->s_rock = 1;
+			printk(KERN_DEBUG "ISO 9660 Extensions: ");
+			{
+				int p;
+				for (p = 0; p < rr->u.ER.len_id; p++)
+					printk("%c", rr->u.ER.data[p]);
+			}
+			printk("\n");
+			break;
+		case SIG('P', 'X'):
+			inode->i_mode = isonum_733(rr->u.PX.mode);
+			inode->i_nlink = isonum_733(rr->u.PX.n_links);
+			inode->i_uid = isonum_733(rr->u.PX.uid);
+			inode->i_gid = isonum_733(rr->u.PX.gid);
+			break;
+		case SIG('P', 'N'):
+			{
+				int high, low;
+				high = isonum_733(rr->u.PN.dev_high);
+				low = isonum_733(rr->u.PN.dev_low);
+				/*
+				 * The Rock Ridge standard specifies that if
+				 * sizeof(dev_t) <= 4, then the high field is
+				 * unused, and the device number is completely
+				 * stored in the low field.  Some writers may
+				 * ignore this subtlety,
+				 * and as a result we test to see if the entire
+				 * device number is
+				 * stored in the low field, and use that.
+				 */
+				if ((low & ~0xff) && high == 0) {
+					inode->i_rdev =
+					    MKDEV(low >> 8, low & 0xff);
+				} else {
+					inode->i_rdev =
+					    MKDEV(high, low);
+				}
+			}
+			break;
+		case SIG('T', 'F'):
+			/*
+			 * Some RRIP writers incorrectly place ctime in the
+			 * TF_CREATE field. Try to handle this correctly for
+			 * either case.
+			 */
+			/* Rock ridge never appears on a High Sierra disk */
+			cnt = 0;
+			if (rr->u.TF.flags & TF_CREATE) {
+				inode->i_ctime.tv_sec =
+				    iso_date(rr->u.TF.times[cnt++].time,
+					     0);
+				inode->i_ctime.tv_nsec = 0;
+			}
+			if (rr->u.TF.flags & TF_MODIFY) {
+				inode->i_mtime.tv_sec =
+				    iso_date(rr->u.TF.times[cnt++].time,
+					     0);
+				inode->i_mtime.tv_nsec = 0;
+			}
+			if (rr->u.TF.flags & TF_ACCESS) {
+				inode->i_atime.tv_sec =
+				    iso_date(rr->u.TF.times[cnt++].time,
+					     0);
+				inode->i_atime.tv_nsec = 0;
+			}
+			if (rr->u.TF.flags & TF_ATTRIBUTES) {
+				inode->i_ctime.tv_sec =
+				    iso_date(rr->u.TF.times[cnt++].time,
+					     0);
+				inode->i_ctime.tv_nsec = 0;
+			}
+			break;
+		case SIG('S', 'L'):
+			{
+				int slen;
+				struct SL_component *slp;
+				struct SL_component *oldslp;
+				slen = rr->len - 5;
+				slp = &rr->u.SL.link;
+				inode->i_size = symlink_len;
+				while (slen > 1) {
+					rootflag = 0;
+					switch (slp->flags & ~1) {
+					case 0:
+						inode->i_size +=
+						    slp->len;
+						break;
+					case 2:
+						inode->i_size += 1;
+						break;
+					case 4:
+						inode->i_size += 2;
+						break;
+					case 8:
+						rootflag = 1;
+						inode->i_size += 1;
+						break;
+					default:
+						printk("Symlink component flag "
+							"not implemented\n");
+					}
+					slen -= slp->len + 2;
+					oldslp = slp;
+					slp = (struct SL_component *)
+						(((char *)slp) + slp->len + 2);
+
+					if (slen < 2) {
+						if (((rr->u.SL.
+						      flags & 1) != 0)
+						    &&
+						    ((oldslp->
+						      flags & 1) == 0))
+							inode->i_size +=
+							    1;
+						break;
+					}
+
+					/*
+					 * If this component record isn't
+					 * continued, then append a '/'.
+					 */
+					if (!rootflag
+					    && (oldslp->flags & 1) == 0)
+						inode->i_size += 1;
+				}
+			}
+			symlink_len = inode->i_size;
+			break;
+		case SIG('R', 'E'):
+			printk(KERN_WARNING "Attempt to read inode for "
+					"relocated directory\n");
+			goto out;
+		case SIG('C', 'L'):
+			ISOFS_I(inode)->i_first_extent =
+			    isonum_733(rr->u.CL.location);
+			reloc =
+			    isofs_iget(inode->i_sb,
+				       ISOFS_I(inode)->i_first_extent,
+				       0);
+			if (!reloc)
+				goto out;
+			inode->i_mode = reloc->i_mode;
+			inode->i_nlink = reloc->i_nlink;
+			inode->i_uid = reloc->i_uid;
+			inode->i_gid = reloc->i_gid;
+			inode->i_rdev = reloc->i_rdev;
+			inode->i_size = reloc->i_size;
+			inode->i_blocks = reloc->i_blocks;
+			inode->i_atime = reloc->i_atime;
+			inode->i_ctime = reloc->i_ctime;
+			inode->i_mtime = reloc->i_mtime;
+			iput(reloc);
+			break;
 #ifdef CONFIG_ZISOFS
-      case SIG('Z','F'):
-	      if ( !ISOFS_SB(inode->i_sb)->s_nocompress ) {
-		      int algo;
-		      algo = isonum_721(rr->u.ZF.algorithm);
-		      if ( algo == SIG('p','z') ) {
-			      int block_shift = isonum_711(&rr->u.ZF.parms[1]);
-			      if ( block_shift < PAGE_CACHE_SHIFT || block_shift > 17 ) {
-				      printk(KERN_WARNING "isofs: Can't handle ZF block size of 2^%d\n", block_shift);
-			      } else {
-				/* Note: we don't change i_blocks here */
-				      ISOFS_I(inode)->i_file_format = isofs_file_compressed;
-				/* Parameters to compression algorithm (header size, block size) */
-				      ISOFS_I(inode)->i_format_parm[0] = isonum_711(&rr->u.ZF.parms[0]);
-				      ISOFS_I(inode)->i_format_parm[1] = isonum_711(&rr->u.ZF.parms[1]);
-				      inode->i_size = isonum_733(rr->u.ZF.real_size);
-			      }
-		      } else {
-			      printk(KERN_WARNING "isofs: Unknown ZF compression algorithm: %c%c\n",
-				     rr->u.ZF.algorithm[0], rr->u.ZF.algorithm[1]);
-		      }
-	      }
-	      break;
+		case SIG('Z', 'F'): {
+			int algo;
+
+			if (ISOFS_SB(inode->i_sb)->s_nocompress)
+				break;
+			algo = isonum_721(rr->u.ZF.algorithm);
+			if (algo == SIG('p', 'z')) {
+				int block_shift =
+					isonum_711(&rr->u.ZF.parms[1]);
+				if (block_shift < PAGE_CACHE_SHIFT
+						|| block_shift > 17) {
+					printk(KERN_WARNING "isofs: "
+						"Can't handle ZF block "
+						"size of 2^%d\n",
+						block_shift);
+				} else {
+					/*
+					 * Note: we don't change
+					 * i_blocks here
+					 */
+					ISOFS_I(inode)->i_file_format =
+						isofs_file_compressed;
+					/*
+					 * Parameters to compression
+					 * algorithm (header size,
+					 * block size)
+					 */
+					ISOFS_I(inode)->i_format_parm[0] =
+						isonum_711(&rr->u.ZF.parms[0]);
+					ISOFS_I(inode)->i_format_parm[1] =
+						isonum_711(&rr->u.ZF.parms[1]);
+					inode->i_size =
+					    isonum_733(rr->u.ZF.
+						       real_size);
+				}
+			} else {
+				printk(KERN_WARNING
+				       "isofs: Unknown ZF compression "
+						"algorithm: %c%c\n",
+				       rr->u.ZF.algorithm[0],
+				       rr->u.ZF.algorithm[1]);
+			}
+			break;
+		}
 #endif
-      default:
-	break;
-      }
-    }
-  }
-  MAYBE_CONTINUE(repeat,inode);
- out:
-  if(buffer) kfree(buffer);
-  return 0;
+		default:
+			break;
+		}
+	}
+	ret = rock_continue(&rs);
+	if (ret == 0)
+		goto repeat;
+	if (ret == 1)
+		ret = 0;
+out:
+	kfree(rs.buffer);
+	return ret;
+eio:
+	ret = -EIO;
+	goto out;
 }
 
 static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit)
@@ -376,32 +566,32 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit)
 			if (slp->len > plimit - rpnt)
 				return NULL;
 			memcpy(rpnt, slp->text, slp->len);
-			rpnt+=slp->len;
+			rpnt += slp->len;
 			break;
 		case 2:
 			if (rpnt >= plimit)
 				return NULL;
-			*rpnt++='.';
+			*rpnt++ = '.';
 			break;
 		case 4:
 			if (2 > plimit - rpnt)
 				return NULL;
-			*rpnt++='.';
-			*rpnt++='.';
+			*rpnt++ = '.';
+			*rpnt++ = '.';
 			break;
 		case 8:
 			if (rpnt >= plimit)
 				return NULL;
 			rootflag = 1;
-			*rpnt++='/';
+			*rpnt++ = '/';
 			break;
 		default:
 			printk("Symlink component flag not implemented (%d)\n",
-			     slp->flags);
+			       slp->flags);
 		}
 		slen -= slp->len + 2;
 		oldslp = slp;
-		slp = (struct SL_component *) ((char *) slp + slp->len + 2);
+		slp = (struct SL_component *)((char *)slp + slp->len + 2);
 
 		if (slen < 2) {
 			/*
@@ -412,7 +602,7 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit)
 			    !(oldslp->flags & 1)) {
 				if (rpnt >= plimit)
 					return NULL;
-				*rpnt++='/';
+				*rpnt++ = '/';
 			}
 			break;
 		}
@@ -423,59 +613,61 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit)
 		if (!rootflag && !(oldslp->flags & 1)) {
 			if (rpnt >= plimit)
 				return NULL;
-			*rpnt++='/';
+			*rpnt++ = '/';
 		}
 	}
 	return rpnt;
 }
 
-int parse_rock_ridge_inode(struct iso_directory_record * de,
-			   struct inode * inode)
+int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode)
 {
-   int result=parse_rock_ridge_inode_internal(de,inode,0);
-   /* if rockridge flag was reset and we didn't look for attributes
-    * behind eventual XA attributes, have a look there */
-   if ((ISOFS_SB(inode->i_sb)->s_rock_offset==-1)
-       &&(ISOFS_SB(inode->i_sb)->s_rock==2))
-     {
-	result=parse_rock_ridge_inode_internal(de,inode,14);
-     }
-   return result;
-}
+	int result = parse_rock_ridge_inode_internal(de, inode, 0);
 
-/* readpage() for symlinks: reads symlink contents into the page and either
-   makes it uptodate and returns 0 or returns error (-EIO) */
+	/*
+	 * if rockridge flag was reset and we didn't look for attributes
+	 * behind eventual XA attributes, have a look there
+	 */
+	if ((ISOFS_SB(inode->i_sb)->s_rock_offset == -1)
+	    && (ISOFS_SB(inode->i_sb)->s_rock == 2)) {
+		result = parse_rock_ridge_inode_internal(de, inode, 14);
+	}
+	return result;
+}
 
+/*
+ * readpage() for symlinks: reads symlink contents into the page and either
+ * makes it uptodate and returns 0 or returns error (-EIO)
+ */
 static int rock_ridge_symlink_readpage(struct file *file, struct page *page)
 {
 	struct inode *inode = page->mapping->host;
-        struct iso_inode_info *ei = ISOFS_I(inode);
+	struct iso_inode_info *ei = ISOFS_I(inode);
 	char *link = kmap(page);
 	unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
 	struct buffer_head *bh;
 	char *rpnt = link;
 	unsigned char *pnt;
-	struct iso_directory_record *raw_inode;
-	CONTINUE_DECLS;
+	struct iso_directory_record *raw_de;
 	unsigned long block, offset;
 	int sig;
-	int len;
-	unsigned char *chr;
 	struct rock_ridge *rr;
+	struct rock_state rs;
+	int ret;
 
 	if (!ISOFS_SB(inode->i_sb)->s_rock)
 		goto error;
 
+	init_rock_state(&rs, inode);
 	block = ei->i_iget5_block;
 	lock_kernel();
 	bh = sb_bread(inode->i_sb, block);
 	if (!bh)
 		goto out_noread;
 
-        offset = ei->i_iget5_offset;
-	pnt = (unsigned char *) bh->b_data + offset;
+	offset = ei->i_iget5_offset;
+	pnt = (unsigned char *)bh->b_data + offset;
 
-	raw_inode = (struct iso_directory_record *) pnt;
+	raw_de = (struct iso_directory_record *)pnt;
 
 	/*
 	 * If we go past the end of the buffer, there is some sort of error.
@@ -483,20 +675,24 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page)
 	if (offset + *pnt > bufsize)
 		goto out_bad_span;
 
-	/* Now test for possible Rock Ridge extensions which will override
-	   some of these numbers in the inode structure. */
+	/*
+	 * Now test for possible Rock Ridge extensions which will override
+	 * some of these numbers in the inode structure.
+	 */
 
-	SETUP_ROCK_RIDGE(raw_inode, chr, len);
+	setup_rock_ridge(raw_de, inode, &rs);
 
-      repeat:
-	while (len > 2) { /* There may be one byte for padding somewhere */
-		rr = (struct rock_ridge *) chr;
+repeat:
+	while (rs.len > 2) { /* There may be one byte for padding somewhere */
+		rr = (struct rock_ridge *)rs.chr;
 		if (rr->len < 3)
 			goto out;	/* Something got screwed up here */
-		sig = isonum_721(chr);
-		chr += rr->len;
-		len -= rr->len;
-		if (len < 0)
+		sig = isonum_721(rs.chr);
+		if (rock_check_overflow(&rs, sig))
+			goto out;
+		rs.chr += rr->len;
+		rs.len -= rr->len;
+		if (rs.len < 0)
 			goto out;	/* corrupted isofs */
 
 		switch (sig) {
@@ -505,7 +701,8 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page)
 				goto out;
 			break;
 		case SIG('S', 'P'):
-			CHECK_SP(goto out);
+			if (check_sp(rr, inode))
+				goto out;
 			break;
 		case SIG('S', 'L'):
 			rpnt = get_symlink_chunk(rpnt, rr,
@@ -515,14 +712,18 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page)
 			break;
 		case SIG('C', 'E'):
 			/* This tells is if there is a continuation record */
-			CHECK_CE;
+			rs.cont_extent = isonum_733(rr->u.CE.extent);
+			rs.cont_offset = isonum_733(rr->u.CE.offset);
+			rs.cont_size = isonum_733(rr->u.CE.size);
 		default:
 			break;
 		}
 	}
-	MAYBE_CONTINUE(repeat, inode);
-	if (buffer)
-		kfree(buffer);
+	ret = rock_continue(&rs);
+	if (ret == 0)
+		goto repeat;
+	if (ret < 0)
+		goto fail;
 
 	if (rpnt == link)
 		goto fail;
@@ -535,19 +736,18 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page)
 	return 0;
 
 	/* error exit from macro */
-      out:
-	if (buffer)
-		kfree(buffer);
+out:
+	kfree(rs.buffer);
 	goto fail;
-      out_noread:
+out_noread:
 	printk("unable to read i-node block");
 	goto fail;
-      out_bad_span:
+out_bad_span:
 	printk("symlink spans iso9660 blocks\n");
-      fail:
+fail:
 	brelse(bh);
 	unlock_kernel();
-      error:
+error:
 	SetPageError(page);
 	kunmap(page);
 	unlock_page(page);
@@ -555,5 +755,5 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page)
 }
 
 struct address_space_operations isofs_symlink_aops = {
-	.readpage	= rock_ridge_symlink_readpage
+	.readpage = rock_ridge_symlink_readpage
 };
diff --git a/fs/isofs/rock.h b/fs/isofs/rock.h
index deaf5c8e8b4a..ed09e2b08637 100644
--- a/fs/isofs/rock.h
+++ b/fs/isofs/rock.h
@@ -1,85 +1,88 @@
-/* These structs are used by the system-use-sharing protocol, in which the
-   Rock Ridge extensions are embedded.  It is quite possible that other
-   extensions are present on the disk, and this is fine as long as they
-   all use SUSP */
-
-struct SU_SP{
-  unsigned char magic[2];
-  unsigned char skip;
-} __attribute__((packed));
-
-struct SU_CE{
-  char extent[8];
-  char offset[8];
-  char size[8];
+/*
+ * These structs are used by the system-use-sharing protocol, in which the
+ * Rock Ridge extensions are embedded.  It is quite possible that other
+ * extensions are present on the disk, and this is fine as long as they
+ * all use SUSP
+ */
+
+struct SU_SP_s {
+	unsigned char magic[2];
+	unsigned char skip;
+} __attribute__ ((packed));
+
+struct SU_CE_s {
+	char extent[8];
+	char offset[8];
+	char size[8];
 };
 
-struct SU_ER{
-  unsigned char len_id;
-  unsigned char len_des;
-  unsigned char len_src;
-  unsigned char ext_ver;
-  char data[0];
-} __attribute__((packed));
-
-struct RR_RR{
-  char flags[1];
-} __attribute__((packed));
-
-struct RR_PX{
-  char mode[8];
-  char n_links[8];
-  char uid[8];
-  char gid[8];
+struct SU_ER_s {
+	unsigned char len_id;
+	unsigned char len_des;
+	unsigned char len_src;
+	unsigned char ext_ver;
+	char data[0];
+} __attribute__ ((packed));
+
+struct RR_RR_s {
+	char flags[1];
+} __attribute__ ((packed));
+
+struct RR_PX_s {
+	char mode[8];
+	char n_links[8];
+	char uid[8];
+	char gid[8];
 };
 
-struct RR_PN{
-  char dev_high[8];
-  char dev_low[8];
+struct RR_PN_s {
+	char dev_high[8];
+	char dev_low[8];
 };
 
+struct SL_component {
+	unsigned char flags;
+	unsigned char len;
+	char text[0];
+} __attribute__ ((packed));
 
-struct SL_component{
-  unsigned char flags;
-  unsigned char len;
-  char text[0];
-} __attribute__((packed));
+struct RR_SL_s {
+	unsigned char flags;
+	struct SL_component link;
+} __attribute__ ((packed));
 
-struct RR_SL{
-  unsigned char flags;
-  struct SL_component link;
-} __attribute__((packed));
+struct RR_NM_s {
+	unsigned char flags;
+	char name[0];
+} __attribute__ ((packed));
 
-struct RR_NM{
-  unsigned char flags;
-  char name[0];
-} __attribute__((packed));
-
-struct RR_CL{
-  char location[8];
+struct RR_CL_s {
+	char location[8];
 };
 
-struct RR_PL{
-  char location[8];
+struct RR_PL_s {
+	char location[8];
 };
 
-struct stamp{
-  char time[7];
-} __attribute__((packed));
+struct stamp {
+	char time[7];
+} __attribute__ ((packed));
 
-struct RR_TF{
-  char flags;
-  struct stamp times[0];  /* Variable number of these beasts */
-} __attribute__((packed));
+struct RR_TF_s {
+	char flags;
+	struct stamp times[0];	/* Variable number of these beasts */
+} __attribute__ ((packed));
 
 /* Linux-specific extension for transparent decompression */
-struct RR_ZF{
-  char algorithm[2];
-  char parms[2];
-  char real_size[8];
+struct RR_ZF_s {
+	char algorithm[2];
+	char parms[2];
+	char real_size[8];
 };
 
-/* These are the bits and their meanings for flags in the TF structure. */
+/*
+ * These are the bits and their meanings for flags in the TF structure.
+ */
 #define TF_CREATE 1
 #define TF_MODIFY 2
 #define TF_ACCESS 4
@@ -89,31 +92,31 @@ struct RR_ZF{
 #define TF_EFFECTIVE 64
 #define TF_LONG_FORM 128
 
-struct rock_ridge{
-  char signature[2];
-  unsigned char len;
-  unsigned char version;
-  union{
-    struct SU_SP SP;
-    struct SU_CE CE;
-    struct SU_ER ER;
-    struct RR_RR RR;
-    struct RR_PX PX;
-    struct RR_PN PN;
-    struct RR_SL SL;
-    struct RR_NM NM;
-    struct RR_CL CL;
-    struct RR_PL PL;
-    struct RR_TF TF;
-    struct RR_ZF ZF;
-  } u;
+struct rock_ridge {
+	char signature[2];
+	unsigned char len;
+	unsigned char version;
+	union {
+		struct SU_SP_s SP;
+		struct SU_CE_s CE;
+		struct SU_ER_s ER;
+		struct RR_RR_s RR;
+		struct RR_PX_s PX;
+		struct RR_PN_s PN;
+		struct RR_SL_s SL;
+		struct RR_NM_s NM;
+		struct RR_CL_s CL;
+		struct RR_PL_s PL;
+		struct RR_TF_s TF;
+		struct RR_ZF_s ZF;
+	} u;
 };
 
-#define RR_PX 1   /* POSIX attributes */
-#define RR_PN 2   /* POSIX devices */
-#define RR_SL 4   /* Symbolic link */
-#define RR_NM 8   /* Alternate Name */
-#define RR_CL 16  /* Child link */
-#define RR_PL 32  /* Parent link */
-#define RR_RE 64  /* Relocation directory */
-#define RR_TF 128 /* Timestamps */
+#define RR_PX 1			/* POSIX attributes */
+#define RR_PN 2			/* POSIX devices */
+#define RR_SL 4			/* Symbolic link */
+#define RR_NM 8			/* Alternate Name */
+#define RR_CL 16		/* Child link */
+#define RR_PL 32		/* Parent link */
+#define RR_RE 64		/* Relocation directory */
+#define RR_TF 128		/* Timestamps */
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 1e6f2e2ad4a3..5e7b43949517 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -167,7 +167,7 @@ loop:
 	}
 
 	wake_up(&journal->j_wait_done_commit);
-	if (current->flags & PF_FREEZE) {
+	if (freezing(current)) {
 		/*
 		 * The simpler the better. Flushing journal isn't a
 		 * good idea, because that depends on threads that may
@@ -175,7 +175,7 @@ loop:
 		 */
 		jbd_debug(1, "Now suspending kjournald\n");
 		spin_unlock(&journal->j_state_lock);
-		refrigerator(PF_FREEZE);
+		refrigerator();
 		spin_lock(&journal->j_state_lock);
 	} else {
 		/*
diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c
index 8cc6893fc56c..fc589ddd0762 100644
--- a/fs/jffs/intrep.c
+++ b/fs/jffs/intrep.c
@@ -175,8 +175,64 @@ jffs_hexdump(struct mtd_info *mtd, loff_t pos, int size)
 	}
 }
 
+/* Print the contents of a node.  */
+static void
+jffs_print_node(struct jffs_node *n)
+{
+	D(printk("jffs_node: 0x%p\n", n));
+	D(printk("{\n"));
+	D(printk("        0x%08x, /* version  */\n", n->version));
+	D(printk("        0x%08x, /* data_offset  */\n", n->data_offset));
+	D(printk("        0x%08x, /* data_size  */\n", n->data_size));
+	D(printk("        0x%08x, /* removed_size  */\n", n->removed_size));
+	D(printk("        0x%08x, /* fm_offset  */\n", n->fm_offset));
+	D(printk("        0x%02x,       /* name_size  */\n", n->name_size));
+	D(printk("        0x%p, /* fm,  fm->offset: %u  */\n",
+		 n->fm, (n->fm ? n->fm->offset : 0)));
+	D(printk("        0x%p, /* version_prev  */\n", n->version_prev));
+	D(printk("        0x%p, /* version_next  */\n", n->version_next));
+	D(printk("        0x%p, /* range_prev  */\n", n->range_prev));
+	D(printk("        0x%p, /* range_next  */\n", n->range_next));
+	D(printk("}\n"));
+}
+
 #endif
 
+/* Print the contents of a raw inode.  */
+static void
+jffs_print_raw_inode(struct jffs_raw_inode *raw_inode)
+{
+	D(printk("jffs_raw_inode: inode number: %u\n", raw_inode->ino));
+	D(printk("{\n"));
+	D(printk("        0x%08x, /* magic  */\n", raw_inode->magic));
+	D(printk("        0x%08x, /* ino  */\n", raw_inode->ino));
+	D(printk("        0x%08x, /* pino  */\n", raw_inode->pino));
+	D(printk("        0x%08x, /* version  */\n", raw_inode->version));
+	D(printk("        0x%08x, /* mode  */\n", raw_inode->mode));
+	D(printk("        0x%04x,     /* uid  */\n", raw_inode->uid));
+	D(printk("        0x%04x,     /* gid  */\n", raw_inode->gid));
+	D(printk("        0x%08x, /* atime  */\n", raw_inode->atime));
+	D(printk("        0x%08x, /* mtime  */\n", raw_inode->mtime));
+	D(printk("        0x%08x, /* ctime  */\n", raw_inode->ctime));
+	D(printk("        0x%08x, /* offset  */\n", raw_inode->offset));
+	D(printk("        0x%08x, /* dsize  */\n", raw_inode->dsize));
+	D(printk("        0x%08x, /* rsize  */\n", raw_inode->rsize));
+	D(printk("        0x%02x,       /* nsize  */\n", raw_inode->nsize));
+	D(printk("        0x%02x,       /* nlink  */\n", raw_inode->nlink));
+	D(printk("        0x%02x,       /* spare  */\n",
+		 raw_inode->spare));
+	D(printk("        %u,          /* rename  */\n",
+		 raw_inode->rename));
+	D(printk("        %u,          /* deleted  */\n",
+		 raw_inode->deleted));
+	D(printk("        0x%02x,       /* accurate  */\n",
+		 raw_inode->accurate));
+	D(printk("        0x%08x, /* dchksum  */\n", raw_inode->dchksum));
+	D(printk("        0x%04x,     /* nchksum  */\n", raw_inode->nchksum));
+	D(printk("        0x%04x,     /* chksum  */\n", raw_inode->chksum));
+	D(printk("}\n"));
+}
+
 #define flash_safe_acquire(arg)
 #define flash_safe_release(arg)
 
@@ -2507,64 +2563,6 @@ jffs_update_file(struct jffs_file *f, struct jffs_node *node)
 	return 0;
 }
 
-/* Print the contents of a node.  */
-void
-jffs_print_node(struct jffs_node *n)
-{
-	D(printk("jffs_node: 0x%p\n", n));
-	D(printk("{\n"));
-	D(printk("        0x%08x, /* version  */\n", n->version));
-	D(printk("        0x%08x, /* data_offset  */\n", n->data_offset));
-	D(printk("        0x%08x, /* data_size  */\n", n->data_size));
-	D(printk("        0x%08x, /* removed_size  */\n", n->removed_size));
-	D(printk("        0x%08x, /* fm_offset  */\n", n->fm_offset));
-	D(printk("        0x%02x,       /* name_size  */\n", n->name_size));
-	D(printk("        0x%p, /* fm,  fm->offset: %u  */\n",
-		 n->fm, (n->fm ? n->fm->offset : 0)));
-	D(printk("        0x%p, /* version_prev  */\n", n->version_prev));
-	D(printk("        0x%p, /* version_next  */\n", n->version_next));
-	D(printk("        0x%p, /* range_prev  */\n", n->range_prev));
-	D(printk("        0x%p, /* range_next  */\n", n->range_next));
-	D(printk("}\n"));
-}
-
-
-/* Print the contents of a raw inode.  */
-void
-jffs_print_raw_inode(struct jffs_raw_inode *raw_inode)
-{
-	D(printk("jffs_raw_inode: inode number: %u\n", raw_inode->ino));
-	D(printk("{\n"));
-	D(printk("        0x%08x, /* magic  */\n", raw_inode->magic));
-	D(printk("        0x%08x, /* ino  */\n", raw_inode->ino));
-	D(printk("        0x%08x, /* pino  */\n", raw_inode->pino));
-	D(printk("        0x%08x, /* version  */\n", raw_inode->version));
-	D(printk("        0x%08x, /* mode  */\n", raw_inode->mode));
-	D(printk("        0x%04x,     /* uid  */\n", raw_inode->uid));
-	D(printk("        0x%04x,     /* gid  */\n", raw_inode->gid));
-	D(printk("        0x%08x, /* atime  */\n", raw_inode->atime));
-	D(printk("        0x%08x, /* mtime  */\n", raw_inode->mtime));
-	D(printk("        0x%08x, /* ctime  */\n", raw_inode->ctime));
-	D(printk("        0x%08x, /* offset  */\n", raw_inode->offset));
-	D(printk("        0x%08x, /* dsize  */\n", raw_inode->dsize));
-	D(printk("        0x%08x, /* rsize  */\n", raw_inode->rsize));
-	D(printk("        0x%02x,       /* nsize  */\n", raw_inode->nsize));
-	D(printk("        0x%02x,       /* nlink  */\n", raw_inode->nlink));
-	D(printk("        0x%02x,       /* spare  */\n",
-		 raw_inode->spare));
-	D(printk("        %u,          /* rename  */\n",
-		 raw_inode->rename));
-	D(printk("        %u,          /* deleted  */\n",
-		 raw_inode->deleted));
-	D(printk("        0x%02x,       /* accurate  */\n",
-		 raw_inode->accurate));
-	D(printk("        0x%08x, /* dchksum  */\n", raw_inode->dchksum));
-	D(printk("        0x%04x,     /* nchksum  */\n", raw_inode->nchksum));
-	D(printk("        0x%04x,     /* chksum  */\n", raw_inode->chksum));
-	D(printk("}\n"));
-}
-
-
 /* Print the contents of a file.  */
 #if 0
 int
diff --git a/fs/jffs/intrep.h b/fs/jffs/intrep.h
index 4ae97b17911c..5c7abe0e2695 100644
--- a/fs/jffs/intrep.h
+++ b/fs/jffs/intrep.h
@@ -49,8 +49,6 @@ int jffs_garbage_collect_thread(void *c);
 void jffs_garbage_collect_trigger(struct jffs_control *c);
 
 /* For debugging purposes.  */
-void jffs_print_node(struct jffs_node *n);
-void jffs_print_raw_inode(struct jffs_raw_inode *raw_inode);
 #if 0
 int jffs_print_file(struct jffs_file *f);
 #endif  /*  0  */
diff --git a/fs/jffs/jffs_fm.c b/fs/jffs/jffs_fm.c
index 0cab8da49d3c..053e3a98a276 100644
--- a/fs/jffs/jffs_fm.c
+++ b/fs/jffs/jffs_fm.c
@@ -31,6 +31,60 @@ static void jffs_free_fm(struct jffs_fm *n);
 extern kmem_cache_t     *fm_cache;
 extern kmem_cache_t     *node_cache;
 
+#if CONFIG_JFFS_FS_VERBOSE > 0
+void
+jffs_print_fmcontrol(struct jffs_fmcontrol *fmc)
+{
+	D(printk("struct jffs_fmcontrol: 0x%p\n", fmc));
+	D(printk("{\n"));
+	D(printk("        %u, /* flash_size  */\n", fmc->flash_size));
+	D(printk("        %u, /* used_size  */\n", fmc->used_size));
+	D(printk("        %u, /* dirty_size  */\n", fmc->dirty_size));
+	D(printk("        %u, /* free_size  */\n", fmc->free_size));
+	D(printk("        %u, /* sector_size  */\n", fmc->sector_size));
+	D(printk("        %u, /* min_free_size  */\n", fmc->min_free_size));
+	D(printk("        %u, /* max_chunk_size  */\n", fmc->max_chunk_size));
+	D(printk("        0x%p, /* mtd  */\n", fmc->mtd));
+	D(printk("        0x%p, /* head  */    "
+		 "(head->offset = 0x%08x)\n",
+		 fmc->head, (fmc->head ? fmc->head->offset : 0)));
+	D(printk("        0x%p, /* tail  */    "
+		 "(tail->offset + tail->size = 0x%08x)\n",
+		 fmc->tail,
+		 (fmc->tail ? fmc->tail->offset + fmc->tail->size : 0)));
+	D(printk("        0x%p, /* head_extra  */\n", fmc->head_extra));
+	D(printk("        0x%p, /* tail_extra  */\n", fmc->tail_extra));
+	D(printk("}\n"));
+}
+#endif  /*  CONFIG_JFFS_FS_VERBOSE > 0  */
+
+#if CONFIG_JFFS_FS_VERBOSE > 2
+static void
+jffs_print_fm(struct jffs_fm *fm)
+{
+	D(printk("struct jffs_fm: 0x%p\n", fm));
+	D(printk("{\n"));
+	D(printk("       0x%08x, /* offset  */\n", fm->offset));
+	D(printk("       %u, /* size  */\n", fm->size));
+	D(printk("       0x%p, /* prev  */\n", fm->prev));
+	D(printk("       0x%p, /* next  */\n", fm->next));
+	D(printk("       0x%p, /* nodes  */\n", fm->nodes));
+	D(printk("}\n"));
+}
+#endif  /*  CONFIG_JFFS_FS_VERBOSE > 2  */
+
+#if 0
+void
+jffs_print_node_ref(struct jffs_node_ref *ref)
+{
+	D(printk("struct jffs_node_ref: 0x%p\n", ref));
+	D(printk("{\n"));
+	D(printk("       0x%p, /* node  */\n", ref->node));
+	D(printk("       0x%p, /* next  */\n", ref->next));
+	D(printk("}\n"));
+}
+#endif  /*  0  */
+
 /* This function creates a new shiny flash memory control structure.  */
 struct jffs_fmcontrol *
 jffs_build_begin(struct jffs_control *c, int unit)
@@ -742,54 +796,3 @@ int jffs_get_node_inuse(void)
 {
 	return no_jffs_node;
 }
-
-void
-jffs_print_fmcontrol(struct jffs_fmcontrol *fmc)
-{
-	D(printk("struct jffs_fmcontrol: 0x%p\n", fmc));
-	D(printk("{\n"));
-	D(printk("        %u, /* flash_size  */\n", fmc->flash_size));
-	D(printk("        %u, /* used_size  */\n", fmc->used_size));
-	D(printk("        %u, /* dirty_size  */\n", fmc->dirty_size));
-	D(printk("        %u, /* free_size  */\n", fmc->free_size));
-	D(printk("        %u, /* sector_size  */\n", fmc->sector_size));
-	D(printk("        %u, /* min_free_size  */\n", fmc->min_free_size));
-	D(printk("        %u, /* max_chunk_size  */\n", fmc->max_chunk_size));
-	D(printk("        0x%p, /* mtd  */\n", fmc->mtd));
-	D(printk("        0x%p, /* head  */    "
-		 "(head->offset = 0x%08x)\n",
-		 fmc->head, (fmc->head ? fmc->head->offset : 0)));
-	D(printk("        0x%p, /* tail  */    "
-		 "(tail->offset + tail->size = 0x%08x)\n",
-		 fmc->tail,
-		 (fmc->tail ? fmc->tail->offset + fmc->tail->size : 0)));
-	D(printk("        0x%p, /* head_extra  */\n", fmc->head_extra));
-	D(printk("        0x%p, /* tail_extra  */\n", fmc->tail_extra));
-	D(printk("}\n"));
-}
-
-void
-jffs_print_fm(struct jffs_fm *fm)
-{
-	D(printk("struct jffs_fm: 0x%p\n", fm));
-	D(printk("{\n"));
-	D(printk("       0x%08x, /* offset  */\n", fm->offset));
-	D(printk("       %u, /* size  */\n", fm->size));
-	D(printk("       0x%p, /* prev  */\n", fm->prev));
-	D(printk("       0x%p, /* next  */\n", fm->next));
-	D(printk("       0x%p, /* nodes  */\n", fm->nodes));
-	D(printk("}\n"));
-}
-
-#if 0
-void
-jffs_print_node_ref(struct jffs_node_ref *ref)
-{
-	D(printk("struct jffs_node_ref: 0x%p\n", ref));
-	D(printk("{\n"));
-	D(printk("       0x%p, /* node  */\n", ref->node));
-	D(printk("       0x%p, /* next  */\n", ref->next));
-	D(printk("}\n"));
-}
-#endif  /*  0  */
-
diff --git a/fs/jffs/jffs_fm.h b/fs/jffs/jffs_fm.h
index bc291c431822..f64151e74122 100644
--- a/fs/jffs/jffs_fm.h
+++ b/fs/jffs/jffs_fm.h
@@ -139,8 +139,9 @@ int jffs_add_node(struct jffs_node *node);
 void jffs_fmfree_partly(struct jffs_fmcontrol *fmc, struct jffs_fm *fm,
 			__u32 size);
 
+#if CONFIG_JFFS_FS_VERBOSE > 0
 void jffs_print_fmcontrol(struct jffs_fmcontrol *fmc);
-void jffs_print_fm(struct jffs_fm *fm);
+#endif
 #if 0
 void jffs_print_node_ref(struct jffs_node_ref *ref);
 #endif  /*  0  */
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index 1be6de27dd81..638836b277d4 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -92,7 +92,7 @@ static int jffs2_garbage_collect_thread(void *_c)
 			schedule();
 		}
 
-		if (try_to_freeze(0))
+		if (try_to_freeze())
 			continue;
 
 		cond_resched();
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 8d2a9ab981d4..e892dab40c26 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -21,6 +21,7 @@
 #include <linux/sched.h>
 #include <linux/fs.h>
 #include <linux/quotaops.h>
+#include <linux/posix_acl_xattr.h>
 #include "jfs_incore.h"
 #include "jfs_xattr.h"
 #include "jfs_acl.h"
@@ -36,11 +37,11 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
 
 	switch(type) {
 		case ACL_TYPE_ACCESS:
-			ea_name = XATTR_NAME_ACL_ACCESS;
+			ea_name = POSIX_ACL_XATTR_ACCESS;
 			p_acl = &ji->i_acl;
 			break;
 		case ACL_TYPE_DEFAULT:
-			ea_name = XATTR_NAME_ACL_DEFAULT;
+			ea_name = POSIX_ACL_XATTR_DEFAULT;
 			p_acl = &ji->i_default_acl;
 			break;
 		default:
@@ -70,8 +71,7 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
 		if (!IS_ERR(acl))
 			*p_acl = posix_acl_dup(acl);
 	}
-	if (value)
-		kfree(value);
+	kfree(value);
 	return acl;
 }
 
@@ -89,11 +89,11 @@ static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 
 	switch(type) {
 		case ACL_TYPE_ACCESS:
-			ea_name = XATTR_NAME_ACL_ACCESS;
+			ea_name = POSIX_ACL_XATTR_ACCESS;
 			p_acl = &ji->i_acl;
 			break;
 		case ACL_TYPE_DEFAULT:
-			ea_name = XATTR_NAME_ACL_DEFAULT;
+			ea_name = POSIX_ACL_XATTR_DEFAULT;
 			p_acl = &ji->i_default_acl;
 			if (!S_ISDIR(inode->i_mode))
 				return acl ? -EACCES : 0;
@@ -102,7 +102,7 @@ static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 			return -EINVAL;
 	}
 	if (acl) {
-		size = xattr_acl_size(acl->a_count);
+		size = posix_acl_xattr_size(acl->a_count);
 		value = kmalloc(size, GFP_KERNEL);
 		if (!value)
 			return -ENOMEM;
@@ -112,8 +112,7 @@ static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 	}
 	rc = __jfs_setxattr(inode, ea_name, value, size, 0);
 out:
-	if (value)
-		kfree(value);
+	kfree(value);
 
 	if (!rc) {
 		if (*p_acl && (*p_acl != JFS_ACL_NOT_CACHED))
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index a87b06fa8ff8..c2c19c9ed9a4 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -1,6 +1,6 @@
 /*
- *   Copyright (c) International Business Machines Corp., 2000-2002
- *   Portions Copyright (c) Christoph Hellwig, 2001-2002
+ *   Copyright (C) International Business Machines Corp., 2000-2002
+ *   Portions Copyright (C) Christoph Hellwig, 2001-2002
  *
  *   This program is free software;  you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -19,16 +19,13 @@
 
 #include <linux/fs.h>
 #include "jfs_incore.h"
+#include "jfs_inode.h"
 #include "jfs_dmap.h"
 #include "jfs_txnmgr.h"
 #include "jfs_xattr.h"
 #include "jfs_acl.h"
 #include "jfs_debug.h"
 
-
-extern int jfs_commit_inode(struct inode *, int);
-extern void jfs_truncate(struct inode *);
-
 int jfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 {
 	struct inode *inode = dentry->d_inode;
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 24a689179af2..2137138c59b0 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -23,6 +23,7 @@
 #include <linux/pagemap.h>
 #include <linux/quotaops.h>
 #include "jfs_incore.h"
+#include "jfs_inode.h"
 #include "jfs_filsys.h"
 #include "jfs_imap.h"
 #include "jfs_extent.h"
@@ -30,14 +31,6 @@
 #include "jfs_debug.h"
 
 
-extern struct inode_operations jfs_dir_inode_operations;
-extern struct inode_operations jfs_file_inode_operations;
-extern struct inode_operations jfs_symlink_inode_operations;
-extern struct file_operations jfs_dir_operations;
-extern struct file_operations jfs_file_operations;
-struct address_space_operations jfs_aops;
-extern int freeZeroLink(struct inode *);
-
 void jfs_read_inode(struct inode *inode)
 {
 	if (diRead(inode)) { 
@@ -136,7 +129,7 @@ void jfs_delete_inode(struct inode *inode)
 	jfs_info("In jfs_delete_inode, inode = 0x%p", inode);
 
 	if (test_cflag(COMMIT_Freewmap, inode))
-		freeZeroLink(inode);
+		jfs_free_zero_link(inode);
 
 	diFree(inode);
 
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index d2ae430adecf..a3acd3eec059 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -20,8 +20,6 @@
 
 #ifdef CONFIG_JFS_POSIX_ACL
 
-#include <linux/xattr_acl.h>
-
 int jfs_permission(struct inode *, int, struct nameidata *);
 int jfs_init_acl(struct inode *, struct inode *);
 int jfs_setattr(struct dentry *, struct iattr *);
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c
index 91a0a889ebc5..4caea6b43b92 100644
--- a/fs/jfs/jfs_debug.c
+++ b/fs/jfs/jfs_debug.c
@@ -58,8 +58,6 @@ void dump_mem(char *label, void *data, int length)
 
 static struct proc_dir_entry *base;
 #ifdef CONFIG_JFS_DEBUG
-extern read_proc_t jfs_txanchor_read;
-
 static int loglevel_read(char *page, char **start, off_t off,
 			 int count, int *eof, void *data)
 {
@@ -97,14 +95,6 @@ static int loglevel_write(struct file *file, const char __user *buffer,
 }
 #endif
 
-
-#ifdef CONFIG_JFS_STATISTICS
-extern read_proc_t jfs_lmstats_read;
-extern read_proc_t jfs_txstats_read;
-extern read_proc_t jfs_xtstat_read;
-extern read_proc_t jfs_mpstat_read;
-#endif
-
 static struct {
 	const char	*name;
 	read_proc_t	*read_fn;
diff --git a/fs/jfs/jfs_debug.h b/fs/jfs/jfs_debug.h
index a38079ae1e00..ddffbbd4d955 100644
--- a/fs/jfs/jfs_debug.h
+++ b/fs/jfs/jfs_debug.h
@@ -1,6 +1,6 @@
 /*
- *   Copyright (c) International Business Machines Corp., 2000-2002
- *   Portions Copyright (c) Christoph Hellwig, 2001-2002
+ *   Copyright (C) International Business Machines Corp., 2000-2002
+ *   Portions Copyright (C) Christoph Hellwig, 2001-2002
  *
  *   This program is free software;  you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -31,7 +31,9 @@
  * CONFIG_JFS_DEBUG or CONFIG_JFS_STATISTICS is defined
  */
 #if defined(CONFIG_PROC_FS) && (defined(CONFIG_JFS_DEBUG) || defined(CONFIG_JFS_STATISTICS))
-	#define PROC_FS_JFS
+#define PROC_FS_JFS
+extern void jfs_proc_init(void);
+extern void jfs_proc_clean(void);
 #endif
 
 /*
@@ -65,8 +67,8 @@
 
 extern int jfsloglevel;
 
-/* dump memory contents */
 extern void dump_mem(char *label, void *data, int length);
+extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *);
 
 /* information message: e.g., configuration, major event */
 #define jfs_info(fmt, arg...) do {			\
@@ -110,6 +112,11 @@ extern void dump_mem(char *label, void *data, int length);
  *	----------
  */
 #ifdef	CONFIG_JFS_STATISTICS
+extern int jfs_lmstats_read(char *, char **, off_t, int, int *, void *);
+extern int jfs_txstats_read(char *, char **, off_t, int, int *, void *);
+extern int jfs_mpstat_read(char *, char **, off_t, int, int *, void *);
+extern int jfs_xtstat_read(char *, char **, off_t, int, int *, void *);
+
 #define	INCREMENT(x)		((x)++)
 #define	DECREMENT(x)		((x)--)
 #define	HIGHWATERMARK(x,y)	((x) = max((x), (y)))
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 69007fd546ef..cced2fed9d0f 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -272,7 +272,6 @@ int dbMount(struct inode *ipbmap)
 int dbUnmount(struct inode *ipbmap, int mounterror)
 {
 	struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
-	int i;
 
 	if (!(mounterror || isReadOnly(ipbmap)))
 		dbSync(ipbmap);
@@ -282,14 +281,6 @@ int dbUnmount(struct inode *ipbmap, int mounterror)
 	 */
 	truncate_inode_pages(ipbmap->i_mapping, 0);
 
-	/*
-	 * Sanity Check
-	 */
-	for (i = 0; i < bmp->db_numag; i++)
-		if (atomic_read(&bmp->db_active[i]))
-			printk(KERN_ERR "dbUnmount: db_active[%d] = %d\n",
-			       i, atomic_read(&bmp->db_active[i]));
-
 	/* free the memory for the in-memory bmap. */
 	kfree(bmp);
 
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index ac41f72d6d50..8676aee3ae48 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -2931,6 +2931,9 @@ static void add_missing_indices(struct inode *inode, s64 bn)
 	ASSERT(p->header.flag & BT_LEAF);
 
 	tlck = txLock(tid, inode, mp, tlckDTREE | tlckENTRY);
+	if (BT_IS_ROOT(mp))
+		tlck->type |= tlckBTROOT;
+
 	dtlck = (struct dt_lock *) &tlck->lock;
 
 	stbl = DT_GETSTBL(p);
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index 1953acb79266..4879603daa1c 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -19,6 +19,7 @@
 #include <linux/fs.h>
 #include <linux/quotaops.h>
 #include "jfs_incore.h"
+#include "jfs_inode.h"
 #include "jfs_superblock.h"
 #include "jfs_dmap.h"
 #include "jfs_extent.h"
@@ -33,12 +34,6 @@ static int extBrealloc(struct inode *, s64, s64, s64 *, s64 *);
 #endif
 static s64 extRoundDown(s64 nb);
 
-/*
- * external references
- */
-extern int jfs_commit_inode(struct inode *, int);
-
-
 #define DPD(a)          (printk("(a): %d\n",(a)))
 #define DPC(a)          (printk("(a): %c\n",(a)))
 #define DPL1(a)					\
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 7acff2ce3c80..971af2977eff 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -47,6 +47,7 @@
 #include <linux/quotaops.h>
 
 #include "jfs_incore.h"
+#include "jfs_inode.h"
 #include "jfs_filsys.h"
 #include "jfs_dinode.h"
 #include "jfs_dmap.h"
@@ -69,11 +70,6 @@
 #define AG_UNLOCK(imap,agno)		up(&imap->im_aglock[agno])
 
 /*
- * external references
- */
-extern struct address_space_operations jfs_aops;
-
-/*
  * forward references
  */
 static int diAllocAG(struct inomap *, int, boolean_t, struct inode *);
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 84f2459b2191..2af5efbfd06f 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -19,6 +19,7 @@
 #include <linux/fs.h>
 #include <linux/quotaops.h>
 #include "jfs_incore.h"
+#include "jfs_inode.h"
 #include "jfs_filsys.h"
 #include "jfs_imap.h"
 #include "jfs_dinode.h"
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 3df91fbfe781..b54bac576cb3 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -1,5 +1,5 @@
 /*
- *   Copyright (c) International Business Machines Corp., 2000-2001
+ *   Copyright (C) International Business Machines Corp., 2000-2001
  *
  *   This program is free software;  you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -19,5 +19,22 @@
 #define _H_JFS_INODE
 
 extern struct inode *ialloc(struct inode *, umode_t);
+extern int jfs_fsync(struct file *, struct dentry *, int);
+extern void jfs_read_inode(struct inode *);
+extern int jfs_commit_inode(struct inode *, int);
+extern int jfs_write_inode(struct inode*, int);
+extern void jfs_delete_inode(struct inode *);
+extern void jfs_dirty_inode(struct inode *);
+extern void jfs_truncate(struct inode *);
+extern void jfs_truncate_nolock(struct inode *, loff_t);
+extern void jfs_free_zero_link(struct inode *);
+extern struct dentry *jfs_get_parent(struct dentry *dentry);
 
+extern struct address_space_operations jfs_aops;
+extern struct inode_operations jfs_dir_inode_operations;
+extern struct file_operations jfs_dir_operations;
+extern struct inode_operations jfs_file_inode_operations;
+extern struct file_operations jfs_file_operations;
+extern struct inode_operations jfs_symlink_inode_operations;
+extern struct dentry_operations jfs_ci_dentry_operations;
 #endif				/* _H_JFS_INODE */
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index dfa1200daa61..79d07624bfe1 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -71,6 +71,7 @@
 #include "jfs_incore.h"
 #include "jfs_filsys.h"
 #include "jfs_metapage.h"
+#include "jfs_superblock.h"
 #include "jfs_txnmgr.h"
 #include "jfs_debug.h"
 
@@ -167,14 +168,6 @@ static struct jfs_log *dummy_log = NULL;
 static DECLARE_MUTEX(jfs_log_sem);
 
 /*
- * external references
- */
-extern void txLazyUnlock(struct tblock * tblk);
-extern int jfs_stop_threads;
-extern struct completion jfsIOwait;
-extern int jfs_tlocks_low;
-
-/*
  * forward references
  */
 static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
@@ -1624,6 +1617,8 @@ void jfs_flush_journal(struct jfs_log *log, int wait)
 		}
 	}
 	assert(list_empty(&log->cqueue));
+
+#ifdef CONFIG_JFS_DEBUG
 	if (!list_empty(&log->synclist)) {
 		struct logsyncblk *lp;
 
@@ -1638,9 +1633,8 @@ void jfs_flush_journal(struct jfs_log *log, int wait)
 				dump_mem("orphan tblock", lp,
 					 sizeof(struct tblock));
 		}
-//		current->state = TASK_INTERRUPTIBLE;
-//		schedule();
 	}
+#endif
 	//assert(list_empty(&log->synclist));
 	clear_bit(log_FLUSH, &log->flag);
 }
@@ -2365,9 +2359,9 @@ int jfsIOWait(void *arg)
 			lbmStartIO(bp);
 			spin_lock_irq(&log_redrive_lock);
 		}
-		if (current->flags & PF_FREEZE) {
+		if (freezing(current)) {
 			spin_unlock_irq(&log_redrive_lock);
-			refrigerator(PF_FREEZE);
+			refrigerator();
 		} else {
 			add_wait_queue(&jfs_IO_thread_wait, &wq);
 			set_current_state(TASK_INTERRUPTIBLE);
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
index 51291fbc420c..747114cd38b8 100644
--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -507,6 +507,8 @@ extern int lmLogClose(struct super_block *sb);
 extern int lmLogShutdown(struct jfs_log * log);
 extern int lmLogInit(struct jfs_log * log);
 extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize);
+extern int lmGroupCommit(struct jfs_log *, struct tblock *);
+extern int jfsIOWait(void *);
 extern void jfs_flush_journal(struct jfs_log * log, int wait);
 extern void jfs_syncpt(struct jfs_log *log);
 
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 41bf078dce05..6c5485d16c39 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -198,7 +198,7 @@ static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
 	}
 }
 
-static inline struct metapage *alloc_metapage(int gfp_mask)
+static inline struct metapage *alloc_metapage(unsigned int gfp_mask)
 {
 	return mempool_alloc(metapage_mempool, gfp_mask);
 }
@@ -726,12 +726,12 @@ void force_metapage(struct metapage *mp)
 	page_cache_release(page);
 }
 
-extern void hold_metapage(struct metapage *mp)
+void hold_metapage(struct metapage *mp)
 {
 	lock_page(mp->page);
 }
 
-extern void put_metapage(struct metapage *mp)
+void put_metapage(struct metapage *mp)
 {
 	if (mp->count || mp->nohomeok) {
 		/* Someone else will release this */
diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h
index 991e9fb84c75..f0b7d3282b07 100644
--- a/fs/jfs/jfs_metapage.h
+++ b/fs/jfs/jfs_metapage.h
@@ -1,6 +1,6 @@
 /*
- *   Copyright (c) International Business Machines Corp., 2000-2002
- *   Portions Copyright (c) Christoph Hellwig, 2001-2002
+ *   Copyright (C) International Business Machines Corp., 2000-2002
+ *   Portions Copyright (C) Christoph Hellwig, 2001-2002
  *
  *   This program is free software;  you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -58,6 +58,8 @@ struct metapage {
 #define mark_metapage_dirty(mp) set_bit(META_dirty, &(mp)->flag)
 
 /* function prototypes */
+extern int metapage_init(void);
+extern void metapage_exit(void);
 extern struct metapage *__get_metapage(struct inode *inode,
 				  unsigned long lblock, unsigned int size,
 				  int absolute, unsigned long new);
diff --git a/fs/jfs/jfs_superblock.h b/fs/jfs/jfs_superblock.h
index ab0566f70cfa..fcf781bf31cb 100644
--- a/fs/jfs/jfs_superblock.h
+++ b/fs/jfs/jfs_superblock.h
@@ -109,5 +109,16 @@ struct jfs_superblock {
 extern int readSuper(struct super_block *, struct buffer_head **);
 extern int updateSuper(struct super_block *, uint);
 extern void jfs_error(struct super_block *, const char *, ...);
+extern int jfs_mount(struct super_block *);
+extern int jfs_mount_rw(struct super_block *, int);
+extern int jfs_umount(struct super_block *);
+extern int jfs_umount_rw(struct super_block *);
+
+extern int jfs_stop_threads;
+extern struct completion jfsIOwait;
+extern wait_queue_head_t jfs_IO_thread_wait;
+extern wait_queue_head_t jfs_commit_thread_wait;
+extern wait_queue_head_t jfs_sync_thread_wait;
+extern int jfs_extendfs(struct super_block *, s64, int);
 
 #endif /*_H_JFS_SUPERBLOCK */
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index e93d01aa12c4..121c981ff453 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -42,7 +42,6 @@
  * hold on to mp+lock thru update of maps
  */
 
-
 #include <linux/fs.h>
 #include <linux/vmalloc.h>
 #include <linux/smp_lock.h>
@@ -51,6 +50,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include "jfs_incore.h"
+#include "jfs_inode.h"
 #include "jfs_filsys.h"
 #include "jfs_metapage.h"
 #include "jfs_dinode.h"
@@ -109,7 +109,6 @@ static int TxLockHWM;		/* High water mark for number of txLocks used */
 static int TxLockVHWM;		/* Very High water mark */
 struct tlock *TxLock;           /* transaction lock table */
 
-
 /*
  *      transaction management lock
  */
@@ -149,7 +148,6 @@ static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event)
 
 #define TXN_WAKEUP(event) wake_up_all(event)
 
-
 /*
  *      statistics
  */
@@ -161,16 +159,6 @@ static struct {
 	int waitlock;		/* 4: # of tlock wait */
 } stattx;
 
-
-/*
- * external references
- */
-extern int lmGroupCommit(struct jfs_log *, struct tblock *);
-extern int jfs_commit_inode(struct inode *, int);
-extern int jfs_stop_threads;
-
-extern struct completion jfsIOwait;
-
 /*
  * forward references
  */
@@ -358,7 +346,6 @@ void txExit(void)
 	TxBlock = NULL;
 }
 
-
 /*
  * NAME:        txBegin()
  *
@@ -460,7 +447,6 @@ tid_t txBegin(struct super_block *sb, int flag)
 	return t;
 }
 
-
 /*
  * NAME:        txBeginAnon()
  *
@@ -503,7 +489,6 @@ void txBeginAnon(struct super_block *sb)
 	TXN_UNLOCK();
 }
 
-
 /*
  *      txEnd()
  *
@@ -592,7 +577,6 @@ wakeup:
 	TXN_WAKEUP(&TxAnchor.freewait);
 }
 
-
 /*
  *      txLock()
  *
@@ -868,7 +852,6 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
 	return NULL;
 }
 
-
 /*
  * NAME:        txRelease()
  *
@@ -908,7 +891,6 @@ static void txRelease(struct tblock * tblk)
 	TXN_UNLOCK();
 }
 
-
 /*
  * NAME:        txUnlock()
  *
@@ -996,7 +978,6 @@ static void txUnlock(struct tblock * tblk)
 	}
 }
 
-
 /*
  *      txMaplock()
  *
@@ -1069,7 +1050,6 @@ struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
 	return tlck;
 }
 
-
 /*
  *      txLinelock()
  *
@@ -1103,8 +1083,6 @@ struct linelock *txLinelock(struct linelock * tlock)
 	return linelock;
 }
 
-
-
 /*
  *              transaction commit management
  *              -----------------------------
@@ -1373,7 +1351,6 @@ int txCommit(tid_t tid,		/* transaction identifier */
 	return rc;
 }
 
-
 /*
  * NAME:        txLog()
  *
@@ -1437,7 +1414,6 @@ static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd)
 	return rc;
 }
 
-
 /*
  *      diLog()
  *
@@ -1465,7 +1441,6 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	if (tlck->type & tlckENTRY) {
 		/* log after-image for logredo(): */
 		lrd->type = cpu_to_le16(LOG_REDOPAGE);
-//              *pxd = mp->cm_pxd;
 		PXDaddress(pxd, mp->index);
 		PXDlength(pxd,
 			  mp->logical_size >> tblk->sb->s_blocksize_bits);
@@ -1552,7 +1527,6 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	return rc;
 }
 
-
 /*
  *      dataLog()
  *
@@ -1599,7 +1573,6 @@ static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	return 0;
 }
 
-
 /*
  *      dtLog()
  *
@@ -1639,7 +1612,6 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 			lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND);
 		else
 			lrd->log.redopage.type |= cpu_to_le16(LOG_NEW);
-//              *pxd = mp->cm_pxd;
 		PXDaddress(pxd, mp->index);
 		PXDlength(pxd,
 			  mp->logical_size >> tblk->sb->s_blocksize_bits);
@@ -1704,7 +1676,6 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	return;
 }
 
-
 /*
  *      xtLog()
  *
@@ -1760,7 +1731,6 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 		 * applying the after-image to the meta-data page.
 		 */
 		lrd->type = cpu_to_le16(LOG_REDOPAGE);
-//              *page_pxd = mp->cm_pxd;
 		PXDaddress(page_pxd, mp->index);
 		PXDlength(page_pxd,
 			  mp->logical_size >> tblk->sb->s_blocksize_bits);
@@ -2093,7 +2063,6 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	return;
 }
 
-
 /*
  *      mapLog()
  *
@@ -2180,7 +2149,6 @@ void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	}
 }
 
-
 /*
  *      txEA()
  *
@@ -2233,7 +2201,6 @@ void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
 	}
 }
 
-
 /*
  *      txForce()
  *
@@ -2300,7 +2267,6 @@ void txForce(struct tblock * tblk)
 	}
 }
 
-
 /*
  *      txUpdateMap()
  *
@@ -2437,7 +2403,6 @@ static void txUpdateMap(struct tblock * tblk)
 	}
 }
 
-
 /*
  *      txAllocPMap()
  *
@@ -2509,7 +2474,6 @@ static void txAllocPMap(struct inode *ip, struct maplock * maplock,
 	}
 }
 
-
 /*
  *      txFreeMap()
  *
@@ -2611,7 +2575,6 @@ void txFreeMap(struct inode *ip,
 	}
 }
 
-
 /*
  *      txFreelock()
  *
@@ -2652,7 +2615,6 @@ void txFreelock(struct inode *ip)
 	TXN_UNLOCK();
 }
 
-
 /*
  *      txAbort()
  *
@@ -2826,9 +2788,9 @@ int jfs_lazycommit(void *arg)
 		/* In case a wakeup came while all threads were active */
 		jfs_commit_thread_waking = 0;
 
-		if (current->flags & PF_FREEZE) {
+		if (freezing(current)) {
 			LAZY_UNLOCK(flags);
-			refrigerator(PF_FREEZE);
+			refrigerator();
 		} else {
 			DECLARE_WAITQUEUE(wq, current);
 
@@ -3025,9 +2987,9 @@ int jfs_sync(void *arg)
 		/* Add anon_list2 back to anon_list */
 		list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
 
-		if (current->flags & PF_FREEZE) {
+		if (freezing(current)) {
 			TXN_UNLOCK();
-			refrigerator(PF_FREEZE);
+			refrigerator();
 		} else {
 			DECLARE_WAITQUEUE(wq, current);
 
diff --git a/fs/jfs/jfs_txnmgr.h b/fs/jfs/jfs_txnmgr.h
index b71b82c2df04..59ad0f6b7231 100644
--- a/fs/jfs/jfs_txnmgr.h
+++ b/fs/jfs/jfs_txnmgr.h
@@ -285,34 +285,26 @@ struct commit {
 /*
  * external declarations
  */
-extern struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage *mp,
-			    int flag);
-
-extern struct tlock *txMaplock(tid_t tid, struct inode *ip, int flag);
-
-extern int txCommit(tid_t tid, int nip, struct inode **iplist, int flag);
-
-extern tid_t txBegin(struct super_block *sb, int flag);
-
-extern void txBeginAnon(struct super_block *sb);
-
-extern void txEnd(tid_t tid);
-
-extern void txAbort(tid_t tid, int dirty);
-
-extern struct linelock *txLinelock(struct linelock * tlock);
-
-extern void txFreeMap(struct inode *ip, struct maplock * maplock,
-		      struct tblock * tblk, int maptype);
-
-extern void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea);
-
-extern void txFreelock(struct inode *ip);
-
-extern int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
-		 struct tlock * tlck);
-
-extern void txQuiesce(struct super_block *sb);
-
-extern void txResume(struct super_block *sb);
+extern int jfs_tlocks_low;
+
+extern int txInit(void);
+extern void txExit(void);
+extern struct tlock *txLock(tid_t, struct inode *, struct metapage *, int);
+extern struct tlock *txMaplock(tid_t, struct inode *, int);
+extern int txCommit(tid_t, int, struct inode **, int);
+extern tid_t txBegin(struct super_block *, int);
+extern void txBeginAnon(struct super_block *);
+extern void txEnd(tid_t);
+extern void txAbort(tid_t, int);
+extern struct linelock *txLinelock(struct linelock *);
+extern void txFreeMap(struct inode *, struct maplock *, struct tblock *, int);
+extern void txEA(tid_t, struct inode *, dxd_t *, dxd_t *);
+extern void txFreelock(struct inode *);
+extern int lmLog(struct jfs_log *, struct tblock *, struct lrd *,
+		 struct tlock *);
+extern void txQuiesce(struct super_block *);
+extern void txResume(struct super_block *);
+extern void txLazyUnlock(struct tblock *);
+extern int jfs_lazycommit(void *);
+extern int jfs_sync(void *);
 #endif				/* _H_JFS_TXNMGR */
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 8413a368f449..1cae14e741eb 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -31,20 +31,9 @@
 #include "jfs_acl.h"
 #include "jfs_debug.h"
 
-extern struct inode_operations jfs_file_inode_operations;
-extern struct inode_operations jfs_symlink_inode_operations;
-extern struct file_operations jfs_file_operations;
-extern struct address_space_operations jfs_aops;
-
-extern int jfs_fsync(struct file *, struct dentry *, int);
-extern void jfs_truncate_nolock(struct inode *, loff_t);
-extern int jfs_init_acl(struct inode *, struct inode *);
-
 /*
  * forward references
  */
-struct inode_operations jfs_dir_inode_operations;
-struct file_operations jfs_dir_operations;
 struct dentry_operations jfs_ci_dentry_operations;
 
 static s64 commitZeroLink(tid_t, struct inode *);
@@ -655,7 +644,7 @@ static s64 commitZeroLink(tid_t tid, struct inode *ip)
 
 
 /*
- * NAME:	freeZeroLink()
+ * NAME:	jfs_free_zero_link()
  *
  * FUNCTION:    for non-directory, called by iClose(),
  *		free resources of a file from cache and WORKING map 
@@ -663,15 +652,12 @@ static s64 commitZeroLink(tid_t tid, struct inode *ip)
  *		while associated with a pager object,
  *
  * PARAMETER:	ip	- pointer to inode of file.
- *
- * RETURN:	0 -ok
  */
-int freeZeroLink(struct inode *ip)
+void jfs_free_zero_link(struct inode *ip)
 {
-	int rc = 0;
 	int type;
 
-	jfs_info("freeZeroLink: ip = 0x%p", ip);
+	jfs_info("jfs_free_zero_link: ip = 0x%p", ip);
 
 	/* return if not reg or symbolic link or if size is
 	 * already ok.
@@ -684,10 +670,10 @@ int freeZeroLink(struct inode *ip)
 	case S_IFLNK:
 		/* if its contained in inode nothing to do */
 		if (ip->i_size < IDATASIZE)
-			return 0;
+			return;
 		break;
 	default:
-		return 0;
+		return;
 	}
 
 	/*
@@ -737,9 +723,7 @@ int freeZeroLink(struct inode *ip)
 	 * free xtree/data blocks from working block map;
 	 */
 	if (ip->i_size)
-		rc = xtTruncate(0, ip, 0, COMMIT_WMAP);
-
-	return rc;
+		xtTruncate(0, ip, 0, COMMIT_WMAP);
 }
 
 /*
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 5e774ed7fb64..ee32211288ce 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -24,10 +24,12 @@
 #include <linux/completion.h>
 #include <linux/vfs.h>
 #include <linux/moduleparam.h>
+#include <linux/posix_acl.h>
 #include <asm/uaccess.h>
 
 #include "jfs_incore.h"
 #include "jfs_filsys.h"
+#include "jfs_inode.h"
 #include "jfs_metapage.h"
 #include "jfs_superblock.h"
 #include "jfs_dmap.h"
@@ -62,37 +64,6 @@ module_param(jfsloglevel, int, 0644);
 MODULE_PARM_DESC(jfsloglevel, "Specify JFS loglevel (0, 1 or 2)");
 #endif
 
-/*
- * External declarations
- */
-extern int jfs_mount(struct super_block *);
-extern int jfs_mount_rw(struct super_block *, int);
-extern int jfs_umount(struct super_block *);
-extern int jfs_umount_rw(struct super_block *);
-
-extern int jfsIOWait(void *);
-extern int jfs_lazycommit(void *);
-extern int jfs_sync(void *);
-
-extern void jfs_read_inode(struct inode *inode);
-extern void jfs_dirty_inode(struct inode *inode);
-extern void jfs_delete_inode(struct inode *inode);
-extern int jfs_write_inode(struct inode *inode, int wait);
-
-extern struct dentry *jfs_get_parent(struct dentry *dentry);
-extern int jfs_extendfs(struct super_block *, s64, int);
-
-extern struct dentry_operations jfs_ci_dentry_operations;
-
-#ifdef PROC_FS_JFS		/* see jfs_debug.h */
-extern void jfs_proc_init(void);
-extern void jfs_proc_clean(void);
-#endif
-
-extern wait_queue_head_t jfs_IO_thread_wait;
-extern wait_queue_head_t jfs_commit_thread_wait;
-extern wait_queue_head_t jfs_sync_thread_wait;
-
 static void jfs_handle_error(struct super_block *sb)
 {
 	struct jfs_sb_info *sbi = JFS_SBI(sb);
@@ -593,11 +564,6 @@ static struct file_system_type jfs_fs_type = {
 	.fs_flags	= FS_REQUIRES_DEV,
 };
 
-extern int metapage_init(void);
-extern int txInit(void);
-extern void txExit(void);
-extern void metapage_exit(void);
-
 static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
 {
 	struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo;
diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c
index ef4c07ee92b2..287d8d6c3cfd 100644
--- a/fs/jfs/symlink.c
+++ b/fs/jfs/symlink.c
@@ -1,5 +1,5 @@
 /*
- *   Copyright (c) Christoph Hellwig, 2001-2002
+ *   Copyright (C) Christoph Hellwig, 2001-2002
  *
  *   This program is free software;  you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -19,6 +19,7 @@
 #include <linux/fs.h>
 #include <linux/namei.h>
 #include "jfs_incore.h"
+#include "jfs_inode.h"
 #include "jfs_xattr.h"
 
 static int jfs_follow_link(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 7a9ffd5d03dc..ee438d429d45 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -19,6 +19,7 @@
 
 #include <linux/fs.h>
 #include <linux/xattr.h>
+#include <linux/posix_acl_xattr.h>
 #include <linux/quotaops.h>
 #include "jfs_incore.h"
 #include "jfs_superblock.h"
@@ -718,9 +719,9 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
 		return -EPERM;
 
 	/*
-	 * XATTR_NAME_ACL_ACCESS is tied to i_mode
+	 * POSIX_ACL_XATTR_ACCESS is tied to i_mode
 	 */
-	if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0) {
+	if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) {
 		acl = posix_acl_from_xattr(value, value_len);
 		if (IS_ERR(acl)) {
 			rc = PTR_ERR(acl);
@@ -750,7 +751,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
 		JFS_IP(inode)->i_acl = JFS_ACL_NOT_CACHED;
 
 		return 0;
-	} else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) {
+	} else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) {
 		acl = posix_acl_from_xattr(value, value_len);
 		if (IS_ERR(acl)) {
 			rc = PTR_ERR(acl);
@@ -946,8 +947,7 @@ int __jfs_setxattr(struct inode *inode, const char *name, const void *value,
       out:
 	up_write(&JFS_IP(inode)->xattr_sem);
 
-	if (os2name)
-		kfree(os2name);
+	kfree(os2name);
 
 	return rc;
 }
@@ -1042,8 +1042,7 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data,
       out:
 	up_read(&JFS_IP(inode)->xattr_sem);
 
-	if (os2name)
-		kfree(os2name);
+	kfree(os2name);
 
 	return size;
 }
diff --git a/fs/libfs.c b/fs/libfs.c
index f90b29595927..58101dff2c66 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -183,6 +183,7 @@ struct file_operations simple_dir_operations = {
 	.llseek		= dcache_dir_lseek,
 	.read		= generic_read_dir,
 	.readdir	= dcache_readdir,
+	.fsync		= simple_sync_file,
 };
 
 struct inode_operations simple_dir_inode_operations = {
@@ -519,6 +520,102 @@ int simple_transaction_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+/* Simple attribute files */
+
+struct simple_attr {
+	u64 (*get)(void *);
+	void (*set)(void *, u64);
+	char get_buf[24];	/* enough to store a u64 and "\n\0" */
+	char set_buf[24];
+	void *data;
+	const char *fmt;	/* format for read operation */
+	struct semaphore sem;	/* protects access to these buffers */
+};
+
+/* simple_attr_open is called by an actual attribute open file operation
+ * to set the attribute specific access operations. */
+int simple_attr_open(struct inode *inode, struct file *file,
+		     u64 (*get)(void *), void (*set)(void *, u64),
+		     const char *fmt)
+{
+	struct simple_attr *attr;
+
+	attr = kmalloc(sizeof(*attr), GFP_KERNEL);
+	if (!attr)
+		return -ENOMEM;
+
+	attr->get = get;
+	attr->set = set;
+	attr->data = inode->u.generic_ip;
+	attr->fmt = fmt;
+	init_MUTEX(&attr->sem);
+
+	file->private_data = attr;
+
+	return nonseekable_open(inode, file);
+}
+
+int simple_attr_close(struct inode *inode, struct file *file)
+{
+	kfree(file->private_data);
+	return 0;
+}
+
+/* read from the buffer that is filled with the get function */
+ssize_t simple_attr_read(struct file *file, char __user *buf,
+			 size_t len, loff_t *ppos)
+{
+	struct simple_attr *attr;
+	size_t size;
+	ssize_t ret;
+
+	attr = file->private_data;
+
+	if (!attr->get)
+		return -EACCES;
+
+	down(&attr->sem);
+	if (*ppos) /* continued read */
+		size = strlen(attr->get_buf);
+	else	  /* first read */
+		size = scnprintf(attr->get_buf, sizeof(attr->get_buf),
+				 attr->fmt,
+				 (unsigned long long)attr->get(attr->data));
+
+	ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size);
+	up(&attr->sem);
+	return ret;
+}
+
+/* interpret the buffer as a number to call the set function with */
+ssize_t simple_attr_write(struct file *file, const char __user *buf,
+			  size_t len, loff_t *ppos)
+{
+	struct simple_attr *attr;
+	u64 val;
+	size_t size;
+	ssize_t ret;
+
+	attr = file->private_data;
+
+	if (!attr->set)
+		return -EACCES;
+
+	down(&attr->sem);
+	ret = -EFAULT;
+	size = min(sizeof(attr->set_buf) - 1, len);
+	if (copy_from_user(attr->set_buf, buf, size))
+		goto out;
+
+	ret = len; /* claim we got the whole input */
+	attr->set_buf[size] = '\0';
+	val = simple_strtol(attr->set_buf, NULL, 0);
+	attr->set(attr->data, val);
+out:
+	up(&attr->sem);
+	return ret;
+}
+
 EXPORT_SYMBOL(dcache_dir_close);
 EXPORT_SYMBOL(dcache_dir_lseek);
 EXPORT_SYMBOL(dcache_dir_open);
@@ -547,3 +644,7 @@ EXPORT_SYMBOL(simple_read_from_buffer);
 EXPORT_SYMBOL(simple_transaction_get);
 EXPORT_SYMBOL(simple_transaction_read);
 EXPORT_SYMBOL(simple_transaction_release);
+EXPORT_SYMBOL_GPL(simple_attr_open);
+EXPORT_SYMBOL_GPL(simple_attr_close);
+EXPORT_SYMBOL_GPL(simple_attr_read);
+EXPORT_SYMBOL_GPL(simple_attr_write);
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index ef7103b8c5bd..006bb9e14579 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -31,7 +31,7 @@ static int			reclaimer(void *ptr);
  * This is the representation of a blocked client lock.
  */
 struct nlm_wait {
-	struct nlm_wait *	b_next;		/* linked list */
+	struct list_head	b_list;		/* linked list */
 	wait_queue_head_t	b_wait;		/* where to wait on */
 	struct nlm_host *	b_host;
 	struct file_lock *	b_lock;		/* local file lock */
@@ -39,27 +39,54 @@ struct nlm_wait {
 	u32			b_status;	/* grant callback status */
 };
 
-static struct nlm_wait *	nlm_blocked;
+static LIST_HEAD(nlm_blocked);
 
 /*
- * Block on a lock
+ * Queue up a lock for blocking so that the GRANTED request can see it
  */
-int
-nlmclnt_block(struct nlm_host *host, struct file_lock *fl, u32 *statp)
+int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl)
+{
+	struct nlm_wait *block;
+
+	BUG_ON(req->a_block != NULL);
+	block = kmalloc(sizeof(*block), GFP_KERNEL);
+	if (block == NULL)
+		return -ENOMEM;
+	block->b_host = host;
+	block->b_lock = fl;
+	init_waitqueue_head(&block->b_wait);
+	block->b_status = NLM_LCK_BLOCKED;
+
+	list_add(&block->b_list, &nlm_blocked);
+	req->a_block = block;
+
+	return 0;
+}
+
+void nlmclnt_finish_block(struct nlm_rqst *req)
 {
-	struct nlm_wait	block, **head;
-	int		err;
-	u32		pstate;
+	struct nlm_wait *block = req->a_block;
 
-	block.b_host   = host;
-	block.b_lock   = fl;
-	init_waitqueue_head(&block.b_wait);
-	block.b_status = NLM_LCK_BLOCKED;
-	block.b_next   = nlm_blocked;
-	nlm_blocked    = &block;
+	if (block == NULL)
+		return;
+	req->a_block = NULL;
+	list_del(&block->b_list);
+	kfree(block);
+}
+
+/*
+ * Block on a lock
+ */
+long nlmclnt_block(struct nlm_rqst *req, long timeout)
+{
+	struct nlm_wait	*block = req->a_block;
+	long ret;
 
-	/* Remember pseudo nsm state */
-	pstate = host->h_state;
+	/* A borken server might ask us to block even if we didn't
+	 * request it. Just say no!
+	 */
+	if (!req->a_args.block)
+		return -EAGAIN;
 
 	/* Go to sleep waiting for GRANT callback. Some servers seem
 	 * to lose callbacks, however, so we're going to poll from
@@ -69,28 +96,16 @@ nlmclnt_block(struct nlm_host *host, struct file_lock *fl, u32 *statp)
 	 * a 1 minute timeout would do. See the comment before
 	 * nlmclnt_lock for an explanation.
 	 */
-	sleep_on_timeout(&block.b_wait, 30*HZ);
-
-	for (head = &nlm_blocked; *head; head = &(*head)->b_next) {
-		if (*head == &block) {
-			*head = block.b_next;
-			break;
-		}
-	}
+	ret = wait_event_interruptible_timeout(block->b_wait,
+			block->b_status != NLM_LCK_BLOCKED,
+			timeout);
 
-	if (!signalled()) {
-		*statp = block.b_status;
-		return 0;
+	if (block->b_status != NLM_LCK_BLOCKED) {
+		req->a_res.status = block->b_status;
+		block->b_status = NLM_LCK_BLOCKED;
 	}
 
-	/* Okay, we were interrupted. Cancel the pending request
-	 * unless the server has rebooted.
-	 */
-	if (pstate == host->h_state && (err = nlmclnt_cancel(host, fl)) < 0)
-		printk(KERN_NOTICE
-			"lockd: CANCEL call failed (errno %d)\n", -err);
-
-	return -ERESTARTSYS;
+	return ret;
 }
 
 /*
@@ -100,27 +115,23 @@ u32
 nlmclnt_grant(struct nlm_lock *lock)
 {
 	struct nlm_wait	*block;
+	u32 res = nlm_lck_denied;
 
 	/*
 	 * Look up blocked request based on arguments. 
 	 * Warning: must not use cookie to match it!
 	 */
-	for (block = nlm_blocked; block; block = block->b_next) {
-		if (nlm_compare_locks(block->b_lock, &lock->fl))
-			break;
+	list_for_each_entry(block, &nlm_blocked, b_list) {
+		if (nlm_compare_locks(block->b_lock, &lock->fl)) {
+			/* Alright, we found a lock. Set the return status
+			 * and wake up the caller
+			 */
+			block->b_status = NLM_LCK_GRANTED;
+			wake_up(&block->b_wait);
+			res = nlm_granted;
+		}
 	}
-
-	/* Ooops, no blocked request found. */
-	if (block == NULL)
-		return nlm_lck_denied;
-
-	/* Alright, we found the lock. Set the return status and
-	 * wake up the caller.
-	 */
-	block->b_status = NLM_LCK_GRANTED;
-	wake_up(&block->b_wait);
-
-	return nlm_granted;
+	return res;
 }
 
 /*
@@ -230,7 +241,7 @@ restart:
 	host->h_reclaiming = 0;
 
 	/* Now, wake up all processes that sleep on a blocked lock */
-	for (block = nlm_blocked; block; block = block->b_next) {
+	list_for_each_entry(block, &nlm_blocked, b_list) {
 		if (block->b_host == host) {
 			block->b_status = NLM_LCK_DENIED_GRACE_PERIOD;
 			wake_up(&block->b_wait);
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index a4407619b1f1..14b3ce87fa29 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -21,6 +21,7 @@
 
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 #define NLMCLNT_GRACE_WAIT	(5*HZ)
+#define NLMCLNT_POLL_TIMEOUT	(30*HZ)
 
 static int	nlmclnt_test(struct nlm_rqst *, struct file_lock *);
 static int	nlmclnt_lock(struct nlm_rqst *, struct file_lock *);
@@ -312,7 +313,7 @@ static int nlm_wait_on_grace(wait_queue_head_t *queue)
 	prepare_to_wait(queue, &wait, TASK_INTERRUPTIBLE);
 	if (!signalled ()) {
 		schedule_timeout(NLMCLNT_GRACE_WAIT);
-		try_to_freeze(PF_FREEZE);
+		try_to_freeze();
 		if (!signalled ())
 			status = 0;
 	}
@@ -553,7 +554,8 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 {
 	struct nlm_host	*host = req->a_host;
 	struct nlm_res	*resp = &req->a_res;
-	int		status;
+	long timeout;
+	int status;
 
 	if (!host->h_monitored && nsm_monitor(host) < 0) {
 		printk(KERN_NOTICE "lockd: failed to monitor %s\n",
@@ -562,15 +564,32 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 		goto out;
 	}
 
-	do {
-		if ((status = nlmclnt_call(req, NLMPROC_LOCK)) >= 0) {
-			if (resp->status != NLM_LCK_BLOCKED)
-				break;
-			status = nlmclnt_block(host, fl, &resp->status);
-		}
+	if (req->a_args.block) {
+		status = nlmclnt_prepare_block(req, host, fl);
 		if (status < 0)
 			goto out;
-	} while (resp->status == NLM_LCK_BLOCKED && req->a_args.block);
+	}
+	for(;;) {
+		status = nlmclnt_call(req, NLMPROC_LOCK);
+		if (status < 0)
+			goto out_unblock;
+		if (resp->status != NLM_LCK_BLOCKED)
+			break;
+		/* Wait on an NLM blocking lock */
+		timeout = nlmclnt_block(req, NLMCLNT_POLL_TIMEOUT);
+		/* Did a reclaimer thread notify us of a server reboot? */
+		if (resp->status ==  NLM_LCK_DENIED_GRACE_PERIOD)
+			continue;
+		if (resp->status != NLM_LCK_BLOCKED)
+			break;
+		if (timeout >= 0)
+			continue;
+		/* We were interrupted. Send a CANCEL request to the server
+		 * and exit
+		 */
+		status = (int)timeout;
+		goto out_unblock;
+	}
 
 	if (resp->status == NLM_LCK_GRANTED) {
 		fl->fl_u.nfs_fl.state = host->h_state;
@@ -579,6 +598,11 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 		do_vfs_lock(fl);
 	}
 	status = nlm_stat_to_errno(resp->status);
+out_unblock:
+	nlmclnt_finish_block(req);
+	/* Cancel the blocked request if it is still pending */
+	if (resp->status == NLM_LCK_BLOCKED)
+		nlmclnt_cancel(host, fl);
 out:
 	nlmclnt_release_lockargs(req);
 	return status;
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 52707c5ad6ea..82c77df81c5f 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -189,17 +189,15 @@ nlm_bind_host(struct nlm_host *host)
 			goto forgetit;
 
 		xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout);
+		xprt->nocong = 1;	/* No congestion control for NLM */
+		xprt->resvport = 1;	/* NLM requires a reserved port */
 
 		/* Existing NLM servers accept AUTH_UNIX only */
 		clnt = rpc_create_client(xprt, host->h_name, &nlm_program,
 					host->h_version, RPC_AUTH_UNIX);
-		if (IS_ERR(clnt)) {
-			xprt_destroy(xprt);
+		if (IS_ERR(clnt))
 			goto forgetit;
-		}
 		clnt->cl_autobind = 1;	/* turn on pmap queries */
-		xprt->nocong = 1;	/* No congestion control for NLM */
-		xprt->resvport = 1;	/* NLM requires a reserved port */
 
 		host->h_rpcclnt = clnt;
 	}
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 6fc1bebeec1d..2d144abe84ad 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -115,20 +115,19 @@ nsm_create(void)
 	xprt = xprt_create_proto(IPPROTO_UDP, &sin, NULL);
 	if (IS_ERR(xprt))
 		return (struct rpc_clnt *)xprt;
+	xprt->resvport = 1;	/* NSM requires a reserved port */
 
 	clnt = rpc_create_client(xprt, "localhost",
 				&nsm_program, SM_VERSION,
 				RPC_AUTH_NULL);
 	if (IS_ERR(clnt))
-		goto out_destroy;
+		goto out_err;
 	clnt->cl_softrtry = 1;
 	clnt->cl_chatty   = 1;
 	clnt->cl_oneshot  = 1;
-	xprt->resvport = 1;	/* NSM requires a reserved port */
 	return clnt;
 
-out_destroy:
-	xprt_destroy(xprt);
+out_err:
 	return clnt;
 }
 
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index b82e470912e8..6e242556b903 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -191,7 +191,9 @@ lockd(struct svc_rqst *rqstp)
 		printk(KERN_DEBUG
 			"lockd: new process, skipping host shutdown\n");
 	wake_up(&lockd_exit);
-		
+
+	flush_signals(current);
+
 	/* Exit the RPC thread */
 	svc_exit_thread(rqstp);
 
diff --git a/fs/locks.c b/fs/locks.c
index 3fa6a7ce57a7..a0bc03495bd4 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1548,6 +1548,8 @@ int fcntl_getlk(struct file *filp, struct flock __user *l)
 
 	if (filp->f_op && filp->f_op->lock) {
 		error = filp->f_op->lock(filp, F_GETLK, &file_lock);
+		if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private)
+			file_lock.fl_ops->fl_release_private(&file_lock);
 		if (error < 0)
 			goto out;
 		else
@@ -1690,6 +1692,8 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
 
 	if (filp->f_op && filp->f_op->lock) {
 		error = filp->f_op->lock(filp, F_GETLK, &file_lock);
+		if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private)
+			file_lock.fl_ops->fl_release_private(&file_lock);
 		if (error < 0)
 			goto out;
 		else
@@ -1873,6 +1877,8 @@ void locks_remove_flock(struct file *filp)
 			.fl_end = OFFSET_MAX,
 		};
 		filp->f_op->flock(filp, F_SETLKW, &fl);
+		if (fl.fl_ops && fl.fl_ops->fl_release_private)
+			fl.fl_ops->fl_release_private(&fl);
 	}
 
 	lock_kernel();
diff --git a/fs/namei.c b/fs/namei.c
index 6e888dd10461..56e9f0f7e761 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1577,19 +1577,35 @@ do_link:
  *
  * Simple function to lookup and return a dentry and create it
  * if it doesn't exist.  Is SMP-safe.
+ *
+ * Returns with nd->dentry->d_inode->i_sem locked.
  */
 struct dentry *lookup_create(struct nameidata *nd, int is_dir)
 {
-	struct dentry *dentry;
+	struct dentry *dentry = ERR_PTR(-EEXIST);
 
 	down(&nd->dentry->d_inode->i_sem);
-	dentry = ERR_PTR(-EEXIST);
+	/*
+	 * Yucky last component or no last component at all?
+	 * (foo/., foo/.., /////)
+	 */
 	if (nd->last_type != LAST_NORM)
 		goto fail;
 	nd->flags &= ~LOOKUP_PARENT;
+
+	/*
+	 * Do the final lookup.
+	 */
 	dentry = lookup_hash(&nd->last, nd->dentry);
 	if (IS_ERR(dentry))
 		goto fail;
+
+	/*
+	 * Special case - lookup gave negative, but... we had foo/bar/
+	 * From the vfs_mknod() POV we just have a negative dentry -
+	 * all is fine. Let's be bastards - you had / on the end, you've
+	 * been asking for (non-existent) directory. -ENOENT for you.
+	 */
 	if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
 		goto enoent;
 	return dentry;
diff --git a/fs/namespace.c b/fs/namespace.c
index 3b93e5d750eb..208c079e9fdb 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -337,7 +337,7 @@ int may_umount(struct vfsmount *mnt)
 
 EXPORT_SYMBOL(may_umount);
 
-void umount_tree(struct vfsmount *mnt)
+static void umount_tree(struct vfsmount *mnt)
 {
 	struct vfsmount *p;
 	LIST_HEAD(kill);
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 2dc2d8693968..a9f7a8ab1d59 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -705,18 +705,6 @@ ncp_do_readdir(struct file *filp, void *dirent, filldir_t filldir,
 		DPRINTK("ncp_do_readdir: init failed, err=%d\n", err);
 		return;
 	}
-#ifdef USE_OLD_SLOW_DIRECTORY_LISTING
-	for (;;) {
-		err = ncp_search_for_file_or_subdir(server, &seq, &entry.i);
-		if (err) {
-			DPRINTK("ncp_do_readdir: search failed, err=%d\n", err);
-			break;
-		}
-		entry.volume = entry.i.volNumber;
-		if (!ncp_fill_cache(filp, dirent, filldir, ctl, &entry))
-			break;
-	}
-#else
 	/* We MUST NOT use server->buffer_size handshaked with server if we are
 	   using UDP, as for UDP server uses max. buffer size determined by
 	   MTU, and for TCP server uses hardwired value 65KB (== 66560 bytes). 
@@ -754,7 +742,6 @@ ncp_do_readdir(struct file *filp, void *dirent, filldir_t filldir,
 		}
 	} while (more);
 	vfree(buf);
-#endif
 	return;
 }
 
diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c
index e4eb5ed4bee4..c755e1848a42 100644
--- a/fs/ncpfs/ncplib_kernel.c
+++ b/fs/ncpfs/ncplib_kernel.c
@@ -845,46 +845,6 @@ out:
 	return result;
 }
 
-/* Search for everything */
-int ncp_search_for_file_or_subdir(struct ncp_server *server,
-				  struct nw_search_sequence *seq,
-				  struct nw_info_struct *target)
-{
-	int result;
-
-	ncp_init_request(server);
-	ncp_add_byte(server, 3);	/* subfunction */
-	ncp_add_byte(server, server->name_space[seq->volNumber]);
-	ncp_add_byte(server, 0);	/* data stream (???) */
-	ncp_add_word(server, cpu_to_le16(0x8006));	/* Search attribs */
-	ncp_add_dword(server, RIM_ALL);		/* return info mask */
-	ncp_add_mem(server, seq, 9);
-#ifdef CONFIG_NCPFS_NFS_NS
-	if (server->name_space[seq->volNumber] == NW_NS_NFS) {
-		ncp_add_byte(server, 0);	/* 0 byte pattern */
-	} else 
-#endif
-	{
-		ncp_add_byte(server, 2);	/* 2 byte pattern */
-		ncp_add_byte(server, 0xff);	/* following is a wildcard */
-		ncp_add_byte(server, '*');
-	}
-	
-	if ((result = ncp_request(server, 87)) != 0)
-		goto out;
-	memcpy(seq, ncp_reply_data(server, 0), sizeof(*seq));
-	ncp_extract_file_info(ncp_reply_data(server, 10), target);
-
-	ncp_unlock_server(server);
-	
-	result = ncp_obtain_nfs_info(server, target);
-	return result;
-
-out:
-	ncp_unlock_server(server);
-	return result;
-}
-
 int ncp_search_for_fileset(struct ncp_server *server,
 			   struct nw_search_sequence *seq,
 			   int* more,
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 05ec2e9d90c6..9e4dc30c2435 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -87,9 +87,6 @@ int ncp_open_create_file_or_subdir(struct ncp_server *, struct inode *, char *,
 
 int ncp_initialize_search(struct ncp_server *, struct inode *,
 		      struct nw_search_sequence *target);
-int ncp_search_for_file_or_subdir(struct ncp_server *server,
-			      struct nw_search_sequence *seq,
-			      struct nw_info_struct *target);
 int ncp_search_for_fileset(struct ncp_server *server,
 			   struct nw_search_sequence *seq,
 			   int* more, int* cnt,
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index b4baa031edf4..8b3bb715d177 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -8,6 +8,7 @@ nfs-y 			:= dir.o file.o inode.o nfs2xdr.o pagelist.o \
 			   proc.o read.o symlink.o unlink.o write.o
 nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o mount_clnt.o      
 nfs-$(CONFIG_NFS_V3)	+= nfs3proc.o nfs3xdr.o
+nfs-$(CONFIG_NFS_V3_ACL)	+= nfs3acl.o
 nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
 			   delegation.o idmap.o \
 			   callback.o callback_xdr.o callback_proc.o
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 560d6175dd58..f2ca782aba33 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -14,6 +14,7 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/nfs_fs.h>
+#include "nfs4_fs.h"
 #include "callback.h"
 
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index ece27e42b93b..65f1e19e4d19 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -8,6 +8,7 @@
 #include <linux/config.h>
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
+#include "nfs4_fs.h"
 #include "callback.h"
 #include "delegation.h"
 
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index d271df9df2b2..7c33b9a81a94 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -10,6 +10,7 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
+#include "nfs4_fs.h"
 #include "callback.h"
 
 #define CB_OP_TAGLEN_MAXSZ	(512)
@@ -410,7 +411,6 @@ static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp
 	xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base);
 
 	p = (uint32_t*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len);
-	rqstp->rq_res.head[0].iov_len = PAGE_SIZE;
 	xdr_init_encode(&xdr_out, &rqstp->rq_res, p);
 
 	decode_compound_hdr_arg(&xdr_in, &hdr_arg);
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 5b9c60f97791..d7f7eb669d03 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -16,6 +16,7 @@
 #include <linux/nfs_fs.h>
 #include <linux/nfs_xdr.h>
 
+#include "nfs4_fs.h"
 #include "delegation.h"
 
 static struct nfs_delegation *nfs_alloc_delegation(void)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ff6155f5e8d9..b38a57e78a63 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -32,6 +32,7 @@
 #include <linux/smp_lock.h>
 #include <linux/namei.h>
 
+#include "nfs4_fs.h"
 #include "delegation.h"
 
 #define NFS_PARANOIA 1
@@ -50,8 +51,10 @@ static int nfs_mknod(struct inode *, struct dentry *, int, dev_t);
 static int nfs_rename(struct inode *, struct dentry *,
 		      struct inode *, struct dentry *);
 static int nfs_fsync_dir(struct file *, struct dentry *, int);
+static loff_t nfs_llseek_dir(struct file *, loff_t, int);
 
 struct file_operations nfs_dir_operations = {
+	.llseek		= nfs_llseek_dir,
 	.read		= generic_read_dir,
 	.readdir	= nfs_readdir,
 	.open		= nfs_opendir,
@@ -74,6 +77,27 @@ struct inode_operations nfs_dir_inode_operations = {
 	.setattr	= nfs_setattr,
 };
 
+#ifdef CONFIG_NFS_V3
+struct inode_operations nfs3_dir_inode_operations = {
+	.create		= nfs_create,
+	.lookup		= nfs_lookup,
+	.link		= nfs_link,
+	.unlink		= nfs_unlink,
+	.symlink	= nfs_symlink,
+	.mkdir		= nfs_mkdir,
+	.rmdir		= nfs_rmdir,
+	.mknod		= nfs_mknod,
+	.rename		= nfs_rename,
+	.permission	= nfs_permission,
+	.getattr	= nfs_getattr,
+	.setattr	= nfs_setattr,
+	.listxattr	= nfs3_listxattr,
+	.getxattr	= nfs3_getxattr,
+	.setxattr	= nfs3_setxattr,
+	.removexattr	= nfs3_removexattr,
+};
+#endif  /* CONFIG_NFS_V3 */
+
 #ifdef CONFIG_NFS_V4
 
 static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *);
@@ -90,6 +114,9 @@ struct inode_operations nfs4_dir_inode_operations = {
 	.permission	= nfs_permission,
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
+	.getxattr       = nfs4_getxattr,
+	.setxattr       = nfs4_setxattr,
+	.listxattr      = nfs4_listxattr,
 };
 
 #endif /* CONFIG_NFS_V4 */
@@ -116,7 +143,8 @@ typedef struct {
 	struct page	*page;
 	unsigned long	page_index;
 	u32		*ptr;
-	u64		target;
+	u64		*dir_cookie;
+	loff_t		current_index;
 	struct nfs_entry *entry;
 	decode_dirent_t	decode;
 	int		plus;
@@ -164,12 +192,10 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
 	NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME;
 	/* Ensure consistent page alignment of the data.
 	 * Note: assumes we have exclusive access to this mapping either
-	 *	 throught inode->i_sem or some other mechanism.
+	 *	 through inode->i_sem or some other mechanism.
 	 */
-	if (page->index == 0) {
-		invalidate_inode_pages(inode->i_mapping);
-		NFS_I(inode)->readdir_timestamp = timestamp;
-	}
+	if (page->index == 0)
+		invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1);
 	unlock_page(page);
 	return 0;
  error:
@@ -202,22 +228,22 @@ void dir_page_release(nfs_readdir_descriptor_t *desc)
 
 /*
  * Given a pointer to a buffer that has already been filled by a call
- * to readdir, find the next entry.
+ * to readdir, find the next entry with cookie '*desc->dir_cookie'.
  *
  * If the end of the buffer has been reached, return -EAGAIN, if not,
  * return the offset within the buffer of the next entry to be
  * read.
  */
 static inline
-int find_dirent(nfs_readdir_descriptor_t *desc, struct page *page)
+int find_dirent(nfs_readdir_descriptor_t *desc)
 {
 	struct nfs_entry *entry = desc->entry;
 	int		loop_count = 0,
 			status;
 
 	while((status = dir_decode(desc)) == 0) {
-		dfprintk(VFS, "NFS: found cookie %Lu\n", (long long)entry->cookie);
-		if (entry->prev_cookie == desc->target)
+		dfprintk(VFS, "NFS: found cookie %Lu\n", (unsigned long long)entry->cookie);
+		if (entry->prev_cookie == *desc->dir_cookie)
 			break;
 		if (loop_count++ > 200) {
 			loop_count = 0;
@@ -229,8 +255,44 @@ int find_dirent(nfs_readdir_descriptor_t *desc, struct page *page)
 }
 
 /*
- * Find the given page, and call find_dirent() in order to try to
- * return the next entry.
+ * Given a pointer to a buffer that has already been filled by a call
+ * to readdir, find the entry at offset 'desc->file->f_pos'.
+ *
+ * If the end of the buffer has been reached, return -EAGAIN, if not,
+ * return the offset within the buffer of the next entry to be
+ * read.
+ */
+static inline
+int find_dirent_index(nfs_readdir_descriptor_t *desc)
+{
+	struct nfs_entry *entry = desc->entry;
+	int		loop_count = 0,
+			status;
+
+	for(;;) {
+		status = dir_decode(desc);
+		if (status)
+			break;
+
+		dfprintk(VFS, "NFS: found cookie %Lu at index %Ld\n", (unsigned long long)entry->cookie, desc->current_index);
+
+		if (desc->file->f_pos == desc->current_index) {
+			*desc->dir_cookie = entry->cookie;
+			break;
+		}
+		desc->current_index++;
+		if (loop_count++ > 200) {
+			loop_count = 0;
+			schedule();
+		}
+	}
+	dfprintk(VFS, "NFS: find_dirent_index() returns %d\n", status);
+	return status;
+}
+
+/*
+ * Find the given page, and call find_dirent() or find_dirent_index in
+ * order to try to return the next entry.
  */
 static inline
 int find_dirent_page(nfs_readdir_descriptor_t *desc)
@@ -253,7 +315,10 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
 	/* NOTE: Someone else may have changed the READDIRPLUS flag */
 	desc->page = page;
 	desc->ptr = kmap(page);		/* matching kunmap in nfs_do_filldir */
-	status = find_dirent(desc, page);
+	if (*desc->dir_cookie != 0)
+		status = find_dirent(desc);
+	else
+		status = find_dirent_index(desc);
 	if (status < 0)
 		dir_page_release(desc);
  out:
@@ -268,7 +333,8 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
  * Recurse through the page cache pages, and return a
  * filled nfs_entry structure of the next directory entry if possible.
  *
- * The target for the search is 'desc->target'.
+ * The target for the search is '*desc->dir_cookie' if non-0,
+ * 'desc->file->f_pos' otherwise
  */
 static inline
 int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
@@ -276,7 +342,16 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
 	int		loop_count = 0;
 	int		res;
 
-	dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (long long)desc->target);
+	/* Always search-by-index from the beginning of the cache */
+	if (*desc->dir_cookie == 0) {
+		dfprintk(VFS, "NFS: readdir_search_pagecache() searching for offset %Ld\n", (long long)desc->file->f_pos);
+		desc->page_index = 0;
+		desc->entry->cookie = desc->entry->prev_cookie = 0;
+		desc->entry->eof = 0;
+		desc->current_index = 0;
+	} else
+		dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie);
+
 	for (;;) {
 		res = find_dirent_page(desc);
 		if (res != -EAGAIN)
@@ -313,7 +388,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 	int		loop_count = 0,
 			res;
 
-	dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)desc->target);
+	dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)entry->cookie);
 
 	for(;;) {
 		unsigned d_type = DT_UNKNOWN;
@@ -333,10 +408,11 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 		}
 
 		res = filldir(dirent, entry->name, entry->len, 
-			      entry->prev_cookie, fileid, d_type);
+			      file->f_pos, fileid, d_type);
 		if (res < 0)
 			break;
-		file->f_pos = desc->target = entry->cookie;
+		file->f_pos++;
+		*desc->dir_cookie = entry->cookie;
 		if (dir_decode(desc) != 0) {
 			desc->page_index ++;
 			break;
@@ -349,7 +425,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 	dir_page_release(desc);
 	if (dentry != NULL)
 		dput(dentry);
-	dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (long long)desc->target, res);
+	dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res);
 	return res;
 }
 
@@ -375,14 +451,14 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 	struct page	*page = NULL;
 	int		status;
 
-	dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (long long)desc->target);
+	dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie);
 
 	page = alloc_page(GFP_HIGHUSER);
 	if (!page) {
 		status = -ENOMEM;
 		goto out;
 	}
-	desc->error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->target,
+	desc->error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, *desc->dir_cookie,
 						page,
 						NFS_SERVER(inode)->dtsize,
 						desc->plus);
@@ -391,7 +467,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 	desc->ptr = kmap(page);		/* matching kunmap in nfs_do_filldir */
 	if (desc->error >= 0) {
 		if ((status = dir_decode(desc)) == 0)
-			desc->entry->prev_cookie = desc->target;
+			desc->entry->prev_cookie = *desc->dir_cookie;
 	} else
 		status = -EIO;
 	if (status < 0)
@@ -412,8 +488,9 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 	goto out;
 }
 
-/* The file offset position is now represented as a true offset into the
- * page cache as is the case in most of the other filesystems.
+/* The file offset position represents the dirent entry number.  A
+   last cookie cache takes care of the common case of reading the
+   whole directory.
  */
 static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
@@ -435,15 +512,15 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	}
 
 	/*
-	 * filp->f_pos points to the file offset in the page cache.
-	 * but if the cache has meanwhile been zapped, we need to
-	 * read from the last dirent to revalidate f_pos
-	 * itself.
+	 * filp->f_pos points to the dirent entry number.
+	 * *desc->dir_cookie has the cookie for the next entry. We have
+	 * to either find the entry with the appropriate number or
+	 * revalidate the cookie.
 	 */
 	memset(desc, 0, sizeof(*desc));
 
 	desc->file = filp;
-	desc->target = filp->f_pos;
+	desc->dir_cookie = &((struct nfs_open_context *)filp->private_data)->dir_cookie;
 	desc->decode = NFS_PROTO(inode)->decode_dirent;
 	desc->plus = NFS_USE_READDIRPLUS(inode);
 
@@ -455,9 +532,10 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 
 	while(!desc->entry->eof) {
 		res = readdir_search_pagecache(desc);
+
 		if (res == -EBADCOOKIE) {
 			/* This means either end of directory */
-			if (desc->entry->cookie != desc->target) {
+			if (*desc->dir_cookie && desc->entry->cookie != *desc->dir_cookie) {
 				/* Or that the server has 'lost' a cookie */
 				res = uncached_readdir(desc, dirent, filldir);
 				if (res >= 0)
@@ -490,6 +568,28 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	return 0;
 }
 
+loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
+{
+	down(&filp->f_dentry->d_inode->i_sem);
+	switch (origin) {
+		case 1:
+			offset += filp->f_pos;
+		case 0:
+			if (offset >= 0)
+				break;
+		default:
+			offset = -EINVAL;
+			goto out;
+	}
+	if (offset != filp->f_pos) {
+		filp->f_pos = offset;
+		((struct nfs_open_context *)filp->private_data)->dir_cookie = 0;
+	}
+out:
+	up(&filp->f_dentry->d_inode->i_sem);
+	return offset;
+}
+
 /*
  * All directory operations under NFS are synchronous, so fsync()
  * is a dummy operation.
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 68df803f27ca..6537f2c4ae44 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -517,7 +517,7 @@ retry:
 	result = tot_bytes;
 
 out:
-	nfs_end_data_update_defer(inode);
+	nfs_end_data_update(inode);
 	nfs_writedata_free(wdata);
 	return result;
 
@@ -751,11 +751,6 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count,
 	retval = -EFAULT;
 	if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len))
 		goto out;
-        if (file->f_error) {
-                retval = file->f_error;
-                file->f_error = 0;
-                goto out;
-        }
 	retval = -EFBIG;
 	if (limit != RLIM_INFINITY) {
 		if (pos >= limit) {
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 55c907592490..5621ba9885f4 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -71,6 +71,18 @@ struct inode_operations nfs_file_inode_operations = {
 	.setattr	= nfs_setattr,
 };
 
+#ifdef CONFIG_NFS_V3
+struct inode_operations nfs3_file_inode_operations = {
+	.permission	= nfs_permission,
+	.getattr	= nfs_getattr,
+	.setattr	= nfs_setattr,
+	.listxattr	= nfs3_listxattr,
+	.getxattr	= nfs3_getxattr,
+	.setxattr	= nfs3_setxattr,
+	.removexattr	= nfs3_removexattr,
+};
+#endif  /* CONFIG_NFS_v3 */
+
 /* Hack for future NFS swap support */
 #ifndef IS_SWAPFILE
 # define IS_SWAPFILE(inode)	(0)
@@ -116,6 +128,21 @@ nfs_file_release(struct inode *inode, struct file *filp)
 }
 
 /**
+ * nfs_revalidate_file - Revalidate the page cache & related metadata
+ * @inode - pointer to inode struct
+ * @file - pointer to file
+ */
+static int nfs_revalidate_file(struct inode *inode, struct file *filp)
+{
+	int retval = 0;
+
+	if ((NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) || nfs_attribute_timeout(inode))
+		retval = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	nfs_revalidate_mapping(inode, filp->f_mapping);
+	return 0;
+}
+
+/**
  * nfs_revalidate_size - Revalidate the file size
  * @inode - pointer to inode struct
  * @file - pointer to struct file
@@ -137,7 +164,8 @@ static int nfs_revalidate_file_size(struct inode *inode, struct file *filp)
 		goto force_reval;
 	if (nfsi->npages != 0)
 		return 0;
-	return nfs_revalidate_inode(server, inode);
+	if (!(NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode))
+		return 0;
 force_reval:
 	return __nfs_revalidate_inode(server, inode);
 }
@@ -198,7 +226,7 @@ nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos)
 		dentry->d_parent->d_name.name, dentry->d_name.name,
 		(unsigned long) count, (unsigned long) pos);
 
-	result = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	result = nfs_revalidate_file(inode, iocb->ki_filp);
 	if (!result)
 		result = generic_file_aio_read(iocb, buf, count, pos);
 	return result;
@@ -216,7 +244,7 @@ nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count,
 		dentry->d_parent->d_name.name, dentry->d_name.name,
 		(unsigned long) count, (unsigned long long) *ppos);
 
-	res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	res = nfs_revalidate_file(inode, filp);
 	if (!res)
 		res = generic_file_sendfile(filp, ppos, count, actor, target);
 	return res;
@@ -232,7 +260,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
 	dfprintk(VFS, "nfs: mmap(%s/%s)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name);
 
-	status = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	status = nfs_revalidate_file(inode, file);
 	if (!status)
 		status = generic_file_mmap(file, vma);
 	return status;
@@ -321,9 +349,15 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t
 	result = -EBUSY;
 	if (IS_SWAPFILE(inode))
 		goto out_swapfile;
-	result = nfs_revalidate_inode(NFS_SERVER(inode), inode);
-	if (result)
-		goto out;
+	/*
+	 * O_APPEND implies that we must revalidate the file length.
+	 */
+	if (iocb->ki_filp->f_flags & O_APPEND) {
+		result = nfs_revalidate_file_size(inode, iocb->ki_filp);
+		if (result)
+			goto out;
+	}
+	nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
 
 	result = count;
 	if (!count)
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 87f4f9aeac86..ffb8df91dc34 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -50,6 +50,7 @@
 #include <linux/nfs_fs.h>
 
 #include <linux/nfs_idmap.h>
+#include "nfs4_fs.h"
 
 #define IDMAP_HASH_SZ          128
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index f2317f3e29f9..4845911f1c63 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -39,6 +39,7 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 
+#include "nfs4_fs.h"
 #include "delegation.h"
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
@@ -63,6 +64,7 @@ static void nfs_clear_inode(struct inode *);
 static void nfs_umount_begin(struct super_block *);
 static int  nfs_statfs(struct super_block *, struct kstatfs *);
 static int  nfs_show_options(struct seq_file *, struct vfsmount *);
+static void nfs_zap_acl_cache(struct inode *);
 
 static struct rpc_program	nfs_program;
 
@@ -106,6 +108,21 @@ static struct rpc_program	nfs_program = {
 	.pipe_dir_name		= "/nfs",
 };
 
+#ifdef CONFIG_NFS_V3_ACL
+static struct rpc_stat		nfsacl_rpcstat = { &nfsacl_program };
+static struct rpc_version *	nfsacl_version[] = {
+	[3]			= &nfsacl_version3,
+};
+
+struct rpc_program		nfsacl_program = {
+	.name =			"nfsacl",
+	.number =		NFS_ACL_PROGRAM,
+	.nrvers =		sizeof(nfsacl_version) / sizeof(nfsacl_version[0]),
+	.version =		nfsacl_version,
+	.stats =		&nfsacl_rpcstat,
+};
+#endif  /* CONFIG_NFS_V3_ACL */
+
 static inline unsigned long
 nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
 {
@@ -118,7 +135,7 @@ nfs_write_inode(struct inode *inode, int sync)
 	int flags = sync ? FLUSH_WAIT : 0;
 	int ret;
 
-	ret = nfs_commit_inode(inode, 0, 0, flags);
+	ret = nfs_commit_inode(inode, flags);
 	if (ret < 0)
 		return ret;
 	return 0;
@@ -140,10 +157,6 @@ nfs_delete_inode(struct inode * inode)
 	clear_inode(inode);
 }
 
-/*
- * For the moment, the only task for the NFS clear_inode method is to
- * release the mmap credential
- */
 static void
 nfs_clear_inode(struct inode *inode)
 {
@@ -152,6 +165,7 @@ nfs_clear_inode(struct inode *inode)
 
 	nfs_wb_all(inode);
 	BUG_ON (!list_empty(&nfsi->open_files));
+	nfs_zap_acl_cache(inode);
 	cred = nfsi->cache_access.cred;
 	if (cred)
 		put_rpccred(cred);
@@ -161,11 +175,13 @@ nfs_clear_inode(struct inode *inode)
 void
 nfs_umount_begin(struct super_block *sb)
 {
-	struct nfs_server *server = NFS_SB(sb);
-	struct rpc_clnt	*rpc;
+	struct rpc_clnt	*rpc = NFS_SB(sb)->client;
 
 	/* -EIO all pending I/O */
-	if ((rpc = server->client) != NULL)
+	if (!IS_ERR(rpc))
+		rpc_killall_tasks(rpc);
+	rpc = NFS_SB(sb)->client_acl;
+	if (!IS_ERR(rpc))
 		rpc_killall_tasks(rpc);
 }
 
@@ -366,13 +382,15 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 	xprt = xprt_create_proto(tcp ? IPPROTO_TCP : IPPROTO_UDP,
 				 &server->addr, &timeparms);
 	if (IS_ERR(xprt)) {
-		printk(KERN_WARNING "NFS: cannot create RPC transport.\n");
+		dprintk("%s: cannot create RPC transport. Error = %ld\n",
+				__FUNCTION__, PTR_ERR(xprt));
 		return (struct rpc_clnt *)xprt;
 	}
 	clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
 				 server->rpc_ops->version, data->pseudoflavor);
 	if (IS_ERR(clnt)) {
-		printk(KERN_WARNING "NFS: cannot create RPC client.\n");
+		dprintk("%s: cannot create RPC client. Error = %ld\n",
+				__FUNCTION__, PTR_ERR(xprt));
 		goto out_fail;
 	}
 
@@ -383,7 +401,6 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 	return clnt;
 
 out_fail:
-	xprt_destroy(xprt);
 	return clnt;
 }
 
@@ -427,21 +444,16 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
 
 	/* Check NFS protocol revision and initialize RPC op vector
 	 * and file handle pool. */
-	if (server->flags & NFS_MOUNT_VER3) {
 #ifdef CONFIG_NFS_V3
+	if (server->flags & NFS_MOUNT_VER3) {
 		server->rpc_ops = &nfs_v3_clientops;
 		server->caps |= NFS_CAP_READDIRPLUS;
-		if (data->version < 4) {
-			printk(KERN_NOTICE "NFS: NFSv3 not supported by mount program.\n");
-			return -EIO;
-		}
-#else
-		printk(KERN_NOTICE "NFS: NFSv3 not supported.\n");
-		return -EIO;
-#endif
 	} else {
 		server->rpc_ops = &nfs_v2_clientops;
 	}
+#else
+	server->rpc_ops = &nfs_v2_clientops;
+#endif
 
 	/* Fill in pseudoflavor for mount version < 5 */
 	if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
@@ -455,17 +467,34 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
 		return PTR_ERR(server->client);
 	/* RFC 2623, sec 2.3.2 */
 	if (authflavor != RPC_AUTH_UNIX) {
+		struct rpc_auth *auth;
+
 		server->client_sys = rpc_clone_client(server->client);
 		if (IS_ERR(server->client_sys))
 			return PTR_ERR(server->client_sys);
-		if (!rpcauth_create(RPC_AUTH_UNIX, server->client_sys))
-			return -ENOMEM;
+		auth = rpcauth_create(RPC_AUTH_UNIX, server->client_sys);
+		if (IS_ERR(auth))
+			return PTR_ERR(auth);
 	} else {
 		atomic_inc(&server->client->cl_count);
 		server->client_sys = server->client;
 	}
-
 	if (server->flags & NFS_MOUNT_VER3) {
+#ifdef CONFIG_NFS_V3_ACL
+		if (!(server->flags & NFS_MOUNT_NOACL)) {
+			server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3);
+			/* No errors! Assume that Sun nfsacls are supported */
+			if (!IS_ERR(server->client_acl))
+				server->caps |= NFS_CAP_ACLS;
+		}
+#else
+		server->flags &= ~NFS_MOUNT_NOACL;
+#endif /* CONFIG_NFS_V3_ACL */
+		/*
+		 * The VFS shouldn't apply the umask to mode bits. We will
+		 * do so ourselves when necessary.
+		 */
+		sb->s_flags |= MS_POSIXACL;
 		if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
 			server->namelen = NFS3_MAXNAMLEN;
 		sb->s_time_gran = 1;
@@ -549,6 +578,7 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
 		{ NFS_MOUNT_NOCTO, ",nocto", "" },
 		{ NFS_MOUNT_NOAC, ",noac", "" },
 		{ NFS_MOUNT_NONLM, ",nolock", ",lock" },
+		{ NFS_MOUNT_NOACL, ",noacl", "" },
 		{ 0, NULL, NULL }
 	};
 	struct proc_nfs_info *nfs_infop;
@@ -590,9 +620,19 @@ nfs_zap_caches(struct inode *inode)
 
 	memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
 	if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
-		nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS;
+		nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
 	else
-		nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS;
+		nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
+}
+
+static void nfs_zap_acl_cache(struct inode *inode)
+{
+	void (*clear_acl_cache)(struct inode *);
+
+	clear_acl_cache = NFS_PROTO(inode)->clear_acl_cache;
+	if (clear_acl_cache != NULL)
+		clear_acl_cache(inode);
+	NFS_I(inode)->flags &= ~NFS_INO_INVALID_ACL;
 }
 
 /*
@@ -689,7 +729,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 		/* Why so? Because we want revalidate for devices/FIFOs, and
 		 * that's precisely what we have in nfs_file_inode_operations.
 		 */
-		inode->i_op = &nfs_file_inode_operations;
+		inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops;
 		if (S_ISREG(inode->i_mode)) {
 			inode->i_fop = &nfs_file_operations;
 			inode->i_data.a_ops = &nfs_file_aops;
@@ -792,7 +832,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
 		}
 	}
 	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
-		NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS;
+		NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
 	nfs_end_data_update(inode);
 	unlock_kernel();
 	return error;
@@ -851,7 +891,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rp
 		ctx->state = NULL;
 		ctx->lockowner = current->files;
 		ctx->error = 0;
-		init_waitqueue_head(&ctx->waitq);
+		ctx->dir_cookie = 0;
 	}
 	return ctx;
 }
@@ -1015,6 +1055,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 		goto out;
 	}
 	flags = nfsi->flags;
+	nfsi->flags &= ~NFS_INO_REVAL_PAGECACHE;
 	/*
 	 * We may need to keep the attributes marked as invalid if
 	 * we raced with nfs_end_attr_update().
@@ -1022,21 +1063,9 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 	if (verifier == nfsi->cache_change_attribute)
 		nfsi->flags &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME);
 	/* Do the page cache invalidation */
-	if (flags & NFS_INO_INVALID_DATA) {
-		if (S_ISREG(inode->i_mode)) {
-			if (filemap_fdatawrite(inode->i_mapping) == 0)
-				filemap_fdatawait(inode->i_mapping);
-			nfs_wb_all(inode);
-		}
-		nfsi->flags &= ~NFS_INO_INVALID_DATA;
-		invalidate_inode_pages2(inode->i_mapping);
-		memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
-		dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
-				inode->i_sb->s_id,
-				(long long)NFS_FILEID(inode));
-		/* This ensures we revalidate dentries */
-		nfsi->cache_change_attribute++;
-	}
+	nfs_revalidate_mapping(inode, inode->i_mapping);
+	if (flags & NFS_INO_INVALID_ACL)
+		nfs_zap_acl_cache(inode);
 	dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n",
 		inode->i_sb->s_id,
 		(long long)NFS_FILEID(inode));
@@ -1074,6 +1103,34 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 }
 
 /**
+ * nfs_revalidate_mapping - Revalidate the pagecache
+ * @inode - pointer to host inode
+ * @mapping - pointer to mapping
+ */
+void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	if (nfsi->flags & NFS_INO_INVALID_DATA) {
+		if (S_ISREG(inode->i_mode)) {
+			if (filemap_fdatawrite(mapping) == 0)
+				filemap_fdatawait(mapping);
+			nfs_wb_all(inode);
+		}
+		invalidate_inode_pages2(mapping);
+		nfsi->flags &= ~NFS_INO_INVALID_DATA;
+		if (S_ISDIR(inode->i_mode)) {
+			memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
+			/* This ensures we revalidate child dentries */
+			nfsi->cache_change_attribute++;
+		}
+		dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
+				inode->i_sb->s_id,
+				(long long)NFS_FILEID(inode));
+	}
+}
+
+/**
  * nfs_begin_data_update
  * @inode - pointer to inode
  * Declare that a set of operations will update file data on the server
@@ -1106,27 +1163,6 @@ void nfs_end_data_update(struct inode *inode)
 }
 
 /**
- * nfs_end_data_update_defer
- * @inode - pointer to inode
- * Declare end of the operations that will update file data
- * This will defer marking the inode as needing revalidation
- * unless there are no other pending updates.
- */
-void nfs_end_data_update_defer(struct inode *inode)
-{
-	struct nfs_inode *nfsi = NFS_I(inode);
-
-	if (atomic_dec_and_test(&nfsi->data_updates)) {
-		/* Mark the attribute cache for revalidation */
-		nfsi->flags |= NFS_INO_INVALID_ATTR;
-		/* Directories and symlinks: invalidate page cache too */
-		if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
-			nfsi->flags |= NFS_INO_INVALID_DATA;
-		nfsi->cache_change_attribute ++;
-	}
-}
-
-/**
  * nfs_refresh_inode - verify consistency of the inode attribute cache
  * @inode - pointer to inode
  * @fattr - updated attributes
@@ -1152,8 +1188,11 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
 		if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0
 				&& nfsi->change_attr == fattr->pre_change_attr)
 			nfsi->change_attr = fattr->change_attr;
-		if (!data_unstable && nfsi->change_attr != fattr->change_attr)
+		if (nfsi->change_attr != fattr->change_attr) {
 			nfsi->flags |= NFS_INO_INVALID_ATTR;
+			if (!data_unstable)
+				nfsi->flags |= NFS_INO_REVAL_PAGECACHE;
+		}
 	}
 
 	if ((fattr->valid & NFS_ATTR_FATTR) == 0)
@@ -1176,18 +1215,22 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
 	}
 
 	/* Verify a few of the more important attributes */
-	if (!data_unstable) {
-		if (!timespec_equal(&inode->i_mtime, &fattr->mtime)
-				|| cur_size != new_isize)
-			nfsi->flags |= NFS_INO_INVALID_ATTR;
-	} else if (S_ISREG(inode->i_mode) && new_isize > cur_size)
-			nfsi->flags |= NFS_INO_INVALID_ATTR;
+	if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
+		nfsi->flags |= NFS_INO_INVALID_ATTR;
+		if (!data_unstable)
+			nfsi->flags |= NFS_INO_REVAL_PAGECACHE;
+	}
+	if (cur_size != new_isize) {
+		nfsi->flags |= NFS_INO_INVALID_ATTR;
+		if (nfsi->npages == 0)
+			nfsi->flags |= NFS_INO_REVAL_PAGECACHE;
+	}
 
 	/* Have any file permissions changed? */
 	if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
 			|| inode->i_uid != fattr->uid
 			|| inode->i_gid != fattr->gid)
-		nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;
+		nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
 
 	/* Has the link count changed? */
 	if (inode->i_nlink != fattr->nlink)
@@ -1215,10 +1258,8 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
 static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsigned long verifier)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
-	__u64		new_size;
-	loff_t		new_isize;
+	loff_t cur_isize, new_isize;
 	unsigned int	invalid = 0;
-	loff_t		cur_isize;
 	int data_unstable;
 
 	dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
@@ -1251,61 +1292,56 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
 	/* Are we racing with known updates of the metadata on the server? */
 	data_unstable = ! nfs_verify_change_attribute(inode, verifier);
 
-	/* Check if the file size agrees */
-	new_size = fattr->size;
+	/* Check if our cached file size is stale */
  	new_isize = nfs_size_to_loff_t(fattr->size);
 	cur_isize = i_size_read(inode);
-	if (cur_isize != new_size) {
-#ifdef NFS_DEBUG_VERBOSE
-		printk(KERN_DEBUG "NFS: isize change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino);
-#endif
-		/*
-		 * If we have pending writebacks, things can get
-		 * messy.
-		 */
-		if (S_ISREG(inode->i_mode) && data_unstable) {
-			if (new_isize > cur_isize) {
+	if (new_isize != cur_isize) {
+		/* Do we perhaps have any outstanding writes? */
+		if (nfsi->npages == 0) {
+			/* No, but did we race with nfs_end_data_update()? */
+			if (verifier  ==  nfsi->cache_change_attribute) {
 				inode->i_size = new_isize;
-				invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+				invalid |= NFS_INO_INVALID_DATA;
 			}
-		} else {
+			invalid |= NFS_INO_INVALID_ATTR;
+		} else if (new_isize > cur_isize) {
 			inode->i_size = new_isize;
 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
 		}
+		dprintk("NFS: isize change on server for file %s/%ld\n",
+				inode->i_sb->s_id, inode->i_ino);
 	}
 
-	/*
-	 * Note: we don't check inode->i_mtime since pipes etc.
-	 *       can change this value in VFS without requiring a
-	 *	 cache revalidation.
-	 */
+	/* Check if the mtime agrees */
 	if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
 		memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
-#ifdef NFS_DEBUG_VERBOSE
-		printk(KERN_DEBUG "NFS: mtime change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino);
-#endif
+		dprintk("NFS: mtime change on server for file %s/%ld\n",
+				inode->i_sb->s_id, inode->i_ino);
 		if (!data_unstable)
 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
 	}
 
 	if ((fattr->valid & NFS_ATTR_FATTR_V4)
 	    && nfsi->change_attr != fattr->change_attr) {
-#ifdef NFS_DEBUG_VERBOSE
-		printk(KERN_DEBUG "NFS: change_attr change on %s/%ld\n",
+		dprintk("NFS: change_attr change on server for file %s/%ld\n",
 		       inode->i_sb->s_id, inode->i_ino);
-#endif
 		nfsi->change_attr = fattr->change_attr;
 		if (!data_unstable)
-			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS;
+			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
 	}
 
-	memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
+	/* If ctime has changed we should definitely clear access+acl caches */
+	if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
+		if (!data_unstable)
+			invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+		memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
+	}
 	memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
 
 	if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) ||
 	    inode->i_uid != fattr->uid ||
 	    inode->i_gid != fattr->gid)
-		invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS;
+		invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
 
 	inode->i_mode = fattr->mode;
 	inode->i_nlink = fattr->nlink;
@@ -1385,74 +1421,95 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data)
 {
 	int error;
-	struct nfs_server *server;
+	struct nfs_server *server = NULL;
 	struct super_block *s;
 	struct nfs_fh *root;
 	struct nfs_mount_data *data = raw_data;
 
-	if (!data) {
-		printk("nfs_read_super: missing data argument\n");
-		return ERR_PTR(-EINVAL);
+	s = ERR_PTR(-EINVAL);
+	if (data == NULL) {
+		dprintk("%s: missing data argument\n", __FUNCTION__);
+		goto out_err;
+	}
+	if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) {
+		dprintk("%s: bad mount version\n", __FUNCTION__);
+		goto out_err;
 	}
+	switch (data->version) {
+		case 1:
+			data->namlen = 0;
+		case 2:
+			data->bsize  = 0;
+		case 3:
+			if (data->flags & NFS_MOUNT_VER3) {
+				dprintk("%s: mount structure version %d does not support NFSv3\n",
+						__FUNCTION__,
+						data->version);
+				goto out_err;
+			}
+			data->root.size = NFS2_FHSIZE;
+			memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
+		case 4:
+			if (data->flags & NFS_MOUNT_SECFLAVOUR) {
+				dprintk("%s: mount structure version %d does not support strong security\n",
+						__FUNCTION__,
+						data->version);
+				goto out_err;
+			}
+		case 5:
+			memset(data->context, 0, sizeof(data->context));
+	}
+#ifndef CONFIG_NFS_V3
+	/* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
+	s = ERR_PTR(-EPROTONOSUPPORT);
+	if (data->flags & NFS_MOUNT_VER3) {
+		dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
+		goto out_err;
+	}
+#endif /* CONFIG_NFS_V3 */
 
+	s = ERR_PTR(-ENOMEM);
 	server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
 	if (!server)
-		return ERR_PTR(-ENOMEM);
+		goto out_err;
 	memset(server, 0, sizeof(struct nfs_server));
 	/* Zero out the NFS state stuff */
 	init_nfsv4_state(server);
-
-	if (data->version != NFS_MOUNT_VERSION) {
-		printk("nfs warning: mount version %s than kernel\n",
-			data->version < NFS_MOUNT_VERSION ? "older" : "newer");
-		if (data->version < 2)
-			data->namlen = 0;
-		if (data->version < 3)
-			data->bsize  = 0;
-		if (data->version < 4) {
-			data->flags &= ~NFS_MOUNT_VER3;
-			data->root.size = NFS2_FHSIZE;
-			memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
-		}
-		if (data->version < 5)
-			data->flags &= ~NFS_MOUNT_SECFLAVOUR;
-	}
+	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
 
 	root = &server->fh;
 	if (data->flags & NFS_MOUNT_VER3)
 		root->size = data->root.size;
 	else
 		root->size = NFS2_FHSIZE;
+	s = ERR_PTR(-EINVAL);
 	if (root->size > sizeof(root->data)) {
-		printk("nfs_get_sb: invalid root filehandle\n");
-		kfree(server);
-		return ERR_PTR(-EINVAL);
+		dprintk("%s: invalid root filehandle\n", __FUNCTION__);
+		goto out_err;
 	}
 	memcpy(root->data, data->root.data, root->size);
 
 	/* We now require that the mount process passes the remote address */
 	memcpy(&server->addr, &data->addr, sizeof(server->addr));
 	if (server->addr.sin_addr.s_addr == INADDR_ANY) {
-		printk("NFS: mount program didn't pass remote address!\n");
-		kfree(server);
-		return ERR_PTR(-EINVAL);
+		dprintk("%s: mount program didn't pass remote address!\n",
+				__FUNCTION__);
+		goto out_err;
 	}
 
-	s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
-
-	if (IS_ERR(s) || s->s_root) {
-		kfree(server);
-		return s;
+	/* Fire up rpciod if not yet running */
+	s = ERR_PTR(rpciod_up());
+	if (IS_ERR(s)) {
+		dprintk("%s: couldn't start rpciod! Error = %ld\n",
+				__FUNCTION__, PTR_ERR(s));
+		goto out_err;
 	}
 
-	s->s_flags = flags;
+	s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
+	if (IS_ERR(s) || s->s_root)
+		goto out_rpciod_down;
 
-	/* Fire up rpciod if not yet running */
-	if (rpciod_up() != 0) {
-		printk(KERN_WARNING "NFS: couldn't start rpciod!\n");
-		kfree(server);
-		return ERR_PTR(-EIO);
-	}
+	s->s_flags = flags;
 
 	error = nfs_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
 	if (error) {
@@ -1462,6 +1519,11 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
 	}
 	s->s_flags |= MS_ACTIVE;
 	return s;
+out_rpciod_down:
+	rpciod_down();
+out_err:
+	kfree(server);
+	return s;
 }
 
 static void nfs_kill_super(struct super_block *s)
@@ -1470,10 +1532,12 @@ static void nfs_kill_super(struct super_block *s)
 
 	kill_anon_super(s);
 
-	if (server->client != NULL && !IS_ERR(server->client))
+	if (!IS_ERR(server->client))
 		rpc_shutdown_client(server->client);
-	if (server->client_sys != NULL && !IS_ERR(server->client_sys))
+	if (!IS_ERR(server->client_sys))
 		rpc_shutdown_client(server->client_sys);
+	if (!IS_ERR(server->client_acl))
+		rpc_shutdown_client(server->client_acl);
 
 	if (!(server->flags & NFS_MOUNT_NONLM))
 		lockd_down();	/* release rpc.lockd */
@@ -1594,15 +1658,19 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 
 	clp = nfs4_get_client(&server->addr.sin_addr);
 	if (!clp) {
-		printk(KERN_WARNING "NFS: failed to create NFS4 client.\n");
+		dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
 		return -EIO;
 	}
 
 	/* Now create transport and client */
 	authflavour = RPC_AUTH_UNIX;
 	if (data->auth_flavourlen != 0) {
-		if (data->auth_flavourlen > 1)
-			printk(KERN_INFO "NFS: cannot yet deal with multiple auth flavours.\n");
+		if (data->auth_flavourlen != 1) {
+			dprintk("%s: Invalid number of RPC auth flavours %d.\n",
+					__FUNCTION__, data->auth_flavourlen);
+			err = -EINVAL;
+			goto out_fail;
+		}
 		if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) {
 			err = -EFAULT;
 			goto out_fail;
@@ -1610,21 +1678,22 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 	}
 
 	down_write(&clp->cl_sem);
-	if (clp->cl_rpcclient == NULL) {
+	if (IS_ERR(clp->cl_rpcclient)) {
 		xprt = xprt_create_proto(proto, &server->addr, &timeparms);
 		if (IS_ERR(xprt)) {
 			up_write(&clp->cl_sem);
-			printk(KERN_WARNING "NFS: cannot create RPC transport.\n");
 			err = PTR_ERR(xprt);
+			dprintk("%s: cannot create RPC transport. Error = %d\n",
+					__FUNCTION__, err);
 			goto out_fail;
 		}
 		clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
 				server->rpc_ops->version, authflavour);
 		if (IS_ERR(clnt)) {
 			up_write(&clp->cl_sem);
-			printk(KERN_WARNING "NFS: cannot create RPC client.\n");
-			xprt_destroy(xprt);
 			err = PTR_ERR(clnt);
+			dprintk("%s: cannot create RPC client. Error = %d\n",
+					__FUNCTION__, err);
 			goto out_fail;
 		}
 		clnt->cl_intr     = 1;
@@ -1656,21 +1725,26 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 	clp = NULL;
 
 	if (IS_ERR(clnt)) {
-		printk(KERN_WARNING "NFS: cannot create RPC client.\n");
-		return PTR_ERR(clnt);
+		err = PTR_ERR(clnt);
+		dprintk("%s: cannot create RPC client. Error = %d\n",
+				__FUNCTION__, err);
+		return err;
 	}
 
 	server->client    = clnt;
 
 	if (server->nfs4_state->cl_idmap == NULL) {
-		printk(KERN_WARNING "NFS: failed to create idmapper.\n");
+		dprintk("%s: failed to create idmapper.\n", __FUNCTION__);
 		return -ENOMEM;
 	}
 
 	if (clnt->cl_auth->au_flavor != authflavour) {
-		if (rpcauth_create(authflavour, clnt) == NULL) {
-			printk(KERN_WARNING "NFS: couldn't create credcache!\n");
-			return -ENOMEM;
+		struct rpc_auth *auth;
+
+		auth = rpcauth_create(authflavour, clnt);
+		if (IS_ERR(auth)) {
+			dprintk("%s: couldn't create credcache!\n", __FUNCTION__);
+			return PTR_ERR(auth);
 		}
 	}
 
@@ -1730,8 +1804,12 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
 	struct nfs4_mount_data *data = raw_data;
 	void *p;
 
-	if (!data) {
-		printk("nfs_read_super: missing data argument\n");
+	if (data == NULL) {
+		dprintk("%s: missing data argument\n", __FUNCTION__);
+		return ERR_PTR(-EINVAL);
+	}
+	if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) {
+		dprintk("%s: bad mount version\n", __FUNCTION__);
 		return ERR_PTR(-EINVAL);
 	}
 
@@ -1741,11 +1819,7 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
 	memset(server, 0, sizeof(struct nfs_server));
 	/* Zero out the NFS state stuff */
 	init_nfsv4_state(server);
-
-	if (data->version != NFS4_MOUNT_VERSION) {
-		printk("nfs warning: mount version %s than kernel\n",
-			data->version < NFS4_MOUNT_VERSION ? "older" : "newer");
-	}
+	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
 
 	p = nfs_copy_user_string(NULL, &data->hostname, 256);
 	if (IS_ERR(p))
@@ -1773,11 +1847,20 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
 	}
 	if (server->addr.sin_family != AF_INET ||
 	    server->addr.sin_addr.s_addr == INADDR_ANY) {
-		printk("NFS: mount program didn't pass remote IP address!\n");
+		dprintk("%s: mount program didn't pass remote IP address!\n",
+				__FUNCTION__);
 		s = ERR_PTR(-EINVAL);
 		goto out_free;
 	}
 
+	/* Fire up rpciod if not yet running */
+	s = ERR_PTR(rpciod_up());
+	if (IS_ERR(s)) {
+		dprintk("%s: couldn't start rpciod! Error = %ld\n",
+				__FUNCTION__, PTR_ERR(s));
+		goto out_free;
+	}
+
 	s = sget(fs_type, nfs4_compare_super, nfs_set_super, server);
 
 	if (IS_ERR(s) || s->s_root)
@@ -1785,13 +1868,6 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
 
 	s->s_flags = flags;
 
-	/* Fire up rpciod if not yet running */
-	if (rpciod_up() != 0) {
-		printk(KERN_WARNING "NFS: couldn't start rpciod!\n");
-		s = ERR_PTR(-EIO);
-		goto out_free;
-	}
-
 	error = nfs4_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
 	if (error) {
 		up_write(&s->s_umount);
@@ -1875,6 +1951,13 @@ static struct inode *nfs_alloc_inode(struct super_block *sb)
 	if (!nfsi)
 		return NULL;
 	nfsi->flags = 0;
+#ifdef CONFIG_NFS_V3_ACL
+	nfsi->acl_access = ERR_PTR(-EAGAIN);
+	nfsi->acl_default = ERR_PTR(-EAGAIN);
+#endif
+#ifdef CONFIG_NFS_V4
+	nfsi->nfs4_acl = NULL;
+#endif /* CONFIG_NFS_V4 */
 	return &nfsi->vfs_inode;
 }
 
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 9d3ddad96d9e..0e82617f2de0 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -80,9 +80,7 @@ mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version,
 	clnt = rpc_create_client(xprt, hostname,
 				&mnt_program, version,
 				RPC_AUTH_UNIX);
-	if (IS_ERR(clnt)) {
-		xprt_destroy(xprt);
-	} else {
+	if (!IS_ERR(clnt)) {
 		clnt->cl_softrtry = 1;
 		clnt->cl_chatty   = 1;
 		clnt->cl_oneshot  = 1;
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
new file mode 100644
index 000000000000..1b7a3ef2f813
--- /dev/null
+++ b/fs/nfs/nfs3acl.c
@@ -0,0 +1,403 @@
+#include <linux/fs.h>
+#include <linux/nfs.h>
+#include <linux/nfs3.h>
+#include <linux/nfs_fs.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/nfsacl.h>
+
+#define NFSDBG_FACILITY	NFSDBG_PROC
+
+ssize_t nfs3_listxattr(struct dentry *dentry, char *buffer, size_t size)
+{
+	struct inode *inode = dentry->d_inode;
+	struct posix_acl *acl;
+	int pos=0, len=0;
+
+#	define output(s) do {						\
+			if (pos + sizeof(s) <= size) {			\
+				memcpy(buffer + pos, s, sizeof(s));	\
+				pos += sizeof(s);			\
+			}						\
+			len += sizeof(s);				\
+		} while(0)
+
+	acl = nfs3_proc_getacl(inode, ACL_TYPE_ACCESS);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	if (acl) {
+		output("system.posix_acl_access");
+		posix_acl_release(acl);
+	}
+
+	if (S_ISDIR(inode->i_mode)) {
+		acl = nfs3_proc_getacl(inode, ACL_TYPE_DEFAULT);
+		if (IS_ERR(acl))
+			return PTR_ERR(acl);
+		if (acl) {
+			output("system.posix_acl_default");
+			posix_acl_release(acl);
+		}
+	}
+
+#	undef output
+
+	if (!buffer || len <= size)
+		return len;
+	return -ERANGE;
+}
+
+ssize_t nfs3_getxattr(struct dentry *dentry, const char *name,
+		void *buffer, size_t size)
+{
+	struct inode *inode = dentry->d_inode;
+	struct posix_acl *acl;
+	int type, error = 0;
+
+	if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0)
+		type = ACL_TYPE_ACCESS;
+	else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0)
+		type = ACL_TYPE_DEFAULT;
+	else
+		return -EOPNOTSUPP;
+
+	acl = nfs3_proc_getacl(inode, type);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	else if (acl) {
+		if (type == ACL_TYPE_ACCESS && acl->a_count == 0)
+			error = -ENODATA;
+		else
+			error = posix_acl_to_xattr(acl, buffer, size);
+		posix_acl_release(acl);
+	} else
+		error = -ENODATA;
+
+	return error;
+}
+
+int nfs3_setxattr(struct dentry *dentry, const char *name,
+	     const void *value, size_t size, int flags)
+{
+	struct inode *inode = dentry->d_inode;
+	struct posix_acl *acl;
+	int type, error;
+
+	if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0)
+		type = ACL_TYPE_ACCESS;
+	else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0)
+		type = ACL_TYPE_DEFAULT;
+	else
+		return -EOPNOTSUPP;
+
+	acl = posix_acl_from_xattr(value, size);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	error = nfs3_proc_setacl(inode, type, acl);
+	posix_acl_release(acl);
+
+	return error;
+}
+
+int nfs3_removexattr(struct dentry *dentry, const char *name)
+{
+	struct inode *inode = dentry->d_inode;
+	int type;
+
+	if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0)
+		type = ACL_TYPE_ACCESS;
+	else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0)
+		type = ACL_TYPE_DEFAULT;
+	else
+		return -EOPNOTSUPP;
+
+	return nfs3_proc_setacl(inode, type, NULL);
+}
+
+static void __nfs3_forget_cached_acls(struct nfs_inode *nfsi)
+{
+	if (!IS_ERR(nfsi->acl_access)) {
+		posix_acl_release(nfsi->acl_access);
+		nfsi->acl_access = ERR_PTR(-EAGAIN);
+	}
+	if (!IS_ERR(nfsi->acl_default)) {
+		posix_acl_release(nfsi->acl_default);
+		nfsi->acl_default = ERR_PTR(-EAGAIN);
+	}
+}
+
+void nfs3_forget_cached_acls(struct inode *inode)
+{
+	dprintk("NFS: nfs3_forget_cached_acls(%s/%ld)\n", inode->i_sb->s_id,
+		inode->i_ino);
+	spin_lock(&inode->i_lock);
+	__nfs3_forget_cached_acls(NFS_I(inode));
+	spin_unlock(&inode->i_lock);
+}
+
+static struct posix_acl *nfs3_get_cached_acl(struct inode *inode, int type)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct posix_acl *acl = ERR_PTR(-EINVAL);
+
+	spin_lock(&inode->i_lock);
+	switch(type) {
+		case ACL_TYPE_ACCESS:
+			acl = nfsi->acl_access;
+			break;
+
+		case ACL_TYPE_DEFAULT:
+			acl = nfsi->acl_default;
+			break;
+
+		default:
+			goto out;
+	}
+	if (IS_ERR(acl))
+		acl = ERR_PTR(-EAGAIN);
+	else
+		acl = posix_acl_dup(acl);
+out:
+	spin_unlock(&inode->i_lock);
+	dprintk("NFS: nfs3_get_cached_acl(%s/%ld, %d) = %p\n", inode->i_sb->s_id,
+		inode->i_ino, type, acl);
+	return acl;
+}
+
+static void nfs3_cache_acls(struct inode *inode, struct posix_acl *acl,
+		    struct posix_acl *dfacl)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	dprintk("nfs3_cache_acls(%s/%ld, %p, %p)\n", inode->i_sb->s_id,
+		inode->i_ino, acl, dfacl);
+	spin_lock(&inode->i_lock);
+	__nfs3_forget_cached_acls(NFS_I(inode));
+	nfsi->acl_access = posix_acl_dup(acl);
+	nfsi->acl_default = posix_acl_dup(dfacl);
+	spin_unlock(&inode->i_lock);
+}
+
+struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs_fattr fattr;
+	struct page *pages[NFSACL_MAXPAGES] = { };
+	struct nfs3_getaclargs args = {
+		.fh = NFS_FH(inode),
+		/* The xdr layer may allocate pages here. */
+		.pages = pages,
+	};
+	struct nfs3_getaclres res = {
+		.fattr =	&fattr,
+	};
+	struct posix_acl *acl;
+	int status, count;
+
+	if (!nfs_server_capable(inode, NFS_CAP_ACLS))
+		return ERR_PTR(-EOPNOTSUPP);
+
+	status = nfs_revalidate_inode(server, inode);
+	if (status < 0)
+		return ERR_PTR(status);
+	acl = nfs3_get_cached_acl(inode, type);
+	if (acl != ERR_PTR(-EAGAIN))
+		return acl;
+	acl = NULL;
+
+	/*
+	 * Only get the access acl when explicitly requested: We don't
+	 * need it for access decisions, and only some applications use
+	 * it. Applications which request the access acl first are not
+	 * penalized from this optimization.
+	 */
+	if (type == ACL_TYPE_ACCESS)
+		args.mask |= NFS_ACLCNT|NFS_ACL;
+	if (S_ISDIR(inode->i_mode))
+		args.mask |= NFS_DFACLCNT|NFS_DFACL;
+	if (args.mask == 0)
+		return NULL;
+
+	dprintk("NFS call getacl\n");
+	status = rpc_call(server->client_acl, ACLPROC3_GETACL,
+			  &args, &res, 0);
+	dprintk("NFS reply getacl: %d\n", status);
+
+	/* pages may have been allocated at the xdr layer. */
+	for (count = 0; count < NFSACL_MAXPAGES && args.pages[count]; count++)
+		__free_page(args.pages[count]);
+
+	switch (status) {
+		case 0:
+			status = nfs_refresh_inode(inode, &fattr);
+			break;
+		case -EPFNOSUPPORT:
+		case -EPROTONOSUPPORT:
+			dprintk("NFS_V3_ACL extension not supported; disabling\n");
+			server->caps &= ~NFS_CAP_ACLS;
+		case -ENOTSUPP:
+			status = -EOPNOTSUPP;
+		default:
+			goto getout;
+	}
+	if ((args.mask & res.mask) != args.mask) {
+		status = -EIO;
+		goto getout;
+	}
+
+	if (res.acl_access != NULL) {
+		if (posix_acl_equiv_mode(res.acl_access, NULL) == 0) {
+			posix_acl_release(res.acl_access);
+			res.acl_access = NULL;
+		}
+	}
+	nfs3_cache_acls(inode, res.acl_access, res.acl_default);
+
+	switch(type) {
+		case ACL_TYPE_ACCESS:
+			acl = res.acl_access;
+			res.acl_access = NULL;
+			break;
+
+		case ACL_TYPE_DEFAULT:
+			acl = res.acl_default;
+			res.acl_default = NULL;
+	}
+
+getout:
+	posix_acl_release(res.acl_access);
+	posix_acl_release(res.acl_default);
+
+	if (status != 0) {
+		posix_acl_release(acl);
+		acl = ERR_PTR(status);
+	}
+	return acl;
+}
+
+static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
+		  struct posix_acl *dfacl)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs_fattr fattr;
+	struct page *pages[NFSACL_MAXPAGES] = { };
+	struct nfs3_setaclargs args = {
+		.inode = inode,
+		.mask = NFS_ACL,
+		.acl_access = acl,
+		.pages = pages,
+	};
+	int status, count;
+
+	status = -EOPNOTSUPP;
+	if (!nfs_server_capable(inode, NFS_CAP_ACLS))
+		goto out;
+
+	/* We are doing this here, because XDR marshalling can only
+	   return -ENOMEM. */
+	status = -ENOSPC;
+	if (acl != NULL && acl->a_count > NFS_ACL_MAX_ENTRIES)
+		goto out;
+	if (dfacl != NULL && dfacl->a_count > NFS_ACL_MAX_ENTRIES)
+		goto out;
+	if (S_ISDIR(inode->i_mode)) {
+		args.mask |= NFS_DFACL;
+		args.acl_default = dfacl;
+	}
+
+	dprintk("NFS call setacl\n");
+	nfs_begin_data_update(inode);
+	status = rpc_call(server->client_acl, ACLPROC3_SETACL,
+			  &args, &fattr, 0);
+	NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS;
+	nfs_end_data_update(inode);
+	dprintk("NFS reply setacl: %d\n", status);
+
+	/* pages may have been allocated at the xdr layer. */
+	for (count = 0; count < NFSACL_MAXPAGES && args.pages[count]; count++)
+		__free_page(args.pages[count]);
+
+	switch (status) {
+		case 0:
+			status = nfs_refresh_inode(inode, &fattr);
+			break;
+		case -EPFNOSUPPORT:
+		case -EPROTONOSUPPORT:
+			dprintk("NFS_V3_ACL SETACL RPC not supported"
+					"(will not retry)\n");
+			server->caps &= ~NFS_CAP_ACLS;
+		case -ENOTSUPP:
+			status = -EOPNOTSUPP;
+	}
+out:
+	return status;
+}
+
+int nfs3_proc_setacl(struct inode *inode, int type, struct posix_acl *acl)
+{
+	struct posix_acl *alloc = NULL, *dfacl = NULL;
+	int status;
+
+	if (S_ISDIR(inode->i_mode)) {
+		switch(type) {
+			case ACL_TYPE_ACCESS:
+				alloc = dfacl = nfs3_proc_getacl(inode,
+						ACL_TYPE_DEFAULT);
+				if (IS_ERR(alloc))
+					goto fail;
+				break;
+
+			case ACL_TYPE_DEFAULT:
+				dfacl = acl;
+				alloc = acl = nfs3_proc_getacl(inode,
+						ACL_TYPE_ACCESS);
+				if (IS_ERR(alloc))
+					goto fail;
+				break;
+
+			default:
+				return -EINVAL;
+		}
+	} else if (type != ACL_TYPE_ACCESS)
+			return -EINVAL;
+
+	if (acl == NULL) {
+		alloc = acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
+		if (IS_ERR(alloc))
+			goto fail;
+	}
+	status = nfs3_proc_setacls(inode, acl, dfacl);
+	posix_acl_release(alloc);
+	return status;
+
+fail:
+	return PTR_ERR(alloc);
+}
+
+int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode,
+		mode_t mode)
+{
+	struct posix_acl *dfacl, *acl;
+	int error = 0;
+
+	dfacl = nfs3_proc_getacl(dir, ACL_TYPE_DEFAULT);
+	if (IS_ERR(dfacl)) {
+		error = PTR_ERR(dfacl);
+		return (error == -EOPNOTSUPP) ? 0 : error;
+	}
+	if (!dfacl)
+		return 0;
+	acl = posix_acl_clone(dfacl, GFP_KERNEL);
+	error = -ENOMEM;
+	if (!acl)
+		goto out_release_dfacl;
+	error = posix_acl_create_masq(acl, &mode);
+	if (error < 0)
+		goto out_release_acl;
+	error = nfs3_proc_setacls(inode, acl, S_ISDIR(inode->i_mode) ?
+						      dfacl : NULL);
+out_release_acl:
+	posix_acl_release(acl);
+out_release_dfacl:
+	posix_acl_release(dfacl);
+	return error;
+}
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 3878494dfc2c..7851569b31c6 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -17,6 +17,7 @@
 #include <linux/nfs_page.h>
 #include <linux/lockd/bind.h>
 #include <linux/smp_lock.h>
+#include <linux/nfs_mount.h>
 
 #define NFSDBG_FACILITY		NFSDBG_PROC
 
@@ -45,7 +46,7 @@ static inline int
 nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
 {
 	struct rpc_message msg = {
-		.rpc_proc	= &nfs3_procedures[proc],
+		.rpc_proc	= &clnt->cl_procinfo[proc],
 		.rpc_argp	= argp,
 		.rpc_resp	= resp,
 	};
@@ -313,7 +314,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		.fh		= &fhandle,
 		.fattr		= &fattr
 	};
-	int			status;
+	mode_t mode = sattr->ia_mode;
+	int status;
 
 	dprintk("NFS call  create %s\n", dentry->d_name.name);
 	arg.createmode = NFS3_CREATE_UNCHECKED;
@@ -323,6 +325,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		arg.verifier[1] = current->pid;
 	}
 
+	sattr->ia_mode &= ~current->fs->umask;
+
 again:
 	dir_attr.valid = 0;
 	fattr.valid = 0;
@@ -369,6 +373,9 @@ again:
 		nfs_refresh_inode(dentry->d_inode, &fattr);
 		dprintk("NFS reply setattr (post-create): %d\n", status);
 	}
+	if (status != 0)
+		goto out;
+	status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode);
 out:
 	dprintk("NFS reply create: %d\n", status);
 	return status;
@@ -538,15 +545,24 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 		.fh		= &fhandle,
 		.fattr		= &fattr
 	};
-	int			status;
+	int mode = sattr->ia_mode;
+	int status;
 
 	dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
 	dir_attr.valid = 0;
 	fattr.valid = 0;
+
+	sattr->ia_mode &= ~current->fs->umask;
+
 	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0);
 	nfs_refresh_inode(dir, &dir_attr);
-	if (status == 0)
-		status = nfs_instantiate(dentry, &fhandle, &fattr);
+	if (status != 0)
+		goto out;
+	status = nfs_instantiate(dentry, &fhandle, &fattr);
+	if (status != 0)
+		goto out;
+	status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode);
+out:
 	dprintk("NFS reply mkdir: %d\n", status);
 	return status;
 }
@@ -641,6 +657,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		.fh		= &fh,
 		.fattr		= &fattr
 	};
+	mode_t mode = sattr->ia_mode;
 	int status;
 
 	switch (sattr->ia_mode & S_IFMT) {
@@ -653,12 +670,20 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 
 	dprintk("NFS call  mknod %s %u:%u\n", dentry->d_name.name,
 			MAJOR(rdev), MINOR(rdev));
+
+	sattr->ia_mode &= ~current->fs->umask;
+
 	dir_attr.valid = 0;
 	fattr.valid = 0;
 	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0);
 	nfs_refresh_inode(dir, &dir_attr);
-	if (status == 0)
-		status = nfs_instantiate(dentry, &fh, &fattr);
+	if (status != 0)
+		goto out;
+	status = nfs_instantiate(dentry, &fh, &fattr);
+	if (status != 0)
+		goto out;
+	status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode);
+out:
 	dprintk("NFS reply mknod: %d\n", status);
 	return status;
 }
@@ -825,7 +850,8 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
 struct nfs_rpc_ops	nfs_v3_clientops = {
 	.version	= 3,			/* protocol version */
 	.dentry_ops	= &nfs_dentry_operations,
-	.dir_inode_ops	= &nfs_dir_inode_operations,
+	.dir_inode_ops	= &nfs3_dir_inode_operations,
+	.file_inode_ops	= &nfs3_file_inode_operations,
 	.getroot	= nfs3_proc_get_root,
 	.getattr	= nfs3_proc_getattr,
 	.setattr	= nfs3_proc_setattr,
@@ -856,4 +882,5 @@ struct nfs_rpc_ops	nfs_v3_clientops = {
 	.file_open	= nfs_open,
 	.file_release	= nfs_release,
 	.lock		= nfs3_proc_lock,
+	.clear_acl_cache = nfs3_forget_cached_acls,
 };
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index a3593d47e5ab..db4a904810a4 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -21,6 +21,7 @@
 #include <linux/nfs.h>
 #include <linux/nfs3.h>
 #include <linux/nfs_fs.h>
+#include <linux/nfsacl.h>
 
 #define NFSDBG_FACILITY		NFSDBG_XDR
 
@@ -79,6 +80,11 @@ extern int			nfs_stat_to_errno(int);
 #define NFS3_pathconfres_sz	(1+NFS3_post_op_attr_sz+6)
 #define NFS3_commitres_sz	(1+NFS3_wcc_data_sz+2)
 
+#define ACL3_getaclargs_sz	(NFS3_fh_sz+1)
+#define ACL3_setaclargs_sz	(NFS3_fh_sz+1+2*(2+5*3))
+#define ACL3_getaclres_sz	(1+NFS3_post_op_attr_sz+1+2*(2+5*3))
+#define ACL3_setaclres_sz	(1+NFS3_post_op_attr_sz)
+
 /*
  * Map file type to S_IFMT bits
  */
@@ -627,6 +633,74 @@ nfs3_xdr_commitargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
 	return 0;
 }
 
+#ifdef CONFIG_NFS_V3_ACL
+/*
+ * Encode GETACL arguments
+ */
+static int
+nfs3_xdr_getaclargs(struct rpc_rqst *req, u32 *p,
+		    struct nfs3_getaclargs *args)
+{
+	struct rpc_auth *auth = req->rq_task->tk_auth;
+	unsigned int replen;
+
+	p = xdr_encode_fhandle(p, args->fh);
+	*p++ = htonl(args->mask);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+
+	if (args->mask & (NFS_ACL | NFS_DFACL)) {
+		/* Inline the page array */
+		replen = (RPC_REPHDRSIZE + auth->au_rslack +
+			  ACL3_getaclres_sz) << 2;
+		xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0,
+				 NFSACL_MAXPAGES << PAGE_SHIFT);
+	}
+	return 0;
+}
+
+/*
+ * Encode SETACL arguments
+ */
+static int
+nfs3_xdr_setaclargs(struct rpc_rqst *req, u32 *p,
+                   struct nfs3_setaclargs *args)
+{
+	struct xdr_buf *buf = &req->rq_snd_buf;
+	unsigned int base, len_in_head, len = nfsacl_size(
+		(args->mask & NFS_ACL)   ? args->acl_access  : NULL,
+		(args->mask & NFS_DFACL) ? args->acl_default : NULL);
+	int count, err;
+
+	p = xdr_encode_fhandle(p, NFS_FH(args->inode));
+	*p++ = htonl(args->mask);
+	base = (char *)p - (char *)buf->head->iov_base;
+	/* put as much of the acls into head as possible. */
+	len_in_head = min_t(unsigned int, buf->head->iov_len - base, len);
+	len -= len_in_head;
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p + (len_in_head >> 2));
+
+	for (count = 0; (count << PAGE_SHIFT) < len; count++) {
+		args->pages[count] = alloc_page(GFP_KERNEL);
+		if (!args->pages[count]) {
+			while (count)
+				__free_page(args->pages[--count]);
+			return -ENOMEM;
+		}
+	}
+	xdr_encode_pages(buf, args->pages, 0, len);
+
+	err = nfsacl_encode(buf, base, args->inode,
+			    (args->mask & NFS_ACL) ?
+			    args->acl_access : NULL, 1, 0);
+	if (err > 0)
+		err = nfsacl_encode(buf, base + err, args->inode,
+				    (args->mask & NFS_DFACL) ?
+				    args->acl_default : NULL, 1,
+				    NFS_ACL_DEFAULT);
+	return (err > 0) ? 0 : err;
+}
+#endif  /* CONFIG_NFS_V3_ACL */
+
 /*
  * NFS XDR decode functions
  */
@@ -978,6 +1052,54 @@ nfs3_xdr_commitres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
 	return 0;
 }
 
+#ifdef CONFIG_NFS_V3_ACL
+/*
+ * Decode GETACL reply
+ */
+static int
+nfs3_xdr_getaclres(struct rpc_rqst *req, u32 *p,
+		   struct nfs3_getaclres *res)
+{
+	struct xdr_buf *buf = &req->rq_rcv_buf;
+	int status = ntohl(*p++);
+	struct posix_acl **acl;
+	unsigned int *aclcnt;
+	int err, base;
+
+	if (status != 0)
+		return -nfs_stat_to_errno(status);
+	p = xdr_decode_post_op_attr(p, res->fattr);
+	res->mask = ntohl(*p++);
+	if (res->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
+		return -EINVAL;
+	base = (char *)p - (char *)req->rq_rcv_buf.head->iov_base;
+
+	acl = (res->mask & NFS_ACL) ? &res->acl_access : NULL;
+	aclcnt = (res->mask & NFS_ACLCNT) ? &res->acl_access_count : NULL;
+	err = nfsacl_decode(buf, base, aclcnt, acl);
+
+	acl = (res->mask & NFS_DFACL) ? &res->acl_default : NULL;
+	aclcnt = (res->mask & NFS_DFACLCNT) ? &res->acl_default_count : NULL;
+	if (err > 0)
+		err = nfsacl_decode(buf, base + err, aclcnt, acl);
+	return (err > 0) ? 0 : err;
+}
+
+/*
+ * Decode setacl reply.
+ */
+static int
+nfs3_xdr_setaclres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
+{
+	int status = ntohl(*p++);
+
+	if (status)
+		return -nfs_stat_to_errno(status);
+	xdr_decode_post_op_attr(p, fattr);
+	return 0;
+}
+#endif  /* CONFIG_NFS_V3_ACL */
+
 #ifndef MAX
 # define MAX(a, b)	(((a) > (b))? (a) : (b))
 #endif
@@ -1021,3 +1143,28 @@ struct rpc_version		nfs_version3 = {
 	.procs			= nfs3_procedures
 };
 
+#ifdef CONFIG_NFS_V3_ACL
+static struct rpc_procinfo	nfs3_acl_procedures[] = {
+	[ACLPROC3_GETACL] = {
+		.p_proc = ACLPROC3_GETACL,
+		.p_encode = (kxdrproc_t) nfs3_xdr_getaclargs,
+		.p_decode = (kxdrproc_t) nfs3_xdr_getaclres,
+		.p_bufsiz = MAX(ACL3_getaclargs_sz, ACL3_getaclres_sz) << 2,
+		.p_timer = 1,
+	},
+	[ACLPROC3_SETACL] = {
+		.p_proc = ACLPROC3_SETACL,
+		.p_encode = (kxdrproc_t) nfs3_xdr_setaclargs,
+		.p_decode = (kxdrproc_t) nfs3_xdr_setaclres,
+		.p_bufsiz = MAX(ACL3_setaclargs_sz, ACL3_setaclres_sz) << 2,
+		.p_timer = 0,
+	},
+};
+
+struct rpc_version		nfsacl_version3 = {
+	.number			= 3,
+	.nrprocs		= sizeof(nfs3_acl_procedures)/
+				  sizeof(nfs3_acl_procedures[0]),
+	.procs			= nfs3_acl_procedures,
+};
+#endif  /* CONFIG_NFS_V3_ACL */
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
new file mode 100644
index 000000000000..ec1a22d7b876
--- /dev/null
+++ b/fs/nfs/nfs4_fs.h
@@ -0,0 +1,253 @@
+/*
+ * linux/fs/nfs/nfs4_fs.h
+ *
+ * Copyright (C) 2005 Trond Myklebust
+ *
+ * NFSv4-specific filesystem definitions and declarations
+ */
+
+#ifndef __LINUX_FS_NFS_NFS4_FS_H
+#define __LINUX_FS_NFS_NFS4_FS_H
+
+#ifdef CONFIG_NFS_V4
+
+struct idmap;
+
+/*
+ * In a seqid-mutating op, this macro controls which error return
+ * values trigger incrementation of the seqid.
+ *
+ * from rfc 3010:
+ * The client MUST monotonically increment the sequence number for the
+ * CLOSE, LOCK, LOCKU, OPEN, OPEN_CONFIRM, and OPEN_DOWNGRADE
+ * operations.  This is true even in the event that the previous
+ * operation that used the sequence number received an error.  The only
+ * exception to this rule is if the previous operation received one of
+ * the following errors: NFSERR_STALE_CLIENTID, NFSERR_STALE_STATEID,
+ * NFSERR_BAD_STATEID, NFSERR_BAD_SEQID, NFSERR_BADXDR,
+ * NFSERR_RESOURCE, NFSERR_NOFILEHANDLE.
+ *
+ */
+#define seqid_mutating_err(err)       \
+(((err) != NFSERR_STALE_CLIENTID) &&  \
+ ((err) != NFSERR_STALE_STATEID)  &&  \
+ ((err) != NFSERR_BAD_STATEID)    &&  \
+ ((err) != NFSERR_BAD_SEQID)      &&  \
+ ((err) != NFSERR_BAD_XDR)        &&  \
+ ((err) != NFSERR_RESOURCE)       &&  \
+ ((err) != NFSERR_NOFILEHANDLE))
+
+enum nfs4_client_state {
+	NFS4CLNT_OK  = 0,
+};
+
+/*
+ * The nfs4_client identifies our client state to the server.
+ */
+struct nfs4_client {
+	struct list_head	cl_servers;	/* Global list of servers */
+	struct in_addr		cl_addr;	/* Server identifier */
+	u64			cl_clientid;	/* constant */
+	nfs4_verifier		cl_confirm;
+	unsigned long		cl_state;
+
+	u32			cl_lockowner_id;
+
+	/*
+	 * The following rwsem ensures exclusive access to the server
+	 * while we recover the state following a lease expiration.
+	 */
+	struct rw_semaphore	cl_sem;
+
+	struct list_head	cl_delegations;
+	struct list_head	cl_state_owners;
+	struct list_head	cl_unused;
+	int			cl_nunused;
+	spinlock_t		cl_lock;
+	atomic_t		cl_count;
+
+	struct rpc_clnt *	cl_rpcclient;
+	struct rpc_cred *	cl_cred;
+
+	struct list_head	cl_superblocks;	/* List of nfs_server structs */
+
+	unsigned long		cl_lease_time;
+	unsigned long		cl_last_renewal;
+	struct work_struct	cl_renewd;
+	struct work_struct	cl_recoverd;
+
+	wait_queue_head_t	cl_waitq;
+	struct rpc_wait_queue	cl_rpcwaitq;
+
+	/* used for the setclientid verifier */
+	struct timespec		cl_boot_time;
+
+	/* idmapper */
+	struct idmap *		cl_idmap;
+
+	/* Our own IP address, as a null-terminated string.
+	 * This is used to generate the clientid, and the callback address.
+	 */
+	char			cl_ipaddr[16];
+	unsigned char		cl_id_uniquifier;
+};
+
+/*
+ * NFS4 state_owners and lock_owners are simply labels for ordered
+ * sequences of RPC calls. Their sole purpose is to provide once-only
+ * semantics by allowing the server to identify replayed requests.
+ *
+ * The ->so_sema is held during all state_owner seqid-mutating operations:
+ * OPEN, OPEN_DOWNGRADE, and CLOSE. Its purpose is to properly serialize
+ * so_seqid.
+ */
+struct nfs4_state_owner {
+	struct list_head     so_list;	 /* per-clientid list of state_owners */
+	struct nfs4_client   *so_client;
+	u32                  so_id;      /* 32-bit identifier, unique */
+	struct semaphore     so_sema;
+	u32                  so_seqid;   /* protected by so_sema */
+	atomic_t	     so_count;
+
+	struct rpc_cred	     *so_cred;	 /* Associated cred */
+	struct list_head     so_states;
+	struct list_head     so_delegations;
+};
+
+/*
+ * struct nfs4_state maintains the client-side state for a given
+ * (state_owner,inode) tuple (OPEN) or state_owner (LOCK).
+ *
+ * OPEN:
+ * In order to know when to OPEN_DOWNGRADE or CLOSE the state on the server,
+ * we need to know how many files are open for reading or writing on a
+ * given inode. This information too is stored here.
+ *
+ * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN)
+ */
+
+struct nfs4_lock_state {
+	struct list_head	ls_locks;	/* Other lock stateids */
+	struct nfs4_state *	ls_state;	/* Pointer to open state */
+	fl_owner_t		ls_owner;	/* POSIX lock owner */
+#define NFS_LOCK_INITIALIZED 1
+	int			ls_flags;
+	u32			ls_seqid;
+	u32			ls_id;
+	nfs4_stateid		ls_stateid;
+	atomic_t		ls_count;
+};
+
+/* bits for nfs4_state->flags */
+enum {
+	LK_STATE_IN_USE,
+	NFS_DELEGATED_STATE,
+};
+
+struct nfs4_state {
+	struct list_head open_states;	/* List of states for the same state_owner */
+	struct list_head inode_states;	/* List of states for the same inode */
+	struct list_head lock_states;	/* List of subservient lock stateids */
+
+	struct nfs4_state_owner *owner;	/* Pointer to the open owner */
+	struct inode *inode;		/* Pointer to the inode */
+
+	unsigned long flags;		/* Do we hold any locks? */
+	struct semaphore lock_sema;	/* Serializes file locking operations */
+	spinlock_t state_lock;		/* Protects the lock_states list */
+
+	nfs4_stateid stateid;
+
+	unsigned int nreaders;
+	unsigned int nwriters;
+	int state;			/* State on the server (R,W, or RW) */
+	atomic_t count;
+};
+
+
+struct nfs4_exception {
+	long timeout;
+	int retry;
+};
+
+struct nfs4_state_recovery_ops {
+	int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *);
+	int (*recover_lock)(struct nfs4_state *, struct file_lock *);
+};
+
+extern struct dentry_operations nfs4_dentry_operations;
+extern struct inode_operations nfs4_dir_inode_operations;
+
+/* inode.c */
+extern ssize_t nfs4_getxattr(struct dentry *, const char *, void *, size_t);
+extern int nfs4_setxattr(struct dentry *, const char *, const void *, size_t, int);
+extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t);
+
+
+/* nfs4proc.c */
+extern int nfs4_map_errors(int err);
+extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short);
+extern int nfs4_proc_setclientid_confirm(struct nfs4_client *);
+extern int nfs4_proc_async_renew(struct nfs4_client *);
+extern int nfs4_proc_renew(struct nfs4_client *);
+extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode);
+extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
+extern int nfs4_open_revalidate(struct inode *, struct dentry *, int);
+
+extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops;
+extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops;
+
+extern const u32 nfs4_fattr_bitmap[2];
+extern const u32 nfs4_statfs_bitmap[2];
+extern const u32 nfs4_pathconf_bitmap[2];
+extern const u32 nfs4_fsinfo_bitmap[2];
+
+/* nfs4renewd.c */
+extern void nfs4_schedule_state_renewal(struct nfs4_client *);
+extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
+extern void nfs4_kill_renewd(struct nfs4_client *);
+extern void nfs4_renew_state(void *);
+
+/* nfs4state.c */
+extern void init_nfsv4_state(struct nfs_server *);
+extern void destroy_nfsv4_state(struct nfs_server *);
+extern struct nfs4_client *nfs4_get_client(struct in_addr *);
+extern void nfs4_put_client(struct nfs4_client *clp);
+extern int nfs4_init_client(struct nfs4_client *clp);
+extern struct nfs4_client *nfs4_find_client(struct in_addr *);
+extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *);
+
+extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
+extern void nfs4_put_state_owner(struct nfs4_state_owner *);
+extern void nfs4_drop_state_owner(struct nfs4_state_owner *);
+extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
+extern void nfs4_put_open_state(struct nfs4_state *);
+extern void nfs4_close_state(struct nfs4_state *, mode_t);
+extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode);
+extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp);
+extern void nfs4_schedule_state_recovery(struct nfs4_client *);
+extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
+extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls);
+extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
+
+extern const nfs4_stateid zero_stateid;
+
+/* nfs4xdr.c */
+extern uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus);
+extern struct rpc_procinfo nfs4_procedures[];
+
+struct nfs4_mount_data;
+
+/* callback_xdr.c */
+extern struct svc_version nfs4_callback_version1;
+
+#else
+
+#define init_nfsv4_state(server)  do { } while (0)
+#define destroy_nfsv4_state(server)       do { } while (0)
+#define nfs4_put_state_owner(inode, owner) do { } while (0)
+#define nfs4_put_open_state(state) do { } while (0)
+#define nfs4_close_state(a, b) do { } while (0)
+
+#endif /* CONFIG_NFS_V4 */
+#endif /* __LINUX_FS_NFS_NFS4_FS.H */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1d5cb3e80c3e..1b76f80aedb9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -48,6 +48,7 @@
 #include <linux/smp_lock.h>
 #include <linux/namei.h>
 
+#include "nfs4_fs.h"
 #include "delegation.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PROC
@@ -62,8 +63,6 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
 extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
 extern struct rpc_procinfo nfs4_procedures[];
 
-extern nfs4_stateid zero_stateid;
-
 /* Prevent leaks of NFSv4 errors into userland */
 int nfs4_map_errors(int err)
 {
@@ -104,7 +103,7 @@ const u32 nfs4_statfs_bitmap[2] = {
 	| FATTR4_WORD1_SPACE_TOTAL
 };
 
-u32 nfs4_pathconf_bitmap[2] = {
+const u32 nfs4_pathconf_bitmap[2] = {
 	FATTR4_WORD0_MAXLINK
 	| FATTR4_WORD0_MAXNAME,
 	0
@@ -124,7 +123,7 @@ static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry,
 
 	BUG_ON(readdir->count < 80);
 	if (cookie > 2) {
-		readdir->cookie = (cookie > 2) ? cookie : 0;
+		readdir->cookie = cookie;
 		memcpy(&readdir->verifier, verifier, sizeof(readdir->verifier));
 		return;
 	}
@@ -270,14 +269,9 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta
 	int err;
 	do {
 		err = _nfs4_open_reclaim(sp, state);
-		switch (err) {
-			case 0:
-			case -NFS4ERR_STALE_CLIENTID:
-			case -NFS4ERR_STALE_STATEID:
-			case -NFS4ERR_EXPIRED:
-				return err;
-		}
-		err = nfs4_handle_exception(server, err, &exception);
+		if (err != -NFS4ERR_DELAY)
+			break;
+		nfs4_handle_exception(server, err, &exception);
 	} while (exception.retry);
 	return err;
 }
@@ -509,6 +503,20 @@ out_stale:
 	goto out_nodeleg;
 }
 
+static inline int nfs4_do_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
+{
+	struct nfs_server *server = NFS_SERVER(dentry->d_inode);
+	struct nfs4_exception exception = { };
+	int err;
+
+	do {
+		err = _nfs4_open_expired(sp, state, dentry);
+		if (err == -NFS4ERR_DELAY)
+			nfs4_handle_exception(server, err, &exception);
+	} while (exception.retry);
+	return err;
+}
+
 static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
 {
 	struct nfs_inode *nfsi = NFS_I(state->inode);
@@ -521,7 +529,7 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
 			continue;
 		get_nfs_open_context(ctx);
 		spin_unlock(&state->inode->i_lock);
-		status = _nfs4_open_expired(sp, state, ctx->dentry);
+		status = nfs4_do_open_expired(sp, state, ctx->dentry);
 		put_nfs_open_context(ctx);
 		return status;
 	}
@@ -748,11 +756,10 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
 
         fattr->valid = 0;
 
-	if (state != NULL)
+	if (state != NULL) {
 		msg.rpc_cred = state->owner->so_cred;
-	if (sattr->ia_valid & ATTR_SIZE)
-		nfs4_copy_stateid(&arg.stateid, state, NULL);
-	else
+		nfs4_copy_stateid(&arg.stateid, state, current->files);
+	} else
 		memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
 
 	return rpc_call_sync(server->client, &msg, 0);
@@ -1116,47 +1123,31 @@ static int
 nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 		  struct iattr *sattr)
 {
-	struct inode *		inode = dentry->d_inode;
-	int			size_change = sattr->ia_valid & ATTR_SIZE;
-	struct nfs4_state	*state = NULL;
-	int need_iput = 0;
+	struct rpc_cred *cred;
+	struct inode *inode = dentry->d_inode;
+	struct nfs4_state *state;
 	int status;
 
 	fattr->valid = 0;
 	
-	if (size_change) {
-		struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
-		if (IS_ERR(cred))
-			return PTR_ERR(cred);
+	cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
+	if (IS_ERR(cred))
+		return PTR_ERR(cred);
+	/* Search for an existing WRITE delegation first */
+	state = nfs4_open_delegated(inode, FMODE_WRITE, cred);
+	if (!IS_ERR(state)) {
+		/* NB: nfs4_open_delegated() bumps the inode->i_count */
+		iput(inode);
+	} else {
+		/* Search for an existing open(O_WRITE) stateid */
 		state = nfs4_find_state(inode, cred, FMODE_WRITE);
-		if (state == NULL) {
-			state = nfs4_open_delegated(dentry->d_inode,
-					FMODE_WRITE, cred);
-			if (IS_ERR(state))
-				state = nfs4_do_open(dentry->d_parent->d_inode,
-						dentry, FMODE_WRITE,
-						NULL, cred);
-			need_iput = 1;
-		}
-		put_rpccred(cred);
-		if (IS_ERR(state))
-			return PTR_ERR(state);
-
-		if (state->inode != inode) {
-			printk(KERN_WARNING "nfs: raced in setattr (%p != %p), returning -EIO\n", inode, state->inode);
-			status = -EIO;
-			goto out;
-		}
 	}
+
 	status = nfs4_do_setattr(NFS_SERVER(inode), fattr,
 			NFS_FH(inode), sattr, state);
-out:
-	if (state) {
-		inode = state->inode;
+	if (state != NULL)
 		nfs4_close_state(state, FMODE_WRITE);
-		if (need_iput)
-			iput(inode);
-	}
+	put_rpccred(cred);
 	return status;
 }
 
@@ -1731,6 +1722,10 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 	};
 	int			status;
 
+	dprintk("%s: dentry = %s/%s, cookie = %Lu\n", __FUNCTION__,
+			dentry->d_parent->d_name.name,
+			dentry->d_name.name,
+			(unsigned long long)cookie);
 	lock_kernel();
 	nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
 	res.pgbase = args.pgbase;
@@ -1738,6 +1733,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 	if (status == 0)
 		memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
 	unlock_kernel();
+	dprintk("%s: returns %d\n", __FUNCTION__, status);
 	return status;
 }
 
@@ -2163,6 +2159,193 @@ nfs4_proc_file_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
+static inline int nfs4_server_supports_acls(struct nfs_server *server)
+{
+	return (server->caps & NFS_CAP_ACLS)
+		&& (server->acl_bitmask & ACL4_SUPPORT_ALLOW_ACL)
+		&& (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL);
+}
+
+/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_CACHE_SIZE, and that
+ * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_CACHE_SIZE) bytes on
+ * the stack.
+ */
+#define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT)
+
+static void buf_to_pages(const void *buf, size_t buflen,
+		struct page **pages, unsigned int *pgbase)
+{
+	const void *p = buf;
+
+	*pgbase = offset_in_page(buf);
+	p -= *pgbase;
+	while (p < buf + buflen) {
+		*(pages++) = virt_to_page(p);
+		p += PAGE_CACHE_SIZE;
+	}
+}
+
+struct nfs4_cached_acl {
+	int cached;
+	size_t len;
+	char data[0];
+};
+
+static void nfs4_set_cached_acl(struct inode *inode, struct nfs4_cached_acl *acl)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	spin_lock(&inode->i_lock);
+	kfree(nfsi->nfs4_acl);
+	nfsi->nfs4_acl = acl;
+	spin_unlock(&inode->i_lock);
+}
+
+static void nfs4_zap_acl_attr(struct inode *inode)
+{
+	nfs4_set_cached_acl(inode, NULL);
+}
+
+static inline ssize_t nfs4_read_cached_acl(struct inode *inode, char *buf, size_t buflen)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs4_cached_acl *acl;
+	int ret = -ENOENT;
+
+	spin_lock(&inode->i_lock);
+	acl = nfsi->nfs4_acl;
+	if (acl == NULL)
+		goto out;
+	if (buf == NULL) /* user is just asking for length */
+		goto out_len;
+	if (acl->cached == 0)
+		goto out;
+	ret = -ERANGE; /* see getxattr(2) man page */
+	if (acl->len > buflen)
+		goto out;
+	memcpy(buf, acl->data, acl->len);
+out_len:
+	ret = acl->len;
+out:
+	spin_unlock(&inode->i_lock);
+	return ret;
+}
+
+static void nfs4_write_cached_acl(struct inode *inode, const char *buf, size_t acl_len)
+{
+	struct nfs4_cached_acl *acl;
+
+	if (buf && acl_len <= PAGE_SIZE) {
+		acl = kmalloc(sizeof(*acl) + acl_len, GFP_KERNEL);
+		if (acl == NULL)
+			goto out;
+		acl->cached = 1;
+		memcpy(acl->data, buf, acl_len);
+	} else {
+		acl = kmalloc(sizeof(*acl), GFP_KERNEL);
+		if (acl == NULL)
+			goto out;
+		acl->cached = 0;
+	}
+	acl->len = acl_len;
+out:
+	nfs4_set_cached_acl(inode, acl);
+}
+
+static inline ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
+{
+	struct page *pages[NFS4ACL_MAXPAGES];
+	struct nfs_getaclargs args = {
+		.fh = NFS_FH(inode),
+		.acl_pages = pages,
+		.acl_len = buflen,
+	};
+	size_t resp_len = buflen;
+	void *resp_buf;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL],
+		.rpc_argp = &args,
+		.rpc_resp = &resp_len,
+	};
+	struct page *localpage = NULL;
+	int ret;
+
+	if (buflen < PAGE_SIZE) {
+		/* As long as we're doing a round trip to the server anyway,
+		 * let's be prepared for a page of acl data. */
+		localpage = alloc_page(GFP_KERNEL);
+		resp_buf = page_address(localpage);
+		if (localpage == NULL)
+			return -ENOMEM;
+		args.acl_pages[0] = localpage;
+		args.acl_pgbase = 0;
+		args.acl_len = PAGE_SIZE;
+	} else {
+		resp_buf = buf;
+		buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase);
+	}
+	ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+	if (ret)
+		goto out_free;
+	if (resp_len > args.acl_len)
+		nfs4_write_cached_acl(inode, NULL, resp_len);
+	else
+		nfs4_write_cached_acl(inode, resp_buf, resp_len);
+	if (buf) {
+		ret = -ERANGE;
+		if (resp_len > buflen)
+			goto out_free;
+		if (localpage)
+			memcpy(buf, resp_buf, resp_len);
+	}
+	ret = resp_len;
+out_free:
+	if (localpage)
+		__free_page(localpage);
+	return ret;
+}
+
+static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	int ret;
+
+	if (!nfs4_server_supports_acls(server))
+		return -EOPNOTSUPP;
+	ret = nfs_revalidate_inode(server, inode);
+	if (ret < 0)
+		return ret;
+	ret = nfs4_read_cached_acl(inode, buf, buflen);
+	if (ret != -ENOENT)
+		return ret;
+	return nfs4_get_acl_uncached(inode, buf, buflen);
+}
+
+static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct page *pages[NFS4ACL_MAXPAGES];
+	struct nfs_setaclargs arg = {
+		.fh		= NFS_FH(inode),
+		.acl_pages	= pages,
+		.acl_len	= buflen,
+	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_SETACL],
+		.rpc_argp	= &arg,
+		.rpc_resp	= NULL,
+	};
+	int ret;
+
+	if (!nfs4_server_supports_acls(server))
+		return -EOPNOTSUPP;
+	buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
+	ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0);
+	if (ret == 0)
+		nfs4_write_cached_acl(inode, buf, buflen);
+	return ret;
+}
+
 static int
 nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server)
 {
@@ -2448,14 +2631,11 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 	down_read(&clp->cl_sem);
 	nlo.clientid = clp->cl_clientid;
 	down(&state->lock_sema);
-	lsp = nfs4_find_lock_state(state, request->fl_owner);
-	if (lsp)
-		nlo.id = lsp->ls_id; 
-	else {
-		spin_lock(&clp->cl_lock);
-		nlo.id = nfs4_alloc_lockowner_id(clp);
-		spin_unlock(&clp->cl_lock);
-	}
+	status = nfs4_set_lock_state(state, request);
+	if (status != 0)
+		goto out;
+	lsp = request->fl_u.nfs4_fl.owner;
+	nlo.id = lsp->ls_id; 
 	arg.u.lockt = &nlo;
 	status = rpc_call_sync(server->client, &msg, 0);
 	if (!status) {
@@ -2476,8 +2656,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 		request->fl_pid = 0;
 		status = 0;
 	}
-	if (lsp)
-		nfs4_put_lock_state(lsp);
+out:
 	up(&state->lock_sema);
 	up_read(&clp->cl_sem);
 	return status;
@@ -2537,28 +2716,26 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock
 	};
 	struct nfs4_lock_state *lsp;
 	struct nfs_locku_opargs luargs;
-	int status = 0;
+	int status;
 			
 	down_read(&clp->cl_sem);
 	down(&state->lock_sema);
-	lsp = nfs4_find_lock_state(state, request->fl_owner);
-	if (!lsp)
+	status = nfs4_set_lock_state(state, request);
+	if (status != 0)
 		goto out;
+	lsp = request->fl_u.nfs4_fl.owner;
 	/* We might have lost the locks! */
-	if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) {
-		luargs.seqid = lsp->ls_seqid;
-		memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid));
-		arg.u.locku = &luargs;
-		status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-		nfs4_increment_lock_seqid(status, lsp);
-	}
+	if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0)
+		goto out;
+	luargs.seqid = lsp->ls_seqid;
+	memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid));
+	arg.u.locku = &luargs;
+	status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
+	nfs4_increment_lock_seqid(status, lsp);
 
-	if (status == 0) {
+	if (status == 0)
 		memcpy(&lsp->ls_stateid,  &res.u.stateid, 
 				sizeof(lsp->ls_stateid));
-		nfs4_notify_unlck(state, request, lsp);
-	}
-	nfs4_put_lock_state(lsp);
 out:
 	up(&state->lock_sema);
 	if (status == 0)
@@ -2584,7 +2761,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r
 {
 	struct inode *inode = state->inode;
 	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs4_lock_state *lsp;
+	struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner;
 	struct nfs_lockargs arg = {
 		.fh = NFS_FH(inode),
 		.type = nfs4_lck_type(cmd, request),
@@ -2606,9 +2783,6 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r
 	};
 	int status;
 
-	lsp = nfs4_get_lock_state(state, request->fl_owner);
-	if (lsp == NULL)
-		return -ENOMEM;
 	if (!(lsp->ls_flags & NFS_LOCK_INITIALIZED)) {
 		struct nfs4_state_owner *owner = state->owner;
 		struct nfs_open_to_lock otl = {
@@ -2630,38 +2804,57 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r
 		* seqid mutating errors */
 		nfs4_increment_seqid(status, owner);
 		up(&owner->so_sema);
+		if (status == 0) {
+			lsp->ls_flags |= NFS_LOCK_INITIALIZED;
+			lsp->ls_seqid++;
+		}
 	} else {
 		struct nfs_exist_lock el = {
 			.seqid = lsp->ls_seqid,
 		};
 		memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid));
 		largs.u.exist_lock = &el;
-		largs.new_lock_owner = 0;
 		arg.u.lock = &largs;
 		status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
+		/* increment seqid on success, and * seqid mutating errors*/
+		nfs4_increment_lock_seqid(status, lsp);
 	}
-	/* increment seqid on success, and * seqid mutating errors*/
-	nfs4_increment_lock_seqid(status, lsp);
 	/* save the returned stateid. */
-	if (status == 0) {
+	if (status == 0)
 		memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(nfs4_stateid));
-		lsp->ls_flags |= NFS_LOCK_INITIALIZED;
-		if (!reclaim)
-			nfs4_notify_setlk(state, request, lsp);
-	} else if (status == -NFS4ERR_DENIED)
+	else if (status == -NFS4ERR_DENIED)
 		status = -EAGAIN;
-	nfs4_put_lock_state(lsp);
 	return status;
 }
 
 static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request)
 {
-	return _nfs4_do_setlk(state, F_SETLK, request, 1);
+	struct nfs_server *server = NFS_SERVER(state->inode);
+	struct nfs4_exception exception = { };
+	int err;
+
+	do {
+		err = _nfs4_do_setlk(state, F_SETLK, request, 1);
+		if (err != -NFS4ERR_DELAY)
+			break;
+		nfs4_handle_exception(server, err, &exception);
+	} while (exception.retry);
+	return err;
 }
 
 static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request)
 {
-	return _nfs4_do_setlk(state, F_SETLK, request, 0);
+	struct nfs_server *server = NFS_SERVER(state->inode);
+	struct nfs4_exception exception = { };
+	int err;
+
+	do {
+		err = _nfs4_do_setlk(state, F_SETLK, request, 0);
+		if (err != -NFS4ERR_DELAY)
+			break;
+		nfs4_handle_exception(server, err, &exception);
+	} while (exception.retry);
+	return err;
 }
 
 static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
@@ -2671,7 +2864,9 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
 
 	down_read(&clp->cl_sem);
 	down(&state->lock_sema);
-	status = _nfs4_do_setlk(state, cmd, request, 0);
+	status = nfs4_set_lock_state(state, request);
+	if (status == 0)
+		status = _nfs4_do_setlk(state, cmd, request, 0);
 	up(&state->lock_sema);
 	if (status == 0) {
 		/* Note: we always want to sleep here! */
@@ -2729,10 +2924,53 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
 		if (signalled())
 			break;
 	} while(status < 0);
-
 	return status;
 }
 
+
+#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
+
+int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf,
+		size_t buflen, int flags)
+{
+	struct inode *inode = dentry->d_inode;
+
+	if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0)
+		return -EOPNOTSUPP;
+
+	if (!S_ISREG(inode->i_mode) &&
+	    (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
+		return -EPERM;
+
+	return nfs4_proc_set_acl(inode, buf, buflen);
+}
+
+/* The getxattr man page suggests returning -ENODATA for unknown attributes,
+ * and that's what we'll do for e.g. user attributes that haven't been set.
+ * But we'll follow ext2/ext3's lead by returning -EOPNOTSUPP for unsupported
+ * attributes in kernel-managed attribute namespaces. */
+ssize_t nfs4_getxattr(struct dentry *dentry, const char *key, void *buf,
+		size_t buflen)
+{
+	struct inode *inode = dentry->d_inode;
+
+	if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0)
+		return -EOPNOTSUPP;
+
+	return nfs4_proc_get_acl(inode, buf, buflen);
+}
+
+ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen)
+{
+	size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1;
+
+	if (buf && buflen < len)
+		return -ERANGE;
+	if (buf)
+		memcpy(buf, XATTR_NAME_NFSV4_ACL, len);
+	return len;
+}
+
 struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = {
 	.recover_open	= nfs4_open_reclaim,
 	.recover_lock	= nfs4_lock_reclaim,
@@ -2743,10 +2981,20 @@ struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops = {
 	.recover_lock	= nfs4_lock_expired,
 };
 
+static struct inode_operations nfs4_file_inode_operations = {
+	.permission	= nfs_permission,
+	.getattr	= nfs_getattr,
+	.setattr	= nfs_setattr,
+	.getxattr	= nfs4_getxattr,
+	.setxattr	= nfs4_setxattr,
+	.listxattr	= nfs4_listxattr,
+};
+
 struct nfs_rpc_ops	nfs_v4_clientops = {
 	.version	= 4,			/* protocol version */
 	.dentry_ops	= &nfs4_dentry_operations,
 	.dir_inode_ops	= &nfs4_dir_inode_operations,
+	.file_inode_ops	= &nfs4_file_inode_operations,
 	.getroot	= nfs4_proc_get_root,
 	.getattr	= nfs4_proc_getattr,
 	.setattr	= nfs4_proc_setattr,
@@ -2777,6 +3025,7 @@ struct nfs_rpc_ops	nfs_v4_clientops = {
 	.file_open      = nfs4_proc_file_open,
 	.file_release   = nfs4_proc_file_release,
 	.lock		= nfs4_proc_lock,
+	.clear_acl_cache = nfs4_zap_acl_attr,
 };
 
 /*
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 667e06f1c647..a3001628ad32 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -53,6 +53,7 @@
 #include <linux/nfs.h>
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
+#include "nfs4_fs.h"
 
 #define NFSDBG_FACILITY	NFSDBG_PROC
 
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 231cebce3c87..afe587d82f1e 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -46,24 +46,18 @@
 #include <linux/workqueue.h>
 #include <linux/bitops.h>
 
+#include "nfs4_fs.h"
 #include "callback.h"
 #include "delegation.h"
 
 #define OPENOWNER_POOL_SIZE	8
 
-static DEFINE_SPINLOCK(state_spinlock);
-
-nfs4_stateid zero_stateid;
-
-#if 0
-nfs4_stateid one_stateid =
-	{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
-#endif
+const nfs4_stateid zero_stateid;
 
+static DEFINE_SPINLOCK(state_spinlock);
 static LIST_HEAD(nfs4_clientid_list);
 
 static void nfs4_recover_state(void *);
-extern void nfs4_renew_state(void *);
 
 void
 init_nfsv4_state(struct nfs_server *server)
@@ -116,6 +110,7 @@ nfs4_alloc_client(struct in_addr *addr)
 	INIT_LIST_HEAD(&clp->cl_superblocks);
 	init_waitqueue_head(&clp->cl_waitq);
 	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client");
+	clp->cl_rpcclient = ERR_PTR(-EINVAL);
 	clp->cl_boot_time = CURRENT_TIME;
 	clp->cl_state = 1 << NFS4CLNT_OK;
 	return clp;
@@ -137,7 +132,7 @@ nfs4_free_client(struct nfs4_client *clp)
 	if (clp->cl_cred)
 		put_rpccred(clp->cl_cred);
 	nfs_idmap_delete(clp);
-	if (clp->cl_rpcclient)
+	if (!IS_ERR(clp->cl_rpcclient))
 		rpc_shutdown_client(clp->cl_rpcclient);
 	kfree(clp);
 	nfs_callback_down();
@@ -365,7 +360,7 @@ nfs4_alloc_open_state(void)
 	atomic_set(&state->count, 1);
 	INIT_LIST_HEAD(&state->lock_states);
 	init_MUTEX(&state->lock_sema);
-	rwlock_init(&state->state_lock);
+	spin_lock_init(&state->state_lock);
 	return state;
 }
 
@@ -547,16 +542,6 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
 	return NULL;
 }
 
-struct nfs4_lock_state *
-nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
-{
-	struct nfs4_lock_state *lsp;
-	read_lock(&state->state_lock);
-	lsp = __nfs4_find_lock_state(state, fl_owner);
-	read_unlock(&state->state_lock);
-	return lsp;
-}
-
 /*
  * Return a compatible lock_state. If no initialized lock_state structure
  * exists, return an uninitialized one.
@@ -573,14 +558,13 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
 		return NULL;
 	lsp->ls_flags = 0;
 	lsp->ls_seqid = 0;	/* arbitrary */
-	lsp->ls_id = -1; 
 	memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data));
 	atomic_set(&lsp->ls_count, 1);
 	lsp->ls_owner = fl_owner;
-	INIT_LIST_HEAD(&lsp->ls_locks);
 	spin_lock(&clp->cl_lock);
 	lsp->ls_id = nfs4_alloc_lockowner_id(clp);
 	spin_unlock(&clp->cl_lock);
+	INIT_LIST_HEAD(&lsp->ls_locks);
 	return lsp;
 }
 
@@ -590,121 +574,112 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
  *
  * The caller must be holding state->lock_sema and clp->cl_sem
  */
-struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
+static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
 {
-	struct nfs4_lock_state * lsp;
+	struct nfs4_lock_state *lsp, *new = NULL;
 	
-	lsp = nfs4_find_lock_state(state, owner);
-	if (lsp == NULL)
-		lsp = nfs4_alloc_lock_state(state, owner);
+	for(;;) {
+		spin_lock(&state->state_lock);
+		lsp = __nfs4_find_lock_state(state, owner);
+		if (lsp != NULL)
+			break;
+		if (new != NULL) {
+			new->ls_state = state;
+			list_add(&new->ls_locks, &state->lock_states);
+			set_bit(LK_STATE_IN_USE, &state->flags);
+			lsp = new;
+			new = NULL;
+			break;
+		}
+		spin_unlock(&state->state_lock);
+		new = nfs4_alloc_lock_state(state, owner);
+		if (new == NULL)
+			return NULL;
+	}
+	spin_unlock(&state->state_lock);
+	kfree(new);
 	return lsp;
 }
 
 /*
- * Byte-range lock aware utility to initialize the stateid of read/write
- * requests.
+ * Release reference to lock_state, and free it if we see that
+ * it is no longer in use
  */
-void
-nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
+static void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
 {
-	if (test_bit(LK_STATE_IN_USE, &state->flags)) {
-		struct nfs4_lock_state *lsp;
+	struct nfs4_state *state;
 
-		lsp = nfs4_find_lock_state(state, fl_owner);
-		if (lsp) {
-			memcpy(dst, &lsp->ls_stateid, sizeof(*dst));
-			nfs4_put_lock_state(lsp);
-			return;
-		}
-	}
-	memcpy(dst, &state->stateid, sizeof(*dst));
+	if (lsp == NULL)
+		return;
+	state = lsp->ls_state;
+	if (!atomic_dec_and_lock(&lsp->ls_count, &state->state_lock))
+		return;
+	list_del(&lsp->ls_locks);
+	if (list_empty(&state->lock_states))
+		clear_bit(LK_STATE_IN_USE, &state->flags);
+	spin_unlock(&state->state_lock);
+	kfree(lsp);
 }
 
-/*
-* Called with state->lock_sema and clp->cl_sem held.
-*/
-void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp)
+static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
 {
-	if (status == NFS_OK || seqid_mutating_err(-status))
-		lsp->ls_seqid++;
-}
+	struct nfs4_lock_state *lsp = src->fl_u.nfs4_fl.owner;
 
-/* 
-* Check to see if the request lock (type FL_UNLK) effects the fl lock.
-*
-* fl and request must have the same posix owner
-*
-* return: 
-* 0 -> fl not effected by request
-* 1 -> fl consumed by request
-*/
+	dst->fl_u.nfs4_fl.owner = lsp;
+	atomic_inc(&lsp->ls_count);
+}
 
-static int
-nfs4_check_unlock(struct file_lock *fl, struct file_lock *request)
+static void nfs4_fl_release_lock(struct file_lock *fl)
 {
-	if (fl->fl_start >= request->fl_start && fl->fl_end <= request->fl_end)
-		return 1;
-	return 0;
+	nfs4_put_lock_state(fl->fl_u.nfs4_fl.owner);
 }
 
-/*
- * Post an initialized lock_state on the state->lock_states list.
- */
-void nfs4_notify_setlk(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp)
+static struct file_lock_operations nfs4_fl_lock_ops = {
+	.fl_copy_lock = nfs4_fl_copy_lock,
+	.fl_release_private = nfs4_fl_release_lock,
+};
+
+int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
 {
-	if (!list_empty(&lsp->ls_locks))
-		return;
-	atomic_inc(&lsp->ls_count);
-	write_lock(&state->state_lock);
-	list_add(&lsp->ls_locks, &state->lock_states);
-	set_bit(LK_STATE_IN_USE, &state->flags);
-	write_unlock(&state->state_lock);
+	struct nfs4_lock_state *lsp;
+
+	if (fl->fl_ops != NULL)
+		return 0;
+	lsp = nfs4_get_lock_state(state, fl->fl_owner);
+	if (lsp == NULL)
+		return -ENOMEM;
+	fl->fl_u.nfs4_fl.owner = lsp;
+	fl->fl_ops = &nfs4_fl_lock_ops;
+	return 0;
 }
 
-/* 
- * to decide to 'reap' lock state:
- * 1) search i_flock for file_locks with fl.lock_state = to ls.
- * 2) determine if unlock will consume found lock. 
- * 	if so, reap
- *
- * 	else, don't reap.
- *
+/*
+ * Byte-range lock aware utility to initialize the stateid of read/write
+ * requests.
  */
-void
-nfs4_notify_unlck(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp)
+void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
 {
-	struct inode *inode = state->inode;
-	struct file_lock *fl;
+	struct nfs4_lock_state *lsp;
 
-	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
-		if (!(fl->fl_flags & FL_POSIX))
-			continue;
-		if (fl->fl_owner != lsp->ls_owner)
-			continue;
-		/* Exit if we find at least one lock which is not consumed */
-		if (nfs4_check_unlock(fl,request) == 0)
-			return;
-	}
+	memcpy(dst, &state->stateid, sizeof(*dst));
+	if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
+		return;
 
-	write_lock(&state->state_lock);
-	list_del_init(&lsp->ls_locks);
-	if (list_empty(&state->lock_states))
-		clear_bit(LK_STATE_IN_USE, &state->flags);
-	write_unlock(&state->state_lock);
+	spin_lock(&state->state_lock);
+	lsp = __nfs4_find_lock_state(state, fl_owner);
+	if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
+		memcpy(dst, &lsp->ls_stateid, sizeof(*dst));
+	spin_unlock(&state->state_lock);
 	nfs4_put_lock_state(lsp);
 }
 
 /*
- * Release reference to lock_state, and free it if we see that
- * it is no longer in use
- */
-void
-nfs4_put_lock_state(struct nfs4_lock_state *lsp)
+* Called with state->lock_sema and clp->cl_sem held.
+*/
+void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp)
 {
-	if (!atomic_dec_and_test(&lsp->ls_count))
-		return;
-	BUG_ON (!list_empty(&lsp->ls_locks));
-	kfree(lsp);
+	if (status == NFS_OK || seqid_mutating_err(-status))
+		lsp->ls_seqid++;
 }
 
 /*
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 5f4de05763c9..6c564ef9489e 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -51,6 +51,7 @@
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_idmap.h>
+#include "nfs4_fs.h"
 
 #define NFSDBG_FACILITY		NFSDBG_XDR
 
@@ -82,12 +83,16 @@ static int nfs_stat_to_errno(int);
 #define encode_getfh_maxsz      (op_encode_hdr_maxsz)
 #define decode_getfh_maxsz      (op_decode_hdr_maxsz + 1 + \
 				((3+NFS4_FHSIZE) >> 2))
-#define encode_getattr_maxsz    (op_encode_hdr_maxsz + 3)
+#define nfs4_fattr_bitmap_maxsz 3
+#define encode_getattr_maxsz    (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
 #define nfs4_name_maxsz		(1 + ((3 + NFS4_MAXNAMLEN) >> 2))
 #define nfs4_path_maxsz		(1 + ((3 + NFS4_MAXPATHLEN) >> 2))
-#define nfs4_fattr_bitmap_maxsz (36 + 2 * nfs4_name_maxsz)
-#define decode_getattr_maxsz    (op_decode_hdr_maxsz + 3 + \
-                                nfs4_fattr_bitmap_maxsz)
+/* This is based on getfattr, which uses the most attributes: */
+#define nfs4_fattr_value_maxsz	(1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
+				3 + 3 + 3 + 2 * nfs4_name_maxsz))
+#define nfs4_fattr_maxsz	(nfs4_fattr_bitmap_maxsz + \
+				nfs4_fattr_value_maxsz)
+#define decode_getattr_maxsz    (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
 #define encode_savefh_maxsz     (op_encode_hdr_maxsz)
 #define decode_savefh_maxsz     (op_decode_hdr_maxsz)
 #define encode_fsinfo_maxsz	(op_encode_hdr_maxsz + 2)
@@ -122,11 +127,11 @@ static int nfs_stat_to_errno(int);
 #define encode_symlink_maxsz	(op_encode_hdr_maxsz + \
 				1 + nfs4_name_maxsz + \
 				nfs4_path_maxsz + \
-				nfs4_fattr_bitmap_maxsz)
+				nfs4_fattr_maxsz)
 #define decode_symlink_maxsz	(op_decode_hdr_maxsz + 8)
 #define encode_create_maxsz	(op_encode_hdr_maxsz + \
 				2 + nfs4_name_maxsz + \
-				nfs4_fattr_bitmap_maxsz)
+				nfs4_fattr_maxsz)
 #define decode_create_maxsz	(op_decode_hdr_maxsz + 8)
 #define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4)
 #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
@@ -205,7 +210,7 @@ static int nfs_stat_to_errno(int);
 #define NFS4_enc_setattr_sz     (compound_encode_hdr_maxsz + \
                                 encode_putfh_maxsz + \
                                 op_encode_hdr_maxsz + 4 + \
-                                nfs4_fattr_bitmap_maxsz + \
+                                nfs4_fattr_maxsz + \
                                 encode_getattr_maxsz)
 #define NFS4_dec_setattr_sz     (compound_decode_hdr_maxsz + \
                                 decode_putfh_maxsz + \
@@ -360,6 +365,20 @@ static int nfs_stat_to_errno(int);
 				encode_delegreturn_maxsz)
 #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \
 				decode_delegreturn_maxsz)
+#define NFS4_enc_getacl_sz	(compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				encode_getattr_maxsz)
+#define NFS4_dec_getacl_sz	(compound_decode_hdr_maxsz + \
+				decode_putfh_maxsz + \
+				op_decode_hdr_maxsz + \
+				nfs4_fattr_bitmap_maxsz + 1)
+#define NFS4_enc_setacl_sz	(compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				op_encode_hdr_maxsz + 4 + \
+				nfs4_fattr_bitmap_maxsz + 1)
+#define NFS4_dec_setacl_sz	(compound_decode_hdr_maxsz + \
+				decode_putfh_maxsz + \
+				op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
 
 static struct {
 	unsigned int	mode;
@@ -459,7 +478,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
 	 * In the worst-case, this would be
 	 *   12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime)
 	 *          = 36 bytes, plus any contribution from variable-length fields
-	 *            such as owner/group/acl's.
+	 *            such as owner/group.
 	 */
 	len = 16;
 
@@ -660,8 +679,6 @@ static int encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1
 
 static int encode_getfattr(struct xdr_stream *xdr, const u32* bitmask)
 {
-	extern u32 nfs4_fattr_bitmap[];
-
 	return encode_getattr_two(xdr,
 			bitmask[0] & nfs4_fattr_bitmap[0],
 			bitmask[1] & nfs4_fattr_bitmap[1]);
@@ -669,8 +686,6 @@ static int encode_getfattr(struct xdr_stream *xdr, const u32* bitmask)
 
 static int encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask)
 {
-	extern u32 nfs4_fsinfo_bitmap[];
-
 	return encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0],
 			bitmask[1] & nfs4_fsinfo_bitmap[1]);
 }
@@ -969,7 +984,6 @@ static int encode_putrootfh(struct xdr_stream *xdr)
 
 static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx)
 {
-	extern nfs4_stateid zero_stateid;
 	nfs4_stateid stateid;
 	uint32_t *p;
 
@@ -1000,6 +1014,10 @@ static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args)
 static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req)
 {
 	struct rpc_auth *auth = req->rq_task->tk_auth;
+	uint32_t attrs[2] = {
+		FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID,
+		FATTR4_WORD1_MOUNTED_ON_FILEID,
+	};
 	int replen;
 	uint32_t *p;
 
@@ -1010,13 +1028,20 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
 	WRITE32(readdir->count >> 1);  /* We're not doing readdirplus */
 	WRITE32(readdir->count);
 	WRITE32(2);
-	if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) {
-		WRITE32(0);
-		WRITE32(FATTR4_WORD1_MOUNTED_ON_FILEID);
-	} else {
-		WRITE32(FATTR4_WORD0_FILEID);
-		WRITE32(0);
-	}
+	/* Switch to mounted_on_fileid if the server supports it */
+	if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
+		attrs[0] &= ~FATTR4_WORD0_FILEID;
+	else
+		attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
+	WRITE32(attrs[0] & readdir->bitmask[0]);
+	WRITE32(attrs[1] & readdir->bitmask[1]);
+	dprintk("%s: cookie = %Lu, verifier = 0x%x%x, bitmap = 0x%x%x\n",
+			__FUNCTION__,
+			(unsigned long long)readdir->cookie,
+			((u32 *)readdir->verifier.data)[0],
+			((u32 *)readdir->verifier.data)[1],
+			attrs[0] & readdir->bitmask[0],
+			attrs[1] & readdir->bitmask[1]);
 
 	/* set up reply kvec
 	 *    toplevel_status + taglen + rescount + OP_PUTFH + status
@@ -1025,6 +1050,9 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
 	replen = (RPC_REPHDRSIZE + auth->au_rslack + 9) << 2;
 	xdr_inline_pages(&req->rq_rcv_buf, replen, readdir->pages,
 			 readdir->pgbase, readdir->count);
+	dprintk("%s: inlined page args = (%u, %p, %u, %u)\n",
+			__FUNCTION__, replen, readdir->pages,
+			readdir->pgbase, readdir->count);
 
 	return 0;
 }
@@ -1089,6 +1117,25 @@ static int encode_renew(struct xdr_stream *xdr, const struct nfs4_client *client
 }
 
 static int
+encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg)
+{
+	uint32_t *p;
+
+	RESERVE_SPACE(4+sizeof(zero_stateid.data));
+	WRITE32(OP_SETATTR);
+	WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data));
+	RESERVE_SPACE(2*4);
+	WRITE32(1);
+	WRITE32(FATTR4_WORD0_ACL);
+	if (arg->acl_len % 4)
+		return -EINVAL;
+	RESERVE_SPACE(4);
+	WRITE32(arg->acl_len);
+	xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
+	return 0;
+}
+
+static int
 encode_savefh(struct xdr_stream *xdr)
 {
 	uint32_t *p;
@@ -1632,6 +1679,34 @@ out:
 }
 
 /*
+ * Encode a GETACL request
+ */
+static int
+nfs4_xdr_enc_getacl(struct rpc_rqst *req, uint32_t *p,
+		struct nfs_getaclargs *args)
+{
+	struct xdr_stream xdr;
+	struct rpc_auth *auth = req->rq_task->tk_auth;
+	struct compound_hdr hdr = {
+		.nops   = 2,
+	};
+	int replen, status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	status = encode_putfh(&xdr, args->fh);
+	if (status)
+		goto out;
+	status = encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0);
+	/* set up reply buffer: */
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_getacl_sz) << 2;
+	xdr_inline_pages(&req->rq_rcv_buf, replen,
+		args->acl_pages, args->acl_pgbase, args->acl_len);
+out:
+	return status;
+}
+
+/*
  * Encode a WRITE request
  */
 static int nfs4_xdr_enc_write(struct rpc_rqst *req, uint32_t *p, struct nfs_writeargs *args)
@@ -1697,7 +1772,6 @@ static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs4_fs
  */
 static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, uint32_t *p, const struct nfs4_pathconf_arg *args)
 {
-	extern u32 nfs4_pathconf_bitmap[2];
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.nops = 2,
@@ -1718,7 +1792,6 @@ static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, uint32_t *p, const struct
  */
 static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, uint32_t *p, const struct nfs4_statfs_arg *args)
 {
-	extern u32 nfs4_statfs_bitmap[];
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.nops = 2,
@@ -3003,6 +3076,11 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
 		return status;
 	READ_BUF(8);
 	COPYMEM(readdir->verifier.data, 8);
+	dprintk("%s: verifier = 0x%x%x\n",
+			__FUNCTION__,
+			((u32 *)readdir->verifier.data)[0],
+			((u32 *)readdir->verifier.data)[1]);
+
 
 	hdrlen = (char *) p - (char *) iov->iov_base;
 	recvd = rcvbuf->len - hdrlen;
@@ -3017,12 +3095,14 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
 	for (nr = 0; *p++; nr++) {
 		if (p + 3 > end)
 			goto short_pkt;
+		dprintk("cookie = %Lu, ", *((unsigned long long *)p));
 		p += 2;			/* cookie */
 		len = ntohl(*p++);	/* filename length */
 		if (len > NFS4_MAXNAMLEN) {
 			printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len);
 			goto err_unmap;
 		}
+		dprintk("filename = %*s\n", len, (char *)p);
 		p += XDR_QUADLEN(len);
 		if (p + 1 > end)
 			goto short_pkt;
@@ -3042,6 +3122,7 @@ out:
 	kunmap_atomic(kaddr, KM_USER0);
 	return 0;
 short_pkt:
+	dprintk("%s: short packet at entry %d\n", __FUNCTION__, nr);
 	entry[0] = entry[1] = 0;
 	/* truncate listing ? */
 	if (!nr) {
@@ -3127,6 +3208,47 @@ static int decode_renew(struct xdr_stream *xdr)
 	return decode_op_hdr(xdr, OP_RENEW);
 }
 
+static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
+		size_t *acl_len)
+{
+	uint32_t *savep;
+	uint32_t attrlen,
+		 bitmap[2] = {0};
+	struct kvec *iov = req->rq_rcv_buf.head;
+	int status;
+
+	*acl_len = 0;
+	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+		goto out;
+	if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
+		goto out;
+	if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
+		goto out;
+
+	if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U)))
+		return -EIO;
+	if (likely(bitmap[0] & FATTR4_WORD0_ACL)) {
+		int hdrlen, recvd;
+
+		/* We ignore &savep and don't do consistency checks on
+		 * the attr length.  Let userspace figure it out.... */
+		hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base;
+		recvd = req->rq_rcv_buf.len - hdrlen;
+		if (attrlen > recvd) {
+			printk(KERN_WARNING "NFS: server cheating in getattr"
+					" acl reply: attrlen %u > recvd %u\n",
+					attrlen, recvd);
+			return -EINVAL;
+		}
+		if (attrlen <= *acl_len)
+			xdr_read_pages(xdr, attrlen);
+		*acl_len = attrlen;
+	}
+
+out:
+	return status;
+}
+
 static int
 decode_savefh(struct xdr_stream *xdr)
 {
@@ -3418,6 +3540,71 @@ out:
 
 }
 
+/*
+ * Encode an SETACL request
+ */
+static int
+nfs4_xdr_enc_setacl(struct rpc_rqst *req, uint32_t *p, struct nfs_setaclargs *args)
+{
+        struct xdr_stream xdr;
+        struct compound_hdr hdr = {
+                .nops   = 2,
+        };
+        int status;
+
+        xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+        encode_compound_hdr(&xdr, &hdr);
+        status = encode_putfh(&xdr, args->fh);
+        if (status)
+                goto out;
+        status = encode_setacl(&xdr, args);
+out:
+        return status;
+}
+/*
+ * Decode SETACL response
+ */
+static int
+nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, uint32_t *p, void *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_putfh(&xdr);
+	if (status)
+		goto out;
+	status = decode_setattr(&xdr, res);
+out:
+	return status;
+}
+
+/*
+ * Decode GETACL response
+ */
+static int
+nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, uint32_t *p, size_t *acl_len)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_putfh(&xdr);
+	if (status)
+		goto out;
+	status = decode_getacl(&xdr, rqstp, acl_len);
+
+out:
+	return status;
+}
 
 /*
  * Decode CLOSE response
@@ -3895,6 +4082,12 @@ uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus)
 	}
 	len = XDR_QUADLEN(ntohl(*p++));	/* attribute buffer length */
 	if (len > 0) {
+		if (bitmap[0] & FATTR4_WORD0_RDATTR_ERROR) {
+			bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR;
+			/* Ignore the return value of rdattr_error for now */
+			p++;
+			len--;
+		}
 		if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID)
 			xdr_decode_hyper(p, &entry->ino);
 		else if (bitmap[0] == FATTR4_WORD0_FILEID)
@@ -3934,6 +4127,8 @@ static struct {
 	{ NFS4ERR_DQUOT,	EDQUOT		},
 	{ NFS4ERR_STALE,	ESTALE		},
 	{ NFS4ERR_BADHANDLE,	EBADHANDLE	},
+	{ NFS4ERR_BADOWNER,	EINVAL		},
+	{ NFS4ERR_BADNAME,	EINVAL		},
 	{ NFS4ERR_BAD_COOKIE,	EBADCOOKIE	},
 	{ NFS4ERR_NOTSUPP,	ENOTSUPP	},
 	{ NFS4ERR_TOOSMALL,	ETOOSMALL	},
@@ -4019,6 +4214,8 @@ struct rpc_procinfo	nfs4_procedures[] = {
   PROC(READDIR,		enc_readdir,	dec_readdir),
   PROC(SERVER_CAPS,	enc_server_caps, dec_server_caps),
   PROC(DELEGRETURN,	enc_delegreturn, dec_delegreturn),
+  PROC(GETACL,		enc_getacl,	dec_getacl),
+  PROC(SETACL,		enc_setacl,	dec_setacl),
 };
 
 struct rpc_version		nfs_version4 = {
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index fd5bc596fe8a..1b272a135a31 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -124,6 +124,7 @@ enum {
 	Opt_soft, Opt_hard, Opt_intr,
 	Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, 
 	Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp,
+	Opt_acl, Opt_noacl,
 	/* Error token */
 	Opt_err
 };
@@ -158,6 +159,8 @@ static match_table_t __initdata tokens = {
 	{Opt_udp, "udp"},
 	{Opt_tcp, "proto=tcp"},
 	{Opt_tcp, "tcp"},
+	{Opt_acl, "acl"},
+	{Opt_noacl, "noacl"},
 	{Opt_err, NULL}
 	
 };
@@ -266,6 +269,12 @@ static int __init root_nfs_parse(char *name, char *buf)
 			case Opt_tcp:
 				nfs_data.flags |= NFS_MOUNT_TCP;
 				break;
+			case Opt_acl:
+				nfs_data.flags &= ~NFS_MOUNT_NOACL;
+				break;
+			case Opt_noacl:
+				nfs_data.flags |= NFS_MOUNT_NOACL;
+				break;
 			default : 
 				return 0;
 		}
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 4f1ba723848d..d53857b148e2 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -107,11 +107,38 @@ void nfs_unlock_request(struct nfs_page *req)
 	smp_mb__before_clear_bit();
 	clear_bit(PG_BUSY, &req->wb_flags);
 	smp_mb__after_clear_bit();
-	wake_up_all(&req->wb_context->waitq);
+	wake_up_bit(&req->wb_flags, PG_BUSY);
 	nfs_release_request(req);
 }
 
 /**
+ * nfs_set_page_writeback_locked - Lock a request for writeback
+ * @req:
+ */
+int nfs_set_page_writeback_locked(struct nfs_page *req)
+{
+	struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
+
+	if (!nfs_lock_request(req))
+		return 0;
+	radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
+	return 1;
+}
+
+/**
+ * nfs_clear_page_writeback - Unlock request and wake up sleepers
+ */
+void nfs_clear_page_writeback(struct nfs_page *req)
+{
+	struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
+
+	spin_lock(&nfsi->req_lock);
+	radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
+	spin_unlock(&nfsi->req_lock);
+	nfs_unlock_request(req);
+}
+
+/**
  * nfs_clear_request - Free up all resources allocated to the request
  * @req:
  *
@@ -150,34 +177,15 @@ nfs_release_request(struct nfs_page *req)
 	nfs_page_free(req);
 }
 
-/**
- * nfs_list_add_request - Insert a request into a sorted list
- * @req: request
- * @head: head of list into which to insert the request.
- *
- * Note that the wb_list is sorted by page index in order to facilitate
- * coalescing of requests.
- * We use an insertion sort that is optimized for the case of appended
- * writes.
- */
-void
-nfs_list_add_request(struct nfs_page *req, struct list_head *head)
+static int nfs_wait_bit_interruptible(void *word)
 {
-	struct list_head *pos;
+	int ret = 0;
 
-#ifdef NFS_PARANOIA
-	if (!list_empty(&req->wb_list)) {
-		printk(KERN_ERR "NFS: Add to list failed!\n");
-		BUG();
-	}
-#endif
-	list_for_each_prev(pos, head) {
-		struct nfs_page	*p = nfs_list_entry(pos);
-		if (p->wb_index < req->wb_index)
-			break;
-	}
-	list_add(&req->wb_list, pos);
-	req->wb_list_head = head;
+	if (signal_pending(current))
+		ret = -ERESTARTSYS;
+	else
+		schedule();
+	return ret;
 }
 
 /**
@@ -190,12 +198,22 @@ nfs_list_add_request(struct nfs_page *req, struct list_head *head)
 int
 nfs_wait_on_request(struct nfs_page *req)
 {
-	struct inode	*inode = req->wb_context->dentry->d_inode;
-        struct rpc_clnt	*clnt = NFS_CLIENT(inode);
-
-	if (!NFS_WBACK_BUSY(req))
-		return 0;
-	return nfs_wait_event(clnt, req->wb_context->waitq, !NFS_WBACK_BUSY(req));
+        struct rpc_clnt	*clnt = NFS_CLIENT(req->wb_context->dentry->d_inode);
+	sigset_t oldmask;
+	int ret = 0;
+
+	if (!test_bit(PG_BUSY, &req->wb_flags))
+		goto out;
+	/*
+	 * Note: the call to rpc_clnt_sigmask() suffices to ensure that we
+	 *	 are not interrupted if intr flag is not set
+	 */
+	rpc_clnt_sigmask(clnt, &oldmask);
+	ret = out_of_line_wait_on_bit(&req->wb_flags, PG_BUSY,
+			nfs_wait_bit_interruptible, TASK_INTERRUPTIBLE);
+	rpc_clnt_sigunmask(clnt, &oldmask);
+out:
+	return ret;
 }
 
 /**
@@ -243,6 +261,62 @@ nfs_coalesce_requests(struct list_head *head, struct list_head *dst,
 	return npages;
 }
 
+#define NFS_SCAN_MAXENTRIES 16
+/**
+ * nfs_scan_lock_dirty - Scan the radix tree for dirty requests
+ * @nfsi: NFS inode
+ * @dst: Destination list
+ * @idx_start: lower bound of page->index to scan
+ * @npages: idx_start + npages sets the upper bound to scan.
+ *
+ * Moves elements from one of the inode request lists.
+ * If the number of requests is set to 0, the entire address_space
+ * starting at index idx_start, is scanned.
+ * The requests are *not* checked to ensure that they form a contiguous set.
+ * You must be holding the inode's req_lock when calling this function
+ */
+int
+nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst,
+	      unsigned long idx_start, unsigned int npages)
+{
+	struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
+	struct nfs_page *req;
+	unsigned long idx_end;
+	int found, i;
+	int res;
+
+	res = 0;
+	if (npages == 0)
+		idx_end = ~0;
+	else
+		idx_end = idx_start + npages - 1;
+
+	for (;;) {
+		found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree,
+				(void **)&pgvec[0], idx_start, NFS_SCAN_MAXENTRIES,
+				NFS_PAGE_TAG_DIRTY);
+		if (found <= 0)
+			break;
+		for (i = 0; i < found; i++) {
+			req = pgvec[i];
+			if (req->wb_index > idx_end)
+				goto out;
+
+			idx_start = req->wb_index + 1;
+
+			if (nfs_set_page_writeback_locked(req)) {
+				radix_tree_tag_clear(&nfsi->nfs_page_tree,
+						req->wb_index, NFS_PAGE_TAG_DIRTY);
+				nfs_list_remove_request(req);
+				nfs_list_add_request(req, dst);
+				res++;
+			}
+		}
+	}
+out:
+	return res;
+}
+
 /**
  * nfs_scan_list - Scan a list for matching requests
  * @head: One of the NFS inode request lists
@@ -280,7 +354,7 @@ nfs_scan_list(struct list_head *head, struct list_head *dst,
 		if (req->wb_index > idx_end)
 			break;
 
-		if (!nfs_lock_request(req))
+		if (!nfs_set_page_writeback_locked(req))
 			continue;
 		nfs_list_remove_request(req);
 		nfs_list_add_request(req, dst);
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index d31b4d6e5a5e..cedf636bcf3c 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -622,6 +622,7 @@ struct nfs_rpc_ops	nfs_v2_clientops = {
 	.version	= 2,		       /* protocol version */
 	.dentry_ops	= &nfs_dentry_operations,
 	.dir_inode_ops	= &nfs_dir_inode_operations,
+	.file_inode_ops	= &nfs_file_inode_operations,
 	.getroot	= nfs_proc_get_root,
 	.getattr	= nfs_proc_getattr,
 	.setattr	= nfs_proc_setattr,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index a0042fb58634..6f866b8aa2d5 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -173,7 +173,6 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
 	if (len < PAGE_CACHE_SIZE)
 		memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
 
-	nfs_lock_request(new);
 	nfs_list_add_request(new, &one_request);
 	nfs_pagein_one(&one_request, inode);
 	return 0;
@@ -185,7 +184,6 @@ static void nfs_readpage_release(struct nfs_page *req)
 
 	nfs_clear_request(req);
 	nfs_release_request(req);
-	nfs_unlock_request(req);
 
 	dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
 			req->wb_context->dentry->d_inode->i_sb->s_id,
@@ -553,7 +551,6 @@ readpage_async_filler(void *data, struct page *page)
 	}
 	if (len < PAGE_CACHE_SIZE)
 		memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
-	nfs_lock_request(new);
 	nfs_list_add_request(new, desc->head);
 	return 0;
 }
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 6f7a4af3bc46..5130eda231d7 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -220,7 +220,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
 		ClearPageError(page);
 
 io_error:
-	nfs_end_data_update_defer(inode);
+	nfs_end_data_update(inode);
 	nfs_writedata_free(wdata);
 	return written ? written : result;
 }
@@ -352,7 +352,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 		if (err < 0)
 			goto out;
 	}
-	err = nfs_commit_inode(inode, 0, 0, wb_priority(wbc));
+	err = nfs_commit_inode(inode, wb_priority(wbc));
 	if (err > 0) {
 		wbc->nr_to_write -= err;
 		err = 0;
@@ -401,7 +401,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
 	nfsi->npages--;
 	if (!nfsi->npages) {
 		spin_unlock(&nfsi->req_lock);
-		nfs_end_data_update_defer(inode);
+		nfs_end_data_update(inode);
 		iput(inode);
 	} else
 		spin_unlock(&nfsi->req_lock);
@@ -446,6 +446,8 @@ nfs_mark_request_dirty(struct nfs_page *req)
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	spin_lock(&nfsi->req_lock);
+	radix_tree_tag_set(&nfsi->nfs_page_tree,
+			req->wb_index, NFS_PAGE_TAG_DIRTY);
 	nfs_list_add_request(req, &nfsi->dirty);
 	nfsi->ndirty++;
 	spin_unlock(&nfsi->req_lock);
@@ -503,13 +505,12 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int
 
 	spin_lock(&nfsi->req_lock);
 	next = idx_start;
-	while (radix_tree_gang_lookup(&nfsi->nfs_page_tree, (void **)&req, next, 1)) {
+	while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) {
 		if (req->wb_index > idx_end)
 			break;
 
 		next = req->wb_index + 1;
-		if (!NFS_WBACK_BUSY(req))
-			continue;
+		BUG_ON(!NFS_WBACK_BUSY(req));
 
 		atomic_inc(&req->wb_count);
 		spin_unlock(&nfsi->req_lock);
@@ -538,12 +539,15 @@ static int
 nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
-	int	res;
-	res = nfs_scan_list(&nfsi->dirty, dst, idx_start, npages);
-	nfsi->ndirty -= res;
-	sub_page_state(nr_dirty,res);
-	if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty))
-		printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n");
+	int res = 0;
+
+	if (nfsi->ndirty != 0) {
+		res = nfs_scan_lock_dirty(nfsi, dst, idx_start, npages);
+		nfsi->ndirty -= res;
+		sub_page_state(nr_dirty,res);
+		if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty))
+			printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n");
+	}
 	return res;
 }
 
@@ -562,11 +566,14 @@ static int
 nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
-	int	res;
-	res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages);
-	nfsi->ncommit -= res;
-	if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit))
-		printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
+	int res = 0;
+
+	if (nfsi->ncommit != 0) {
+		res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages);
+		nfsi->ncommit -= res;
+		if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit))
+			printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
+	}
 	return res;
 }
 #endif
@@ -750,7 +757,7 @@ int nfs_updatepage(struct file *file, struct page *page,
 	 * is entirely in cache, it may be more efficient to avoid
 	 * fragmenting write requests.
 	 */
-	if (PageUptodate(page) && inode->i_flock == NULL) {
+	if (PageUptodate(page) && inode->i_flock == NULL && !(file->f_mode & O_SYNC)) {
 		loff_t end_offs = i_size_read(inode) - 1;
 		unsigned long end_index = end_offs >> PAGE_CACHE_SHIFT;
 
@@ -821,7 +828,7 @@ out:
 #else
 	nfs_inode_remove_request(req);
 #endif
-	nfs_unlock_request(req);
+	nfs_clear_page_writeback(req);
 }
 
 static inline int flush_task_priority(int how)
@@ -952,7 +959,7 @@ out_bad:
 		nfs_writedata_free(data);
 	}
 	nfs_mark_request_dirty(req);
-	nfs_unlock_request(req);
+	nfs_clear_page_writeback(req);
 	return -ENOMEM;
 }
 
@@ -1002,7 +1009,7 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
 		struct nfs_page *req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
 		nfs_mark_request_dirty(req);
-		nfs_unlock_request(req);
+		nfs_clear_page_writeback(req);
 	}
 	return -ENOMEM;
 }
@@ -1029,7 +1036,7 @@ nfs_flush_list(struct list_head *head, int wpages, int how)
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
 		nfs_mark_request_dirty(req);
-		nfs_unlock_request(req);
+		nfs_clear_page_writeback(req);
 	}
 	return error;
 }
@@ -1121,7 +1128,7 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
 		nfs_inode_remove_request(req);
 #endif
 	next:
-		nfs_unlock_request(req);
+		nfs_clear_page_writeback(req);
 	}
 }
 
@@ -1210,36 +1217,24 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 		struct nfs_write_data *data, int how)
 {
 	struct rpc_task		*task = &data->task;
-	struct nfs_page		*first, *last;
+	struct nfs_page		*first;
 	struct inode		*inode;
-	loff_t			start, end, len;
 
 	/* Set up the RPC argument and reply structs
 	 * NB: take care not to mess about with data->commit et al. */
 
 	list_splice_init(head, &data->pages);
 	first = nfs_list_entry(data->pages.next);
-	last = nfs_list_entry(data->pages.prev);
 	inode = first->wb_context->dentry->d_inode;
 
-	/*
-	 * Determine the offset range of requests in the COMMIT call.
-	 * We rely on the fact that data->pages is an ordered list...
-	 */
-	start = req_offset(first);
-	end = req_offset(last) + last->wb_bytes;
-	len = end - start;
-	/* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */
-	if (end >= i_size_read(inode) || len < 0 || len > (~((u32)0) >> 1))
-		len = 0;
-
 	data->inode	  = inode;
 	data->cred	  = first->wb_context->cred;
 
 	data->args.fh     = NFS_FH(data->inode);
-	data->args.offset = start;
-	data->args.count  = len;
-	data->res.count   = len;
+	/* Note: we always request a commit of the entire inode */
+	data->args.offset = 0;
+	data->args.count  = 0;
+	data->res.count   = 0;
 	data->res.fattr   = &data->fattr;
 	data->res.verf    = &data->verf;
 	
@@ -1278,7 +1273,7 @@ nfs_commit_list(struct list_head *head, int how)
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
 		nfs_mark_request_commit(req);
-		nfs_unlock_request(req);
+		nfs_clear_page_writeback(req);
 	}
 	return -ENOMEM;
 }
@@ -1324,7 +1319,7 @@ nfs_commit_done(struct rpc_task *task)
 		dprintk(" mismatch\n");
 		nfs_mark_request_dirty(req);
 	next:
-		nfs_unlock_request(req);
+		nfs_clear_page_writeback(req);
 		res++;
 	}
 	sub_page_state(nr_unstable,res);
@@ -1342,16 +1337,23 @@ static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
 	spin_lock(&nfsi->req_lock);
 	res = nfs_scan_dirty(inode, &head, idx_start, npages);
 	spin_unlock(&nfsi->req_lock);
-	if (res)
-		error = nfs_flush_list(&head, NFS_SERVER(inode)->wpages, how);
+	if (res) {
+		struct nfs_server *server = NFS_SERVER(inode);
+
+		/* For single writes, FLUSH_STABLE is more efficient */
+		if (res == nfsi->npages && nfsi->npages <= server->wpages) {
+			if (res > 1 || nfs_list_entry(head.next)->wb_bytes <= server->wsize)
+				how |= FLUSH_STABLE;
+		}
+		error = nfs_flush_list(&head, server->wpages, how);
+	}
 	if (error < 0)
 		return error;
 	return res;
 }
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-int nfs_commit_inode(struct inode *inode, unsigned long idx_start,
-		    unsigned int npages, int how)
+int nfs_commit_inode(struct inode *inode, int how)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	LIST_HEAD(head);
@@ -1359,15 +1361,13 @@ int nfs_commit_inode(struct inode *inode, unsigned long idx_start,
 				error = 0;
 
 	spin_lock(&nfsi->req_lock);
-	res = nfs_scan_commit(inode, &head, idx_start, npages);
+	res = nfs_scan_commit(inode, &head, 0, 0);
+	spin_unlock(&nfsi->req_lock);
 	if (res) {
-		res += nfs_scan_commit(inode, &head, 0, 0);
-		spin_unlock(&nfsi->req_lock);
 		error = nfs_commit_list(&head, how);
-	} else
-		spin_unlock(&nfsi->req_lock);
-	if (error < 0)
-		return error;
+		if (error < 0)
+			return error;
+	}
 	return res;
 }
 #endif
@@ -1389,7 +1389,7 @@ int nfs_sync_inode(struct inode *inode, unsigned long idx_start,
 			error = nfs_flush_inode(inode, idx_start, npages, how);
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 		if (error == 0)
-			error = nfs_commit_inode(inode, idx_start, npages, how);
+			error = nfs_commit_inode(inode, how);
 #endif
 	} while (error > 0);
 	return error;
diff --git a/fs/nfs_common/Makefile b/fs/nfs_common/Makefile
new file mode 100644
index 000000000000..f689ed82af3a
--- /dev/null
+++ b/fs/nfs_common/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for Linux filesystem routines that are shared by client and server.
+#
+
+obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o
+
+nfs_acl-objs := nfsacl.o
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
new file mode 100644
index 000000000000..18c58c32e326
--- /dev/null
+++ b/fs/nfs_common/nfsacl.c
@@ -0,0 +1,257 @@
+/*
+ * fs/nfs_common/nfsacl.c
+ *
+ *  Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de>
+ */
+
+/*
+ * The Solaris nfsacl protocol represents some ACLs slightly differently
+ * than POSIX 1003.1e draft 17 does (and we do):
+ *
+ *  - Minimal ACLs always have an ACL_MASK entry, so they have
+ *    four instead of three entries.
+ *  - The ACL_MASK entry in such minimal ACLs always has the same
+ *    permissions as the ACL_GROUP_OBJ entry. (In extended ACLs
+ *    the ACL_MASK and ACL_GROUP_OBJ entries may differ.)
+ *  - The identifier fields of the ACL_USER_OBJ and ACL_GROUP_OBJ
+ *    entries contain the identifiers of the owner and owning group.
+ *    (In POSIX ACLs we always set them to ACL_UNDEFINED_ID).
+ *  - ACL entries in the kernel are kept sorted in ascending order
+ *    of (e_tag, e_id). Solaris ACLs are unsorted.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/nfsacl.h>
+#include <linux/nfs3.h>
+#include <linux/sort.h>
+
+MODULE_LICENSE("GPL");
+
+EXPORT_SYMBOL(nfsacl_encode);
+EXPORT_SYMBOL(nfsacl_decode);
+
+struct nfsacl_encode_desc {
+	struct xdr_array2_desc desc;
+	unsigned int count;
+	struct posix_acl *acl;
+	int typeflag;
+	uid_t uid;
+	gid_t gid;
+};
+
+static int
+xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem)
+{
+	struct nfsacl_encode_desc *nfsacl_desc =
+		(struct nfsacl_encode_desc *) desc;
+	u32 *p = (u32 *) elem;
+
+	if (nfsacl_desc->count < nfsacl_desc->acl->a_count) {
+		struct posix_acl_entry *entry =
+			&nfsacl_desc->acl->a_entries[nfsacl_desc->count++];
+
+		*p++ = htonl(entry->e_tag | nfsacl_desc->typeflag);
+		switch(entry->e_tag) {
+			case ACL_USER_OBJ:
+				*p++ = htonl(nfsacl_desc->uid);
+				break;
+			case ACL_GROUP_OBJ:
+				*p++ = htonl(nfsacl_desc->gid);
+				break;
+			case ACL_USER:
+			case ACL_GROUP:
+				*p++ = htonl(entry->e_id);
+				break;
+			default:  /* Solaris depends on that! */
+				*p++ = 0;
+				break;
+		}
+		*p++ = htonl(entry->e_perm & S_IRWXO);
+	} else {
+		const struct posix_acl_entry *pa, *pe;
+		int group_obj_perm = ACL_READ|ACL_WRITE|ACL_EXECUTE;
+
+		FOREACH_ACL_ENTRY(pa, nfsacl_desc->acl, pe) {
+			if (pa->e_tag == ACL_GROUP_OBJ) {
+				group_obj_perm = pa->e_perm & S_IRWXO;
+				break;
+			}
+		}
+		/* fake up ACL_MASK entry */
+		*p++ = htonl(ACL_MASK | nfsacl_desc->typeflag);
+		*p++ = htonl(0);
+		*p++ = htonl(group_obj_perm);
+	}
+
+	return 0;
+}
+
+unsigned int
+nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
+	      struct posix_acl *acl, int encode_entries, int typeflag)
+{
+	int entries = (acl && acl->a_count) ? max_t(int, acl->a_count, 4) : 0;
+	struct nfsacl_encode_desc nfsacl_desc = {
+		.desc = {
+			.elem_size = 12,
+			.array_len = encode_entries ? entries : 0,
+			.xcode = xdr_nfsace_encode,
+		},
+		.acl = acl,
+		.typeflag = typeflag,
+		.uid = inode->i_uid,
+		.gid = inode->i_gid,
+	};
+	int err;
+
+	if (entries > NFS_ACL_MAX_ENTRIES ||
+	    xdr_encode_word(buf, base, entries))
+		return -EINVAL;
+	err = xdr_encode_array2(buf, base + 4, &nfsacl_desc.desc);
+	if (!err)
+		err = 8 + nfsacl_desc.desc.elem_size *
+			  nfsacl_desc.desc.array_len;
+	return err;
+}
+
+struct nfsacl_decode_desc {
+	struct xdr_array2_desc desc;
+	unsigned int count;
+	struct posix_acl *acl;
+};
+
+static int
+xdr_nfsace_decode(struct xdr_array2_desc *desc, void *elem)
+{
+	struct nfsacl_decode_desc *nfsacl_desc =
+		(struct nfsacl_decode_desc *) desc;
+	u32 *p = (u32 *) elem;
+	struct posix_acl_entry *entry;
+
+	if (!nfsacl_desc->acl) {
+		if (desc->array_len > NFS_ACL_MAX_ENTRIES)
+			return -EINVAL;
+		nfsacl_desc->acl = posix_acl_alloc(desc->array_len, GFP_KERNEL);
+		if (!nfsacl_desc->acl)
+			return -ENOMEM;
+		nfsacl_desc->count = 0;
+	}
+
+	entry = &nfsacl_desc->acl->a_entries[nfsacl_desc->count++];
+	entry->e_tag = ntohl(*p++) & ~NFS_ACL_DEFAULT;
+	entry->e_id = ntohl(*p++);
+	entry->e_perm = ntohl(*p++);
+
+	switch(entry->e_tag) {
+		case ACL_USER_OBJ:
+		case ACL_USER:
+		case ACL_GROUP_OBJ:
+		case ACL_GROUP:
+		case ACL_OTHER:
+			if (entry->e_perm & ~S_IRWXO)
+				return -EINVAL;
+			break;
+		case ACL_MASK:
+			/* Solaris sometimes sets additonal bits in the mask */
+			entry->e_perm &= S_IRWXO;
+			break;
+		default:
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+cmp_acl_entry(const void *x, const void *y)
+{
+	const struct posix_acl_entry *a = x, *b = y;
+
+	if (a->e_tag != b->e_tag)
+		return a->e_tag - b->e_tag;
+	else if (a->e_id > b->e_id)
+		return 1;
+	else if (a->e_id < b->e_id)
+		return -1;
+	else
+		return 0;
+}
+
+/*
+ * Convert from a Solaris ACL to a POSIX 1003.1e draft 17 ACL.
+ */
+static int
+posix_acl_from_nfsacl(struct posix_acl *acl)
+{
+	struct posix_acl_entry *pa, *pe,
+	       *group_obj = NULL, *mask = NULL;
+
+	if (!acl)
+		return 0;
+
+	sort(acl->a_entries, acl->a_count, sizeof(struct posix_acl_entry),
+	     cmp_acl_entry, NULL);
+
+	/* Clear undefined identifier fields and find the ACL_GROUP_OBJ
+	   and ACL_MASK entries. */
+	FOREACH_ACL_ENTRY(pa, acl, pe) {
+		switch(pa->e_tag) {
+			case ACL_USER_OBJ:
+				pa->e_id = ACL_UNDEFINED_ID;
+				break;
+			case ACL_GROUP_OBJ:
+				pa->e_id = ACL_UNDEFINED_ID;
+				group_obj = pa;
+				break;
+			case ACL_MASK:
+				mask = pa;
+				/* fall through */
+			case ACL_OTHER:
+				pa->e_id = ACL_UNDEFINED_ID;
+				break;
+		}
+	}
+	if (acl->a_count == 4 && group_obj && mask &&
+	    mask->e_perm == group_obj->e_perm) {
+		/* remove bogus ACL_MASK entry */
+		memmove(mask, mask+1, (3 - (mask - acl->a_entries)) *
+				      sizeof(struct posix_acl_entry));
+		acl->a_count = 3;
+	}
+	return 0;
+}
+
+unsigned int
+nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt,
+	      struct posix_acl **pacl)
+{
+	struct nfsacl_decode_desc nfsacl_desc = {
+		.desc = {
+			.elem_size = 12,
+			.xcode = pacl ? xdr_nfsace_decode : NULL,
+		},
+	};
+	u32 entries;
+	int err;
+
+	if (xdr_decode_word(buf, base, &entries) ||
+	    entries > NFS_ACL_MAX_ENTRIES)
+		return -EINVAL;
+	err = xdr_decode_array2(buf, base + 4, &nfsacl_desc.desc);
+	if (err)
+		return err;
+	if (pacl) {
+		if (entries != nfsacl_desc.desc.array_len ||
+		    posix_acl_from_nfsacl(nfsacl_desc.acl) != 0) {
+			posix_acl_release(nfsacl_desc.acl);
+			return -EINVAL;
+		}
+		*pacl = nfsacl_desc.acl;
+	}
+	if (aclcnt)
+		*aclcnt = entries;
+	return 8 + nfsacl_desc.desc.elem_size *
+		   nfsacl_desc.desc.array_len;
+}
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index b8680a247f8b..ce341dc76d5e 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -6,7 +6,9 @@ obj-$(CONFIG_NFSD)	+= nfsd.o
 
 nfsd-y 			:= nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
 			   export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o
+nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
 nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs3xdr.o
+nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
 nfsd-$(CONFIG_NFSD_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
-			   nfs4acl.o nfs4callback.o
+			   nfs4acl.o nfs4callback.o nfs4recover.o
 nfsd-objs		:= $(nfsd-y)
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
new file mode 100644
index 000000000000..7cbf0682b2f0
--- /dev/null
+++ b/fs/nfsd/nfs2acl.c
@@ -0,0 +1,336 @@
+/*
+ * linux/fs/nfsd/nfsacl.c
+ *
+ * Process version 2 NFSACL requests.
+ *
+ * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de>
+ */
+
+#include <linux/sunrpc/svc.h>
+#include <linux/nfs.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/cache.h>
+#include <linux/nfsd/xdr.h>
+#include <linux/nfsd/xdr3.h>
+#include <linux/posix_acl.h>
+#include <linux/nfsacl.h>
+
+#define NFSDDBG_FACILITY		NFSDDBG_PROC
+#define RETURN_STATUS(st)	{ resp->status = (st); return (st); }
+
+/*
+ * NULL call.
+ */
+static int
+nfsacld_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+{
+	return nfs_ok;
+}
+
+/*
+ * Get the Access and/or Default ACL of a file.
+ */
+static int nfsacld_proc_getacl(struct svc_rqst * rqstp,
+		struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp)
+{
+	svc_fh *fh;
+	struct posix_acl *acl;
+	int nfserr = 0;
+
+	dprintk("nfsd: GETACL(2acl)   %s\n", SVCFH_fmt(&argp->fh));
+
+	fh = fh_copy(&resp->fh, &argp->fh);
+	if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP)))
+		RETURN_STATUS(nfserr_inval);
+
+	if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
+		RETURN_STATUS(nfserr_inval);
+	resp->mask = argp->mask;
+
+	if (resp->mask & (NFS_ACL|NFS_ACLCNT)) {
+		acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS);
+		if (IS_ERR(acl)) {
+			int err = PTR_ERR(acl);
+
+			if (err == -ENODATA || err == -EOPNOTSUPP)
+				acl = NULL;
+			else {
+				nfserr = nfserrno(err);
+				goto fail;
+			}
+		}
+		if (acl == NULL) {
+			/* Solaris returns the inode's minimum ACL. */
+
+			struct inode *inode = fh->fh_dentry->d_inode;
+			acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
+		}
+		resp->acl_access = acl;
+	}
+	if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) {
+		/* Check how Solaris handles requests for the Default ACL
+		   of a non-directory! */
+
+		acl = nfsd_get_posix_acl(fh, ACL_TYPE_DEFAULT);
+		if (IS_ERR(acl)) {
+			int err = PTR_ERR(acl);
+
+			if (err == -ENODATA || err == -EOPNOTSUPP)
+				acl = NULL;
+			else {
+				nfserr = nfserrno(err);
+				goto fail;
+			}
+		}
+		resp->acl_default = acl;
+	}
+
+	/* resp->acl_{access,default} are released in nfssvc_release_getacl. */
+	RETURN_STATUS(0);
+
+fail:
+	posix_acl_release(resp->acl_access);
+	posix_acl_release(resp->acl_default);
+	RETURN_STATUS(nfserr);
+}
+
+/*
+ * Set the Access and/or Default ACL of a file.
+ */
+static int nfsacld_proc_setacl(struct svc_rqst * rqstp,
+		struct nfsd3_setaclargs *argp,
+		struct nfsd_attrstat *resp)
+{
+	svc_fh *fh;
+	int nfserr = 0;
+
+	dprintk("nfsd: SETACL(2acl)   %s\n", SVCFH_fmt(&argp->fh));
+
+	fh = fh_copy(&resp->fh, &argp->fh);
+	nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
+
+	if (!nfserr) {
+		nfserr = nfserrno( nfsd_set_posix_acl(
+			fh, ACL_TYPE_ACCESS, argp->acl_access) );
+	}
+	if (!nfserr) {
+		nfserr = nfserrno( nfsd_set_posix_acl(
+			fh, ACL_TYPE_DEFAULT, argp->acl_default) );
+	}
+
+	/* argp->acl_{access,default} may have been allocated in
+	   nfssvc_decode_setaclargs. */
+	posix_acl_release(argp->acl_access);
+	posix_acl_release(argp->acl_default);
+	return nfserr;
+}
+
+/*
+ * Check file attributes
+ */
+static int nfsacld_proc_getattr(struct svc_rqst * rqstp,
+		struct nfsd_fhandle *argp, struct nfsd_attrstat *resp)
+{
+	dprintk("nfsd: GETATTR  %s\n", SVCFH_fmt(&argp->fh));
+
+	fh_copy(&resp->fh, &argp->fh);
+	return fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
+}
+
+/*
+ * Check file access
+ */
+static int nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
+		struct nfsd3_accessres *resp)
+{
+	int nfserr;
+
+	dprintk("nfsd: ACCESS(2acl)   %s 0x%x\n",
+			SVCFH_fmt(&argp->fh),
+			argp->access);
+
+	fh_copy(&resp->fh, &argp->fh);
+	resp->access = argp->access;
+	nfserr = nfsd_access(rqstp, &resp->fh, &resp->access, NULL);
+	return nfserr;
+}
+
+/*
+ * XDR decode functions
+ */
+static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_getaclargs *argp)
+{
+	if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
+		return 0;
+	argp->mask = ntohl(*p); p++;
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+
+static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_setaclargs *argp)
+{
+	struct kvec *head = rqstp->rq_arg.head;
+	unsigned int base;
+	int n;
+
+	if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
+		return 0;
+	argp->mask = ntohl(*p++);
+	if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) ||
+	    !xdr_argsize_check(rqstp, p))
+		return 0;
+
+	base = (char *)p - (char *)head->iov_base;
+	n = nfsacl_decode(&rqstp->rq_arg, base, NULL,
+			  (argp->mask & NFS_ACL) ?
+			  &argp->acl_access : NULL);
+	if (n > 0)
+		n = nfsacl_decode(&rqstp->rq_arg, base + n, NULL,
+				  (argp->mask & NFS_DFACL) ?
+				  &argp->acl_default : NULL);
+	return (n > 0);
+}
+
+static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd_fhandle *argp)
+{
+	if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
+		return 0;
+	return xdr_argsize_check(rqstp, p);
+}
+
+static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_accessargs *argp)
+{
+	if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
+		return 0;
+	argp->access = ntohl(*p++);
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+/*
+ * XDR encode functions
+ */
+
+/* GETACL */
+static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_getaclres *resp)
+{
+	struct dentry *dentry = resp->fh.fh_dentry;
+	struct inode *inode = dentry->d_inode;
+	int w = nfsacl_size(
+		(resp->mask & NFS_ACL)   ? resp->acl_access  : NULL,
+		(resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
+	struct kvec *head = rqstp->rq_res.head;
+	unsigned int base;
+	int n;
+
+	if (dentry == NULL || dentry->d_inode == NULL)
+		return 0;
+	inode = dentry->d_inode;
+
+	p = nfs2svc_encode_fattr(rqstp, p, &resp->fh);
+	*p++ = htonl(resp->mask);
+	if (!xdr_ressize_check(rqstp, p))
+		return 0;
+	base = (char *)p - (char *)head->iov_base;
+
+	rqstp->rq_res.page_len = w;
+	while (w > 0) {
+		if (!svc_take_res_page(rqstp))
+			return 0;
+		w -= PAGE_SIZE;
+	}
+
+	n = nfsacl_encode(&rqstp->rq_res, base, inode,
+			  resp->acl_access,
+			  resp->mask & NFS_ACL, 0);
+	if (n > 0)
+		n = nfsacl_encode(&rqstp->rq_res, base + n, inode,
+				  resp->acl_default,
+				  resp->mask & NFS_DFACL,
+				  NFS_ACL_DEFAULT);
+	if (n <= 0)
+		return 0;
+	return 1;
+}
+
+static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd_attrstat *resp)
+{
+	p = nfs2svc_encode_fattr(rqstp, p, &resp->fh);
+	return xdr_ressize_check(rqstp, p);
+}
+
+/* ACCESS */
+static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_accessres *resp)
+{
+	p = nfs2svc_encode_fattr(rqstp, p, &resp->fh);
+	*p++ = htonl(resp->access);
+	return xdr_ressize_check(rqstp, p);
+}
+
+/*
+ * XDR release functions
+ */
+static int nfsaclsvc_release_getacl(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_getaclres *resp)
+{
+	fh_put(&resp->fh);
+	posix_acl_release(resp->acl_access);
+	posix_acl_release(resp->acl_default);
+	return 1;
+}
+
+static int nfsaclsvc_release_fhandle(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd_fhandle *resp)
+{
+	fh_put(&resp->fh);
+	return 1;
+}
+
+#define nfsaclsvc_decode_voidargs	NULL
+#define nfsaclsvc_encode_voidres	NULL
+#define nfsaclsvc_release_void		NULL
+#define nfsd3_fhandleargs	nfsd_fhandle
+#define nfsd3_attrstatres	nfsd_attrstat
+#define nfsd3_voidres		nfsd3_voidargs
+struct nfsd3_voidargs { int dummy; };
+
+#define PROC(name, argt, rest, relt, cache, respsize)	\
+ { (svc_procfunc) nfsacld_proc_##name,		\
+   (kxdrproc_t) nfsaclsvc_decode_##argt##args,	\
+   (kxdrproc_t) nfsaclsvc_encode_##rest##res,	\
+   (kxdrproc_t) nfsaclsvc_release_##relt,		\
+   sizeof(struct nfsd3_##argt##args),		\
+   sizeof(struct nfsd3_##rest##res),		\
+   0,						\
+   cache,					\
+   respsize,					\
+ }
+
+#define ST 1		/* status*/
+#define AT 21		/* attributes */
+#define pAT (1+AT)	/* post attributes - conditional */
+#define ACL (1+NFS_ACL_MAX_ENTRIES*3)  /* Access Control List */
+
+static struct svc_procedure		nfsd_acl_procedures2[] = {
+  PROC(null,	void,		void,		void,	  RC_NOCACHE, ST),
+  PROC(getacl,	getacl,		getacl,		getacl,	  RC_NOCACHE, ST+1+2*(1+ACL)),
+  PROC(setacl,	setacl,		attrstat,	fhandle,  RC_NOCACHE, ST+AT),
+  PROC(getattr, fhandle,	attrstat,	fhandle,  RC_NOCACHE, ST+AT),
+  PROC(access,	access,		access,		fhandle,  RC_NOCACHE, ST+AT+1),
+};
+
+struct svc_version	nfsd_acl_version2 = {
+		.vs_vers	= 2,
+		.vs_nproc	= 5,
+		.vs_proc	= nfsd_acl_procedures2,
+		.vs_dispatch	= nfsd_dispatch,
+		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
+};
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
new file mode 100644
index 000000000000..64ba40572fea
--- /dev/null
+++ b/fs/nfsd/nfs3acl.c
@@ -0,0 +1,267 @@
+/*
+ * linux/fs/nfsd/nfs3acl.c
+ *
+ * Process version 3 NFSACL requests.
+ *
+ * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de>
+ */
+
+#include <linux/sunrpc/svc.h>
+#include <linux/nfs3.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/cache.h>
+#include <linux/nfsd/xdr3.h>
+#include <linux/posix_acl.h>
+#include <linux/nfsacl.h>
+
+#define RETURN_STATUS(st)	{ resp->status = (st); return (st); }
+
+/*
+ * NULL call.
+ */
+static int
+nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+{
+	return nfs_ok;
+}
+
+/*
+ * Get the Access and/or Default ACL of a file.
+ */
+static int nfsd3_proc_getacl(struct svc_rqst * rqstp,
+		struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp)
+{
+	svc_fh *fh;
+	struct posix_acl *acl;
+	int nfserr = 0;
+
+	fh = fh_copy(&resp->fh, &argp->fh);
+	if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP)))
+		RETURN_STATUS(nfserr_inval);
+
+	if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
+		RETURN_STATUS(nfserr_inval);
+	resp->mask = argp->mask;
+
+	if (resp->mask & (NFS_ACL|NFS_ACLCNT)) {
+		acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS);
+		if (IS_ERR(acl)) {
+			int err = PTR_ERR(acl);
+
+			if (err == -ENODATA || err == -EOPNOTSUPP)
+				acl = NULL;
+			else {
+				nfserr = nfserrno(err);
+				goto fail;
+			}
+		}
+		if (acl == NULL) {
+			/* Solaris returns the inode's minimum ACL. */
+
+			struct inode *inode = fh->fh_dentry->d_inode;
+			acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
+		}
+		resp->acl_access = acl;
+	}
+	if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) {
+		/* Check how Solaris handles requests for the Default ACL
+		   of a non-directory! */
+
+		acl = nfsd_get_posix_acl(fh, ACL_TYPE_DEFAULT);
+		if (IS_ERR(acl)) {
+			int err = PTR_ERR(acl);
+
+			if (err == -ENODATA || err == -EOPNOTSUPP)
+				acl = NULL;
+			else {
+				nfserr = nfserrno(err);
+				goto fail;
+			}
+		}
+		resp->acl_default = acl;
+	}
+
+	/* resp->acl_{access,default} are released in nfs3svc_release_getacl. */
+	RETURN_STATUS(0);
+
+fail:
+	posix_acl_release(resp->acl_access);
+	posix_acl_release(resp->acl_default);
+	RETURN_STATUS(nfserr);
+}
+
+/*
+ * Set the Access and/or Default ACL of a file.
+ */
+static int nfsd3_proc_setacl(struct svc_rqst * rqstp,
+		struct nfsd3_setaclargs *argp,
+		struct nfsd3_attrstat *resp)
+{
+	svc_fh *fh;
+	int nfserr = 0;
+
+	fh = fh_copy(&resp->fh, &argp->fh);
+	nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
+
+	if (!nfserr) {
+		nfserr = nfserrno( nfsd_set_posix_acl(
+			fh, ACL_TYPE_ACCESS, argp->acl_access) );
+	}
+	if (!nfserr) {
+		nfserr = nfserrno( nfsd_set_posix_acl(
+			fh, ACL_TYPE_DEFAULT, argp->acl_default) );
+	}
+
+	/* argp->acl_{access,default} may have been allocated in
+	   nfs3svc_decode_setaclargs. */
+	posix_acl_release(argp->acl_access);
+	posix_acl_release(argp->acl_default);
+	RETURN_STATUS(nfserr);
+}
+
+/*
+ * XDR decode functions
+ */
+static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_getaclargs *args)
+{
+	if (!(p = nfs3svc_decode_fh(p, &args->fh)))
+		return 0;
+	args->mask = ntohl(*p); p++;
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+
+static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_setaclargs *args)
+{
+	struct kvec *head = rqstp->rq_arg.head;
+	unsigned int base;
+	int n;
+
+	if (!(p = nfs3svc_decode_fh(p, &args->fh)))
+		return 0;
+	args->mask = ntohl(*p++);
+	if (args->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) ||
+	    !xdr_argsize_check(rqstp, p))
+		return 0;
+
+	base = (char *)p - (char *)head->iov_base;
+	n = nfsacl_decode(&rqstp->rq_arg, base, NULL,
+			  (args->mask & NFS_ACL) ?
+			  &args->acl_access : NULL);
+	if (n > 0)
+		n = nfsacl_decode(&rqstp->rq_arg, base + n, NULL,
+				  (args->mask & NFS_DFACL) ?
+				  &args->acl_default : NULL);
+	return (n > 0);
+}
+
+/*
+ * XDR encode functions
+ */
+
+/* GETACL */
+static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_getaclres *resp)
+{
+	struct dentry *dentry = resp->fh.fh_dentry;
+
+	p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
+	if (resp->status == 0 && dentry && dentry->d_inode) {
+		struct inode *inode = dentry->d_inode;
+		int w = nfsacl_size(
+			(resp->mask & NFS_ACL)   ? resp->acl_access  : NULL,
+			(resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
+		struct kvec *head = rqstp->rq_res.head;
+		unsigned int base;
+		int n;
+
+		*p++ = htonl(resp->mask);
+		if (!xdr_ressize_check(rqstp, p))
+			return 0;
+		base = (char *)p - (char *)head->iov_base;
+
+		rqstp->rq_res.page_len = w;
+		while (w > 0) {
+			if (!svc_take_res_page(rqstp))
+				return 0;
+			w -= PAGE_SIZE;
+		}
+
+		n = nfsacl_encode(&rqstp->rq_res, base, inode,
+				  resp->acl_access,
+				  resp->mask & NFS_ACL, 0);
+		if (n > 0)
+			n = nfsacl_encode(&rqstp->rq_res, base + n, inode,
+					  resp->acl_default,
+					  resp->mask & NFS_DFACL,
+					  NFS_ACL_DEFAULT);
+		if (n <= 0)
+			return 0;
+	} else
+		if (!xdr_ressize_check(rqstp, p))
+			return 0;
+
+	return 1;
+}
+
+/* SETACL */
+static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_attrstat *resp)
+{
+	p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
+
+	return xdr_ressize_check(rqstp, p);
+}
+
+/*
+ * XDR release functions
+ */
+static int nfs3svc_release_getacl(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_getaclres *resp)
+{
+	fh_put(&resp->fh);
+	posix_acl_release(resp->acl_access);
+	posix_acl_release(resp->acl_default);
+	return 1;
+}
+
+#define nfs3svc_decode_voidargs		NULL
+#define nfs3svc_release_void		NULL
+#define nfsd3_setaclres			nfsd3_attrstat
+#define nfsd3_voidres			nfsd3_voidargs
+struct nfsd3_voidargs { int dummy; };
+
+#define PROC(name, argt, rest, relt, cache, respsize)	\
+ { (svc_procfunc) nfsd3_proc_##name,		\
+   (kxdrproc_t) nfs3svc_decode_##argt##args,	\
+   (kxdrproc_t) nfs3svc_encode_##rest##res,	\
+   (kxdrproc_t) nfs3svc_release_##relt,		\
+   sizeof(struct nfsd3_##argt##args),		\
+   sizeof(struct nfsd3_##rest##res),		\
+   0,						\
+   cache,					\
+   respsize,					\
+ }
+
+#define ST 1		/* status*/
+#define AT 21		/* attributes */
+#define pAT (1+AT)	/* post attributes - conditional */
+#define ACL (1+NFS_ACL_MAX_ENTRIES*3)  /* Access Control List */
+
+static struct svc_procedure		nfsd_acl_procedures3[] = {
+  PROC(null,	void,		void,		void,	  RC_NOCACHE, ST),
+  PROC(getacl,	getacl,		getacl,		getacl,	  RC_NOCACHE, ST+1+2*(1+ACL)),
+  PROC(setacl,	setacl,		setacl,		fhandle,  RC_NOCACHE, ST+pAT),
+};
+
+struct svc_version	nfsd_acl_version3 = {
+		.vs_vers	= 3,
+		.vs_nproc	= 3,
+		.vs_proc	= nfsd_acl_procedures3,
+		.vs_dispatch	= nfsd_dispatch,
+		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
+};
+
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 11f806835c5a..e0e134d6baba 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -71,6 +71,12 @@ decode_fh(u32 *p, struct svc_fh *fhp)
 	return p + XDR_QUADLEN(size);
 }
 
+/* Helper function for NFSv3 ACL code */
+u32 *nfs3svc_decode_fh(u32 *p, struct svc_fh *fhp)
+{
+	return decode_fh(p, fhp);
+}
+
 static inline u32 *
 encode_fh(u32 *p, struct svc_fh *fhp)
 {
@@ -233,6 +239,13 @@ encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
 	return p;
 }
 
+/* Helper for NFSv3 ACLs */
+u32 *
+nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+{
+	return encode_post_op_attr(rqstp, p, fhp);
+}
+
 /*
  * Enocde weak cache consistency data
  */
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 11ebf6c4aa54..4a2105552ac4 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -125,7 +125,7 @@ static short ace2type(struct nfs4_ace *);
 static int _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, unsigned int);
 static struct posix_acl *_nfsv4_to_posix_one(struct nfs4_acl *, unsigned int);
 int nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t);
-int nfs4_acl_split(struct nfs4_acl *, struct nfs4_acl *);
+static int nfs4_acl_split(struct nfs4_acl *, struct nfs4_acl *);
 
 struct nfs4_acl *
 nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl,
@@ -775,7 +775,7 @@ out_err:
 	return pacl;
 }
 
-int
+static int
 nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl)
 {
 	struct list_head *h, *n;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 1a55dfcb74bc..583c0710e45e 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -54,7 +54,6 @@
 
 /* declarations */
 static void nfs4_cb_null(struct rpc_task *task);
-extern spinlock_t recall_lock;
 
 /* Index of predefined Linux callback client operations */
 
@@ -329,12 +328,12 @@ out:
         .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2,  \
 }
 
-struct rpc_procinfo     nfs4_cb_procedures[] = {
+static struct rpc_procinfo     nfs4_cb_procedures[] = {
     PROC(CB_NULL,      NULL,     enc_cb_null,     dec_cb_null),
     PROC(CB_RECALL,    COMPOUND,   enc_cb_recall,      dec_cb_recall),
 };
 
-struct rpc_version              nfs_cb_version4 = {
+static struct rpc_version       nfs_cb_version4 = {
         .number                 = 1,
         .nrprocs                = sizeof(nfs4_cb_procedures)/sizeof(nfs4_cb_procedures[0]),
         .procs                  = nfs4_cb_procedures
@@ -348,7 +347,7 @@ static struct rpc_version *	nfs_cb_version[] = {
 /*
  * Use the SETCLIENTID credential
  */
-struct rpc_cred *
+static struct rpc_cred *
 nfsd4_lookupcred(struct nfs4_client *clp, int taskflags)
 {
         struct auth_cred acred;
@@ -387,9 +386,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 	char                    hostname[32];
 	int status;
 
-	dprintk("NFSD: probe_callback. cb_parsed %d cb_set %d\n",
-			cb->cb_parsed, atomic_read(&cb->cb_set));
-	if (!cb->cb_parsed || atomic_read(&cb->cb_set))
+	if (atomic_read(&cb->cb_set))
 		return;
 
 	/* Initialize address */
@@ -427,10 +424,10 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 	 * XXX AUTH_UNIX only - need AUTH_GSS....
 	 */
 	sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr.sin_addr.s_addr));
-	clnt = rpc_create_client(xprt, hostname, program, 1, RPC_AUTH_UNIX);
+	clnt = rpc_new_client(xprt, hostname, program, 1, RPC_AUTH_UNIX);
 	if (IS_ERR(clnt)) {
 		dprintk("NFSD: couldn't create callback client\n");
-		goto out_xprt;
+		goto out_err;
 	}
 	clnt->cl_intr = 0;
 	clnt->cl_softrtry = 1;
@@ -465,8 +462,6 @@ out_rpciod:
 out_clnt:
 	rpc_shutdown_client(clnt);
 	goto out_err;
-out_xprt:
-	xprt_destroy(xprt);
 out_err:
 	dprintk("NFSD: warning: no callback path to client %.*s\n",
 		(int)clp->cl_name.len, clp->cl_name.data);
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 4ba540841cf6..5605a26efc57 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -104,7 +104,7 @@ ent_update(struct ent *new, struct ent *itm)
 	ent_init(new, itm);
 }
 
-void
+static void
 ent_put(struct cache_head *ch, struct cache_detail *cd)
 {
 	if (cache_put(ch, cd)) {
@@ -186,7 +186,7 @@ warn_no_idmapd(struct cache_detail *detail)
 static int         idtoname_parse(struct cache_detail *, char *, int);
 static struct ent *idtoname_lookup(struct ent *, int);
 
-struct cache_detail idtoname_cache = {
+static struct cache_detail idtoname_cache = {
 	.hash_size	= ENT_HASHMAX,
 	.hash_table	= idtoname_table,
 	.name		= "nfs4.idtoname",
@@ -277,7 +277,7 @@ nametoid_hash(struct ent *ent)
 	return hash_str(ent->name, ENT_HASHBITS);
 }
 
-void
+static void
 nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
     int *blen)
 {
@@ -317,9 +317,9 @@ nametoid_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h)
 }
 
 static struct ent *nametoid_lookup(struct ent *, int);
-int                nametoid_parse(struct cache_detail *, char *, int);
+static int         nametoid_parse(struct cache_detail *, char *, int);
 
-struct cache_detail nametoid_cache = {
+static struct cache_detail nametoid_cache = {
 	.hash_size	= ENT_HASHMAX,
 	.hash_table	= nametoid_table,
 	.name		= "nfs4.nametoid",
@@ -330,7 +330,7 @@ struct cache_detail nametoid_cache = {
 	.warn_no_listener = warn_no_idmapd,
 };
 
-int
+static int
 nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
 {
 	struct ent ent, *res;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e8158741e8b5..d71f14517b9c 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -45,6 +45,7 @@
 #include <linux/param.h>
 #include <linux/major.h>
 #include <linux/slab.h>
+#include <linux/file.h>
 
 #include <linux/sunrpc/svc.h>
 #include <linux/nfsd/nfsd.h>
@@ -198,6 +199,11 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
 	if (status)
 		goto out;
 	switch (open->op_claim_type) {
+		case NFS4_OPEN_CLAIM_DELEGATE_CUR:
+			status = nfserr_inval;
+			if (open->op_create)
+				goto out;
+			/* fall through */
 		case NFS4_OPEN_CLAIM_NULL:
 			/*
 			 * (1) set CURRENT_FH to the file being opened,
@@ -220,7 +226,6 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
 			if (status)
 				goto out;
 			break;
-		case NFS4_OPEN_CLAIM_DELEGATE_CUR:
              	case NFS4_OPEN_CLAIM_DELEGATE_PREV:
 			printk("NFSD: unsupported OPEN claim type %d\n",
 				open->op_claim_type);
@@ -473,26 +478,27 @@ static inline int
 nfsd4_read(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_read *read)
 {
 	int status;
-	struct file *filp = NULL;
 
 	/* no need to check permission - this will be done in nfsd_read() */
 
+	read->rd_filp = NULL;
 	if (read->rd_offset >= OFFSET_MAX)
 		return nfserr_inval;
 
 	nfs4_lock_state();
 	/* check stateid */
 	if ((status = nfs4_preprocess_stateid_op(current_fh, &read->rd_stateid,
-					CHECK_FH | RD_STATE, &filp))) {
+				CHECK_FH | RD_STATE, &read->rd_filp))) {
 		dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
 		goto out;
 	}
+	if (read->rd_filp)
+		get_file(read->rd_filp);
 	status = nfs_ok;
 out:
 	nfs4_unlock_state();
 	read->rd_rqstp = rqstp;
 	read->rd_fhp = current_fh;
-	read->rd_filp = filp;
 	return status;
 }
 
@@ -532,6 +538,8 @@ nfsd4_remove(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_rem
 {
 	int status;
 
+	if (nfs4_in_grace())
+		return nfserr_grace;
 	status = nfsd_unlink(rqstp, current_fh, 0, remove->rm_name, remove->rm_namelen);
 	if (status == nfserr_symlink)
 		return nfserr_notdir;
@@ -550,6 +558,9 @@ nfsd4_rename(struct svc_rqst *rqstp, struct svc_fh *current_fh,
 
 	if (!save_fh->fh_dentry)
 		return status;
+	if (nfs4_in_grace() && !(save_fh->fh_export->ex_flags
+					& NFSEXP_NOSUBTREECHECK))
+		return nfserr_grace;
 	status = nfsd_rename(rqstp, save_fh, rename->rn_sname,
 			     rename->rn_snamelen, current_fh,
 			     rename->rn_tname, rename->rn_tnamelen);
@@ -624,6 +635,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
 		dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
 		goto out;
 	}
+	if (filp)
+		get_file(filp);
 	nfs4_unlock_state();
 
 	write->wr_bytes_written = write->wr_buflen;
@@ -635,6 +648,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
 	status =  nfsd_write(rqstp, current_fh, filp, write->wr_offset,
 			write->wr_vec, write->wr_vlen, write->wr_buflen,
 			&write->wr_how_written);
+	if (filp)
+		fput(filp);
 
 	if (status == nfserr_symlink)
 		status = nfserr_inval;
@@ -923,6 +938,9 @@ encode_op:
 			nfs4_put_stateowner(replay_owner);
 			replay_owner = NULL;
 		}
+		/* XXX Ugh, we need to get rid of this kind of special case: */
+		if (op->opnum == OP_READ && op->u.read.rd_filp)
+			fput(op->u.read.rd_filp);
 	}
 
 out:
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
new file mode 100644
index 000000000000..095f1740f3ae
--- /dev/null
+++ b/fs/nfsd/nfs4recover.c
@@ -0,0 +1,431 @@
+/*
+*  linux/fs/nfsd/nfs4recover.c
+*
+*  Copyright (c) 2004 The Regents of the University of Michigan.
+*  All rights reserved.
+*
+*  Andy Adamson <andros@citi.umich.edu>
+*
+*  Redistribution and use in source and binary forms, with or without
+*  modification, are permitted provided that the following conditions
+*  are met:
+*
+*  1. Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*  2. Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in the
+*     documentation and/or other materials provided with the distribution.
+*  3. Neither the name of the University nor the names of its
+*     contributors may be used to endorse or promote products derived
+*     from this software without specific prior written permission.
+*
+*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+*  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+*  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+*  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+*  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+*  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+*  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+*  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+*  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+*  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+*  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfs4.h>
+#include <linux/nfsd/state.h>
+#include <linux/nfsd/xdr4.h>
+#include <linux/param.h>
+#include <linux/file.h>
+#include <linux/namei.h>
+#include <asm/uaccess.h>
+#include <asm/scatterlist.h>
+#include <linux/crypto.h>
+
+
+#define NFSDDBG_FACILITY                NFSDDBG_PROC
+
+/* Globals */
+static struct nameidata rec_dir;
+static int rec_dir_init = 0;
+
+static void
+nfs4_save_user(uid_t *saveuid, gid_t *savegid)
+{
+	*saveuid = current->fsuid;
+	*savegid = current->fsgid;
+	current->fsuid = 0;
+	current->fsgid = 0;
+}
+
+static void
+nfs4_reset_user(uid_t saveuid, gid_t savegid)
+{
+	current->fsuid = saveuid;
+	current->fsgid = savegid;
+}
+
+static void
+md5_to_hex(char *out, char *md5)
+{
+	int i;
+
+	for (i=0; i<16; i++) {
+		unsigned char c = md5[i];
+
+		*out++ = '0' + ((c&0xf0)>>4) + (c>=0xa0)*('a'-'9'-1);
+		*out++ = '0' + (c&0x0f) + ((c&0x0f)>=0x0a)*('a'-'9'-1);
+	}
+	*out = '\0';
+}
+
+int
+nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
+{
+	struct xdr_netobj cksum;
+	struct crypto_tfm *tfm;
+	struct scatterlist sg[1];
+	int status = nfserr_resource;
+
+	dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
+			clname->len, clname->data);
+	tfm = crypto_alloc_tfm("md5", 0);
+	if (tfm == NULL)
+		goto out;
+	cksum.len = crypto_tfm_alg_digestsize(tfm);
+	cksum.data = kmalloc(cksum.len, GFP_KERNEL);
+	if (cksum.data == NULL)
+ 		goto out;
+	crypto_digest_init(tfm);
+
+	sg[0].page = virt_to_page(clname->data);
+	sg[0].offset = offset_in_page(clname->data);
+	sg[0].length = clname->len;
+
+	crypto_digest_update(tfm, sg, 1);
+	crypto_digest_final(tfm, cksum.data);
+
+	md5_to_hex(dname, cksum.data);
+
+	kfree(cksum.data);
+	status = nfs_ok;
+out:
+	if (tfm)
+		crypto_free_tfm(tfm);
+	return status;
+}
+
+static int
+nfsd4_rec_fsync(struct dentry *dentry)
+{
+	struct file *filp;
+	int status = nfs_ok;
+
+	dprintk("NFSD: nfs4_fsync_rec_dir\n");
+	filp = dentry_open(dget(dentry), mntget(rec_dir.mnt), O_RDWR);
+	if (IS_ERR(filp)) {
+		status = PTR_ERR(filp);
+		goto out;
+	}
+	if (filp->f_op && filp->f_op->fsync)
+		status = filp->f_op->fsync(filp, filp->f_dentry, 0);
+	fput(filp);
+out:
+	if (status)
+		printk("nfsd4: unable to sync recovery directory\n");
+	return status;
+}
+
+int
+nfsd4_create_clid_dir(struct nfs4_client *clp)
+{
+	char *dname = clp->cl_recdir;
+	struct dentry *dentry;
+	uid_t uid;
+	gid_t gid;
+	int status;
+
+	dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname);
+
+	if (!rec_dir_init || clp->cl_firststate)
+		return 0;
+
+	nfs4_save_user(&uid, &gid);
+
+	/* lock the parent */
+	down(&rec_dir.dentry->d_inode->i_sem);
+
+	dentry = lookup_one_len(dname, rec_dir.dentry, HEXDIR_LEN-1);
+	if (IS_ERR(dentry)) {
+		status = PTR_ERR(dentry);
+		goto out_unlock;
+	}
+	status = -EEXIST;
+	if (dentry->d_inode) {
+		dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n");
+		goto out_put;
+	}
+	status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU);
+out_put:
+	dput(dentry);
+out_unlock:
+	up(&rec_dir.dentry->d_inode->i_sem);
+	if (status == 0) {
+		clp->cl_firststate = 1;
+		status = nfsd4_rec_fsync(rec_dir.dentry);
+	}
+	nfs4_reset_user(uid, gid);
+	dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status);
+	return status;
+}
+
+typedef int (recdir_func)(struct dentry *, struct dentry *);
+
+struct dentry_list {
+	struct dentry *dentry;
+	struct list_head list;
+};
+
+struct dentry_list_arg {
+	struct list_head dentries;
+	struct dentry *parent;
+};
+
+static int
+nfsd4_build_dentrylist(void *arg, const char *name, int namlen,
+		loff_t offset, ino_t ino, unsigned int d_type)
+{
+	struct dentry_list_arg *dla = arg;
+	struct list_head *dentries = &dla->dentries;
+	struct dentry *parent = dla->parent;
+	struct dentry *dentry;
+	struct dentry_list *child;
+
+	if (name && isdotent(name, namlen))
+		return nfs_ok;
+	dentry = lookup_one_len(name, parent, namlen);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+	child = kmalloc(sizeof(*child), GFP_KERNEL);
+	if (child == NULL)
+		return -ENOMEM;
+	child->dentry = dentry;
+	list_add(&child->list, dentries);
+	return 0;
+}
+
+static int
+nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
+{
+	struct file *filp;
+	struct dentry_list_arg dla = {
+		.parent = dir,
+	};
+	struct list_head *dentries = &dla.dentries;
+	struct dentry_list *child;
+	uid_t uid;
+	gid_t gid;
+	int status;
+
+	if (!rec_dir_init)
+		return 0;
+
+	nfs4_save_user(&uid, &gid);
+
+	filp = dentry_open(dget(dir), mntget(rec_dir.mnt),
+			O_RDWR);
+	status = PTR_ERR(filp);
+	if (IS_ERR(filp))
+		goto out;
+	INIT_LIST_HEAD(dentries);
+	status = vfs_readdir(filp, nfsd4_build_dentrylist, &dla);
+	fput(filp);
+	while (!list_empty(dentries)) {
+		child = list_entry(dentries->next, struct dentry_list, list);
+		status = f(dir, child->dentry);
+		if (status)
+			goto out;
+		list_del(&child->list);
+		dput(child->dentry);
+		kfree(child);
+	}
+out:
+	while (!list_empty(dentries)) {
+		child = list_entry(dentries->next, struct dentry_list, list);
+		list_del(&child->list);
+		dput(child->dentry);
+		kfree(child);
+	}
+	nfs4_reset_user(uid, gid);
+	return status;
+}
+
+static int
+nfsd4_remove_clid_file(struct dentry *dir, struct dentry *dentry)
+{
+	int status;
+
+	if (!S_ISREG(dir->d_inode->i_mode)) {
+		printk("nfsd4: non-file found in client recovery directory\n");
+		return -EINVAL;
+	}
+	down(&dir->d_inode->i_sem);
+	status = vfs_unlink(dir->d_inode, dentry);
+	up(&dir->d_inode->i_sem);
+	return status;
+}
+
+static int
+nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry)
+{
+	int status;
+
+	/* For now this directory should already be empty, but we empty it of
+	 * any regular files anyway, just in case the directory was created by
+	 * a kernel from the future.... */
+	nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file);
+	down(&dir->d_inode->i_sem);
+	status = vfs_rmdir(dir->d_inode, dentry);
+	up(&dir->d_inode->i_sem);
+	return status;
+}
+
+static int
+nfsd4_unlink_clid_dir(char *name, int namlen)
+{
+	struct dentry *dentry;
+	int status;
+
+	dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
+
+	dentry = lookup_one_len(name, rec_dir.dentry, namlen);
+	if (IS_ERR(dentry)) {
+		status = PTR_ERR(dentry);
+		return status;
+	}
+	status = -ENOENT;
+	if (!dentry->d_inode)
+		goto out;
+
+	status = nfsd4_clear_clid_dir(rec_dir.dentry, dentry);
+out:
+	dput(dentry);
+	return status;
+}
+
+void
+nfsd4_remove_clid_dir(struct nfs4_client *clp)
+{
+	uid_t uid;
+	gid_t gid;
+	int status;
+
+	if (!rec_dir_init || !clp->cl_firststate)
+		return;
+
+	nfs4_save_user(&uid, &gid);
+	status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1);
+	nfs4_reset_user(uid, gid);
+	if (status == 0)
+		status = nfsd4_rec_fsync(rec_dir.dentry);
+	if (status)
+		printk("NFSD: Failed to remove expired client state directory"
+				" %.*s\n", HEXDIR_LEN, clp->cl_recdir);
+	return;
+}
+
+static int
+purge_old(struct dentry *parent, struct dentry *child)
+{
+	int status;
+
+	if (nfs4_has_reclaimed_state(child->d_name.name))
+		return nfs_ok;
+
+	status = nfsd4_clear_clid_dir(parent, child);
+	if (status)
+		printk("failed to remove client recovery directory %s\n",
+				child->d_name.name);
+	/* Keep trying, success or failure: */
+	return nfs_ok;
+}
+
+void
+nfsd4_recdir_purge_old(void) {
+	int status;
+
+	if (!rec_dir_init)
+		return;
+	status = nfsd4_list_rec_dir(rec_dir.dentry, purge_old);
+	if (status == 0)
+		status = nfsd4_rec_fsync(rec_dir.dentry);
+	if (status)
+		printk("nfsd4: failed to purge old clients from recovery"
+			" directory %s\n", rec_dir.dentry->d_name.name);
+	return;
+}
+
+static int
+load_recdir(struct dentry *parent, struct dentry *child)
+{
+	if (child->d_name.len != HEXDIR_LEN - 1) {
+		printk("nfsd4: illegal name %s in recovery directory\n",
+				child->d_name.name);
+		/* Keep trying; maybe the others are OK: */
+		return nfs_ok;
+	}
+	nfs4_client_to_reclaim(child->d_name.name);
+	return nfs_ok;
+}
+
+int
+nfsd4_recdir_load(void) {
+	int status;
+
+	status = nfsd4_list_rec_dir(rec_dir.dentry, load_recdir);
+	if (status)
+		printk("nfsd4: failed loading clients from recovery"
+			" directory %s\n", rec_dir.dentry->d_name.name);
+	return status;
+}
+
+/*
+ * Hold reference to the recovery directory.
+ */
+
+void
+nfsd4_init_recdir(char *rec_dirname)
+{
+	uid_t			uid = 0;
+	gid_t			gid = 0;
+	int 			status;
+
+	printk("NFSD: Using %s as the NFSv4 state recovery directory\n",
+			rec_dirname);
+
+	BUG_ON(rec_dir_init);
+
+	nfs4_save_user(&uid, &gid);
+
+	status = path_lookup(rec_dirname, LOOKUP_FOLLOW, &rec_dir);
+	if (status == -ENOENT)
+		printk("NFSD: recovery directory %s doesn't exist\n",
+				rec_dirname);
+
+	if (!status)
+		rec_dir_init = 1;
+	nfs4_reset_user(uid, gid);
+}
+
+void
+nfsd4_shutdown_recdir(void)
+{
+	if (!rec_dir_init)
+		return;
+	rec_dir_init = 0;
+	path_release(&rec_dir);
+}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 75e8b137580c..89e36526d7f2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -48,39 +48,32 @@
 #include <linux/nfs4.h>
 #include <linux/nfsd/state.h>
 #include <linux/nfsd/xdr4.h>
+#include <linux/namei.h>
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
 /* Globals */
 static time_t lease_time = 90;     /* default lease time */
-static time_t old_lease_time = 90; /* past incarnation lease time */
-static u32 nfs4_reclaim_init = 0;
-time_t boot_time;
-static time_t grace_end = 0;
+static time_t user_lease_time = 90;
+static time_t boot_time;
+static int in_grace = 1;
 static u32 current_clientid = 1;
 static u32 current_ownerid = 1;
 static u32 current_fileid = 1;
 static u32 current_delegid = 1;
 static u32 nfs4_init;
-stateid_t zerostateid;             /* bits all 0 */
-stateid_t onestateid;              /* bits all 1 */
-
-/* debug counters */
-u32 list_add_perfile = 0; 
-u32 list_del_perfile = 0;
-u32 add_perclient = 0;
-u32 del_perclient = 0;
-u32 alloc_file = 0;
-u32 free_file = 0;
-u32 vfsopen = 0;
-u32 vfsclose = 0;
-u32 alloc_delegation= 0;
-u32 free_delegation= 0;
+static stateid_t zerostateid;             /* bits all 0 */
+static stateid_t onestateid;              /* bits all 1 */
+
+#define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t)))
+#define ONE_STATEID(stateid)  (!memcmp((stateid), &onestateid, sizeof(stateid_t)))
 
 /* forward declarations */
-struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
+static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
 static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
 static void release_stateid_lockowners(struct nfs4_stateid *open_stp);
+static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
+static void nfs4_set_recdir(char *recdir);
 
 /* Locking:
  *
@@ -90,6 +83,11 @@ static void release_stateid_lockowners(struct nfs4_stateid *open_stp);
  */
 static DECLARE_MUTEX(client_sema);
 
+static kmem_cache_t *stateowner_slab = NULL;
+static kmem_cache_t *file_slab = NULL;
+static kmem_cache_t *stateid_slab = NULL;
+static kmem_cache_t *deleg_slab = NULL;
+
 void
 nfs4_lock_state(void)
 {
@@ -118,16 +116,36 @@ opaque_hashval(const void *ptr, int nbytes)
 /* forward declarations */
 static void release_stateowner(struct nfs4_stateowner *sop);
 static void release_stateid(struct nfs4_stateid *stp, int flags);
-static void release_file(struct nfs4_file *fp);
 
 /*
  * Delegation state
  */
 
 /* recall_lock protects the del_recall_lru */
-spinlock_t recall_lock;
+static spinlock_t recall_lock = SPIN_LOCK_UNLOCKED;
 static struct list_head del_recall_lru;
 
+static void
+free_nfs4_file(struct kref *kref)
+{
+	struct nfs4_file *fp = container_of(kref, struct nfs4_file, fi_ref);
+	list_del(&fp->fi_hash);
+	iput(fp->fi_inode);
+	kmem_cache_free(file_slab, fp);
+}
+
+static inline void
+put_nfs4_file(struct nfs4_file *fi)
+{
+	kref_put(&fi->fi_ref, free_nfs4_file);
+}
+
+static inline void
+get_nfs4_file(struct nfs4_file *fi)
+{
+	kref_get(&fi->fi_ref);
+}
+
 static struct nfs4_delegation *
 alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type)
 {
@@ -136,13 +154,14 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
 	struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback;
 
 	dprintk("NFSD alloc_init_deleg\n");
-	if ((dp = kmalloc(sizeof(struct nfs4_delegation),
-		GFP_KERNEL)) == NULL)
+	dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL);
+	if (dp == NULL)
 		return dp;
-	INIT_LIST_HEAD(&dp->dl_del_perfile);
-	INIT_LIST_HEAD(&dp->dl_del_perclnt);
+	INIT_LIST_HEAD(&dp->dl_perfile);
+	INIT_LIST_HEAD(&dp->dl_perclnt);
 	INIT_LIST_HEAD(&dp->dl_recall_lru);
 	dp->dl_client = clp;
+	get_nfs4_file(fp);
 	dp->dl_file = fp;
 	dp->dl_flock = NULL;
 	get_file(stp->st_vfs_file);
@@ -160,9 +179,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
 		        current_fh->fh_handle.fh_size);
 	dp->dl_time = 0;
 	atomic_set(&dp->dl_count, 1);
-	list_add(&dp->dl_del_perfile, &fp->fi_del_perfile);
-	list_add(&dp->dl_del_perclnt, &clp->cl_del_perclnt);
-	alloc_delegation++;
+	list_add(&dp->dl_perfile, &fp->fi_delegations);
+	list_add(&dp->dl_perclnt, &clp->cl_delegations);
 	return dp;
 }
 
@@ -171,8 +189,8 @@ nfs4_put_delegation(struct nfs4_delegation *dp)
 {
 	if (atomic_dec_and_test(&dp->dl_count)) {
 		dprintk("NFSD: freeing dp %p\n",dp);
-		kfree(dp);
-		free_delegation++;
+		put_nfs4_file(dp->dl_file);
+		kmem_cache_free(deleg_slab, dp);
 	}
 }
 
@@ -193,15 +211,14 @@ nfs4_close_delegation(struct nfs4_delegation *dp)
 	if (dp->dl_flock)
 		setlease(filp, F_UNLCK, &dp->dl_flock);
 	nfsd_close(filp);
-	vfsclose++;
 }
 
 /* Called under the state lock. */
 static void
 unhash_delegation(struct nfs4_delegation *dp)
 {
-	list_del_init(&dp->dl_del_perfile);
-	list_del_init(&dp->dl_del_perclnt);
+	list_del_init(&dp->dl_perfile);
+	list_del_init(&dp->dl_perclnt);
 	spin_lock(&recall_lock);
 	list_del_init(&dp->dl_recall_lru);
 	spin_unlock(&recall_lock);
@@ -220,8 +237,8 @@ unhash_delegation(struct nfs4_delegation *dp)
 
 #define clientid_hashval(id) \
 	((id) & CLIENT_HASH_MASK)
-#define clientstr_hashval(name, namelen) \
-	(opaque_hashval((name), (namelen)) & CLIENT_HASH_MASK)
+#define clientstr_hashval(name) \
+	(opaque_hashval((name), 8) & CLIENT_HASH_MASK)
 /*
  * reclaim_str_hashtbl[] holds known client info from previous reset/reboot
  * used in reboot/reset lease grace period processing
@@ -331,11 +348,11 @@ expire_client(struct nfs4_client *clp)
 
 	INIT_LIST_HEAD(&reaplist);
 	spin_lock(&recall_lock);
-	while (!list_empty(&clp->cl_del_perclnt)) {
-		dp = list_entry(clp->cl_del_perclnt.next, struct nfs4_delegation, dl_del_perclnt);
+	while (!list_empty(&clp->cl_delegations)) {
+		dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
 		dprintk("NFSD: expire client. dp %p, fp %p\n", dp,
 				dp->dl_flock);
-		list_del_init(&dp->dl_del_perclnt);
+		list_del_init(&dp->dl_perclnt);
 		list_move(&dp->dl_recall_lru, &reaplist);
 	}
 	spin_unlock(&recall_lock);
@@ -347,26 +364,26 @@ expire_client(struct nfs4_client *clp)
 	list_del(&clp->cl_idhash);
 	list_del(&clp->cl_strhash);
 	list_del(&clp->cl_lru);
-	while (!list_empty(&clp->cl_perclient)) {
-		sop = list_entry(clp->cl_perclient.next, struct nfs4_stateowner, so_perclient);
+	while (!list_empty(&clp->cl_openowners)) {
+		sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
 		release_stateowner(sop);
 	}
 	put_nfs4_client(clp);
 }
 
 static struct nfs4_client *
-create_client(struct xdr_netobj name) {
+create_client(struct xdr_netobj name, char *recdir) {
 	struct nfs4_client *clp;
 
 	if (!(clp = alloc_client(name)))
 		goto out;
+	memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
 	atomic_set(&clp->cl_count, 1);
 	atomic_set(&clp->cl_callback.cb_set, 0);
-	clp->cl_callback.cb_parsed = 0;
 	INIT_LIST_HEAD(&clp->cl_idhash);
 	INIT_LIST_HEAD(&clp->cl_strhash);
-	INIT_LIST_HEAD(&clp->cl_perclient);
-	INIT_LIST_HEAD(&clp->cl_del_perclnt);
+	INIT_LIST_HEAD(&clp->cl_openowners);
+	INIT_LIST_HEAD(&clp->cl_delegations);
 	INIT_LIST_HEAD(&clp->cl_lru);
 out:
 	return clp;
@@ -392,11 +409,9 @@ copy_cred(struct svc_cred *target, struct svc_cred *source) {
 	get_group_info(target->cr_group_info);
 }
 
-static int
-cmp_name(struct xdr_netobj *n1, struct xdr_netobj *n2) {
-	if (!n1 || !n2)
-		return 0;
-	return((n1->len == n2->len) && !memcmp(n1->data, n2->data, n2->len));
+static inline int
+same_name(const char *n1, const char *n2) {
+	return 0 == memcmp(n1, n2, HEXDIR_LEN);
 }
 
 static int
@@ -446,7 +461,7 @@ check_name(struct xdr_netobj name) {
 	return 1;
 }
 
-void
+static void
 add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval)
 {
 	unsigned int idhashval;
@@ -458,7 +473,7 @@ add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval)
 	clp->cl_time = get_seconds();
 }
 
-void
+static void
 move_to_confirmed(struct nfs4_client *clp)
 {
 	unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id);
@@ -468,8 +483,7 @@ move_to_confirmed(struct nfs4_client *clp)
 	list_del_init(&clp->cl_strhash);
 	list_del_init(&clp->cl_idhash);
 	list_add(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
-	strhashval = clientstr_hashval(clp->cl_name.data, 
-			clp->cl_name.len);
+	strhashval = clientstr_hashval(clp->cl_recdir);
 	list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
 	renew_client(clp);
 }
@@ -500,6 +514,30 @@ find_unconfirmed_client(clientid_t *clid)
 	return NULL;
 }
 
+static struct nfs4_client *
+find_confirmed_client_by_str(const char *dname, unsigned int hashval)
+{
+	struct nfs4_client *clp;
+
+	list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) {
+		if (same_name(clp->cl_recdir, dname))
+			return clp;
+	}
+	return NULL;
+}
+
+static struct nfs4_client *
+find_unconfirmed_client_by_str(const char *dname, unsigned int hashval)
+{
+	struct nfs4_client *clp;
+
+	list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) {
+		if (same_name(clp->cl_recdir, dname))
+			return clp;
+	}
+	return NULL;
+}
+
 /* a helper function for parse_callback */
 static int
 parse_octet(unsigned int *lenp, char **addrp)
@@ -534,7 +572,7 @@ parse_octet(unsigned int *lenp, char **addrp)
 }
 
 /* parse and set the setclientid ipv4 callback address */
-int
+static int
 parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp)
 {
 	int temp = 0;
@@ -570,7 +608,7 @@ parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigne
 	return 1;
 }
 
-void
+static void
 gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
 {
 	struct nfs4_callback *cb = &clp->cl_callback;
@@ -584,14 +622,12 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
 		goto out_err;
 	cb->cb_prog = se->se_callback_prog;
 	cb->cb_ident = se->se_callback_ident;
-	cb->cb_parsed = 1;
 	return;
 out_err:
 	printk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
 		"will not receive delegations\n",
 		clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
 
-	cb->cb_parsed = 0;
 	return;
 }
 
@@ -638,59 +674,43 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 	};
 	nfs4_verifier		clverifier = setclid->se_verf;
 	unsigned int 		strhashval;
-	struct nfs4_client *	conf, * unconf, * new, * clp;
+	struct nfs4_client	*conf, *unconf, *new;
 	int 			status;
+	char                    dname[HEXDIR_LEN];
 	
 	status = nfserr_inval;
 	if (!check_name(clname))
 		goto out;
 
+	status = nfs4_make_rec_clidname(dname, &clname);
+	if (status)
+		goto out;
+
 	/* 
 	 * XXX The Duplicate Request Cache (DRC) has been checked (??)
 	 * We get here on a DRC miss.
 	 */
 
-	strhashval = clientstr_hashval(clname.data, clname.len);
+	strhashval = clientstr_hashval(dname);
 
-	conf = NULL;
 	nfs4_lock_state();
-	list_for_each_entry(clp, &conf_str_hashtbl[strhashval], cl_strhash) {
-		if (!cmp_name(&clp->cl_name, &clname))
-			continue;
+	conf = find_confirmed_client_by_str(dname, strhashval);
+	if (conf) {
 		/* 
 		 * CASE 0:
 		 * clname match, confirmed, different principal
 		 * or different ip_address
 		 */
 		status = nfserr_clid_inuse;
-		if (!cmp_creds(&clp->cl_cred,&rqstp->rq_cred)) {
-			printk("NFSD: setclientid: string in use by client"
-			"(clientid %08x/%08x)\n",
-			clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
-			goto out;
-		}
-		if (clp->cl_addr != ip_addr) { 
+		if (!cmp_creds(&conf->cl_cred, &rqstp->rq_cred)
+				|| conf->cl_addr != ip_addr) {
 			printk("NFSD: setclientid: string in use by client"
 			"(clientid %08x/%08x)\n",
-			clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
+			conf->cl_clientid.cl_boot, conf->cl_clientid.cl_id);
 			goto out;
 		}
-
-		/* 
-	 	 * cl_name match from a previous SETCLIENTID operation
-	 	 * XXX check for additional matches?
-		 */
-		conf = clp;
-		break;
-	}
-	unconf = NULL;
-	list_for_each_entry(clp, &unconf_str_hashtbl[strhashval], cl_strhash) {
-		if (!cmp_name(&clp->cl_name, &clname))
-			continue;
-		/* cl_name match from a previous SETCLIENTID operation */
-		unconf = clp;
-		break;
 	}
+	unconf = find_unconfirmed_client_by_str(dname, strhashval);
 	status = nfserr_resource;
 	if (!conf) {
 		/* 
@@ -699,7 +719,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 		 */
 		if (unconf)
 			expire_client(unconf);
-		if (!(new = create_client(clname)))
+		new = create_client(clname, dname);
+		if (new == NULL)
 			goto out;
 		copy_verf(new, &clverifier);
 		new->cl_addr = ip_addr;
@@ -722,12 +743,16 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 		 * nfs4_client,  but with the new callback info and a 
 		 * new cl_confirm
 		 */
-		if ((unconf) && 
-		    cmp_verf(&unconf->cl_verifier, &conf->cl_verifier) &&
-		     cmp_clid(&unconf->cl_clientid, &conf->cl_clientid)) {
-				expire_client(unconf);
+		if (unconf) {
+			/* Note this is removing unconfirmed {*x***},
+			 * which is stronger than RFC recommended {vxc**}.
+			 * This has the advantage that there is at most
+			 * one {*x***} in either list at any time.
+			 */
+			expire_client(unconf);
 		}
-		if (!(new = create_client(clname)))
+		new = create_client(clname, dname);
+		if (new == NULL)
 			goto out;
 		copy_verf(new,&conf->cl_verifier);
 		new->cl_addr = ip_addr;
@@ -745,7 +770,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 		 * using input clverifier, clname, and callback info
 		 * and generate a new cl_clientid and cl_confirm.
 		 */
-		if (!(new = create_client(clname)))
+		new = create_client(clname, dname);
+		if (new == NULL)
 			goto out;
 		copy_verf(new,&clverifier);
 		new->cl_addr = ip_addr;
@@ -771,7 +797,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 		 * new cl_verifier and a new cl_confirm
 		 */
 		expire_client(unconf);
-		if (!(new = create_client(clname)))
+		new = create_client(clname, dname);
+		if (new == NULL)
 			goto out;
 		copy_verf(new,&clverifier);
 		new->cl_addr = ip_addr;
@@ -807,7 +834,7 @@ int
 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confirm *setclientid_confirm)
 {
 	u32 ip_addr = rqstp->rq_addr.sin_addr.s_addr;
-	struct nfs4_client *clp, *conf = NULL, *unconf = NULL;
+	struct nfs4_client *conf, *unconf;
 	nfs4_verifier confirm = setclientid_confirm->sc_confirm; 
 	clientid_t * clid = &setclientid_confirm->sc_clientid;
 	int status;
@@ -820,102 +847,90 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 	 */
 
 	nfs4_lock_state();
-	clp = find_confirmed_client(clid);
-	if (clp) {
-		status = nfserr_inval;
-		/* 
-		 * Found a record for this clientid. If the IP addresses
-		 * don't match, return ERR_INVAL just as if the record had
-		 * not been found.
-		 */
-		if (clp->cl_addr != ip_addr) { 
-			printk("NFSD: setclientid: string in use by client"
-			"(clientid %08x/%08x)\n",
-			clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
-			goto out;
-		}
-		conf = clp;
-	}
-	clp = find_unconfirmed_client(clid);
-	if (clp) {
-		status = nfserr_inval;
-		if (clp->cl_addr != ip_addr) { 
-			printk("NFSD: setclientid: string in use by client"
-			"(clientid %08x/%08x)\n",
-			clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
-			goto out;
-		}
-		unconf = clp;
-	}
-	/* CASE 1: 
-	* unconf record that matches input clientid and input confirm.
-	* conf record that matches input clientid.
-	* conf  and unconf records match names, verifiers 
-	*/
+
+	conf = find_confirmed_client(clid);
+	unconf = find_unconfirmed_client(clid);
+
+	status = nfserr_clid_inuse;
+	if (conf && conf->cl_addr != ip_addr)
+		goto out;
+	if (unconf && unconf->cl_addr != ip_addr)
+		goto out;
+
 	if ((conf && unconf) && 
 	    (cmp_verf(&unconf->cl_confirm, &confirm)) &&
 	    (cmp_verf(&conf->cl_verifier, &unconf->cl_verifier)) &&
-	    (cmp_name(&conf->cl_name,&unconf->cl_name))  &&
+	    (same_name(conf->cl_recdir,unconf->cl_recdir))  &&
 	    (!cmp_verf(&conf->cl_confirm, &unconf->cl_confirm))) {
+		/* CASE 1:
+		* unconf record that matches input clientid and input confirm.
+		* conf record that matches input clientid.
+		* conf and unconf records match names, verifiers
+		*/
 		if (!cmp_creds(&conf->cl_cred, &unconf->cl_cred)) 
 			status = nfserr_clid_inuse;
 		else {
-			expire_client(conf);
-			clp = unconf;
-			move_to_confirmed(unconf);
+			/* XXX: We just turn off callbacks until we can handle
+			  * change request correctly. */
+			atomic_set(&conf->cl_callback.cb_set, 0);
+			gen_confirm(conf);
+			expire_client(unconf);
 			status = nfs_ok;
+
 		}
-		goto out;
-	} 
-	/* CASE 2:
-	 * conf record that matches input clientid.
-	 * if unconf record that matches input clientid, then unconf->cl_name
-	 * or unconf->cl_verifier don't match the conf record.
-	 */
-	if ((conf && !unconf) || 
+	} else if ((conf && !unconf) ||
 	    ((conf && unconf) && 
 	     (!cmp_verf(&conf->cl_verifier, &unconf->cl_verifier) ||
-	      !cmp_name(&conf->cl_name, &unconf->cl_name)))) {
-		if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred)) {
+	      !same_name(conf->cl_recdir, unconf->cl_recdir)))) {
+		/* CASE 2:
+		 * conf record that matches input clientid.
+		 * if unconf record matches input clientid, then
+		 * unconf->cl_name or unconf->cl_verifier don't match the
+		 * conf record.
+		 */
+		if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred))
 			status = nfserr_clid_inuse;
-		} else {
-			clp = conf;
+		else
 			status = nfs_ok;
-		}
-		goto out;
-	}
-	/* CASE 3:
-	 * conf record not found.
-	 * unconf record found. 
-	 * unconf->cl_confirm matches input confirm
-	 */ 
-	if (!conf && unconf && cmp_verf(&unconf->cl_confirm, &confirm)) {
+	} else if (!conf && unconf
+			&& cmp_verf(&unconf->cl_confirm, &confirm)) {
+		/* CASE 3:
+		 * conf record not found.
+		 * unconf record found.
+		 * unconf->cl_confirm matches input confirm
+		 */
 		if (!cmp_creds(&unconf->cl_cred, &rqstp->rq_cred)) {
 			status = nfserr_clid_inuse;
 		} else {
-			status = nfs_ok;
-			clp = unconf;
+			unsigned int hash =
+				clientstr_hashval(unconf->cl_recdir);
+			conf = find_confirmed_client_by_str(unconf->cl_recdir,
+									hash);
+			if (conf) {
+				nfsd4_remove_clid_dir(conf);
+				expire_client(conf);
+			}
 			move_to_confirmed(unconf);
+			conf = unconf;
+			status = nfs_ok;
 		}
-		goto out;
-	}
-	/* CASE 4:
-	 * conf record not found, or if conf, then conf->cl_confirm does not
-	 * match input confirm.
-	 * unconf record not found, or if unconf, then unconf->cl_confirm 
-	 * does not match input confirm.
-	 */
-	if ((!conf || (conf && !cmp_verf(&conf->cl_confirm, &confirm))) &&
-	    (!unconf || (unconf && !cmp_verf(&unconf->cl_confirm, &confirm)))) {
+	} else if ((!conf || (conf && !cmp_verf(&conf->cl_confirm, &confirm)))
+	    && (!unconf || (unconf && !cmp_verf(&unconf->cl_confirm,
+				    				&confirm)))) {
+		/* CASE 4:
+		 * conf record not found, or if conf, conf->cl_confirm does not
+		 * match input confirm.
+		 * unconf record not found, or if unconf, unconf->cl_confirm
+		 * does not match input confirm.
+		 */
 		status = nfserr_stale_clientid;
-		goto out;
+	} else {
+		/* check that we have hit one of the cases...*/
+		status = nfserr_clid_inuse;
 	}
-	/* check that we have hit one of the cases...*/
-	status = nfserr_inval;
-	goto out;
 out:
 	if (!status)
-		nfsd4_probe_callback(clp);
+		nfsd4_probe_callback(conf);
 	nfs4_unlock_state();
 	return status;
 }
@@ -961,60 +976,65 @@ alloc_init_file(struct inode *ino)
 	struct nfs4_file *fp;
 	unsigned int hashval = file_hashval(ino);
 
-	if ((fp = kmalloc(sizeof(struct nfs4_file),GFP_KERNEL))) {
+	fp = kmem_cache_alloc(file_slab, GFP_KERNEL);
+	if (fp) {
+		kref_init(&fp->fi_ref);
 		INIT_LIST_HEAD(&fp->fi_hash);
-		INIT_LIST_HEAD(&fp->fi_perfile);
-		INIT_LIST_HEAD(&fp->fi_del_perfile);
+		INIT_LIST_HEAD(&fp->fi_stateids);
+		INIT_LIST_HEAD(&fp->fi_delegations);
 		list_add(&fp->fi_hash, &file_hashtbl[hashval]);
 		fp->fi_inode = igrab(ino);
 		fp->fi_id = current_fileid++;
-		alloc_file++;
 		return fp;
 	}
 	return NULL;
 }
 
 static void
-release_all_files(void)
+nfsd4_free_slab(kmem_cache_t **slab)
 {
-	int i;
-	struct nfs4_file *fp;
+	int status;
 
-	for (i=0;i<FILE_HASH_SIZE;i++) {
-		while (!list_empty(&file_hashtbl[i])) {
-			fp = list_entry(file_hashtbl[i].next, struct nfs4_file, fi_hash);
-			/* this should never be more than once... */
-			if (!list_empty(&fp->fi_perfile) || !list_empty(&fp->fi_del_perfile)) {
-				printk("ERROR: release_all_files: file %p is open, creating dangling state !!!\n",fp);
-			}
-			release_file(fp);
-		}
-	}
+	if (*slab == NULL)
+		return;
+	status = kmem_cache_destroy(*slab);
+	*slab = NULL;
+	WARN_ON(status);
 }
 
-kmem_cache_t *stateowner_slab = NULL;
+static void
+nfsd4_free_slabs(void)
+{
+	nfsd4_free_slab(&stateowner_slab);
+	nfsd4_free_slab(&file_slab);
+	nfsd4_free_slab(&stateid_slab);
+	nfsd4_free_slab(&deleg_slab);
+}
 
 static int
 nfsd4_init_slabs(void)
 {
 	stateowner_slab = kmem_cache_create("nfsd4_stateowners",
 			sizeof(struct nfs4_stateowner), 0, 0, NULL, NULL);
-	if (stateowner_slab == NULL) {
-		dprintk("nfsd4: out of memory while initializing nfsv4\n");
-		return -ENOMEM;
-	}
+	if (stateowner_slab == NULL)
+		goto out_nomem;
+	file_slab = kmem_cache_create("nfsd4_files",
+			sizeof(struct nfs4_file), 0, 0, NULL, NULL);
+	if (file_slab == NULL)
+		goto out_nomem;
+	stateid_slab = kmem_cache_create("nfsd4_stateids",
+			sizeof(struct nfs4_stateid), 0, 0, NULL, NULL);
+	if (stateid_slab == NULL)
+		goto out_nomem;
+	deleg_slab = kmem_cache_create("nfsd4_delegations",
+			sizeof(struct nfs4_delegation), 0, 0, NULL, NULL);
+	if (deleg_slab == NULL)
+		goto out_nomem;
 	return 0;
-}
-
-static void
-nfsd4_free_slabs(void)
-{
-	int status = 0;
-
-	if (stateowner_slab)
-		status = kmem_cache_destroy(stateowner_slab);
-	stateowner_slab = NULL;
-	BUG_ON(status);
+out_nomem:
+	nfsd4_free_slabs();
+	dprintk("nfsd4: out of memory while initializing nfsv4\n");
+	return -ENOMEM;
 }
 
 void
@@ -1055,14 +1075,13 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
 	INIT_LIST_HEAD(&sop->so_idhash);
 	INIT_LIST_HEAD(&sop->so_strhash);
 	INIT_LIST_HEAD(&sop->so_perclient);
-	INIT_LIST_HEAD(&sop->so_perfilestate);
-	INIT_LIST_HEAD(&sop->so_perlockowner);  /* not used */
+	INIT_LIST_HEAD(&sop->so_stateids);
+	INIT_LIST_HEAD(&sop->so_perstateid);  /* not used */
 	INIT_LIST_HEAD(&sop->so_close_lru);
 	sop->so_time = 0;
 	list_add(&sop->so_idhash, &ownerid_hashtbl[idhashval]);
 	list_add(&sop->so_strhash, &ownerstr_hashtbl[strhashval]);
-	list_add(&sop->so_perclient, &clp->cl_perclient);
-	add_perclient++;
+	list_add(&sop->so_perclient, &clp->cl_openowners);
 	sop->so_is_open_owner = 1;
 	sop->so_id = current_ownerid++;
 	sop->so_client = clp;
@@ -1080,10 +1099,10 @@ release_stateid_lockowners(struct nfs4_stateid *open_stp)
 {
 	struct nfs4_stateowner *lock_sop;
 
-	while (!list_empty(&open_stp->st_perlockowner)) {
-		lock_sop = list_entry(open_stp->st_perlockowner.next,
-				struct nfs4_stateowner, so_perlockowner);
-		/* list_del(&open_stp->st_perlockowner);  */
+	while (!list_empty(&open_stp->st_lockowners)) {
+		lock_sop = list_entry(open_stp->st_lockowners.next,
+				struct nfs4_stateowner, so_perstateid);
+		/* list_del(&open_stp->st_lockowners);  */
 		BUG_ON(lock_sop->so_is_open_owner);
 		release_stateowner(lock_sop);
 	}
@@ -1096,14 +1115,12 @@ unhash_stateowner(struct nfs4_stateowner *sop)
 
 	list_del(&sop->so_idhash);
 	list_del(&sop->so_strhash);
-	if (sop->so_is_open_owner) {
+	if (sop->so_is_open_owner)
 		list_del(&sop->so_perclient);
-		del_perclient++;
-	}
-	list_del(&sop->so_perlockowner);
-	while (!list_empty(&sop->so_perfilestate)) {
-		stp = list_entry(sop->so_perfilestate.next, 
-			struct nfs4_stateid, st_perfilestate);
+	list_del(&sop->so_perstateid);
+	while (!list_empty(&sop->so_stateids)) {
+		stp = list_entry(sop->so_stateids.next,
+			struct nfs4_stateid, st_perstateowner);
 		if (sop->so_is_open_owner)
 			release_stateid(stp, OPEN_STATE);
 		else
@@ -1125,14 +1142,14 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *
 	unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id);
 
 	INIT_LIST_HEAD(&stp->st_hash);
-	INIT_LIST_HEAD(&stp->st_perfilestate);
-	INIT_LIST_HEAD(&stp->st_perlockowner);
+	INIT_LIST_HEAD(&stp->st_perstateowner);
+	INIT_LIST_HEAD(&stp->st_lockowners);
 	INIT_LIST_HEAD(&stp->st_perfile);
 	list_add(&stp->st_hash, &stateid_hashtbl[hashval]);
-	list_add(&stp->st_perfilestate, &sop->so_perfilestate);
-	list_add_perfile++;
-	list_add(&stp->st_perfile, &fp->fi_perfile);
+	list_add(&stp->st_perstateowner, &sop->so_stateids);
+	list_add(&stp->st_perfile, &fp->fi_stateids);
 	stp->st_stateowner = sop;
+	get_nfs4_file(fp);
 	stp->st_file = fp;
 	stp->st_stateid.si_boot = boot_time;
 	stp->st_stateid.si_stateownerid = sop->so_id;
@@ -1150,30 +1167,20 @@ release_stateid(struct nfs4_stateid *stp, int flags)
 	struct file *filp = stp->st_vfs_file;
 
 	list_del(&stp->st_hash);
-	list_del_perfile++;
 	list_del(&stp->st_perfile);
-	list_del(&stp->st_perfilestate);
+	list_del(&stp->st_perstateowner);
 	if (flags & OPEN_STATE) {
 		release_stateid_lockowners(stp);
 		stp->st_vfs_file = NULL;
 		nfsd_close(filp);
-		vfsclose++;
 	} else if (flags & LOCK_STATE)
 		locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner);
-	kfree(stp);
+	put_nfs4_file(stp->st_file);
+	kmem_cache_free(stateid_slab, stp);
 	stp = NULL;
 }
 
 static void
-release_file(struct nfs4_file *fp)
-{
-	free_file++;
-	list_del(&fp->fi_hash);
-	iput(fp->fi_inode);
-	kfree(fp);
-}	
-
-void
 move_to_close_lru(struct nfs4_stateowner *sop)
 {
 	dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop);
@@ -1183,11 +1190,10 @@ move_to_close_lru(struct nfs4_stateowner *sop)
 	sop->so_time = get_seconds();
 }
 
-void
+static void
 release_state_owner(struct nfs4_stateid *stp, int flag)
 {
 	struct nfs4_stateowner *sop = stp->st_stateowner;
-	struct nfs4_file *fp = stp->st_file;
 
 	dprintk("NFSD: release_state_owner\n");
 	release_stateid(stp, flag);
@@ -1196,12 +1202,8 @@ release_state_owner(struct nfs4_stateid *stp, int flag)
 	 * released by the laundromat service after the lease period
 	 * to enable us to handle CLOSE replay
 	 */
-	if (sop->so_confirmed && list_empty(&sop->so_perfilestate))
+	if (sop->so_confirmed && list_empty(&sop->so_stateids))
 		move_to_close_lru(sop);
-	/* unused nfs4_file's are releseed. XXX slab cache? */
-	if (list_empty(&fp->fi_perfile) && list_empty(&fp->fi_del_perfile)) {
-		release_file(fp);
-	}
 }
 
 static int
@@ -1231,8 +1233,10 @@ find_file(struct inode *ino)
 	struct nfs4_file *fp;
 
 	list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) {
-		if (fp->fi_inode == ino)
+		if (fp->fi_inode == ino) {
+			get_nfs4_file(fp);
 			return fp;
+		}
 	}
 	return NULL;
 }
@@ -1240,7 +1244,7 @@ find_file(struct inode *ino)
 #define TEST_ACCESS(x) ((x > 0 || x < 4)?1:0)
 #define TEST_DENY(x) ((x >= 0 || x < 5)?1:0)
 
-void
+static void
 set_access(unsigned int *access, unsigned long bmap) {
 	int i;
 
@@ -1251,7 +1255,7 @@ set_access(unsigned int *access, unsigned long bmap) {
 	}
 }
 
-void
+static void
 set_deny(unsigned int *deny, unsigned long bmap) {
 	int i;
 
@@ -1277,25 +1281,30 @@ test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) {
  * Called to check deny when READ with all zero stateid or
  * WRITE with all zero or all one stateid
  */
-int
+static int
 nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
 {
 	struct inode *ino = current_fh->fh_dentry->d_inode;
 	struct nfs4_file *fp;
 	struct nfs4_stateid *stp;
+	int ret;
 
 	dprintk("NFSD: nfs4_share_conflict\n");
 
 	fp = find_file(ino);
-	if (fp) {
+	if (!fp)
+		return nfs_ok;
+	ret = nfserr_share_denied;
 	/* Search for conflicting share reservations */
-		list_for_each_entry(stp, &fp->fi_perfile, st_perfile) {
-			if (test_bit(deny_type, &stp->st_deny_bmap) ||
-			    test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap))
-				return nfserr_share_denied;
-		}
+	list_for_each_entry(stp, &fp->fi_stateids, st_perfile) {
+		if (test_bit(deny_type, &stp->st_deny_bmap) ||
+		    test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap))
+			goto out;
 	}
-	return nfs_ok;
+	ret = nfs_ok;
+out:
+	put_nfs4_file(fp);
+	return ret;
 }
 
 static inline void
@@ -1427,7 +1436,7 @@ int nfsd_change_deleg_cb(struct file_lock **onlist, int arg)
 		return -EAGAIN;
 }
 
-struct lock_manager_operations nfsd_lease_mng_ops = {
+static struct lock_manager_operations nfsd_lease_mng_ops = {
 	.fl_break = nfsd_break_deleg_cb,
 	.fl_release_private = nfsd_release_deleg_cb,
 	.fl_copy_lock = nfsd_copy_lock_deleg_cb,
@@ -1526,6 +1535,51 @@ out:
 	return status;
 }
 
+static inline int
+nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
+{
+	if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
+		return nfserr_openmode;
+	else
+		return nfs_ok;
+}
+
+static struct nfs4_delegation *
+find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
+{
+	struct nfs4_delegation *dp;
+
+	list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) {
+		if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid)
+			return dp;
+	}
+	return NULL;
+}
+
+static int
+nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open,
+		struct nfs4_delegation **dp)
+{
+	int flags;
+	int status = nfserr_bad_stateid;
+
+	*dp = find_delegation_file(fp, &open->op_delegate_stateid);
+	if (*dp == NULL)
+		goto out;
+	flags = open->op_share_access == NFS4_SHARE_ACCESS_READ ?
+						RD_STATE : WR_STATE;
+	status = nfs4_check_delegmode(*dp, flags);
+	if (status)
+		*dp = NULL;
+out:
+	if (open->op_claim_type != NFS4_OPEN_CLAIM_DELEGATE_CUR)
+		return nfs_ok;
+	if (status)
+		return status;
+	open->op_stateowner->so_confirmed = 1;
+	return nfs_ok;
+}
+
 static int
 nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_stateid **stpp)
 {
@@ -1533,7 +1587,7 @@ nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_state
 	int status = nfserr_share_denied;
 	struct nfs4_stateowner *sop = open->op_stateowner;
 
-	list_for_each_entry(local, &fp->fi_perfile, st_perfile) {
+	list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
 		/* ignore lock owners */
 		if (local->st_stateowner->so_is_open_owner == 0)
 			continue;
@@ -1549,25 +1603,37 @@ out:
 	return status;
 }
 
+static inline struct nfs4_stateid *
+nfs4_alloc_stateid(void)
+{
+	return kmem_cache_alloc(stateid_slab, GFP_KERNEL);
+}
+
 static int
 nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
+		struct nfs4_delegation *dp,
 		struct svc_fh *cur_fh, int flags)
 {
 	struct nfs4_stateid *stp;
-	int status;
 
-	stp = kmalloc(sizeof(struct nfs4_stateid), GFP_KERNEL);
+	stp = nfs4_alloc_stateid();
 	if (stp == NULL)
 		return nfserr_resource;
 
-	status = nfsd_open(rqstp, cur_fh, S_IFREG, flags, &stp->st_vfs_file);
-	if (status) {
-		if (status == nfserr_dropit)
-			status = nfserr_jukebox;
-		kfree(stp);
-		return status;
+	if (dp) {
+		get_file(dp->dl_vfs_file);
+		stp->st_vfs_file = dp->dl_vfs_file;
+	} else {
+		int status;
+		status = nfsd_open(rqstp, cur_fh, S_IFREG, flags,
+				&stp->st_vfs_file);
+		if (status) {
+			if (status == nfserr_dropit)
+				status = nfserr_jukebox;
+			kmem_cache_free(stateid_slab, stp);
+			return status;
+		}
 	}
-	vfsopen++;
 	*stpp = stp;
 	return 0;
 }
@@ -1628,6 +1694,7 @@ nfs4_set_claim_prev(struct nfsd4_open *open, int *status)
 			*status = nfserr_reclaim_bad;
 		else {
 			open->op_stateowner->so_confirmed = 1;
+			open->op_stateowner->so_client->cl_firststate = 1;
 			open->op_stateowner->so_seqid--;
 		}
 	}
@@ -1646,14 +1713,30 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
 	int status, flag = 0;
 
 	flag = NFS4_OPEN_DELEGATE_NONE;
-	if (open->op_claim_type != NFS4_OPEN_CLAIM_NULL
-	     || !atomic_read(&cb->cb_set) || !sop->so_confirmed)
-		goto out;
-
-	if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
-		flag = NFS4_OPEN_DELEGATE_WRITE;
-	else
-		flag = NFS4_OPEN_DELEGATE_READ;
+	open->op_recall = 0;
+	switch (open->op_claim_type) {
+		case NFS4_OPEN_CLAIM_PREVIOUS:
+			if (!atomic_read(&cb->cb_set))
+				open->op_recall = 1;
+			flag = open->op_delegate_type;
+			if (flag == NFS4_OPEN_DELEGATE_NONE)
+				goto out;
+			break;
+		case NFS4_OPEN_CLAIM_NULL:
+			/* Let's not give out any delegations till everyone's
+			 * had the chance to reclaim theirs.... */
+			if (nfs4_in_grace())
+				goto out;
+			if (!atomic_read(&cb->cb_set) || !sop->so_confirmed)
+				goto out;
+			if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
+				flag = NFS4_OPEN_DELEGATE_WRITE;
+			else
+				flag = NFS4_OPEN_DELEGATE_READ;
+			break;
+		default:
+			goto out;
+	}
 
 	dp = alloc_init_deleg(sop->so_client, stp, fh, flag);
 	if (dp == NULL) {
@@ -1687,6 +1770,10 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
 	             dp->dl_stateid.si_fileid,
 	             dp->dl_stateid.si_generation);
 out:
+	if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS
+			&& flag == NFS4_OPEN_DELEGATE_NONE
+			&& open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
+		printk("NFSD: WARNING: refusing delegation reclaim\n");
 	open->op_delegate_type = flag;
 }
 
@@ -1699,6 +1786,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	struct nfs4_file *fp = NULL;
 	struct inode *ino = current_fh->fh_dentry->d_inode;
 	struct nfs4_stateid *stp = NULL;
+	struct nfs4_delegation *dp = NULL;
 	int status;
 
 	status = nfserr_inval;
@@ -1713,7 +1801,13 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	if (fp) {
 		if ((status = nfs4_check_open(fp, open, &stp)))
 			goto out;
+		status = nfs4_check_deleg(fp, open, &dp);
+		if (status)
+			goto out;
 	} else {
+		status = nfserr_bad_stateid;
+		if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR)
+			goto out;
 		status = nfserr_resource;
 		fp = alloc_init_file(ino);
 		if (fp == NULL)
@@ -1736,7 +1830,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 			flags = MAY_WRITE;
 		else
 			flags = MAY_READ;
-		if ((status = nfs4_new_open(rqstp, &stp, current_fh, flags)))
+		status = nfs4_new_open(rqstp, &stp, dp, current_fh, flags);
+		if (status)
 			goto out;
 		init_stateid(stp, fp, open);
 		status = nfsd4_truncate(rqstp, current_fh, open);
@@ -1759,10 +1854,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	            stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid,
 	            stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
 out:
-	/* take the opportunity to clean up unused state */
-	if (fp && list_empty(&fp->fi_perfile) && list_empty(&fp->fi_del_perfile))
-		release_file(fp);
-
+	if (fp)
+		put_nfs4_file(fp);
 	/* CLAIM_PREVIOUS has different error returns */
 	nfs4_set_claim_prev(open, &status);
 	/*
@@ -1775,6 +1868,7 @@ out:
 	return status;
 }
 
+static struct workqueue_struct *laundry_wq;
 static struct work_struct laundromat_work;
 static void laundromat_main(void *);
 static DECLARE_WORK(laundromat_work, laundromat_main, NULL);
@@ -1800,7 +1894,7 @@ nfsd4_renew(clientid_t *clid)
 	}
 	renew_client(clp);
 	status = nfserr_cb_path_down;
-	if (!list_empty(&clp->cl_del_perclnt)
+	if (!list_empty(&clp->cl_delegations)
 			&& !atomic_read(&clp->cl_callback.cb_set))
 		goto out;
 	status = nfs_ok;
@@ -1809,7 +1903,15 @@ out:
 	return status;
 }
 
-time_t
+static void
+end_grace(void)
+{
+	dprintk("NFSD: end of grace period\n");
+	nfsd4_recdir_purge_old();
+	in_grace = 0;
+}
+
+static time_t
 nfs4_laundromat(void)
 {
 	struct nfs4_client *clp;
@@ -1823,6 +1925,8 @@ nfs4_laundromat(void)
 	nfs4_lock_state();
 
 	dprintk("NFSD: laundromat service - starting\n");
+	if (in_grace)
+		end_grace();
 	list_for_each_safe(pos, next, &client_lru) {
 		clp = list_entry(pos, struct nfs4_client, cl_lru);
 		if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
@@ -1833,6 +1937,7 @@ nfs4_laundromat(void)
 		}
 		dprintk("NFSD: purging unused client (clientid %08x)\n",
 			clp->cl_clientid.cl_id);
+		nfsd4_remove_clid_dir(clp);
 		expire_client(clp);
 	}
 	INIT_LIST_HEAD(&reaplist);
@@ -1882,13 +1987,13 @@ laundromat_main(void *not_used)
 
 	t = nfs4_laundromat();
 	dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t);
-	schedule_delayed_work(&laundromat_work, t*HZ);
+	queue_delayed_work(laundry_wq, &laundromat_work, t*HZ);
 }
 
 /* search ownerid_hashtbl[] and close_lru for stateid owner
  * (stateid->si_stateownerid)
  */
-struct nfs4_stateowner *
+static struct nfs4_stateowner *
 find_openstateowner_id(u32 st_id, int flags) {
 	struct nfs4_stateowner *local = NULL;
 
@@ -1949,15 +2054,6 @@ out:
 }
 
 static inline int
-nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
-{
-	if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
-		return nfserr_openmode;
-	else
-		return nfs_ok;
-}
-
-static inline int
 check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags)
 {
 	/* Trying to call delegreturn with a special stateid? Yuch: */
@@ -2071,7 +2167,7 @@ out:
 /* 
  * Checks for sequence id mutating operations. 
  */
-int
+static int
 nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, clientid_t *lockclid)
 {
 	int status;
@@ -2230,6 +2326,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
 		         stp->st_stateid.si_stateownerid,
 		         stp->st_stateid.si_fileid,
 		         stp->st_stateid.si_generation);
+
+	nfsd4_create_clid_dir(sop->so_client);
 out:
 	if (oc->oc_stateowner)
 		nfs4_get_stateowner(oc->oc_stateowner);
@@ -2387,7 +2485,7 @@ static struct list_head lock_ownerid_hashtbl[LOCK_HASH_SIZE];
 static struct list_head	lock_ownerstr_hashtbl[LOCK_HASH_SIZE];
 static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE];
 
-struct nfs4_stateid *
+static struct nfs4_stateid *
 find_stateid(stateid_t *stid, int flags)
 {
 	struct nfs4_stateid *local = NULL;
@@ -2419,25 +2517,19 @@ find_stateid(stateid_t *stid, int flags)
 static struct nfs4_delegation *
 find_delegation_stateid(struct inode *ino, stateid_t *stid)
 {
-	struct nfs4_delegation *dp = NULL;
-	struct nfs4_file *fp = NULL;
-	u32 st_id;
+	struct nfs4_file *fp;
+	struct nfs4_delegation *dl;
 
 	dprintk("NFSD:find_delegation_stateid stateid=(%08x/%08x/%08x/%08x)\n",
                     stid->si_boot, stid->si_stateownerid,
                     stid->si_fileid, stid->si_generation);
 
-	st_id = stid->si_stateownerid;
 	fp = find_file(ino);
-	if (fp) {
-		list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) {
-			if(dp->dl_stateid.si_stateownerid == st_id) {
-				dprintk("NFSD: find_delegation dp %p\n",dp);
-				return dp;
-			}
-		}
-	}
-	return NULL;
+	if (!fp)
+		return NULL;
+	dl = find_delegation_file(fp, stid);
+	put_nfs4_file(fp);
+	return dl;
 }
 
 /*
@@ -2457,7 +2549,7 @@ nfs4_transform_lock_offset(struct file_lock *lock)
 		lock->fl_end = OFFSET_MAX;
 }
 
-int
+static int
 nfs4_verify_lock_stateowner(struct nfs4_stateowner *sop, unsigned int hashval)
 {
 	struct nfs4_stateowner *local = NULL;
@@ -2498,22 +2590,6 @@ nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
 }
 
 static struct nfs4_stateowner *
-find_lockstateowner(struct xdr_netobj *owner, clientid_t *clid)
-{
-	struct nfs4_stateowner *local = NULL;
-	int i;
-
-	for (i = 0; i < LOCK_HASH_SIZE; i++) {
-		list_for_each_entry(local, &lock_ownerid_hashtbl[i], so_idhash) {
-			if (!cmp_owner_str(local, owner, clid))
-				continue;
-			return local;
-		}
-	}
-	return NULL;
-}
-
-static struct nfs4_stateowner *
 find_lockstateowner_str(struct inode *inode, clientid_t *clid,
 		struct xdr_netobj *owner)
 {
@@ -2548,13 +2624,13 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
 	INIT_LIST_HEAD(&sop->so_idhash);
 	INIT_LIST_HEAD(&sop->so_strhash);
 	INIT_LIST_HEAD(&sop->so_perclient);
-	INIT_LIST_HEAD(&sop->so_perfilestate);
-	INIT_LIST_HEAD(&sop->so_perlockowner);
+	INIT_LIST_HEAD(&sop->so_stateids);
+	INIT_LIST_HEAD(&sop->so_perstateid);
 	INIT_LIST_HEAD(&sop->so_close_lru); /* not used */
 	sop->so_time = 0;
 	list_add(&sop->so_idhash, &lock_ownerid_hashtbl[idhashval]);
 	list_add(&sop->so_strhash, &lock_ownerstr_hashtbl[strhashval]);
-	list_add(&sop->so_perlockowner, &open_stp->st_perlockowner);
+	list_add(&sop->so_perstateid, &open_stp->st_lockowners);
 	sop->so_is_open_owner = 0;
 	sop->so_id = current_ownerid++;
 	sop->so_client = clp;
@@ -2567,24 +2643,24 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
 	return sop;
 }
 
-struct nfs4_stateid *
+static struct nfs4_stateid *
 alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struct nfs4_stateid *open_stp)
 {
 	struct nfs4_stateid *stp;
 	unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id);
 
-	if ((stp = kmalloc(sizeof(struct nfs4_stateid), 
-					GFP_KERNEL)) == NULL)
+	stp = nfs4_alloc_stateid();
+	if (stp == NULL)
 		goto out;
 	INIT_LIST_HEAD(&stp->st_hash);
 	INIT_LIST_HEAD(&stp->st_perfile);
-	INIT_LIST_HEAD(&stp->st_perfilestate);
-	INIT_LIST_HEAD(&stp->st_perlockowner); /* not used */
+	INIT_LIST_HEAD(&stp->st_perstateowner);
+	INIT_LIST_HEAD(&stp->st_lockowners); /* not used */
 	list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]);
-	list_add(&stp->st_perfile, &fp->fi_perfile);
-	list_add_perfile++;
-	list_add(&stp->st_perfilestate, &sop->so_perfilestate);
+	list_add(&stp->st_perfile, &fp->fi_stateids);
+	list_add(&stp->st_perstateowner, &sop->so_stateids);
 	stp->st_stateowner = sop;
+	get_nfs4_file(fp);
 	stp->st_file = fp;
 	stp->st_stateid.si_boot = boot_time;
 	stp->st_stateid.si_stateownerid = sop->so_id;
@@ -2598,7 +2674,7 @@ out:
 	return stp;
 }
 
-int
+static int
 check_lock_length(u64 offset, u64 length)
 {
 	return ((length == 0)  || ((length != ~(u64)0) &&
@@ -2611,7 +2687,7 @@ check_lock_length(u64 offset, u64 length)
 int
 nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock *lock)
 {
-	struct nfs4_stateowner *lock_sop = NULL, *open_sop = NULL;
+	struct nfs4_stateowner *open_sop = NULL;
 	struct nfs4_stateid *lock_stp;
 	struct file *filp;
 	struct file_lock file_lock;
@@ -2670,16 +2746,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 		strhashval = lock_ownerstr_hashval(fp->fi_inode, 
 				open_sop->so_client->cl_clientid.cl_id, 
 				&lock->v.new.owner);
-		/* 
-		 * If we already have this lock owner, the client is in 
-		 * error (or our bookeeping is wrong!) 
-		 * for asking for a 'new lock'.
-		 */
-		status = nfserr_bad_stateid;
-		lock_sop = find_lockstateowner(&lock->v.new.owner,
-						&lock->v.new.clientid);
-		if (lock_sop)
-			goto out;
+		/* XXX: Do we need to check for duplicate stateowners on
+		 * the same file, or should they just be allowed (and
+		 * create new stateids)? */
 		status = nfserr_resource;
 		if (!(lock->lk_stateowner = alloc_init_lock_stateowner(strhashval, open_sop->so_client, open_stp, lock)))
 			goto out;
@@ -2970,8 +3039,11 @@ int
 nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *rlockowner)
 {
 	clientid_t *clid = &rlockowner->rl_clientid;
-	struct nfs4_stateowner *local = NULL;
+	struct nfs4_stateowner *sop;
+	struct nfs4_stateid *stp;
 	struct xdr_netobj *owner = &rlockowner->rl_owner;
+	struct list_head matches;
+	int i;
 	int status;
 
 	dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
@@ -2987,22 +3059,32 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *
 
 	nfs4_lock_state();
 
-	status = nfs_ok;
-	local = find_lockstateowner(owner, clid);
-	if (local) {
-		struct nfs4_stateid *stp;
-
-		/* check for any locks held by any stateid
-		 * associated with the (lock) stateowner */
-		status = nfserr_locks_held;
-		list_for_each_entry(stp, &local->so_perfilestate,
-				st_perfilestate) {
-			if (check_for_locks(stp->st_vfs_file, local))
-				goto out;
+	status = nfserr_locks_held;
+	/* XXX: we're doing a linear search through all the lockowners.
+	 * Yipes!  For now we'll just hope clients aren't really using
+	 * release_lockowner much, but eventually we have to fix these
+	 * data structures. */
+	INIT_LIST_HEAD(&matches);
+	for (i = 0; i < LOCK_HASH_SIZE; i++) {
+		list_for_each_entry(sop, &lock_ownerid_hashtbl[i], so_idhash) {
+			if (!cmp_owner_str(sop, owner, clid))
+				continue;
+			list_for_each_entry(stp, &sop->so_stateids,
+					st_perstateowner) {
+				if (check_for_locks(stp->st_vfs_file, sop))
+					goto out;
+				/* Note: so_perclient unused for lockowners,
+				 * so it's OK to fool with here. */
+				list_add(&sop->so_perclient, &matches);
+			}
 		}
-		/* no locks held by (lock) stateowner */
-		status = nfs_ok;
-		release_stateowner(local);
+	}
+	/* Clients probably won't expect us to return with some (but not all)
+	 * of the lockowner state released; so don't release any until all
+	 * have been checked. */
+	status = nfs_ok;
+	list_for_each_entry(sop, &matches, so_perclient) {
+		release_stateowner(sop);
 	}
 out:
 	nfs4_unlock_state();
@@ -3010,39 +3092,38 @@ out:
 }
 
 static inline struct nfs4_client_reclaim *
-alloc_reclaim(int namelen)
+alloc_reclaim(void)
 {
-	struct nfs4_client_reclaim *crp = NULL;
+	return kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL);
+}
 
-	crp = kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL);
-	if (!crp)
-		return NULL;
-	crp->cr_name.data = kmalloc(namelen, GFP_KERNEL);
-	if (!crp->cr_name.data) {
-		kfree(crp);
-		return NULL;
-	}
-	return crp;
+int
+nfs4_has_reclaimed_state(const char *name)
+{
+	unsigned int strhashval = clientstr_hashval(name);
+	struct nfs4_client *clp;
+
+	clp = find_confirmed_client_by_str(name, strhashval);
+	return clp ? 1 : 0;
 }
 
 /*
  * failure => all reset bets are off, nfserr_no_grace...
  */
-static int
-nfs4_client_to_reclaim(char *name, int namlen)
+int
+nfs4_client_to_reclaim(const char *name)
 {
 	unsigned int strhashval;
 	struct nfs4_client_reclaim *crp = NULL;
 
-	dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", namlen, name);
-	crp = alloc_reclaim(namlen);
+	dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", HEXDIR_LEN, name);
+	crp = alloc_reclaim();
 	if (!crp)
 		return 0;
-	strhashval = clientstr_hashval(name, namlen);
+	strhashval = clientstr_hashval(name);
 	INIT_LIST_HEAD(&crp->cr_strhash);
 	list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]);
-	memcpy(crp->cr_name.data, name, namlen);
-	crp->cr_name.len = namlen;
+	memcpy(crp->cr_recdir, name, HEXDIR_LEN);
 	reclaim_str_hashtbl_size++;
 	return 1;
 }
@@ -3053,13 +3134,11 @@ nfs4_release_reclaim(void)
 	struct nfs4_client_reclaim *crp = NULL;
 	int i;
 
-	BUG_ON(!nfs4_reclaim_init);
 	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
 		while (!list_empty(&reclaim_str_hashtbl[i])) {
 			crp = list_entry(reclaim_str_hashtbl[i].next,
 			                struct nfs4_client_reclaim, cr_strhash);
 			list_del(&crp->cr_strhash);
-			kfree(crp->cr_name.data);
 			kfree(crp);
 			reclaim_str_hashtbl_size--;
 		}
@@ -3069,7 +3148,7 @@ nfs4_release_reclaim(void)
 
 /*
  * called from OPEN, CLAIM_PREVIOUS with a new clientid. */
-struct nfs4_client_reclaim *
+static struct nfs4_client_reclaim *
 nfs4_find_reclaim_client(clientid_t *clid)
 {
 	unsigned int strhashval;
@@ -3082,13 +3161,14 @@ nfs4_find_reclaim_client(clientid_t *clid)
 	if (clp == NULL)
 		return NULL;
 
-	dprintk("NFSD: nfs4_find_reclaim_client for %.*s\n",
-		            clp->cl_name.len, clp->cl_name.data);
+	dprintk("NFSD: nfs4_find_reclaim_client for %.*s with recdir %s\n",
+		            clp->cl_name.len, clp->cl_name.data,
+			    clp->cl_recdir);
 
 	/* find clp->cl_name in reclaim_str_hashtbl */
-	strhashval = clientstr_hashval(clp->cl_name.data, clp->cl_name.len);
+	strhashval = clientstr_hashval(clp->cl_recdir);
 	list_for_each_entry(crp, &reclaim_str_hashtbl[strhashval], cr_strhash) {
-		if (cmp_name(&crp->cr_name, &clp->cl_name)) {
+		if (same_name(crp->cr_recdir, clp->cl_recdir)) {
 			return crp;
 		}
 	}
@@ -3101,30 +3181,16 @@ nfs4_find_reclaim_client(clientid_t *clid)
 int
 nfs4_check_open_reclaim(clientid_t *clid)
 {
-	struct nfs4_client_reclaim *crp;
-
-	if ((crp = nfs4_find_reclaim_client(clid)) == NULL)
-		return nfserr_reclaim_bad;
-	return nfs_ok;
+	return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad;
 }
 
+/* initialization to perform at module load time: */
 
-/* 
- * Start and stop routines
- */
-
-static void
-__nfs4_state_init(void)
+void
+nfs4_state_init(void)
 {
 	int i;
-	time_t grace_time;
 
-	if (!nfs4_reclaim_init) {
-		for (i = 0; i < CLIENT_HASH_SIZE; i++)
-			INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
-		reclaim_str_hashtbl_size = 0;
-		nfs4_reclaim_init = 1;
-	}
 	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
 		INIT_LIST_HEAD(&conf_id_hashtbl[i]);
 		INIT_LIST_HEAD(&conf_str_hashtbl[i]);
@@ -3146,26 +3212,46 @@ __nfs4_state_init(void)
 		INIT_LIST_HEAD(&lock_ownerid_hashtbl[i]);
 		INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]);
 	}
-	memset(&zerostateid, 0, sizeof(stateid_t));
 	memset(&onestateid, ~0, sizeof(stateid_t));
-
 	INIT_LIST_HEAD(&close_lru);
 	INIT_LIST_HEAD(&client_lru);
 	INIT_LIST_HEAD(&del_recall_lru);
-	spin_lock_init(&recall_lock);
+	for (i = 0; i < CLIENT_HASH_SIZE; i++)
+		INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
+	reclaim_str_hashtbl_size = 0;
+}
+
+static void
+nfsd4_load_reboot_recovery_data(void)
+{
+	int status;
+
+	nfs4_lock_state();
+	nfsd4_init_recdir(user_recovery_dirname);
+	status = nfsd4_recdir_load();
+	nfs4_unlock_state();
+	if (status)
+		printk("NFSD: Failure reading reboot recovery data\n");
+}
+
+/* initialization to perform when the nfsd service is started: */
+
+static void
+__nfs4_state_start(void)
+{
+	time_t grace_time;
+
 	boot_time = get_seconds();
-	grace_time = max(old_lease_time, lease_time);
-	if (reclaim_str_hashtbl_size == 0)
-		grace_time = 0;
-	if (grace_time)
-		printk("NFSD: starting %ld-second grace period\n", grace_time);
-	grace_end = boot_time + grace_time;
-	INIT_WORK(&laundromat_work,laundromat_main, NULL);
-	schedule_delayed_work(&laundromat_work, NFSD_LEASE_TIME*HZ);
+	grace_time = max(user_lease_time, lease_time);
+	lease_time = user_lease_time;
+	in_grace = 1;
+	printk("NFSD: starting %ld-second grace period\n", grace_time);
+	laundry_wq = create_singlethread_workqueue("nfsd4");
+	queue_delayed_work(laundry_wq, &laundromat_work, grace_time*HZ);
 }
 
 int
-nfs4_state_init(void)
+nfs4_state_start(void)
 {
 	int status;
 
@@ -3174,7 +3260,8 @@ nfs4_state_init(void)
 	status = nfsd4_init_slabs();
 	if (status)
 		return status;
-	__nfs4_state_init();
+	nfsd4_load_reboot_recovery_data();
+	__nfs4_state_start();
 	nfs4_init = 1;
 	return 0;
 }
@@ -3182,14 +3269,7 @@ nfs4_state_init(void)
 int
 nfs4_in_grace(void)
 {
-	return get_seconds() < grace_end;
-}
-
-void
-set_no_grace(void)
-{
-	printk("NFSD: ERROR in reboot recovery.  State reclaims will fail.\n");
-	grace_end = get_seconds();
+	return in_grace;
 }
 
 time_t
@@ -3236,21 +3316,11 @@ __nfs4_state_shutdown(void)
 		unhash_delegation(dp);
 	}
 
-	release_all_files();
 	cancel_delayed_work(&laundromat_work);
-	flush_scheduled_work();
+	flush_workqueue(laundry_wq);
+	destroy_workqueue(laundry_wq);
+	nfsd4_shutdown_recdir();
 	nfs4_init = 0;
-	dprintk("NFSD: list_add_perfile %d list_del_perfile %d\n",
-			list_add_perfile, list_del_perfile);
-	dprintk("NFSD: add_perclient %d del_perclient %d\n",
-			add_perclient, del_perclient);
-	dprintk("NFSD: alloc_file %d free_file %d\n",
-			alloc_file, free_file);
-	dprintk("NFSD: vfsopen %d vfsclose %d\n",
-			vfsopen, vfsclose);
-	dprintk("NFSD: alloc_delegation %d free_delegation %d\n",
-			alloc_delegation, free_delegation);
-
 }
 
 void
@@ -3263,56 +3333,48 @@ nfs4_state_shutdown(void)
 	nfs4_unlock_state();
 }
 
+static void
+nfs4_set_recdir(char *recdir)
+{
+	nfs4_lock_state();
+	strcpy(user_recovery_dirname, recdir);
+	nfs4_unlock_state();
+}
+
+/*
+ * Change the NFSv4 recovery directory to recdir.
+ */
+int
+nfs4_reset_recoverydir(char *recdir)
+{
+	int status;
+	struct nameidata nd;
+
+	status = path_lookup(recdir, LOOKUP_FOLLOW, &nd);
+	if (status)
+		return status;
+	status = -ENOTDIR;
+	if (S_ISDIR(nd.dentry->d_inode->i_mode)) {
+		nfs4_set_recdir(recdir);
+		status = 0;
+	}
+	path_release(&nd);
+	return status;
+}
+
 /*
  * Called when leasetime is changed.
  *
- * if nfsd is not started, simply set the global lease.
- *
- * if nfsd(s) are running, lease change requires nfsv4 state to be reset.
- * e.g: boot_time is reset, existing nfs4_client structs are
- * used to fill reclaim_str_hashtbl, then all state (except for the
- * reclaim_str_hashtbl) is re-initialized.
- *
- * if the old lease time is greater than the new lease time, the grace
- * period needs to be set to the old lease time to allow clients to reclaim
- * their state. XXX - we may want to set the grace period == lease time
- * after an initial grace period == old lease time
- *
- * if an error occurs in this process, the new lease is set, but the server
- * will not honor OPEN or LOCK reclaims, and will return nfserr_no_grace
- * which means OPEN/LOCK/READ/WRITE will fail during grace period.
- *
- * clients will attempt to reset all state with SETCLIENTID/CONFIRM, and
- * OPEN and LOCK reclaims.
+ * The only way the protocol gives us to handle on-the-fly lease changes is to
+ * simulate a reboot.  Instead of doing that, we just wait till the next time
+ * we start to register any changes in lease time.  If the administrator
+ * really wants to change the lease time *now*, they can go ahead and bring
+ * nfsd down and then back up again after changing the lease time.
  */
 void
 nfs4_reset_lease(time_t leasetime)
 {
-	struct nfs4_client *clp;
-	int i;
-
-	printk("NFSD: New leasetime %ld\n",leasetime);
-	if (!nfs4_init)
-		return;
-	nfs4_lock_state();
-	old_lease_time = lease_time;
-	lease_time = leasetime;
-
-	nfs4_release_reclaim();
-
-	/* populate reclaim_str_hashtbl with current confirmed nfs4_clientid */
-	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
-		list_for_each_entry(clp, &conf_id_hashtbl[i], cl_idhash) {
-			if (!nfs4_client_to_reclaim(clp->cl_name.data,
-						clp->cl_name.len)) {
-				nfs4_release_reclaim();
-				goto init_state;
-			}
-		}
-	}
-init_state:
-	__nfs4_state_shutdown();
-	__nfs4_state_init();
-	nfs4_unlock_state();
+	lock_kernel();
+	user_lease_time = leasetime;
+	unlock_kernel();
 }
-
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 36a058a112d5..91fb171d2ace 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -136,7 +136,7 @@ xdr_error:					\
 	}					\
 } while (0)
 
-u32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes)
+static u32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes)
 {
 	/* We want more bytes than seem to be available.
 	 * Maybe we need a new page, maybe we have just run out
@@ -190,7 +190,7 @@ defer_free(struct nfsd4_compoundargs *argp,
 	return 0;
 }
 
-char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes)
+static char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes)
 {
 	void *new = NULL;
 	if (p == argp->tmp) {
@@ -1366,7 +1366,10 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 	if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) {
 		if ((buflen -= 4) < 0)
 			goto out_resource;
-		WRITE32( NFS4_FH_NOEXPIRE_WITH_OPEN | NFS4_FH_VOL_RENAME );
+		if (exp->ex_flags & NFSEXP_NOSUBTREECHECK)
+			WRITE32(NFS4_FH_VOLATILE_ANY);
+		else
+			WRITE32(NFS4_FH_VOLATILE_ANY|NFS4_FH_VOL_RENAME);
 	}
 	if (bmval0 & FATTR4_WORD0_CHANGE) {
 		/*
@@ -1969,7 +1972,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open
 	case NFS4_OPEN_DELEGATE_READ:
 		RESERVE_SPACE(20 + sizeof(stateid_t));
 		WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t));
-		WRITE32(0);
+		WRITE32(open->op_recall);
 
 		/*
 		 * TODO: ACE's in delegations
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 161afdcb8f7d..841c562991e8 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -51,6 +51,7 @@ enum {
 	NFSD_Fh,
 	NFSD_Threads,
 	NFSD_Leasetime,
+	NFSD_RecoveryDir,
 };
 
 /*
@@ -66,6 +67,7 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size);
 static ssize_t write_filehandle(struct file *file, char *buf, size_t size);
 static ssize_t write_threads(struct file *file, char *buf, size_t size);
 static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
+static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
 
 static ssize_t (*write_op[])(struct file *, char *, size_t) = {
 	[NFSD_Svc] = write_svc,
@@ -78,6 +80,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
 	[NFSD_Fh] = write_filehandle,
 	[NFSD_Threads] = write_threads,
 	[NFSD_Leasetime] = write_leasetime,
+	[NFSD_RecoveryDir] = write_recoverydir,
 };
 
 static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos)
@@ -349,6 +352,25 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
 	return strlen(buf);
 }
 
+static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
+{
+	char *mesg = buf;
+	char *recdir;
+	int len, status;
+
+	if (size > PATH_MAX || buf[size-1] != '\n')
+		return -EINVAL;
+	buf[size-1] = 0;
+
+	recdir = mesg;
+	len = qword_get(&mesg, recdir, size);
+	if (len <= 0)
+		return -EINVAL;
+
+	status = nfs4_reset_recoverydir(recdir);
+	return strlen(buf);
+}
+
 /*----------------------------------------------------------------------------*/
 /*
  *	populating the filesystem.
@@ -369,6 +391,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
 		[NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
 #ifdef CONFIG_NFSD_V4
 		[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
+		[NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
 #endif
 		/* last one */ {""}
 	};
@@ -397,9 +420,8 @@ static int __init init_nfsd(void)
 	nfsd_cache_init();	/* RPC reply cache */
 	nfsd_export_init();	/* Exports table */
 	nfsd_lockd_init();	/* lockd->nfsd callbacks */
-#ifdef CONFIG_NFSD_V4
+	nfs4_state_init();	/* NFSv4 locking state */
 	nfsd_idmap_init();      /* Name to ID mapping */
-#endif /* CONFIG_NFSD_V4 */
 	if (proc_mkdir("fs/nfs", NULL)) {
 		struct proc_dir_entry *entry;
 		entry = create_proc_entry("fs/nfs/exports", 0, NULL);
@@ -426,9 +448,7 @@ static void __exit exit_nfsd(void)
 	remove_proc_entry("fs/nfs", NULL);
 	nfsd_stat_shutdown();
 	nfsd_lockd_shutdown();
-#ifdef CONFIG_NFSD_V4
 	nfsd_idmap_shutdown();
-#endif /* CONFIG_NFSD_V4 */
 	unregister_filesystem(&nfsd_fs_type);
 }
 
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 757f9d208034..0aa1b9603d7f 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -591,6 +591,7 @@ nfserrno (int errno)
 		{ nfserr_dropit, -ENOMEM },
 		{ nfserr_badname, -ESRCH },
 		{ nfserr_io, -ETXTBSY },
+		{ nfserr_notsupp, -EOPNOTSUPP },
 		{ -1, -EIO }
 	};
 	int	i;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 02ded7cfbdcf..07b9a065e9da 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -31,6 +31,7 @@
 #include <linux/nfsd/stats.h>
 #include <linux/nfsd/cache.h>
 #include <linux/lockd/bind.h>
+#include <linux/nfsacl.h>
 
 #define NFSDDBG_FACILITY	NFSDDBG_SVC
 
@@ -94,7 +95,7 @@ nfsd_svc(unsigned short port, int nrservs)
 	error =	nfsd_racache_init(2*nrservs);
 	if (error<0)
 		goto out;
-	error = nfs4_state_init();
+	error = nfs4_state_start();
 	if (error<0)
 		goto out;
 	if (!nfsd_serv) {
@@ -362,6 +363,32 @@ nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp)
 	return 1;
 }
 
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+static struct svc_stat	nfsd_acl_svcstats;
+static struct svc_version *	nfsd_acl_version[] = {
+	[2] = &nfsd_acl_version2,
+	[3] = &nfsd_acl_version3,
+};
+
+#define NFSD_ACL_NRVERS		(sizeof(nfsd_acl_version)/sizeof(nfsd_acl_version[0]))
+static struct svc_program	nfsd_acl_program = {
+	.pg_prog		= NFS_ACL_PROGRAM,
+	.pg_nvers		= NFSD_ACL_NRVERS,
+	.pg_vers		= nfsd_acl_version,
+	.pg_name		= "nfsd",
+	.pg_class		= "nfsd",
+	.pg_stats		= &nfsd_acl_svcstats,
+};
+
+static struct svc_stat	nfsd_acl_svcstats = {
+	.program	= &nfsd_acl_program,
+};
+
+#define nfsd_acl_program_p	&nfsd_acl_program
+#else
+#define nfsd_acl_program_p	NULL
+#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
+
 extern struct svc_version nfsd_version2, nfsd_version3, nfsd_version4;
 
 static struct svc_version *	nfsd_version[] = {
@@ -376,6 +403,7 @@ static struct svc_version *	nfsd_version[] = {
 
 #define NFSD_NRVERS		(sizeof(nfsd_version)/sizeof(nfsd_version[0]))
 struct svc_program		nfsd_program = {
+	.pg_next		= nfsd_acl_program_p,
 	.pg_prog		= NFS_PROGRAM,		/* program number */
 	.pg_nvers		= NFSD_NRVERS,		/* nr of entries in nfsd_version */
 	.pg_vers		= nfsd_version,		/* version table */
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 948b08287c99..b45999ff33e6 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -49,6 +49,12 @@ decode_fh(u32 *p, struct svc_fh *fhp)
 	return p + (NFS_FHSIZE >> 2);
 }
 
+/* Helper function for NFSv2 ACL code */
+u32 *nfs2svc_decode_fh(u32 *p, struct svc_fh *fhp)
+{
+	return decode_fh(p, fhp);
+}
+
 static inline u32 *
 encode_fh(u32 *p, struct svc_fh *fhp)
 {
@@ -190,6 +196,11 @@ encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
 	return p;
 }
 
+/* Helper function for NFSv2 ACL code */
+u32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+{
+	return encode_fattr(rqstp, p, fhp);
+}
 
 /*
  * XDR decode functions
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index e3e9d217236e..be24ead89d94 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -46,10 +46,9 @@
 #include <linux/nfsd/nfsfh.h>
 #include <linux/quotaops.h>
 #include <linux/dnotify.h>
-#ifdef CONFIG_NFSD_V4
 #include <linux/posix_acl.h>
 #include <linux/posix_acl_xattr.h>
-#include <linux/xattr_acl.h>
+#ifdef CONFIG_NFSD_V4
 #include <linux/xattr.h>
 #include <linux/nfs4.h>
 #include <linux/nfs4_acl.h>
@@ -424,13 +423,13 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		goto out_nfserr;
 
 	if (pacl) {
-		error = set_nfsv4_acl_one(dentry, pacl, XATTR_NAME_ACL_ACCESS);
+		error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS);
 		if (error < 0)
 			goto out_nfserr;
 	}
 
 	if (dpacl) {
-		error = set_nfsv4_acl_one(dentry, dpacl, XATTR_NAME_ACL_DEFAULT);
+		error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT);
 		if (error < 0)
 			goto out_nfserr;
 	}
@@ -497,7 +496,7 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac
 	struct posix_acl *pacl = NULL, *dpacl = NULL;
 	unsigned int flags = 0;
 
-	pacl = _get_posix_acl(dentry, XATTR_NAME_ACL_ACCESS);
+	pacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_ACCESS);
 	if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA)
 		pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
 	if (IS_ERR(pacl)) {
@@ -507,7 +506,7 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac
 	}
 
 	if (S_ISDIR(inode->i_mode)) {
-		dpacl = _get_posix_acl(dentry, XATTR_NAME_ACL_DEFAULT);
+		dpacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_DEFAULT);
 		if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA)
 			dpacl = NULL;
 		else if (IS_ERR(dpacl)) {
@@ -1857,3 +1856,107 @@ nfsd_racache_init(int cache_size)
 	nfsdstats.ra_size = cache_size;
 	return 0;
 }
+
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+struct posix_acl *
+nfsd_get_posix_acl(struct svc_fh *fhp, int type)
+{
+	struct inode *inode = fhp->fh_dentry->d_inode;
+	char *name;
+	void *value = NULL;
+	ssize_t size;
+	struct posix_acl *acl;
+
+	if (!IS_POSIXACL(inode) || !inode->i_op || !inode->i_op->getxattr)
+		return ERR_PTR(-EOPNOTSUPP);
+	switch(type) {
+		case ACL_TYPE_ACCESS:
+			name = POSIX_ACL_XATTR_ACCESS;
+			break;
+		case ACL_TYPE_DEFAULT:
+			name = POSIX_ACL_XATTR_DEFAULT;
+			break;
+		default:
+			return ERR_PTR(-EOPNOTSUPP);
+	}
+
+	size = inode->i_op->getxattr(fhp->fh_dentry, name, NULL, 0);
+
+	if (size < 0) {
+		acl = ERR_PTR(size);
+		goto getout;
+	} else if (size > 0) {
+		value = kmalloc(size, GFP_KERNEL);
+		if (!value) {
+			acl = ERR_PTR(-ENOMEM);
+			goto getout;
+		}
+		size = inode->i_op->getxattr(fhp->fh_dentry, name, value, size);
+		if (size < 0) {
+			acl = ERR_PTR(size);
+			goto getout;
+		}
+	}
+	acl = posix_acl_from_xattr(value, size);
+
+getout:
+	kfree(value);
+	return acl;
+}
+
+int
+nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
+{
+	struct inode *inode = fhp->fh_dentry->d_inode;
+	char *name;
+	void *value = NULL;
+	size_t size;
+	int error;
+
+	if (!IS_POSIXACL(inode) || !inode->i_op ||
+	    !inode->i_op->setxattr || !inode->i_op->removexattr)
+		return -EOPNOTSUPP;
+	switch(type) {
+		case ACL_TYPE_ACCESS:
+			name = POSIX_ACL_XATTR_ACCESS;
+			break;
+		case ACL_TYPE_DEFAULT:
+			name = POSIX_ACL_XATTR_DEFAULT;
+			break;
+		default:
+			return -EOPNOTSUPP;
+	}
+
+	if (acl && acl->a_count) {
+		size = posix_acl_xattr_size(acl->a_count);
+		value = kmalloc(size, GFP_KERNEL);
+		if (!value)
+			return -ENOMEM;
+		size = posix_acl_to_xattr(acl, value, size);
+		if (size < 0) {
+			error = size;
+			goto getout;
+		}
+	} else
+		size = 0;
+
+	if (!fhp->fh_locked)
+		fh_lock(fhp);  /* unlocking is done automatically */
+	if (size)
+		error = inode->i_op->setxattr(fhp->fh_dentry, name,
+					      value, size, 0);
+	else {
+		if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT)
+			error = 0;
+		else {
+			error = inode->i_op->removexattr(fhp->fh_dentry, name);
+			if (error == -ENODATA)
+				error = 0;
+		}
+	}
+
+getout:
+	kfree(value);
+	return error;
+}
+#endif  /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
diff --git a/fs/open.c b/fs/open.c
index 963bd81a44c8..3f4a4286fdc4 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -21,6 +21,7 @@
 #include <linux/vfs.h>
 #include <asm/uaccess.h>
 #include <linux/fs.h>
+#include <linux/personality.h>
 #include <linux/pagemap.h>
 #include <linux/syscalls.h>
 
@@ -807,7 +808,9 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
 
 	/* NB: we're sure to have correct a_ops only after f_op->open */
 	if (f->f_flags & O_DIRECT) {
-		if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO) {
+		if (!f->f_mapping->a_ops ||
+		    ((!f->f_mapping->a_ops->direct_IO) &&
+		    (!f->f_mapping->a_ops->get_xip_page))) {
 			fput(f);
 			f = ERR_PTR(-EINVAL);
 		}
@@ -933,31 +936,27 @@ EXPORT_SYMBOL(fd_install);
 asmlinkage long sys_open(const char __user * filename, int flags, int mode)
 {
 	char * tmp;
-	int fd, error;
+	int fd;
+
+	if (force_o_largefile())
+		flags |= O_LARGEFILE;
 
-#if BITS_PER_LONG != 32
-	flags |= O_LARGEFILE;
-#endif
 	tmp = getname(filename);
 	fd = PTR_ERR(tmp);
 	if (!IS_ERR(tmp)) {
 		fd = get_unused_fd();
 		if (fd >= 0) {
 			struct file *f = filp_open(tmp, flags, mode);
-			error = PTR_ERR(f);
-			if (IS_ERR(f))
-				goto out_error;
-			fd_install(fd, f);
+			if (IS_ERR(f)) {
+				put_unused_fd(fd);
+				fd = PTR_ERR(f);
+			} else {
+				fd_install(fd, f);
+			}
 		}
-out:
 		putname(tmp);
 	}
 	return fd;
-
-out_error:
-	put_unused_fd(fd);
-	fd = error;
-	goto out;
 }
 EXPORT_SYMBOL_GPL(sys_open);
 
@@ -980,23 +979,15 @@ asmlinkage long sys_creat(const char __user * pathname, int mode)
  */
 int filp_close(struct file *filp, fl_owner_t id)
 {
-	int retval;
-
-	/* Report and clear outstanding errors */
-	retval = filp->f_error;
-	if (retval)
-		filp->f_error = 0;
+	int retval = 0;
 
 	if (!file_count(filp)) {
 		printk(KERN_ERR "VFS: Close: file count is 0\n");
-		return retval;
+		return 0;
 	}
 
-	if (filp->f_op && filp->f_op->flush) {
-		int err = filp->f_op->flush(filp);
-		if (!retval)
-			retval = err;
-	}
+	if (filp->f_op && filp->f_op->flush)
+		retval = filp->f_op->flush(filp);
 
 	dnotify_flush(filp, id);
 	locks_remove_posix(filp, id);
diff --git a/fs/partitions/Makefile b/fs/partitions/Makefile
index 4c83c17969e1..66d5cc26fafb 100644
--- a/fs/partitions/Makefile
+++ b/fs/partitions/Makefile
@@ -17,4 +17,3 @@ obj-$(CONFIG_SUN_PARTITION) += sun.o
 obj-$(CONFIG_ULTRIX_PARTITION) += ultrix.o
 obj-$(CONFIG_IBM_PARTITION) += ibm.o
 obj-$(CONFIG_EFI_PARTITION) += efi.o
-obj-$(CONFIG_NEC98_PARTITION) += nec98.o msdos.o
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 2cab98a9a621..77e178f13162 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -79,9 +79,6 @@ static int (*check_part[])(struct parsed_partitions *, struct block_device *) =
 #ifdef CONFIG_LDM_PARTITION
 	ldm_partition,		/* this must come before msdos */
 #endif
-#ifdef CONFIG_NEC98_PARTITION
-	nec98_partition,	/* must be come before `msdos_partition' */
-#endif
 #ifdef CONFIG_MSDOS_PARTITION
 	msdos_partition,
 #endif
diff --git a/fs/partitions/check.h b/fs/partitions/check.h
index 43adcc68e471..17ae8ecd9e8b 100644
--- a/fs/partitions/check.h
+++ b/fs/partitions/check.h
@@ -30,7 +30,3 @@ put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size)
 
 extern int warn_no_part;
 
-extern void parse_bsd(struct parsed_partitions *state,
-			struct block_device *bdev, u32 offset, u32 size,
-			int origin, char *flavour, int max_partitions);
-
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c
index 584a27b2bbd5..9935d254186e 100644
--- a/fs/partitions/msdos.c
+++ b/fs/partitions/msdos.c
@@ -202,12 +202,12 @@ parse_solaris_x86(struct parsed_partitions *state, struct block_device *bdev,
 #endif
 }
 
-#if defined(CONFIG_BSD_DISKLABEL) || defined(CONFIG_NEC98_PARTITION)
+#if defined(CONFIG_BSD_DISKLABEL)
 /* 
  * Create devices for BSD partitions listed in a disklabel, under a
  * dos-like partition. See parse_extended() for more information.
  */
-void
+static void
 parse_bsd(struct parsed_partitions *state, struct block_device *bdev,
 		u32 offset, u32 size, int origin, char *flavour,
 		int max_partitions)
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 738b9b602932..7431d7ba2d09 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -11,4 +11,5 @@ proc-y       += inode.o root.o base.o generic.o array.o \
 		kmsg.o proc_tty.o proc_misc.o
 
 proc-$(CONFIG_PROC_KCORE)	+= kcore.o
+proc-$(CONFIG_PROC_VMCORE)	+= vmcore.o
 proc-$(CONFIG_PROC_DEVICETREE)	+= proc_devtree.o
diff --git a/fs/proc/base.c b/fs/proc/base.c
index e31903aadd96..ace151fa4878 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -314,7 +314,7 @@ static int may_ptrace_attach(struct task_struct *task)
 	     (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
 		goto out;
 	rmb();
-	if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
+	if (task->mm->dumpable != 1 && !capable(CAP_SYS_PTRACE))
 		goto out;
 	if (security_ptrace(current, task))
 		goto out;
@@ -1113,7 +1113,9 @@ static int task_dumpable(struct task_struct *task)
 	if (mm)
 		dumpable = mm->dumpable;
 	task_unlock(task);
-	return dumpable;
+	if(dumpable == 1)
+		return 1;
+	return 0;
 }
 
 
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index a60a3b3d8a7b..a3453555a94e 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -44,6 +44,7 @@
 #include <linux/jiffies.h>
 #include <linux/sysrq.h>
 #include <linux/vmalloc.h>
+#include <linux/crash_dump.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/io.h>
@@ -219,6 +220,19 @@ static struct file_operations fragmentation_file_operations = {
 	.release	= seq_release,
 };
 
+extern struct seq_operations zoneinfo_op;
+static int zoneinfo_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &zoneinfo_op);
+}
+
+static struct file_operations proc_zoneinfo_file_operations = {
+	.open		= zoneinfo_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
 static int version_read_proc(char *page, char **start, off_t off,
 				 int count, int *eof, void *data)
 {
@@ -438,7 +452,7 @@ static int devices_read_proc(char *page, char **start, off_t off,
 				 int count, int *eof, void *data)
 {
 	int len = get_chrdev_list(page);
-	len += get_blkdev_list(page+len);
+	len += get_blkdev_list(page+len, len);
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
 
@@ -589,6 +603,7 @@ void __init proc_misc_init(void)
 	create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations);
 	create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations);
 	create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations);
+	create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations);
 	create_seq_entry("diskstats", 0, &proc_diskstats_operations);
 #ifdef CONFIG_MODULES
 	create_seq_entry("modules", 0, &proc_modules_operations);
@@ -604,6 +619,11 @@ void __init proc_misc_init(void)
 				(size_t)high_memory - PAGE_OFFSET + PAGE_SIZE;
 	}
 #endif
+#ifdef CONFIG_PROC_VMCORE
+	proc_vmcore = create_proc_entry("vmcore", S_IRUSR, NULL);
+	if (proc_vmcore)
+		proc_vmcore->proc_fops = &proc_vmcore_operations;
+#endif
 #ifdef CONFIG_MAGIC_SYSRQ
 	entry = create_proc_entry("sysrq-trigger", S_IWUSR, NULL);
 	if (entry)
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
new file mode 100644
index 000000000000..3b2e7b69e63a
--- /dev/null
+++ b/fs/proc/vmcore.c
@@ -0,0 +1,669 @@
+/*
+ *	fs/proc/vmcore.c Interface for accessing the crash
+ * 				 dump from the system's previous life.
+ * 	Heavily borrowed from fs/proc/kcore.c
+ *	Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
+ *	Copyright (C) IBM Corporation, 2004. All rights reserved
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/user.h>
+#include <linux/a.out.h>
+#include <linux/elf.h>
+#include <linux/elfcore.h>
+#include <linux/proc_fs.h>
+#include <linux/highmem.h>
+#include <linux/bootmem.h>
+#include <linux/init.h>
+#include <linux/crash_dump.h>
+#include <linux/list.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+/* List representing chunks of contiguous memory areas and their offsets in
+ * vmcore file.
+ */
+static LIST_HEAD(vmcore_list);
+
+/* Stores the pointer to the buffer containing kernel elf core headers. */
+static char *elfcorebuf;
+static size_t elfcorebuf_sz;
+
+/* Total size of vmcore file. */
+static u64 vmcore_size;
+
+struct proc_dir_entry *proc_vmcore = NULL;
+
+/* Reads a page from the oldmem device from given offset. */
+static ssize_t read_from_oldmem(char *buf, size_t count,
+			     loff_t *ppos, int userbuf)
+{
+	unsigned long pfn, offset;
+	size_t nr_bytes;
+	ssize_t read = 0, tmp;
+
+	if (!count)
+		return 0;
+
+	offset = (unsigned long)(*ppos % PAGE_SIZE);
+	pfn = (unsigned long)(*ppos / PAGE_SIZE);
+	if (pfn > saved_max_pfn)
+		return -EINVAL;
+
+	do {
+		if (count > (PAGE_SIZE - offset))
+			nr_bytes = PAGE_SIZE - offset;
+		else
+			nr_bytes = count;
+
+		tmp = copy_oldmem_page(pfn, buf, nr_bytes, offset, userbuf);
+		if (tmp < 0)
+			return tmp;
+		*ppos += nr_bytes;
+		count -= nr_bytes;
+		buf += nr_bytes;
+		read += nr_bytes;
+		++pfn;
+		offset = 0;
+	} while (count);
+
+	return read;
+}
+
+/* Maps vmcore file offset to respective physical address in memroy. */
+static u64 map_offset_to_paddr(loff_t offset, struct list_head *vc_list,
+					struct vmcore **m_ptr)
+{
+	struct vmcore *m;
+	u64 paddr;
+
+	list_for_each_entry(m, vc_list, list) {
+		u64 start, end;
+		start = m->offset;
+		end = m->offset + m->size - 1;
+		if (offset >= start && offset <= end) {
+			paddr = m->paddr + offset - start;
+			*m_ptr = m;
+			return paddr;
+		}
+	}
+	*m_ptr = NULL;
+	return 0;
+}
+
+/* Read from the ELF header and then the crash dump. On error, negative value is
+ * returned otherwise number of bytes read are returned.
+ */
+static ssize_t read_vmcore(struct file *file, char __user *buffer,
+				size_t buflen, loff_t *fpos)
+{
+	ssize_t acc = 0, tmp;
+	size_t tsz, nr_bytes;
+	u64 start;
+	struct vmcore *curr_m = NULL;
+
+	if (buflen == 0 || *fpos >= vmcore_size)
+		return 0;
+
+	/* trim buflen to not go beyond EOF */
+	if (buflen > vmcore_size - *fpos)
+		buflen = vmcore_size - *fpos;
+
+	/* Read ELF core header */
+	if (*fpos < elfcorebuf_sz) {
+		tsz = elfcorebuf_sz - *fpos;
+		if (buflen < tsz)
+			tsz = buflen;
+		if (copy_to_user(buffer, elfcorebuf + *fpos, tsz))
+			return -EFAULT;
+		buflen -= tsz;
+		*fpos += tsz;
+		buffer += tsz;
+		acc += tsz;
+
+		/* leave now if filled buffer already */
+		if (buflen == 0)
+			return acc;
+	}
+
+	start = map_offset_to_paddr(*fpos, &vmcore_list, &curr_m);
+	if (!curr_m)
+        	return -EINVAL;
+	if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
+		tsz = buflen;
+
+	/* Calculate left bytes in current memory segment. */
+	nr_bytes = (curr_m->size - (start - curr_m->paddr));
+	if (tsz > nr_bytes)
+		tsz = nr_bytes;
+
+	while (buflen) {
+		tmp = read_from_oldmem(buffer, tsz, &start, 1);
+		if (tmp < 0)
+			return tmp;
+		buflen -= tsz;
+		*fpos += tsz;
+		buffer += tsz;
+		acc += tsz;
+		if (start >= (curr_m->paddr + curr_m->size)) {
+			if (curr_m->list.next == &vmcore_list)
+				return acc;	/*EOF*/
+			curr_m = list_entry(curr_m->list.next,
+						struct vmcore, list);
+			start = curr_m->paddr;
+		}
+		if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
+			tsz = buflen;
+		/* Calculate left bytes in current memory segment. */
+		nr_bytes = (curr_m->size - (start - curr_m->paddr));
+		if (tsz > nr_bytes)
+			tsz = nr_bytes;
+	}
+	return acc;
+}
+
+static int open_vmcore(struct inode *inode, struct file *filp)
+{
+	return 0;
+}
+
+struct file_operations proc_vmcore_operations = {
+	.read		= read_vmcore,
+	.open		= open_vmcore,
+};
+
+static struct vmcore* __init get_new_element(void)
+{
+	struct vmcore *p;
+
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (p)
+		memset(p, 0, sizeof(*p));
+	return p;
+}
+
+static u64 __init get_vmcore_size_elf64(char *elfptr)
+{
+	int i;
+	u64 size;
+	Elf64_Ehdr *ehdr_ptr;
+	Elf64_Phdr *phdr_ptr;
+
+	ehdr_ptr = (Elf64_Ehdr *)elfptr;
+	phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr));
+	size = sizeof(Elf64_Ehdr) + ((ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr));
+	for (i = 0; i < ehdr_ptr->e_phnum; i++) {
+		size += phdr_ptr->p_memsz;
+		phdr_ptr++;
+	}
+	return size;
+}
+
+static u64 __init get_vmcore_size_elf32(char *elfptr)
+{
+	int i;
+	u64 size;
+	Elf32_Ehdr *ehdr_ptr;
+	Elf32_Phdr *phdr_ptr;
+
+	ehdr_ptr = (Elf32_Ehdr *)elfptr;
+	phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr));
+	size = sizeof(Elf32_Ehdr) + ((ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr));
+	for (i = 0; i < ehdr_ptr->e_phnum; i++) {
+		size += phdr_ptr->p_memsz;
+		phdr_ptr++;
+	}
+	return size;
+}
+
+/* Merges all the PT_NOTE headers into one. */
+static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz,
+						struct list_head *vc_list)
+{
+	int i, nr_ptnote=0, rc=0;
+	char *tmp;
+	Elf64_Ehdr *ehdr_ptr;
+	Elf64_Phdr phdr, *phdr_ptr;
+	Elf64_Nhdr *nhdr_ptr;
+	u64 phdr_sz = 0, note_off;
+
+	ehdr_ptr = (Elf64_Ehdr *)elfptr;
+	phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr));
+	for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
+		int j;
+		void *notes_section;
+		struct vmcore *new;
+		u64 offset, max_sz, sz, real_sz = 0;
+		if (phdr_ptr->p_type != PT_NOTE)
+			continue;
+		nr_ptnote++;
+		max_sz = phdr_ptr->p_memsz;
+		offset = phdr_ptr->p_offset;
+		notes_section = kmalloc(max_sz, GFP_KERNEL);
+		if (!notes_section)
+			return -ENOMEM;
+		rc = read_from_oldmem(notes_section, max_sz, &offset, 0);
+		if (rc < 0) {
+			kfree(notes_section);
+			return rc;
+		}
+		nhdr_ptr = notes_section;
+		for (j = 0; j < max_sz; j += sz) {
+			if (nhdr_ptr->n_namesz == 0)
+				break;
+			sz = sizeof(Elf64_Nhdr) +
+				((nhdr_ptr->n_namesz + 3) & ~3) +
+				((nhdr_ptr->n_descsz + 3) & ~3);
+			real_sz += sz;
+			nhdr_ptr = (Elf64_Nhdr*)((char*)nhdr_ptr + sz);
+		}
+
+		/* Add this contiguous chunk of notes section to vmcore list.*/
+		new = get_new_element();
+		if (!new) {
+			kfree(notes_section);
+			return -ENOMEM;
+		}
+		new->paddr = phdr_ptr->p_offset;
+		new->size = real_sz;
+		list_add_tail(&new->list, vc_list);
+		phdr_sz += real_sz;
+		kfree(notes_section);
+	}
+
+	/* Prepare merged PT_NOTE program header. */
+	phdr.p_type    = PT_NOTE;
+	phdr.p_flags   = 0;
+	note_off = sizeof(Elf64_Ehdr) +
+			(ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf64_Phdr);
+	phdr.p_offset  = note_off;
+	phdr.p_vaddr   = phdr.p_paddr = 0;
+	phdr.p_filesz  = phdr.p_memsz = phdr_sz;
+	phdr.p_align   = 0;
+
+	/* Add merged PT_NOTE program header*/
+	tmp = elfptr + sizeof(Elf64_Ehdr);
+	memcpy(tmp, &phdr, sizeof(phdr));
+	tmp += sizeof(phdr);
+
+	/* Remove unwanted PT_NOTE program headers. */
+	i = (nr_ptnote - 1) * sizeof(Elf64_Phdr);
+	*elfsz = *elfsz - i;
+	memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf64_Ehdr)-sizeof(Elf64_Phdr)));
+
+	/* Modify e_phnum to reflect merged headers. */
+	ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1;
+
+	return 0;
+}
+
+/* Merges all the PT_NOTE headers into one. */
+static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz,
+						struct list_head *vc_list)
+{
+	int i, nr_ptnote=0, rc=0;
+	char *tmp;
+	Elf32_Ehdr *ehdr_ptr;
+	Elf32_Phdr phdr, *phdr_ptr;
+	Elf32_Nhdr *nhdr_ptr;
+	u64 phdr_sz = 0, note_off;
+
+	ehdr_ptr = (Elf32_Ehdr *)elfptr;
+	phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr));
+	for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
+		int j;
+		void *notes_section;
+		struct vmcore *new;
+		u64 offset, max_sz, sz, real_sz = 0;
+		if (phdr_ptr->p_type != PT_NOTE)
+			continue;
+		nr_ptnote++;
+		max_sz = phdr_ptr->p_memsz;
+		offset = phdr_ptr->p_offset;
+		notes_section = kmalloc(max_sz, GFP_KERNEL);
+		if (!notes_section)
+			return -ENOMEM;
+		rc = read_from_oldmem(notes_section, max_sz, &offset, 0);
+		if (rc < 0) {
+			kfree(notes_section);
+			return rc;
+		}
+		nhdr_ptr = notes_section;
+		for (j = 0; j < max_sz; j += sz) {
+			if (nhdr_ptr->n_namesz == 0)
+				break;
+			sz = sizeof(Elf32_Nhdr) +
+				((nhdr_ptr->n_namesz + 3) & ~3) +
+				((nhdr_ptr->n_descsz + 3) & ~3);
+			real_sz += sz;
+			nhdr_ptr = (Elf32_Nhdr*)((char*)nhdr_ptr + sz);
+		}
+
+		/* Add this contiguous chunk of notes section to vmcore list.*/
+		new = get_new_element();
+		if (!new) {
+			kfree(notes_section);
+			return -ENOMEM;
+		}
+		new->paddr = phdr_ptr->p_offset;
+		new->size = real_sz;
+		list_add_tail(&new->list, vc_list);
+		phdr_sz += real_sz;
+		kfree(notes_section);
+	}
+
+	/* Prepare merged PT_NOTE program header. */
+	phdr.p_type    = PT_NOTE;
+	phdr.p_flags   = 0;
+	note_off = sizeof(Elf32_Ehdr) +
+			(ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf32_Phdr);
+	phdr.p_offset  = note_off;
+	phdr.p_vaddr   = phdr.p_paddr = 0;
+	phdr.p_filesz  = phdr.p_memsz = phdr_sz;
+	phdr.p_align   = 0;
+
+	/* Add merged PT_NOTE program header*/
+	tmp = elfptr + sizeof(Elf32_Ehdr);
+	memcpy(tmp, &phdr, sizeof(phdr));
+	tmp += sizeof(phdr);
+
+	/* Remove unwanted PT_NOTE program headers. */
+	i = (nr_ptnote - 1) * sizeof(Elf32_Phdr);
+	*elfsz = *elfsz - i;
+	memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf32_Ehdr)-sizeof(Elf32_Phdr)));
+
+	/* Modify e_phnum to reflect merged headers. */
+	ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1;
+
+	return 0;
+}
+
+/* Add memory chunks represented by program headers to vmcore list. Also update
+ * the new offset fields of exported program headers. */
+static int __init process_ptload_program_headers_elf64(char *elfptr,
+						size_t elfsz,
+						struct list_head *vc_list)
+{
+	int i;
+	Elf64_Ehdr *ehdr_ptr;
+	Elf64_Phdr *phdr_ptr;
+	loff_t vmcore_off;
+	struct vmcore *new;
+
+	ehdr_ptr = (Elf64_Ehdr *)elfptr;
+	phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); /* PT_NOTE hdr */
+
+	/* First program header is PT_NOTE header. */
+	vmcore_off = sizeof(Elf64_Ehdr) +
+			(ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr) +
+			phdr_ptr->p_memsz; /* Note sections */
+
+	for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
+		if (phdr_ptr->p_type != PT_LOAD)
+			continue;
+
+		/* Add this contiguous chunk of memory to vmcore list.*/
+		new = get_new_element();
+		if (!new)
+			return -ENOMEM;
+		new->paddr = phdr_ptr->p_offset;
+		new->size = phdr_ptr->p_memsz;
+		list_add_tail(&new->list, vc_list);
+
+		/* Update the program header offset. */
+		phdr_ptr->p_offset = vmcore_off;
+		vmcore_off = vmcore_off + phdr_ptr->p_memsz;
+	}
+	return 0;
+}
+
+static int __init process_ptload_program_headers_elf32(char *elfptr,
+						size_t elfsz,
+						struct list_head *vc_list)
+{
+	int i;
+	Elf32_Ehdr *ehdr_ptr;
+	Elf32_Phdr *phdr_ptr;
+	loff_t vmcore_off;
+	struct vmcore *new;
+
+	ehdr_ptr = (Elf32_Ehdr *)elfptr;
+	phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); /* PT_NOTE hdr */
+
+	/* First program header is PT_NOTE header. */
+	vmcore_off = sizeof(Elf32_Ehdr) +
+			(ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr) +
+			phdr_ptr->p_memsz; /* Note sections */
+
+	for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
+		if (phdr_ptr->p_type != PT_LOAD)
+			continue;
+
+		/* Add this contiguous chunk of memory to vmcore list.*/
+		new = get_new_element();
+		if (!new)
+			return -ENOMEM;
+		new->paddr = phdr_ptr->p_offset;
+		new->size = phdr_ptr->p_memsz;
+		list_add_tail(&new->list, vc_list);
+
+		/* Update the program header offset */
+		phdr_ptr->p_offset = vmcore_off;
+		vmcore_off = vmcore_off + phdr_ptr->p_memsz;
+	}
+	return 0;
+}
+
+/* Sets offset fields of vmcore elements. */
+static void __init set_vmcore_list_offsets_elf64(char *elfptr,
+						struct list_head *vc_list)
+{
+	loff_t vmcore_off;
+	Elf64_Ehdr *ehdr_ptr;
+	struct vmcore *m;
+
+	ehdr_ptr = (Elf64_Ehdr *)elfptr;
+
+	/* Skip Elf header and program headers. */
+	vmcore_off = sizeof(Elf64_Ehdr) +
+			(ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr);
+
+	list_for_each_entry(m, vc_list, list) {
+		m->offset = vmcore_off;
+		vmcore_off += m->size;
+	}
+}
+
+/* Sets offset fields of vmcore elements. */
+static void __init set_vmcore_list_offsets_elf32(char *elfptr,
+						struct list_head *vc_list)
+{
+	loff_t vmcore_off;
+	Elf32_Ehdr *ehdr_ptr;
+	struct vmcore *m;
+
+	ehdr_ptr = (Elf32_Ehdr *)elfptr;
+
+	/* Skip Elf header and program headers. */
+	vmcore_off = sizeof(Elf32_Ehdr) +
+			(ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr);
+
+	list_for_each_entry(m, vc_list, list) {
+		m->offset = vmcore_off;
+		vmcore_off += m->size;
+	}
+}
+
+static int __init parse_crash_elf64_headers(void)
+{
+	int rc=0;
+	Elf64_Ehdr ehdr;
+	u64 addr;
+
+	addr = elfcorehdr_addr;
+
+	/* Read Elf header */
+	rc = read_from_oldmem((char*)&ehdr, sizeof(Elf64_Ehdr), &addr, 0);
+	if (rc < 0)
+		return rc;
+
+	/* Do some basic Verification. */
+	if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
+		(ehdr.e_type != ET_CORE) ||
+		!elf_check_arch(&ehdr) ||
+		ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
+		ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
+		ehdr.e_version != EV_CURRENT ||
+		ehdr.e_ehsize != sizeof(Elf64_Ehdr) ||
+		ehdr.e_phentsize != sizeof(Elf64_Phdr) ||
+		ehdr.e_phnum == 0) {
+		printk(KERN_WARNING "Warning: Core image elf header is not"
+					"sane\n");
+		return -EINVAL;
+	}
+
+	/* Read in all elf headers. */
+	elfcorebuf_sz = sizeof(Elf64_Ehdr) + ehdr.e_phnum * sizeof(Elf64_Phdr);
+	elfcorebuf = kmalloc(elfcorebuf_sz, GFP_KERNEL);
+	if (!elfcorebuf)
+		return -ENOMEM;
+	addr = elfcorehdr_addr;
+	rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz, &addr, 0);
+	if (rc < 0) {
+		kfree(elfcorebuf);
+		return rc;
+	}
+
+	/* Merge all PT_NOTE headers into one. */
+	rc = merge_note_headers_elf64(elfcorebuf, &elfcorebuf_sz, &vmcore_list);
+	if (rc) {
+		kfree(elfcorebuf);
+		return rc;
+	}
+	rc = process_ptload_program_headers_elf64(elfcorebuf, elfcorebuf_sz,
+							&vmcore_list);
+	if (rc) {
+		kfree(elfcorebuf);
+		return rc;
+	}
+	set_vmcore_list_offsets_elf64(elfcorebuf, &vmcore_list);
+	return 0;
+}
+
+static int __init parse_crash_elf32_headers(void)
+{
+	int rc=0;
+	Elf32_Ehdr ehdr;
+	u64 addr;
+
+	addr = elfcorehdr_addr;
+
+	/* Read Elf header */
+	rc = read_from_oldmem((char*)&ehdr, sizeof(Elf32_Ehdr), &addr, 0);
+	if (rc < 0)
+		return rc;
+
+	/* Do some basic Verification. */
+	if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
+		(ehdr.e_type != ET_CORE) ||
+		!elf_check_arch(&ehdr) ||
+		ehdr.e_ident[EI_CLASS] != ELFCLASS32||
+		ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
+		ehdr.e_version != EV_CURRENT ||
+		ehdr.e_ehsize != sizeof(Elf32_Ehdr) ||
+		ehdr.e_phentsize != sizeof(Elf32_Phdr) ||
+		ehdr.e_phnum == 0) {
+		printk(KERN_WARNING "Warning: Core image elf header is not"
+					"sane\n");
+		return -EINVAL;
+	}
+
+	/* Read in all elf headers. */
+	elfcorebuf_sz = sizeof(Elf32_Ehdr) + ehdr.e_phnum * sizeof(Elf32_Phdr);
+	elfcorebuf = kmalloc(elfcorebuf_sz, GFP_KERNEL);
+	if (!elfcorebuf)
+		return -ENOMEM;
+	addr = elfcorehdr_addr;
+	rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz, &addr, 0);
+	if (rc < 0) {
+		kfree(elfcorebuf);
+		return rc;
+	}
+
+	/* Merge all PT_NOTE headers into one. */
+	rc = merge_note_headers_elf32(elfcorebuf, &elfcorebuf_sz, &vmcore_list);
+	if (rc) {
+		kfree(elfcorebuf);
+		return rc;
+	}
+	rc = process_ptload_program_headers_elf32(elfcorebuf, elfcorebuf_sz,
+								&vmcore_list);
+	if (rc) {
+		kfree(elfcorebuf);
+		return rc;
+	}
+	set_vmcore_list_offsets_elf32(elfcorebuf, &vmcore_list);
+	return 0;
+}
+
+static int __init parse_crash_elf_headers(void)
+{
+	unsigned char e_ident[EI_NIDENT];
+	u64 addr;
+	int rc=0;
+
+	addr = elfcorehdr_addr;
+	rc = read_from_oldmem(e_ident, EI_NIDENT, &addr, 0);
+	if (rc < 0)
+		return rc;
+	if (memcmp(e_ident, ELFMAG, SELFMAG) != 0) {
+		printk(KERN_WARNING "Warning: Core image elf header"
+					" not found\n");
+		return -EINVAL;
+	}
+
+	if (e_ident[EI_CLASS] == ELFCLASS64) {
+		rc = parse_crash_elf64_headers();
+		if (rc)
+			return rc;
+
+		/* Determine vmcore size. */
+		vmcore_size = get_vmcore_size_elf64(elfcorebuf);
+	} else if (e_ident[EI_CLASS] == ELFCLASS32) {
+		rc = parse_crash_elf32_headers();
+		if (rc)
+			return rc;
+
+		/* Determine vmcore size. */
+		vmcore_size = get_vmcore_size_elf32(elfcorebuf);
+	} else {
+		printk(KERN_WARNING "Warning: Core image elf header is not"
+					" sane\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/* Init function for vmcore module. */
+static int __init vmcore_init(void)
+{
+	int rc = 0;
+
+	/* If elfcorehdr= has been passed in cmdline, then capture the dump.*/
+	if (!(elfcorehdr_addr < ELFCORE_ADDR_MAX))
+		return rc;
+	rc = parse_crash_elf_headers();
+	if (rc) {
+		printk(KERN_WARNING "Kdump: vmcore not initialized\n");
+		return rc;
+	}
+
+	/* Initialize /proc/vmcore size if proc is already up. */
+	if (proc_vmcore)
+		proc_vmcore->size = vmcore_size;
+	return 0;
+}
+module_init(vmcore_init)
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index cd66147cca04..7a8f5595c26f 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -61,7 +61,7 @@ static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir)
 						ino = blknum * QNX4_INODES_PER_BLOCK + ix - 1;
 					else {
 						le  = (struct qnx4_link_info*)de;
-						ino = ( le->dl_inode_blk - 1 ) *
+						ino = ( le32_to_cpu(le->dl_inode_blk) - 1 ) *
 							QNX4_INODES_PER_BLOCK +
 							le->dl_inode_ndx;
 					}
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index aa92d6b76a9a..b79162a35478 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -236,7 +236,7 @@ unsigned long qnx4_block_map( struct inode *inode, long iblock )
 	struct buffer_head *bh = NULL;
 	struct qnx4_xblk *xblk = NULL;
 	struct qnx4_inode_entry *qnx4_inode = qnx4_raw_inode(inode);
-	qnx4_nxtnt_t nxtnt = le16_to_cpu(qnx4_inode->di_num_xtnts);
+	u16 nxtnt = le16_to_cpu(qnx4_inode->di_num_xtnts);
 
 	if ( iblock < le32_to_cpu(qnx4_inode->di_first_xtnt.xtnt_size) ) {
 		// iblock is in the first extent. This is easy.
@@ -372,7 +372,7 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent)
 		printk("qnx4: unable to read the superblock\n");
 		goto outnobh;
 	}
-	if ( le32_to_cpu( *(__u32*)bh->b_data ) != QNX4_SUPER_MAGIC ) {
+	if ( le32_to_cpup((__le32*) bh->b_data) != QNX4_SUPER_MAGIC ) {
 		if (!silent)
 			printk("qnx4: wrong fsid in superblock.\n");
 		goto out;
diff --git a/fs/quota.c b/fs/quota.c
index 3f0333a51a23..f5d1cff55196 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -149,36 +149,6 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t
 	return error;
 }
 
-static struct super_block *get_super_to_sync(int type)
-{
-	struct list_head *head;
-	int cnt, dirty;
-
-restart:
-	spin_lock(&sb_lock);
-	list_for_each(head, &super_blocks) {
-		struct super_block *sb = list_entry(head, struct super_block, s_list);
-
-		/* This test just improves performance so it needn't be reliable... */
-		for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
-			if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
-			    && info_any_dirty(&sb_dqopt(sb)->info[cnt]))
-				dirty = 1;
-		if (!dirty)
-			continue;
-		sb->s_count++;
-		spin_unlock(&sb_lock);
-		down_read(&sb->s_umount);
-		if (!sb->s_root) {
-			drop_super(sb);
-			goto restart;
-		}
-		return sb;
-	}
-	spin_unlock(&sb_lock);
-	return NULL;
-}
-
 static void quota_sync_sb(struct super_block *sb, int type)
 {
 	int cnt;
@@ -219,17 +189,35 @@ static void quota_sync_sb(struct super_block *sb, int type)
 
 void sync_dquots(struct super_block *sb, int type)
 {
+	int cnt, dirty;
+
 	if (sb) {
 		if (sb->s_qcop->quota_sync)
 			quota_sync_sb(sb, type);
+		return;
 	}
-	else {
-		while ((sb = get_super_to_sync(type)) != NULL) {
-			if (sb->s_qcop->quota_sync)
-				quota_sync_sb(sb, type);
-			drop_super(sb);
-		}
+
+	spin_lock(&sb_lock);
+restart:
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		/* This test just improves performance so it needn't be reliable... */
+		for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
+			if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
+			    && info_any_dirty(&sb_dqopt(sb)->info[cnt]))
+				dirty = 1;
+		if (!dirty)
+			continue;
+		sb->s_count++;
+		spin_unlock(&sb_lock);
+		down_read(&sb->s_umount);
+		if (sb->s_root && sb->s_qcop->quota_sync)
+			quota_sync_sb(sb, type);
+		up_read(&sb->s_umount);
+		spin_lock(&sb_lock);
+		if (__put_super_and_need_restart(sb))
+			goto restart;
 	}
+	spin_unlock(&sb_lock);
 }
 
 /* Copy parameters and call proper function */
diff --git a/fs/read_write.c b/fs/read_write.c
index c4c2bee373ed..9292f5fa4d62 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -203,6 +203,16 @@ Einval:
 	return -EINVAL;
 }
 
+static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
+{
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	if (!kiocbIsKicked(iocb))
+		schedule();
+	else
+		kiocbClearKicked(iocb);
+	__set_current_state(TASK_RUNNING);
+}
+
 ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
 {
 	struct kiocb kiocb;
@@ -210,7 +220,10 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp
 
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
-	ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos);
+	while (-EIOCBRETRY ==
+		(ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos)))
+		wait_on_retry_sync_kiocb(&kiocb);
+
 	if (-EIOCBQUEUED == ret)
 		ret = wait_on_sync_kiocb(&kiocb);
 	*ppos = kiocb.ki_pos;
@@ -258,7 +271,10 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
 
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
-	ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos);
+	while (-EIOCBRETRY ==
+	       (ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos)))
+		wait_on_retry_sync_kiocb(&kiocb);
+
 	if (-EIOCBQUEUED == ret)
 		ret = wait_on_sync_kiocb(&kiocb);
 	*ppos = kiocb.ki_pos;
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 2230afff1870..12e91209544e 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -201,7 +201,7 @@ static int reiserfs_allocate_blocks_for_region(
     /* If we came here, it means we absolutely need to open a transaction,
        since we need to allocate some blocks */
     reiserfs_write_lock(inode->i_sb); // Journaling stuff and we need that.
-    res = journal_begin(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS); // Wish I know if this number enough
+    res = journal_begin(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb)); // Wish I know if this number enough
     if (res)
         goto error_exit;
     reiserfs_update_inode_transaction(inode) ;
@@ -576,7 +576,7 @@ error_exit:
         int err;
         // update any changes we made to blk count
         reiserfs_update_sd(th, inode);
-        err = journal_end(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS);
+        err = journal_end(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb));
         if (err)
             res = err;
     }
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 2711dff1b7b4..289d864fe731 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -28,7 +28,7 @@ static int reiserfs_prepare_write(struct file *f, struct page *page,
 void reiserfs_delete_inode (struct inode * inode)
 {
     /* We need blocks for transaction + (user+group) quota update (possibly delete) */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * REISERFS_QUOTA_INIT_BLOCKS;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
     struct reiserfs_transaction_handle th ;
   
     reiserfs_write_lock(inode->i_sb);
@@ -254,6 +254,7 @@ static int _get_block_create_0 (struct inode * inode, long block,
     char * p = NULL;
     int chars;
     int ret ;
+    int result ;
     int done = 0 ;
     unsigned long offset ;
 
@@ -262,10 +263,13 @@ static int _get_block_create_0 (struct inode * inode, long block,
 		  (loff_t)block * inode->i_sb->s_blocksize + 1, TYPE_ANY, 3);
 
 research:
-    if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND) {
+    result = search_for_position_by_key (inode->i_sb, &key, &path) ;
+    if (result != POSITION_FOUND) {
 	pathrelse (&path);
         if (p)
             kunmap(bh_result->b_page) ;
+	if (result == IO_ERROR)
+	    return -EIO;
 	// We do not return -ENOENT if there is a hole but page is uptodate, because it means
 	// That there is some MMAPED data associated with it that is yet to be written to disk.
 	if ((args & GET_BLOCK_NO_HOLE) && !PageUptodate(bh_result->b_page) ) {
@@ -382,8 +386,9 @@ research:
 
 	// update key to look for the next piece
 	set_cpu_key_k_offset (&key, cpu_key_k_offset (&key) + chars);
-	if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND)
-	    // we read something from tail, even if now we got IO_ERROR
+	result = search_for_position_by_key (inode->i_sb, &key, &path);
+	if (result != POSITION_FOUND)
+	    // i/o error most likely
 	    break;
 	bh = get_last_bh (&path);
 	ih = get_ih (&path);
@@ -394,6 +399,10 @@ research:
 
 finished:
     pathrelse (&path);
+
+    if (result == IO_ERROR)
+	return -EIO;
+
     /* this buffer has valid data, but isn't valid for io.  mapping it to
      * block #0 tells the rest of reiserfs it just has a tail in it
      */
@@ -591,7 +600,7 @@ int reiserfs_get_block (struct inode * inode, sector_t block,
        XXX in practically impossible worst case direct2indirect()
        can incur (much) more than 3 balancings.
        quota update for user, group */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
     int version;
     int dangle = 1;
     loff_t new_offset = (((loff_t)block) << inode->i_sb->s_blocksize_bits) + 1 ;
@@ -2796,12 +2805,15 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) {
 
                 if (!error) {
 		    struct reiserfs_transaction_handle th;
+		    int jbegin_count = 2*(REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb)+REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb))+2;
 
 		    /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */
-		    journal_begin(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2);
+		    error = journal_begin(&th, inode->i_sb, jbegin_count);
+ 		    if (error)
+ 			goto out;
                     error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
 		    if (error) {
-			journal_end(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2);
+			journal_end(&th, inode->i_sb, jbegin_count);
 			goto out;
 		    }
 		    /* Update corresponding info in inode so that everything is in
@@ -2811,7 +2823,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) {
 		    if (attr->ia_valid & ATTR_GID)
 			inode->i_gid = attr->ia_gid;
 		    mark_inode_dirty(inode);
-		    journal_end(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2);
+		    error = journal_end(&th, inode->i_sb, jbegin_count);
 		}
         }
         if (!error)
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 94dc42475a04..76caedf737f2 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -36,10 +36,16 @@ int reiserfs_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
 	/* following two cases are taken from fs/ext2/ioctl.c by Remy
 	   Card (card@masi.ibp.fr) */
 	case REISERFS_IOC_GETFLAGS:
+		if (!reiserfs_attrs (inode->i_sb))
+			return -ENOTTY;
+
 		flags = REISERFS_I(inode) -> i_attrs;
 		i_attrs_to_sd_attrs( inode, ( __u16 * ) &flags );
 		return put_user(flags, (int __user *) arg);
 	case REISERFS_IOC_SETFLAGS: {
+		if (!reiserfs_attrs (inode->i_sb))
+			return -ENOTTY;
+
 		if (IS_RDONLY(inode))
 			return -EROFS;
 
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 3072cfdee959..d1bcf0da6728 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -645,18 +645,22 @@ struct buffer_chunk {
 
 static void write_chunk(struct buffer_chunk *chunk) {
     int i;
+    get_fs_excl();
     for (i = 0; i < chunk->nr ; i++) {
 	submit_logged_buffer(chunk->bh[i]) ;
     }
     chunk->nr = 0;
+    put_fs_excl();
 }
 
 static void write_ordered_chunk(struct buffer_chunk *chunk) {
     int i;
+    get_fs_excl();
     for (i = 0; i < chunk->nr ; i++) {
 	submit_ordered_buffer(chunk->bh[i]) ;
     }
     chunk->nr = 0;
+    put_fs_excl();
 }
 
 static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh,
@@ -918,6 +922,8 @@ static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list
     return 0 ;
   }
 
+  get_fs_excl();
+
   /* before we can put our commit blocks on disk, we have to make sure everyone older than
   ** us is on disk too
   */
@@ -1055,6 +1061,7 @@ put_jl:
 
   if (retval)
     reiserfs_abort (s, retval, "Journal write error in %s", __FUNCTION__);
+  put_fs_excl();
   return retval;
 }
 
@@ -1251,6 +1258,8 @@ static int flush_journal_list(struct super_block *s,
     return 0 ;
   }
 
+  get_fs_excl();
+
   /* if all the work is already done, get out of here */
   if (atomic_read(&(jl->j_nonzerolen)) <= 0 && 
       atomic_read(&(jl->j_commit_left)) <= 0) {
@@ -1450,6 +1459,7 @@ flush_older_and_return:
   put_journal_list(s, jl);
   if (flushall)
     up(&journal->j_flush_sem);
+  put_fs_excl();
   return err ;
 } 
 
@@ -2631,6 +2641,8 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th, struct sup
   int retval;
 
   reiserfs_check_lock_depth(p_s_sb, "journal_begin") ;
+  if (nblocks > journal->j_trans_max)
+	BUG();
 
   PROC_INFO_INC( p_s_sb, journal.journal_being );
   /* set here for journal_join */
@@ -2717,6 +2729,7 @@ relock:
   th->t_trans_id = journal->j_trans_id ;
   unlock_journal(p_s_sb) ;
   INIT_LIST_HEAD (&th->t_list);
+  get_fs_excl();
   return 0 ;
 
 out_fail:
@@ -3524,6 +3537,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, struct super_b
   BUG_ON (th->t_refcount > 1);
   BUG_ON (!th->t_trans_id);
 
+  put_fs_excl();
   current->journal_info = th->t_handle_save;
   reiserfs_check_lock_depth(p_s_sb, "journal end");
   if (journal->j_len == 0) {
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 7d4dc5f5aa8b..4a333255f27a 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -586,7 +586,7 @@ static int reiserfs_create (struct inode * dir, struct dentry *dentry, int mode,
     int retval;
     struct inode * inode;
     /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS);
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
     struct reiserfs_transaction_handle th ;
     int locked;
 
@@ -653,7 +653,7 @@ static int reiserfs_mknod (struct inode * dir, struct dentry *dentry, int mode,
     struct inode * inode;
     struct reiserfs_transaction_handle th ;
     /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS);
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
     int locked;
 
     if (!new_valid_dev(rdev))
@@ -727,7 +727,7 @@ static int reiserfs_mkdir (struct inode * dir, struct dentry *dentry, int mode)
     struct inode * inode;
     struct reiserfs_transaction_handle th ;
     /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS);
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
     int locked;
 
 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
@@ -829,8 +829,10 @@ static int reiserfs_rmdir (struct inode * dir, struct dentry *dentry)
 
 
     /* we will be doing 2 balancings and update 2 stat data, we change quotas
-     * of the owner of the directory and of the owner of the parent directory */
-    jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS);
+     * of the owner of the directory and of the owner of the parent directory.
+     * The quota structure is possibly deleted only on last iput => outside
+     * of this transaction */
+    jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
 
     reiserfs_write_lock(dir->i_sb);
     retval = journal_begin(&th, dir->i_sb, jbegin_count) ;
@@ -913,9 +915,10 @@ static int reiserfs_unlink (struct inode * dir, struct dentry *dentry)
     inode = dentry->d_inode;
 
     /* in this transaction we can be doing at max two balancings and update
-       two stat datas, we change quotas of the owner of the directory and of
-       the owner of the parent directory */
-    jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS);
+     * two stat datas, we change quotas of the owner of the directory and of
+     * the owner of the parent directory. The quota structure is possibly
+     * deleted only on iput => outside of this transaction */
+    jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
 
     reiserfs_write_lock(dir->i_sb);
     retval = journal_begin(&th, dir->i_sb, jbegin_count) ;
@@ -1000,7 +1003,7 @@ static int reiserfs_symlink (struct inode * parent_dir,
     struct reiserfs_transaction_handle th ;
     int mode = S_IFLNK | S_IRWXUGO;
     /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS);
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(parent_dir->i_sb));
 
     if (!(inode = new_inode(parent_dir->i_sb))) {
 	return -ENOMEM ;
@@ -1076,7 +1079,7 @@ static int reiserfs_link (struct dentry * old_dentry, struct inode * dir, struct
     struct inode *inode = old_dentry->d_inode;
     struct reiserfs_transaction_handle th ;
     /* We need blocks for transaction + update of quotas for the owners of the directory */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * REISERFS_QUOTA_TRANS_BLOCKS;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
 
     reiserfs_write_lock(dir->i_sb);
     if (inode->i_nlink >= REISERFS_LINK_MAX) {
@@ -1196,7 +1199,7 @@ static int reiserfs_rename (struct inode * old_dir, struct dentry *old_dentry,
        pointed initially and (5) maybe block containing ".." of
        renamed directory
        quota updates: two parent directories */
-    jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 5 + 4 * REISERFS_QUOTA_TRANS_BLOCKS;
+    jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 5 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb);
 
     old_inode = old_dentry->d_inode;
     new_dentry_inode = new_dentry->d_inode;
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index c47f8fd31a2d..63158491e152 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -223,7 +223,7 @@ extern struct tree_balance * cur_tb;
 const struct reiserfs_key  MIN_KEY = {0, 0, {{0, 0},}};
 
 /* Maximal possible key. It is never in the tree. */
-const struct reiserfs_key  MAX_KEY = {
+static const struct reiserfs_key  MAX_KEY = {
 	__constant_cpu_to_le32(0xffffffff),
 	__constant_cpu_to_le32(0xffffffff),
 	{{__constant_cpu_to_le32(0xffffffff),
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index b35b87744983..4b80ab95d338 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -866,8 +866,9 @@ static int reiserfs_parse_options (struct super_block * s, char * options, /* st
 	{"jdev",	.arg_required = 'j', .values = NULL},
 	{"nolargeio",	.arg_required = 'w', .values = NULL},
 	{"commit",	.arg_required = 'c', .values = NULL},
-	{"usrquota",},
-	{"grpquota",},
+	{"usrquota",	.setmask = 1<<REISERFS_QUOTA},
+	{"grpquota",	.setmask = 1<<REISERFS_QUOTA},
+	{"noquota",	.clrmask = 1<<REISERFS_QUOTA},
 	{"errors", 	.arg_required = 'e', .values = error_actions},
 	{"usrjquota",	.arg_required = 'u'|(1<<REISERFS_OPT_ALLOWEMPTY), .values = NULL},
 	{"grpjquota",	.arg_required = 'g'|(1<<REISERFS_OPT_ALLOWEMPTY), .values = NULL},
@@ -964,6 +965,7 @@ static int reiserfs_parse_options (struct super_block * s, char * options, /* st
 		    return 0;
 		}
 		strcpy(REISERFS_SB(s)->s_qf_names[qtype], arg);
+		*mount_options |= 1<<REISERFS_QUOTA;
 	    }
 	    else {
 		if (REISERFS_SB(s)->s_qf_names[qtype]) {
@@ -995,7 +997,13 @@ static int reiserfs_parse_options (struct super_block * s, char * options, /* st
 	reiserfs_warning(s, "reiserfs_parse_options: journalled quota format not specified.");
 	return 0;
     }
+    /* This checking is not precise wrt the quota type but for our purposes it is sufficient */
+    if (!(*mount_options & (1<<REISERFS_QUOTA)) && sb_any_quota_enabled(s)) {
+	reiserfs_warning(s, "reiserfs_parse_options: quota options must be present when quota is turned on.");
+	return 0;
+    }
 #endif
+
     return 1;
 }
 
@@ -1045,10 +1053,9 @@ static void handle_barrier_mode(struct super_block *s, unsigned long bits) {
 
 static void handle_attrs( struct super_block *s )
 {
-	struct reiserfs_super_block * rs;
+	struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s);
 
 	if( reiserfs_attrs( s ) ) {
-		rs = SB_DISK_SUPER_BLOCK (s);
 		if( old_format_only(s) ) {
 			reiserfs_warning(s, "reiserfs: cannot support attributes on 3.5.x disk format" );
 			REISERFS_SB(s) -> s_mount_opt &= ~ ( 1 << REISERFS_ATTRS );
@@ -1058,6 +1065,8 @@ static void handle_attrs( struct super_block *s )
 				reiserfs_warning(s, "reiserfs: cannot support attributes until flag is set in super-block" );
 				REISERFS_SB(s) -> s_mount_opt &= ~ ( 1 << REISERFS_ATTRS );
 		}
+	} else if (le32_to_cpu( rs -> s_flags ) & reiserfs_attrs_cleared) {
+		REISERFS_SB(s)->s_mount_opt |= REISERFS_ATTRS;
 	}
 }
 
@@ -1105,6 +1114,7 @@ static int reiserfs_remount (struct super_block * s, int * mount_flags, char * a
   safe_mask |= 1 << REISERFS_ERROR_RO;
   safe_mask |= 1 << REISERFS_ERROR_CONTINUE;
   safe_mask |= 1 << REISERFS_ERROR_PANIC;
+  safe_mask |= 1 << REISERFS_QUOTA;
 
   /* Update the bitmask, taking care to keep
    * the bits we're not allowed to change here */
@@ -1841,13 +1851,18 @@ static int reiserfs_statfs (struct super_block * s, struct kstatfs * buf)
 static int reiserfs_dquot_initialize(struct inode *inode, int type)
 {
     struct reiserfs_transaction_handle th;
-    int ret;
+    int ret, err;
 
     /* We may create quota structure so we need to reserve enough blocks */
     reiserfs_write_lock(inode->i_sb);
-    journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    ret = journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb));
+    if (ret)
+	goto out;
     ret = dquot_initialize(inode, type);
-    journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    err = journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb));
+    if (!ret && err)
+	ret = err;
+out:
     reiserfs_write_unlock(inode->i_sb);
     return ret;
 }
@@ -1855,13 +1870,18 @@ static int reiserfs_dquot_initialize(struct inode *inode, int type)
 static int reiserfs_dquot_drop(struct inode *inode)
 {
     struct reiserfs_transaction_handle th;
-    int ret;
+    int ret, err;
 
     /* We may delete quota structure so we need to reserve enough blocks */
     reiserfs_write_lock(inode->i_sb);
-    journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    ret = journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb));
+    if (ret)
+ 	goto out;
     ret = dquot_drop(inode);
-    journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    err = journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb));
+    if (!ret && err)
+	ret = err;
+out:
     reiserfs_write_unlock(inode->i_sb);
     return ret;
 }
@@ -1869,12 +1889,17 @@ static int reiserfs_dquot_drop(struct inode *inode)
 static int reiserfs_write_dquot(struct dquot *dquot)
 {
     struct reiserfs_transaction_handle th;
-    int ret;
+    int ret, err;
 
     reiserfs_write_lock(dquot->dq_sb);
-    journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS);
+    ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
+    if (ret)
+	goto out;
     ret = dquot_commit(dquot);
-    journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS);
+    err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
+    if (!ret && err)
+	ret = err;
+out:
     reiserfs_write_unlock(dquot->dq_sb);
     return ret;
 }
@@ -1882,12 +1907,17 @@ static int reiserfs_write_dquot(struct dquot *dquot)
 static int reiserfs_acquire_dquot(struct dquot *dquot)
 {
     struct reiserfs_transaction_handle th;
-    int ret;
+    int ret, err;
 
     reiserfs_write_lock(dquot->dq_sb);
-    journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
+    if (ret)
+	goto out;
     ret = dquot_acquire(dquot);
-    journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
+    if (!ret && err)
+	ret = err;
+out:
     reiserfs_write_unlock(dquot->dq_sb);
     return ret;
 }
@@ -1895,12 +1925,17 @@ static int reiserfs_acquire_dquot(struct dquot *dquot)
 static int reiserfs_release_dquot(struct dquot *dquot)
 {
     struct reiserfs_transaction_handle th;
-    int ret;
+    int ret, err;
 
     reiserfs_write_lock(dquot->dq_sb);
-    journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
+    if (ret)
+ 	goto out;
     ret = dquot_release(dquot);
-    journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
+    if (!ret && err)
+	ret = err;
+out:
     reiserfs_write_unlock(dquot->dq_sb);
     return ret;
 }
@@ -1920,39 +1955,29 @@ static int reiserfs_mark_dquot_dirty(struct dquot *dquot)
 static int reiserfs_write_info(struct super_block *sb, int type)
 {
     struct reiserfs_transaction_handle th;
-    int ret;
+    int ret, err;
 
     /* Data block + inode block */
     reiserfs_write_lock(sb);
-    journal_begin(&th, sb, 2);
+    ret = journal_begin(&th, sb, 2);
+    if (ret)
+	goto out;
     ret = dquot_commit_info(sb, type);
-    journal_end(&th, sb, 2);
+    err = journal_end(&th, sb, 2);
+    if (!ret && err)
+	ret = err;
+out:
     reiserfs_write_unlock(sb);
     return ret;
 }
 
 /*
- * Turn on quotas during mount time - we need to find
- * the quota file and such...
+ * Turn on quotas during mount time - we need to find the quota file and such...
  */
 static int reiserfs_quota_on_mount(struct super_block *sb, int type)
 {
-    int err;
-    struct dentry *dentry;
-    struct qstr name = { .name = REISERFS_SB(sb)->s_qf_names[type],
-                         .hash = 0,
-                         .len = strlen(REISERFS_SB(sb)->s_qf_names[type])};
-
-    dentry = lookup_hash(&name, sb->s_root);
-    if (IS_ERR(dentry))
-            return PTR_ERR(dentry);
-    err = vfs_quota_on_mount(type, REISERFS_SB(sb)->s_jquota_fmt, dentry);
-    /* Now invalidate and put the dentry - quota got its own reference
-     * to inode and dentry has at least wrong hash so we had better
-     * throw it away */
-    d_invalidate(dentry);
-    dput(dentry);
-    return err;
+	return vfs_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type],
+			REISERFS_SB(sb)->s_jquota_fmt, type);
 }
 
 /*
@@ -1963,6 +1988,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, ch
     int err;
     struct nameidata nd;
 
+    if (!(REISERFS_SB(sb)->s_mount_opt & (1<<REISERFS_QUOTA)))
+	return -EINVAL;
     err = path_lookup(path, LOOKUP_FOLLOW, &nd);
     if (err)
         return err;
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index e302071903a1..c312881c5f53 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -4,7 +4,7 @@
 #include <linux/errno.h>
 #include <linux/pagemap.h>
 #include <linux/xattr.h>
-#include <linux/xattr_acl.h>
+#include <linux/posix_acl_xattr.h>
 #include <linux/reiserfs_xattr.h>
 #include <linux/reiserfs_acl.h>
 #include <asm/uaccess.h>
@@ -192,11 +192,11 @@ reiserfs_get_acl(struct inode *inode, int type)
 
         switch (type) {
             case ACL_TYPE_ACCESS:
-                name = XATTR_NAME_ACL_ACCESS;
+                name = POSIX_ACL_XATTR_ACCESS;
                 p_acl = &reiserfs_i->i_acl_access;
                 break;
             case ACL_TYPE_DEFAULT:
-                name = XATTR_NAME_ACL_DEFAULT;
+                name = POSIX_ACL_XATTR_DEFAULT;
                 p_acl = &reiserfs_i->i_acl_default;
                 break;
             default:
@@ -260,7 +260,7 @@ reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 
         switch (type) {
             case ACL_TYPE_ACCESS:
-                name = XATTR_NAME_ACL_ACCESS;
+                name = POSIX_ACL_XATTR_ACCESS;
                 p_acl = &reiserfs_i->i_acl_access;
                 if (acl) {
                     mode_t mode = inode->i_mode;
@@ -275,7 +275,7 @@ reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
                 }
                 break;
             case ACL_TYPE_DEFAULT:
-                name = XATTR_NAME_ACL_DEFAULT;
+                name = POSIX_ACL_XATTR_DEFAULT;
                 p_acl = &reiserfs_i->i_acl_default;
                 if (!S_ISDIR (inode->i_mode))
                     return acl ? -EACCES : 0;
@@ -468,7 +468,7 @@ static int
 posix_acl_access_get(struct inode *inode, const char *name,
 			  void *buffer, size_t size)
 {
-	if (strlen(name) != sizeof(XATTR_NAME_ACL_ACCESS)-1)
+	if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS)-1)
 		return -EINVAL;
 	return xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
 }
@@ -477,7 +477,7 @@ static int
 posix_acl_access_set(struct inode *inode, const char *name,
 			  const void *value, size_t size, int flags)
 {
-	if (strlen(name) != sizeof(XATTR_NAME_ACL_ACCESS)-1)
+	if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS)-1)
 		return -EINVAL;
 	return xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
 }
@@ -487,7 +487,7 @@ posix_acl_access_del (struct inode *inode, const char *name)
 {
     struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
     struct posix_acl **acl = &reiserfs_i->i_acl_access;
-    if (strlen(name) != sizeof(XATTR_NAME_ACL_ACCESS)-1)
+    if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS)-1)
 	return -EINVAL;
     if (!IS_ERR (*acl) && *acl) {
         posix_acl_release (*acl);
@@ -510,7 +510,7 @@ posix_acl_access_list (struct inode *inode, const char *name, int namelen, char
 }
 
 struct reiserfs_xattr_handler posix_acl_access_handler = {
-	.prefix = XATTR_NAME_ACL_ACCESS,
+	.prefix = POSIX_ACL_XATTR_ACCESS,
 	.get = posix_acl_access_get,
 	.set = posix_acl_access_set,
 	.del = posix_acl_access_del,
@@ -521,7 +521,7 @@ static int
 posix_acl_default_get (struct inode *inode, const char *name,
 			   void *buffer, size_t size)
 {
-	if (strlen(name) != sizeof(XATTR_NAME_ACL_DEFAULT)-1)
+	if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT)-1)
 		return -EINVAL;
 	return xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
 }
@@ -530,7 +530,7 @@ static int
 posix_acl_default_set(struct inode *inode, const char *name,
 			   const void *value, size_t size, int flags)
 {
-	if (strlen(name) != sizeof(XATTR_NAME_ACL_DEFAULT)-1)
+	if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT)-1)
 		return -EINVAL;
 	return xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
 }
@@ -540,7 +540,7 @@ posix_acl_default_del (struct inode *inode, const char *name)
 {
     struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
     struct posix_acl **acl = &reiserfs_i->i_acl_default;
-    if (strlen(name) != sizeof(XATTR_NAME_ACL_DEFAULT)-1)
+    if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT)-1)
 	return -EINVAL;
     if (!IS_ERR (*acl) && *acl) {
         posix_acl_release (*acl);
@@ -563,7 +563,7 @@ posix_acl_default_list (struct inode *inode, const char *name, int namelen, char
 }
 
 struct reiserfs_xattr_handler posix_acl_default_handler = {
-	.prefix = XATTR_NAME_ACL_DEFAULT,
+	.prefix = POSIX_ACL_XATTR_DEFAULT,
 	.get = posix_acl_default_get,
 	.set = posix_acl_default_set,
 	.del = posix_acl_default_del,
diff --git a/fs/super.c b/fs/super.c
index 3a1b8ca04ba6..25bc1ec6bc5d 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -341,20 +341,22 @@ static inline void write_super(struct super_block *sb)
  */
 void sync_supers(void)
 {
-	struct super_block * sb;
-restart:
+	struct super_block *sb;
+
 	spin_lock(&sb_lock);
-	sb = sb_entry(super_blocks.next);
-	while (sb != sb_entry(&super_blocks))
+restart:
+	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (sb->s_dirt) {
 			sb->s_count++;
 			spin_unlock(&sb_lock);
 			down_read(&sb->s_umount);
 			write_super(sb);
-			drop_super(sb);
-			goto restart;
-		} else
-			sb = sb_entry(sb->s_list.next);
+			up_read(&sb->s_umount);
+			spin_lock(&sb_lock);
+			if (__put_super_and_need_restart(sb))
+				goto restart;
+		}
+	}
 	spin_unlock(&sb_lock);
 }
 
@@ -381,20 +383,16 @@ void sync_filesystems(int wait)
 
 	down(&mutex);		/* Could be down_interruptible */
 	spin_lock(&sb_lock);
-	for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks);
-			sb = sb_entry(sb->s_list.next)) {
+	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (!sb->s_op->sync_fs)
 			continue;
 		if (sb->s_flags & MS_RDONLY)
 			continue;
 		sb->s_need_sync_fs = 1;
 	}
-	spin_unlock(&sb_lock);
 
 restart:
-	spin_lock(&sb_lock);
-	for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks);
-			sb = sb_entry(sb->s_list.next)) {
+	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (!sb->s_need_sync_fs)
 			continue;
 		sb->s_need_sync_fs = 0;
@@ -405,8 +403,11 @@ restart:
 		down_read(&sb->s_umount);
 		if (sb->s_root && (wait || sb->s_dirt))
 			sb->s_op->sync_fs(sb, wait);
-		drop_super(sb);
-		goto restart;
+		up_read(&sb->s_umount);
+		/* restart only when sb is no longer on the list */
+		spin_lock(&sb_lock);
+		if (__put_super_and_need_restart(sb))
+			goto restart;
 	}
 	spin_unlock(&sb_lock);
 	up(&mutex);
@@ -422,21 +423,25 @@ restart:
 
 struct super_block * get_super(struct block_device *bdev)
 {
-	struct list_head *p;
+	struct super_block *sb;
+
 	if (!bdev)
 		return NULL;
-rescan:
+
 	spin_lock(&sb_lock);
-	list_for_each(p, &super_blocks) {
-		struct super_block *s = sb_entry(p);
-		if (s->s_bdev == bdev) {
-			s->s_count++;
+rescan:
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (sb->s_bdev == bdev) {
+			sb->s_count++;
 			spin_unlock(&sb_lock);
-			down_read(&s->s_umount);
-			if (s->s_root)
-				return s;
-			drop_super(s);
-			goto rescan;
+			down_read(&sb->s_umount);
+			if (sb->s_root)
+				return sb;
+			up_read(&sb->s_umount);
+			/* restart only when sb is no longer on the list */
+			spin_lock(&sb_lock);
+			if (__put_super_and_need_restart(sb))
+				goto rescan;
 		}
 	}
 	spin_unlock(&sb_lock);
@@ -447,20 +452,22 @@ EXPORT_SYMBOL(get_super);
  
 struct super_block * user_get_super(dev_t dev)
 {
-	struct list_head *p;
+	struct super_block *sb;
 
-rescan:
 	spin_lock(&sb_lock);
-	list_for_each(p, &super_blocks) {
-		struct super_block *s = sb_entry(p);
-		if (s->s_dev ==  dev) {
-			s->s_count++;
+rescan:
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (sb->s_dev ==  dev) {
+			sb->s_count++;
 			spin_unlock(&sb_lock);
-			down_read(&s->s_umount);
-			if (s->s_root)
-				return s;
-			drop_super(s);
-			goto rescan;
+			down_read(&sb->s_umount);
+			if (sb->s_root)
+				return sb;
+			up_read(&sb->s_umount);
+			/* restart only when sb is no longer on the list */
+			spin_lock(&sb_lock);
+			if (__put_super_and_need_restart(sb))
+				goto rescan;
 		}
 	}
 	spin_unlock(&sb_lock);
@@ -835,6 +842,7 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data)
 	mnt->mnt_parent = mnt;
 	mnt->mnt_namespace = current->namespace;
 	up_write(&sb->s_umount);
+	free_secdata(secdata);
 	put_filesystem(type);
 	return mnt;
 out_sb:
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index d4aaa88d0214..78899eeab974 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -25,7 +25,7 @@ fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count)
 	struct kobject * kobj = to_kobj(dentry->d_parent);
 
 	if (!attr->read)
-		return -EINVAL;
+		return -EIO;
 
 	return attr->read(kobj, buffer, off, count);
 }
@@ -71,7 +71,7 @@ flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count)
 	struct kobject *kobj = to_kobj(dentry->d_parent);
 
 	if (!attr->write)
-		return -EINVAL;
+		return -EIO;
 
 	return attr->write(kobj, buffer, offset, count);
 }
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index fe198210bc2d..59734ba1ee60 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -8,6 +8,7 @@
 #include <linux/mount.h>
 #include <linux/module.h>
 #include <linux/kobject.h>
+#include <linux/namei.h>
 #include "sysfs.h"
 
 DECLARE_RWSEM(sysfs_rename_sem);
@@ -99,20 +100,21 @@ static int create_dir(struct kobject * k, struct dentry * p,
 	umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
 
 	down(&p->d_inode->i_sem);
-	*d = sysfs_get_dentry(p,n);
+	*d = lookup_one_len(n, p, strlen(n));
 	if (!IS_ERR(*d)) {
-		error = sysfs_create(*d, mode, init_dir);
+		error = sysfs_make_dirent(p->d_fsdata, *d, k, mode, SYSFS_DIR);
 		if (!error) {
-			error = sysfs_make_dirent(p->d_fsdata, *d, k, mode,
-						SYSFS_DIR);
+			error = sysfs_create(*d, mode, init_dir);
 			if (!error) {
 				p->d_inode->i_nlink++;
 				(*d)->d_op = &sysfs_dentry_ops;
 				d_rehash(*d);
 			}
 		}
-		if (error && (error != -EEXIST))
+		if (error && (error != -EEXIST)) {
+			sysfs_put((*d)->d_fsdata);
 			d_drop(*d);
+		}
 		dput(*d);
 	} else
 		error = PTR_ERR(*d);
@@ -171,17 +173,19 @@ static int sysfs_attach_attr(struct sysfs_dirent * sd, struct dentry * dentry)
                 init = init_file;
         }
 
+	dentry->d_fsdata = sysfs_get(sd);
+	sd->s_dentry = dentry;
 	error = sysfs_create(dentry, (attr->mode & S_IALLUGO) | S_IFREG, init);
-	if (error)
+	if (error) {
+		sysfs_put(sd);
 		return error;
+	}
 
         if (bin_attr) {
 		dentry->d_inode->i_size = bin_attr->size;
 		dentry->d_inode->i_fop = &bin_fops;
 	}
 	dentry->d_op = &sysfs_dentry_ops;
-	dentry->d_fsdata = sysfs_get(sd);
-	sd->s_dentry = dentry;
 	d_rehash(dentry);
 
 	return 0;
@@ -191,13 +195,15 @@ static int sysfs_attach_link(struct sysfs_dirent * sd, struct dentry * dentry)
 {
 	int err = 0;
 
+	dentry->d_fsdata = sysfs_get(sd);
+	sd->s_dentry = dentry;
 	err = sysfs_create(dentry, S_IFLNK|S_IRWXUGO, init_symlink);
 	if (!err) {
 		dentry->d_op = &sysfs_dentry_ops;
-		dentry->d_fsdata = sysfs_get(sd);
-		sd->s_dentry = dentry;
 		d_rehash(dentry);
-	}
+	} else
+		sysfs_put(sd);
+
 	return err;
 }
 
@@ -228,6 +234,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
 
 struct inode_operations sysfs_dir_inode_operations = {
 	.lookup		= sysfs_lookup,
+	.setattr	= sysfs_setattr,
 };
 
 static void remove_dir(struct dentry * d)
@@ -309,7 +316,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
 
 	down(&parent->d_inode->i_sem);
 
-	new_dentry = sysfs_get_dentry(parent, new_name);
+	new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
 	if (!IS_ERR(new_dentry)) {
   		if (!new_dentry->d_inode) {
 			error = kobject_set_name(kobj, "%s", new_name);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 364208071e17..d72c1ce48559 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -5,6 +5,7 @@
 #include <linux/module.h>
 #include <linux/dnotify.h>
 #include <linux/kobject.h>
+#include <linux/namei.h>
 #include <asm/uaccess.h>
 #include <asm/semaphore.h>
 
@@ -13,7 +14,7 @@
 #define to_subsys(k) container_of(k,struct subsystem,kset.kobj)
 #define to_sattr(a) container_of(a,struct subsys_attribute,attr)
 
-/**
+/*
  * Subsystem file operations.
  * These operations allow subsystems to have files that can be 
  * read/written. 
@@ -23,7 +24,7 @@ subsys_attr_show(struct kobject * kobj, struct attribute * attr, char * page)
 {
 	struct subsystem * s = to_subsys(kobj);
 	struct subsys_attribute * sattr = to_sattr(attr);
-	ssize_t ret = 0;
+	ssize_t ret = -EIO;
 
 	if (sattr->show)
 		ret = sattr->show(s,page);
@@ -36,7 +37,7 @@ subsys_attr_store(struct kobject * kobj, struct attribute * attr,
 {
 	struct subsystem * s = to_subsys(kobj);
 	struct subsys_attribute * sattr = to_sattr(attr);
-	ssize_t ret = 0;
+	ssize_t ret = -EIO;
 
 	if (sattr->store)
 		ret = sattr->store(s,page,count);
@@ -182,7 +183,7 @@ fill_write_buffer(struct sysfs_buffer * buffer, const char __user * buf, size_t
 		return -ENOMEM;
 
 	if (count >= PAGE_SIZE)
-		count = PAGE_SIZE - 1;
+		count = PAGE_SIZE;
 	error = copy_from_user(buffer->page,buf,count);
 	buffer->needs_read_fill = 1;
 	return error ? -EFAULT : count;
@@ -191,8 +192,9 @@ fill_write_buffer(struct sysfs_buffer * buffer, const char __user * buf, size_t
 
 /**
  *	flush_write_buffer - push buffer to kobject.
- *	@file:		file pointer.
+ *	@dentry:	dentry to the attribute
  *	@buffer:	data buffer for file.
+ *	@count:		number of bytes
  *
  *	Get the correct pointers for the kobject and the attribute we're
  *	dealing with, then call the store() method for the attribute, 
@@ -400,7 +402,7 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
 	int res = -ENOENT;
 
 	down(&dir->d_inode->i_sem);
-	victim = sysfs_get_dentry(dir, attr->name);
+	victim = lookup_one_len(attr->name, dir, strlen(attr->name));
 	if (!IS_ERR(victim)) {
 		/* make sure dentry is really there */
 		if (victim->d_inode && 
@@ -443,7 +445,7 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
 	int res = -ENOENT;
 
 	down(&dir->d_inode->i_sem);
-	victim = sysfs_get_dentry(dir, attr->name);
+	victim = lookup_one_len(attr->name, dir, strlen(attr->name));
 	if (!IS_ERR(victim)) {
 		if (victim->d_inode &&
 		    (victim->d_parent->d_inode == dir->d_inode)) {
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index f11ac5ea7021..122145b0895c 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -11,6 +11,7 @@
 #include <linux/kobject.h>
 #include <linux/module.h>
 #include <linux/dcache.h>
+#include <linux/namei.h>
 #include <linux/err.h>
 #include "sysfs.h"
 
@@ -68,7 +69,8 @@ void sysfs_remove_group(struct kobject * kobj,
 	struct dentry * dir;
 
 	if (grp->name)
-		dir = sysfs_get_dentry(kobj->dentry,grp->name);
+		dir = lookup_one_len(grp->name, kobj->dentry,
+				strlen(grp->name));
 	else
 		dir = dget(kobj->dentry);
 
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index aff7b2dfa8ee..8de13bafaa76 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -26,18 +26,107 @@ static struct backing_dev_info sysfs_backing_dev_info = {
 	.capabilities	= BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
 };
 
-struct inode * sysfs_new_inode(mode_t mode)
+static struct inode_operations sysfs_inode_operations ={
+	.setattr	= sysfs_setattr,
+};
+
+int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
+{
+	struct inode * inode = dentry->d_inode;
+	struct sysfs_dirent * sd = dentry->d_fsdata;
+	struct iattr * sd_iattr;
+	unsigned int ia_valid = iattr->ia_valid;
+	int error;
+
+	if (!sd)
+		return -EINVAL;
+
+	sd_iattr = sd->s_iattr;
+
+	error = inode_change_ok(inode, iattr);
+	if (error)
+		return error;
+
+	error = inode_setattr(inode, iattr);
+	if (error)
+		return error;
+
+	if (!sd_iattr) {
+		/* setting attributes for the first time, allocate now */
+		sd_iattr = kmalloc(sizeof(struct iattr), GFP_KERNEL);
+		if (!sd_iattr)
+			return -ENOMEM;
+		/* assign default attributes */
+		memset(sd_iattr, 0, sizeof(struct iattr));
+		sd_iattr->ia_mode = sd->s_mode;
+		sd_iattr->ia_uid = 0;
+		sd_iattr->ia_gid = 0;
+		sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME;
+		sd->s_iattr = sd_iattr;
+	}
+
+	/* attributes were changed atleast once in past */
+
+	if (ia_valid & ATTR_UID)
+		sd_iattr->ia_uid = iattr->ia_uid;
+	if (ia_valid & ATTR_GID)
+		sd_iattr->ia_gid = iattr->ia_gid;
+	if (ia_valid & ATTR_ATIME)
+		sd_iattr->ia_atime = timespec_trunc(iattr->ia_atime,
+						inode->i_sb->s_time_gran);
+	if (ia_valid & ATTR_MTIME)
+		sd_iattr->ia_mtime = timespec_trunc(iattr->ia_mtime,
+						inode->i_sb->s_time_gran);
+	if (ia_valid & ATTR_CTIME)
+		sd_iattr->ia_ctime = timespec_trunc(iattr->ia_ctime,
+						inode->i_sb->s_time_gran);
+	if (ia_valid & ATTR_MODE) {
+		umode_t mode = iattr->ia_mode;
+
+		if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+			mode &= ~S_ISGID;
+		sd_iattr->ia_mode = mode;
+	}
+
+	return error;
+}
+
+static inline void set_default_inode_attr(struct inode * inode, mode_t mode)
+{
+	inode->i_mode = mode;
+	inode->i_uid = 0;
+	inode->i_gid = 0;
+	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+}
+
+static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
+{
+	inode->i_mode = iattr->ia_mode;
+	inode->i_uid = iattr->ia_uid;
+	inode->i_gid = iattr->ia_gid;
+	inode->i_atime = iattr->ia_atime;
+	inode->i_mtime = iattr->ia_mtime;
+	inode->i_ctime = iattr->ia_ctime;
+}
+
+struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd)
 {
 	struct inode * inode = new_inode(sysfs_sb);
 	if (inode) {
-		inode->i_mode = mode;
-		inode->i_uid = 0;
-		inode->i_gid = 0;
 		inode->i_blksize = PAGE_CACHE_SIZE;
 		inode->i_blocks = 0;
-		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		inode->i_mapping->a_ops = &sysfs_aops;
 		inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
+		inode->i_op = &sysfs_inode_operations;
+
+		if (sd->s_iattr) {
+			/* sysfs_dirent has non-default attributes
+			 * get them for the new inode from persistent copy
+			 * in sysfs_dirent
+			 */
+			set_inode_attr(inode, sd->s_iattr);
+		} else
+			set_default_inode_attr(inode, mode);
 	}
 	return inode;
 }
@@ -48,7 +137,8 @@ int sysfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *))
 	struct inode * inode = NULL;
 	if (dentry) {
 		if (!dentry->d_inode) {
-			if ((inode = sysfs_new_inode(mode))) {
+			struct sysfs_dirent * sd = dentry->d_fsdata;
+			if ((inode = sysfs_new_inode(mode, sd))) {
 				if (dentry->d_parent && dentry->d_parent->d_inode) {
 					struct inode *p_inode = dentry->d_parent->d_inode;
 					p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
@@ -76,16 +166,6 @@ int sysfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *))
 	return error;
 }
 
-struct dentry * sysfs_get_dentry(struct dentry * parent, const char * name)
-{
-	struct qstr qstr;
-
-	qstr.name = name;
-	qstr.len = strlen(name);
-	qstr.hash = full_name_hash(name,qstr.len);
-	return lookup_hash(&qstr,parent);
-}
-
 /*
  * Get the name for corresponding element represented by the given sysfs_dirent
  */
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 5c805bb1a4b7..f1117e885bd6 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -28,6 +28,7 @@ static struct sysfs_dirent sysfs_root = {
 	.s_children	= LIST_HEAD_INIT(sysfs_root.s_children),
 	.s_element	= NULL,
 	.s_type		= SYSFS_ROOT,
+	.s_iattr	= NULL,
 };
 
 static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
@@ -42,7 +43,8 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_time_gran = 1;
 	sysfs_sb = sb;
 
-	inode = sysfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO);
+	inode = sysfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
+				 &sysfs_root);
 	if (inode) {
 		inode->i_op = &sysfs_dir_inode_operations;
 		inode->i_fop = &sysfs_dir_operations;
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index dfdf70174354..fae57c83a722 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -43,7 +43,7 @@ static void fill_object_path(struct kobject * kobj, char * buffer, int length)
 	}
 }
 
-static int sysfs_add_link(struct dentry * parent, char * name, struct kobject * target)
+static int sysfs_add_link(struct dentry * parent, const char * name, struct kobject * target)
 {
 	struct sysfs_dirent * parent_sd = parent->d_fsdata;
 	struct sysfs_symlink * sl;
@@ -79,7 +79,7 @@ exit1:
  *	@target:	object we're pointing to.
  *	@name:		name of the symlink.
  */
-int sysfs_create_link(struct kobject * kobj, struct kobject * target, char * name)
+int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name)
 {
 	struct dentry * dentry = kobj->dentry;
 	int error = 0;
@@ -99,13 +99,13 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, char * nam
  *	@name:	name of the symlink to remove.
  */
 
-void sysfs_remove_link(struct kobject * kobj, char * name)
+void sysfs_remove_link(struct kobject * kobj, const char * name)
 {
 	sysfs_hash_and_remove(kobj->dentry,name);
 }
 
 static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target,
-				   char *path)
+				 char *path)
 {
 	char * s;
 	int depth, size;
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index a8a24a0c0b3b..3f8953e0e5d0 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -2,12 +2,11 @@
 extern struct vfsmount * sysfs_mount;
 extern kmem_cache_t *sysfs_dir_cachep;
 
-extern struct inode * sysfs_new_inode(mode_t mode);
+extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *);
 extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *));
 
 extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *,
 				umode_t, int);
-extern struct dentry * sysfs_get_dentry(struct dentry *, const char *);
 
 extern int sysfs_add_file(struct dentry *, const struct attribute *, int);
 extern void sysfs_hash_and_remove(struct dentry * dir, const char * name);
@@ -17,6 +16,7 @@ extern void sysfs_remove_subdir(struct dentry *);
 
 extern const unsigned char * sysfs_get_name(struct sysfs_dirent *sd);
 extern void sysfs_drop_dentry(struct sysfs_dirent *sd, struct dentry *parent);
+extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
 
 extern struct rw_semaphore sysfs_rename_sem;
 extern struct super_block * sysfs_sb;
@@ -75,6 +75,7 @@ static inline void release_sysfs_dirent(struct sysfs_dirent * sd)
 		kobject_put(sl->target_kobj);
 		kfree(sl);
 	}
+	kfree(sd->s_iattr);
 	kmem_cache_free(sysfs_dir_cachep, sd);
 }
 
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 3f6dc7112bc6..ac191ed7df0a 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -159,14 +159,12 @@ udf_find_entry(struct inode *dir, struct dentry *dentry,
 	char *nameptr;
 	uint8_t lfi;
 	uint16_t liu;
-	loff_t size = (udf_ext0_offset(dir) + dir->i_size) >> 2;
+	loff_t size;
 	kernel_lb_addr bloc, eloc;
 	uint32_t extoffset, elen, offset;
 	struct buffer_head *bh = NULL;
 
-	if (!dir)
-		return NULL;
-
+	size = (udf_ext0_offset(dir) + dir->i_size) >> 2;
 	f_pos = (udf_ext0_offset(dir) >> 2);
 
 	fibh->soffset = fibh->eoffset = (f_pos & ((dir->i_sb->s_blocksize - 1) >> 2)) << 2;
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 93ce257cd149..a3a4b5aaf5d9 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -149,11 +149,12 @@ linvfs_unwritten_convert(
  */
 STATIC void
 linvfs_unwritten_convert_direct(
-	struct inode	*inode,
+	struct kiocb	*iocb,
 	loff_t		offset,
 	ssize_t		size,
 	void		*private)
 {
+	struct inode	*inode = iocb->ki_filp->f_dentry->d_inode;
 	ASSERT(!private || inode == (struct inode *)private);
 
 	/* private indicates an unwritten extent lay beneath this IO */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 997963e53622..df0cba239dd5 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -61,12 +61,13 @@
  * File wide globals
  */
 
-STATIC kmem_cache_t *pagebuf_cache;
+STATIC kmem_cache_t *pagebuf_zone;
 STATIC kmem_shaker_t pagebuf_shake;
-STATIC int pagebuf_daemon_wakeup(int, unsigned int);
+STATIC int xfsbufd_wakeup(int, unsigned int);
 STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);
-STATIC struct workqueue_struct *pagebuf_logio_workqueue;
-STATIC struct workqueue_struct *pagebuf_dataio_workqueue;
+
+STATIC struct workqueue_struct *xfslogd_workqueue;
+STATIC struct workqueue_struct *xfsdatad_workqueue;
 
 /*
  * Pagebuf debugging
@@ -123,9 +124,9 @@ ktrace_t *pagebuf_trace_buf;
 
 
 #define pagebuf_allocate(flags) \
-	kmem_zone_alloc(pagebuf_cache, pb_to_km(flags))
+	kmem_zone_alloc(pagebuf_zone, pb_to_km(flags))
 #define pagebuf_deallocate(pb) \
-	kmem_zone_free(pagebuf_cache, (pb));
+	kmem_zone_free(pagebuf_zone, (pb));
 
 /*
  * Page Region interfaces.
@@ -425,7 +426,7 @@ _pagebuf_lookup_pages(
 					__FUNCTION__, gfp_mask);
 
 			XFS_STATS_INC(pb_page_retries);
-			pagebuf_daemon_wakeup(0, gfp_mask);
+			xfsbufd_wakeup(0, gfp_mask);
 			blk_congestion_wait(WRITE, HZ/50);
 			goto retry;
 		}
@@ -1136,8 +1137,8 @@ pagebuf_iodone(
 	if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) {
 		if (schedule) {
 			INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb);
-			queue_work(dataio ? pagebuf_dataio_workqueue :
-				pagebuf_logio_workqueue, &pb->pb_iodone_work);
+			queue_work(dataio ? xfsdatad_workqueue :
+				xfslogd_workqueue, &pb->pb_iodone_work);
 		} else {
 			pagebuf_iodone_work(pb);
 		}
@@ -1562,16 +1563,6 @@ xfs_free_buftarg(
 	kmem_free(btp, sizeof(*btp));
 }
 
-void
-xfs_incore_relse(
-	xfs_buftarg_t		*btp,
-	int			delwri_only,
-	int			wait)
-{
-	invalidate_bdev(btp->pbr_bdev, 1);
-	truncate_inode_pages(btp->pbr_mapping, 0LL);
-}
-
 STATIC int
 xfs_setsize_buftarg_flags(
 	xfs_buftarg_t		*btp,
@@ -1742,27 +1733,27 @@ pagebuf_runall_queues(
 }
 
 /* Defines for pagebuf daemon */
-STATIC DECLARE_COMPLETION(pagebuf_daemon_done);
-STATIC struct task_struct *pagebuf_daemon_task;
-STATIC int pagebuf_daemon_active;
-STATIC int force_flush;
-STATIC int force_sleep;
+STATIC DECLARE_COMPLETION(xfsbufd_done);
+STATIC struct task_struct *xfsbufd_task;
+STATIC int xfsbufd_active;
+STATIC int xfsbufd_force_flush;
+STATIC int xfsbufd_force_sleep;
 
 STATIC int
-pagebuf_daemon_wakeup(
+xfsbufd_wakeup(
 	int			priority,
 	unsigned int		mask)
 {
-	if (force_sleep)
+	if (xfsbufd_force_sleep)
 		return 0;
-	force_flush = 1;
+	xfsbufd_force_flush = 1;
 	barrier();
-	wake_up_process(pagebuf_daemon_task);
+	wake_up_process(xfsbufd_task);
 	return 0;
 }
 
 STATIC int
-pagebuf_daemon(
+xfsbufd(
 	void			*data)
 {
 	struct list_head	tmp;
@@ -1774,17 +1765,17 @@ pagebuf_daemon(
 	daemonize("xfsbufd");
 	current->flags |= PF_MEMALLOC;
 
-	pagebuf_daemon_task = current;
-	pagebuf_daemon_active = 1;
+	xfsbufd_task = current;
+	xfsbufd_active = 1;
 	barrier();
 
 	INIT_LIST_HEAD(&tmp);
 	do {
-		if (unlikely(current->flags & PF_FREEZE)) {
-			force_sleep = 1;
-			refrigerator(PF_FREEZE);
+		if (unlikely(freezing(current))) {
+			xfsbufd_force_sleep = 1;
+			refrigerator();
 		} else {
-			force_sleep = 0;
+			xfsbufd_force_sleep = 0;
 		}
 
 		set_current_state(TASK_INTERRUPTIBLE);
@@ -1797,7 +1788,7 @@ pagebuf_daemon(
 			ASSERT(pb->pb_flags & PBF_DELWRI);
 
 			if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) {
-				if (!force_flush &&
+				if (!xfsbufd_force_flush &&
 				    time_before(jiffies,
 						pb->pb_queuetime + age)) {
 					pagebuf_unlock(pb);
@@ -1824,10 +1815,10 @@ pagebuf_daemon(
 		if (as_list_len > 0)
 			purge_addresses();
 
-		force_flush = 0;
-	} while (pagebuf_daemon_active);
+		xfsbufd_force_flush = 0;
+	} while (xfsbufd_active);
 
-	complete_and_exit(&pagebuf_daemon_done, 0);
+	complete_and_exit(&xfsbufd_done, 0);
 }
 
 /*
@@ -1844,8 +1835,8 @@ xfs_flush_buftarg(
 	xfs_buf_t		*pb, *n;
 	int			pincount = 0;
 
-	pagebuf_runall_queues(pagebuf_dataio_workqueue);
-	pagebuf_runall_queues(pagebuf_logio_workqueue);
+	pagebuf_runall_queues(xfsdatad_workqueue);
+	pagebuf_runall_queues(xfslogd_workqueue);
 
 	INIT_LIST_HEAD(&tmp);
 	spin_lock(&pbd_delwrite_lock);
@@ -1898,43 +1889,43 @@ xfs_flush_buftarg(
 }
 
 STATIC int
-pagebuf_daemon_start(void)
+xfs_buf_daemons_start(void)
 {
-	int		rval;
+	int		error = -ENOMEM;
 
-	pagebuf_logio_workqueue = create_workqueue("xfslogd");
-	if (!pagebuf_logio_workqueue)
-		return -ENOMEM;
+	xfslogd_workqueue = create_workqueue("xfslogd");
+	if (!xfslogd_workqueue)
+		goto out;
 
-	pagebuf_dataio_workqueue = create_workqueue("xfsdatad");
-	if (!pagebuf_dataio_workqueue) {
-		destroy_workqueue(pagebuf_logio_workqueue);
-		return -ENOMEM;
-	}
+	xfsdatad_workqueue = create_workqueue("xfsdatad");
+	if (!xfsdatad_workqueue)
+		goto out_destroy_xfslogd_workqueue;
 
-	rval = kernel_thread(pagebuf_daemon, NULL, CLONE_FS|CLONE_FILES);
-	if (rval < 0) {
-		destroy_workqueue(pagebuf_logio_workqueue);
-		destroy_workqueue(pagebuf_dataio_workqueue);
-	}
+	error = kernel_thread(xfsbufd, NULL, CLONE_FS|CLONE_FILES);
+	if (error < 0)
+		goto out_destroy_xfsdatad_workqueue;
+	return 0;
 
-	return rval;
+ out_destroy_xfsdatad_workqueue:
+	destroy_workqueue(xfsdatad_workqueue);
+ out_destroy_xfslogd_workqueue:
+	destroy_workqueue(xfslogd_workqueue);
+ out:
+	return error;
 }
 
 /*
- * pagebuf_daemon_stop
- *
  * Note: do not mark as __exit, it is called from pagebuf_terminate.
  */
 STATIC void
-pagebuf_daemon_stop(void)
+xfs_buf_daemons_stop(void)
 {
-	pagebuf_daemon_active = 0;
+	xfsbufd_active = 0;
 	barrier();
-	wait_for_completion(&pagebuf_daemon_done);
+	wait_for_completion(&xfsbufd_done);
 
-	destroy_workqueue(pagebuf_logio_workqueue);
-	destroy_workqueue(pagebuf_dataio_workqueue);
+	destroy_workqueue(xfslogd_workqueue);
+	destroy_workqueue(xfsdatad_workqueue);
 }
 
 /*
@@ -1944,27 +1935,37 @@ pagebuf_daemon_stop(void)
 int __init
 pagebuf_init(void)
 {
-	pagebuf_cache = kmem_cache_create("xfs_buf_t", sizeof(xfs_buf_t), 0,
-			SLAB_HWCACHE_ALIGN, NULL, NULL);
-	if (pagebuf_cache == NULL) {
-		printk("XFS: couldn't init xfs_buf_t cache\n");
-		pagebuf_terminate();
-		return -ENOMEM;
-	}
+	int		error = -ENOMEM;
+
+	pagebuf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf");
+	if (!pagebuf_zone)
+		goto out;
 
 #ifdef PAGEBUF_TRACE
 	pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP);
 #endif
 
-	pagebuf_daemon_start();
+	error = xfs_buf_daemons_start();
+	if (error)
+		goto out_free_buf_zone;
 
-	pagebuf_shake = kmem_shake_register(pagebuf_daemon_wakeup);
-	if (pagebuf_shake == NULL) {
-		pagebuf_terminate();
-		return -ENOMEM;
+	pagebuf_shake = kmem_shake_register(xfsbufd_wakeup);
+	if (!pagebuf_shake) {
+		error = -ENOMEM;
+		goto out_stop_daemons;
 	}
 
 	return 0;
+
+ out_stop_daemons:
+	xfs_buf_daemons_stop();
+ out_free_buf_zone:
+#ifdef PAGEBUF_TRACE
+	ktrace_free(pagebuf_trace_buf);
+#endif
+	kmem_zone_destroy(pagebuf_zone);
+ out:
+	return error;
 }
 
 
@@ -1976,12 +1977,12 @@ pagebuf_init(void)
 void
 pagebuf_terminate(void)
 {
-	pagebuf_daemon_stop();
+	xfs_buf_daemons_stop();
 
 #ifdef PAGEBUF_TRACE
 	ktrace_free(pagebuf_trace_buf);
 #endif
 
-	kmem_zone_destroy(pagebuf_cache);
+	kmem_zone_destroy(pagebuf_zone);
 	kmem_shake_deregister(pagebuf_shake);
 }
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 74deed8e6d90..3f8f69a66aea 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -576,7 +576,6 @@ extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
 extern void xfs_free_buftarg(xfs_buftarg_t *, int);
 extern void xfs_wait_buftarg(xfs_buftarg_t *);
 extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
-extern void xfs_incore_relse(xfs_buftarg_t *, int, int);
 extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
 
 #define xfs_getsize_buftarg(buftarg) \
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 24fa3b101b93..f1ce4323f56e 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -57,7 +57,9 @@
 #include <linux/smp_lock.h>
 
 static struct vm_operations_struct linvfs_file_vm_ops;
-
+#ifdef CONFIG_XFS_DMAPI
+static struct vm_operations_struct linvfs_dmapi_file_vm_ops;
+#endif
 
 STATIC inline ssize_t
 __linvfs_read(
@@ -388,6 +390,14 @@ done:
 	return -error;
 }
 
+#ifdef CONFIG_XFS_DMAPI
+STATIC void
+linvfs_mmap_close(
+	struct vm_area_struct	*vma)
+{
+	xfs_dm_mm_put(vma);
+}
+#endif /* CONFIG_XFS_DMAPI */
 
 STATIC int
 linvfs_file_mmap(
@@ -399,16 +409,19 @@ linvfs_file_mmap(
 	vattr_t		va = { .va_mask = XFS_AT_UPDATIME };
 	int		error;
 
+	vma->vm_ops = &linvfs_file_vm_ops;
+
 	if (vp->v_vfsp->vfs_flag & VFS_DMI) {
 		xfs_mount_t	*mp = XFS_VFSTOM(vp->v_vfsp);
 
 		error = -XFS_SEND_MMAP(mp, vma, 0);
 		if (error)
 			return error;
+#ifdef CONFIG_XFS_DMAPI
+		vma->vm_ops = &linvfs_dmapi_file_vm_ops;
+#endif
 	}
 
-	vma->vm_ops = &linvfs_file_vm_ops;
-
 	VOP_SETATTR(vp, &va, XFS_AT_UPDATIME, NULL, error);
 	if (!error)
 		vn_revalidate(vp);	/* update Linux inode flags */
@@ -609,7 +622,15 @@ struct file_operations linvfs_dir_operations = {
 static struct vm_operations_struct linvfs_file_vm_ops = {
 	.nopage		= filemap_nopage,
 	.populate	= filemap_populate,
+};
+
+#ifdef CONFIG_XFS_DMAPI
+static struct vm_operations_struct linvfs_dmapi_file_vm_ops = {
+	.close		= linvfs_mmap_close,
+	.nopage		= filemap_nopage,
+	.populate	= filemap_populate,
 #ifdef HAVE_VMOP_MPROTECT
 	.mprotect	= linvfs_mprotect,
 #endif
 };
+#endif /* CONFIG_XFS_DMAPI */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 69809eef8a54..05a447e51cc0 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -1174,7 +1174,8 @@ xfs_ioc_xattr(
 
 	switch (cmd) {
 	case XFS_IOC_FSGETXATTR: {
-		va.va_mask = XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS;
+		va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | \
+			     XFS_AT_NEXTENTS | XFS_AT_PROJID;
 		VOP_GETATTR(vp, &va, 0, NULL, error);
 		if (error)
 			return -error;
@@ -1182,6 +1183,7 @@ xfs_ioc_xattr(
 		fa.fsx_xflags	= va.va_xflags;
 		fa.fsx_extsize	= va.va_extsize;
 		fa.fsx_nextents = va.va_nextents;
+		fa.fsx_projid	= va.va_projid;
 
 		if (copy_to_user(arg, &fa, sizeof(fa)))
 			return -XFS_ERROR(EFAULT);
@@ -1196,9 +1198,10 @@ xfs_ioc_xattr(
 		if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
 			attr_flags |= ATTR_NONBLOCK;
 
-		va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE;
+		va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | XFS_AT_PROJID;
 		va.va_xflags  = fa.fsx_xflags;
 		va.va_extsize = fa.fsx_extsize;
+		va.va_projid  = fa.fsx_projid;
 
 		VOP_SETATTR(vp, &va, attr_flags, NULL, error);
 		if (!error)
@@ -1207,7 +1210,8 @@ xfs_ioc_xattr(
 	}
 
 	case XFS_IOC_FSGETXATTRA: {
-		va.va_mask = XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_ANEXTENTS;
+		va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | \
+			     XFS_AT_ANEXTENTS | XFS_AT_PROJID;
 		VOP_GETATTR(vp, &va, 0, NULL, error);
 		if (error)
 			return -error;
@@ -1215,6 +1219,7 @@ xfs_ioc_xattr(
 		fa.fsx_xflags	= va.va_xflags;
 		fa.fsx_extsize	= va.va_extsize;
 		fa.fsx_nextents = va.va_anextents;
+		fa.fsx_projid	= va.va_projid;
 
 		if (copy_to_user(arg, &fa, sizeof(fa)))
 			return -XFS_ERROR(EFAULT);
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 71bb41019a12..42dc5e4662ed 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -145,10 +145,10 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh)
 #define xfs_inherit_nosymlinks	xfs_params.inherit_nosym.val
 #define xfs_rotorstep		xfs_params.rotorstep.val
 
-#ifndef __smp_processor_id
-#define __smp_processor_id()	smp_processor_id()
+#ifndef raw_smp_processor_id
+#define raw_smp_processor_id()	smp_processor_id()
 #endif
-#define current_cpu()		__smp_processor_id()
+#define current_cpu()		raw_smp_processor_id()
 #define current_pid()		(current->pid)
 #define current_fsuid(cred)	(current->fsuid)
 #define current_fsgid(cred)	(current->fsgid)
@@ -230,8 +230,10 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh)
  * field (see the QCMD macro in quota.h).  These macros help keep the
  * code portable - they are not visible from the syscall interface.
  */
-#define Q_XSETGQLIM	XQM_CMD(0x8)	/* set groups disk limits */
-#define Q_XGETGQUOTA	XQM_CMD(0x9)	/* get groups disk limits */
+#define Q_XSETGQLIM	XQM_CMD(8)	/* set groups disk limits */
+#define Q_XGETGQUOTA	XQM_CMD(9)	/* get groups disk limits */
+#define Q_XSETPQLIM	XQM_CMD(10)	/* set projects disk limits */
+#define Q_XGETPQUOTA	XQM_CMD(11)	/* get projects disk limits */
 
 /* IRIX uses a dynamic sizing algorithm (ndquot = 200 + numprocs*2) */
 /* we may well need to fine-tune this if it ever becomes an issue.  */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index aa9daaea6c34..acab58c48043 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -209,30 +209,6 @@ unlock:
 	return (-status);
 }
 
-/*
- * xfs_inval_cached_pages
- * 
- * This routine is responsible for keeping direct I/O and buffered I/O
- * somewhat coherent.  From here we make sure that we're at least
- * temporarily holding the inode I/O lock exclusively and then call
- * the page cache to flush and invalidate any cached pages.  If there
- * are no cached pages this routine will be very quick.
- */
-void
-xfs_inval_cached_pages(
-	vnode_t		*vp,
-	xfs_iocore_t	*io,
-	xfs_off_t	offset,
-	int		write,
-	int		relock)
-{
-	if (VN_CACHED(vp)) {
-		xfs_inval_cached_trace(io, offset, -1, ctooff(offtoct(offset)), -1);
-		VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(offset)), -1, FI_REMAPF_LOCKED);
-	}
-
-}
-
 ssize_t			/* bytes read, or (-)  error */
 xfs_read(
 	bhv_desc_t		*bdp,
@@ -304,10 +280,11 @@ xfs_read(
 	if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) &&
 	    !(ioflags & IO_INVIS)) {
 		vrwlock_t locktype = VRWLOCK_READ;
+		int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
 
 		ret = -XFS_SEND_DATA(mp, DM_EVENT_READ,
 					BHV_TO_VNODE(bdp), *offset, size,
-					FILP_DELAY_FLAG(file), &locktype);
+					dmflags, &locktype);
 		if (ret) {
 			xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 			goto unlock_isem;
@@ -867,11 +844,15 @@ retry:
 	    !(ioflags & IO_INVIS)) {
 
 		xfs_rwunlock(bdp, locktype);
+		if (need_isem)
+			up(&inode->i_sem);
 		error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp,
 				DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL,
 				0, 0, 0); /* Delay flag intentionally  unused */
 		if (error)
-			goto out_unlock_isem;
+			goto out_nounlocks;
+		if (need_isem)
+			down(&inode->i_sem);
 		xfs_rwlock(bdp, locktype);
 		pos = xip->i_d.di_size;
 		ret = 0;
@@ -986,6 +967,7 @@ retry:
  out_unlock_isem:
 	if (need_isem)
 		up(&inode->i_sem);
+ out_nounlocks:
 	return -error;
 }
 
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index d723e35254a0..f197a720e394 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -94,8 +94,6 @@ extern int xfs_bdstrat_cb(struct xfs_buf *);
 
 extern int xfs_zero_eof(struct vnode *, struct xfs_iocore *, xfs_off_t,
 				xfs_fsize_t, xfs_fsize_t);
-extern void xfs_inval_cached_pages(struct vnode	*, struct xfs_iocore *,
-				xfs_off_t, int, int);
 extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *,
 				const struct iovec *, unsigned int,
 				loff_t *, int, struct cred *);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 455e2b2fb964..f6dd7de25927 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -483,7 +483,7 @@ xfssyncd(
 		set_current_state(TASK_INTERRUPTIBLE);
 		timeleft = schedule_timeout(timeleft);
 		/* swsusp */
-		try_to_freeze(PF_FREEZE);
+		try_to_freeze();
 		if (vfsp->vfs_flag & VFS_UMOUNT)
 			break;
 
@@ -590,8 +590,10 @@ linvfs_sync_super(
 	int		error;
 	int		flags = SYNC_FSDATA;
 
-	if (wait)
-		flags |= SYNC_WAIT;
+	if (unlikely(sb->s_frozen == SB_FREEZE_WRITE))
+		flags = SYNC_QUIESCE;
+	else
+		flags = SYNC_FSDATA | (wait ? SYNC_WAIT : 0);
 
 	VFS_SYNC(vfsp, flags, NULL, error);
 	sb->s_dirt = 0;
@@ -701,7 +703,8 @@ linvfs_getxquota(
 	struct vfs		*vfsp = LINVFS_GET_VFS(sb);
 	int			error, getmode;
 
-	getmode = (type == GRPQUOTA) ? Q_XGETGQUOTA : Q_XGETQUOTA;
+	getmode = (type == USRQUOTA) ? Q_XGETQUOTA :
+		 ((type == GRPQUOTA) ? Q_XGETGQUOTA : Q_XGETPQUOTA);
 	VFS_QUOTACTL(vfsp, getmode, id, (caddr_t)fdq, error);
 	return -error;
 }
@@ -716,7 +719,8 @@ linvfs_setxquota(
 	struct vfs		*vfsp = LINVFS_GET_VFS(sb);
 	int			error, setmode;
 
-	setmode = (type == GRPQUOTA) ? Q_XSETGQLIM : Q_XSETQLIM;
+	setmode = (type == USRQUOTA) ? Q_XSETQLIM :
+		 ((type == GRPQUOTA) ? Q_XSETGQLIM : Q_XSETPQLIM);
 	VFS_QUOTACTL(vfsp, setmode, id, (caddr_t)fdq, error);
 	return -error;
 }
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
index 76493991578f..7ee1f714e9ba 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -107,6 +107,7 @@ typedef enum {
 #define SYNC_FSDATA		0x0020	/* flush fs data (e.g. superblocks) */
 #define SYNC_REFCACHE		0x0040  /* prune some of the nfs ref cache */
 #define SYNC_REMOUNT		0x0080  /* remount readonly, no dummy LRs */
+#define SYNC_QUIESCE		0x0100  /* quiesce fileystem for a snapshot */
 
 typedef int	(*vfs_mount_t)(bhv_desc_t *,
 				struct xfs_mount_args *, struct cred *);
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index a832d165f24f..250cad54e892 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -411,13 +411,13 @@ vn_remove(
 /*  0 */		(void *)(__psint_t)(vk),		\
 /*  1 */		(void *)(s),				\
 /*  2 */		(void *)(__psint_t) line,		\
-/*  3 */		(void *)(vn_count(vp)), \
+/*  3 */		(void *)(__psint_t)(vn_count(vp)),	\
 /*  4 */		(void *)(ra),				\
 /*  5 */		(void *)(__psunsigned_t)(vp)->v_flag,	\
 /*  6 */		(void *)(__psint_t)current_cpu(),	\
 /*  7 */		(void *)(__psint_t)current_pid(),	\
 /*  8 */		(void *)__return_address,		\
-/*  9 */		0, 0, 0, 0, 0, 0, 0)
+/*  9 */		NULL, NULL, NULL, NULL, NULL, NULL, NULL)
 
 /*
  * Vnode tracing code.
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 00466c3194ac..a6e57c647be4 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -426,7 +426,7 @@ typedef struct vattr {
 	u_long		va_extsize;	/* file extent size */
 	u_long		va_nextents;	/* number of extents in file */
 	u_long		va_anextents;	/* number of attr extents in file */
-	int		va_projid;	/* project id */
+	prid_t		va_projid;	/* project id */
 } vattr_t;
 
 /*
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 740d20d33187..46ce1e3ce1d6 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -101,7 +101,7 @@ int xfs_dqerror_mod = 33;
  * is the d_id field. The idea is to fill in the entire q_core
  * when we read in the on disk dquot.
  */
-xfs_dquot_t *
+STATIC xfs_dquot_t *
 xfs_qm_dqinit(
 	xfs_mount_t  *mp,
 	xfs_dqid_t   id,
@@ -286,7 +286,9 @@ xfs_qm_adjust_dqlimits(
  * We also return 0 as the values of the timers in Q_GETQUOTA calls, when
  * enforcement's off.
  * In contrast, warnings are a little different in that they don't
- * 'automatically' get started when limits get exceeded.
+ * 'automatically' get started when limits get exceeded.  They do
+ * get reset to zero, however, when we find the count to be under
+ * the soft limit (they are only ever set non-zero via userspace).
  */
 void
 xfs_qm_adjust_dqtimers(
@@ -315,6 +317,8 @@ xfs_qm_adjust_dqtimers(
 				INT_GET(d->d_blk_hardlimit, ARCH_CONVERT)))) {
 			INT_SET(d->d_btimer, ARCH_CONVERT,
 				get_seconds() + XFS_QI_BTIMELIMIT(mp));
+		} else {
+			d->d_bwarns = 0;
 		}
 	} else {
 		if ((!d->d_blk_softlimit ||
@@ -336,6 +340,8 @@ xfs_qm_adjust_dqtimers(
 				INT_GET(d->d_ino_hardlimit, ARCH_CONVERT)))) {
 			INT_SET(d->d_itimer, ARCH_CONVERT,
 				get_seconds() + XFS_QI_ITIMELIMIT(mp));
+		} else {
+			d->d_iwarns = 0;
 		}
 	} else {
 		if ((!d->d_ino_softlimit ||
@@ -357,6 +363,8 @@ xfs_qm_adjust_dqtimers(
 				INT_GET(d->d_rtb_hardlimit, ARCH_CONVERT)))) {
 			INT_SET(d->d_rtbtimer, ARCH_CONVERT,
 				get_seconds() + XFS_QI_RTBTIMELIMIT(mp));
+		} else {
+			d->d_rtbwarns = 0;
 		}
 	} else {
 		if ((!d->d_rtb_softlimit ||
@@ -371,68 +379,6 @@ xfs_qm_adjust_dqtimers(
 }
 
 /*
- * Increment or reset warnings of a given dquot.
- */
-int
-xfs_qm_dqwarn(
-	xfs_disk_dquot_t	*d,
-	uint			flags)
-{
-	int	warned;
-
-	/*
-	 * root's limits are not real limits.
-	 */
-	if (!d->d_id)
-		return (0);
-
-	warned = 0;
-	if (INT_GET(d->d_blk_softlimit, ARCH_CONVERT) &&
-	    (INT_GET(d->d_bcount, ARCH_CONVERT) >=
-	     INT_GET(d->d_blk_softlimit, ARCH_CONVERT))) {
-		if (flags & XFS_QMOPT_DOWARN) {
-			INT_MOD(d->d_bwarns, ARCH_CONVERT, +1);
-			warned++;
-		}
-	} else {
-		if (!d->d_blk_softlimit ||
-		    (INT_GET(d->d_bcount, ARCH_CONVERT) <
-		     INT_GET(d->d_blk_softlimit, ARCH_CONVERT))) {
-			d->d_bwarns = 0;
-		}
-	}
-
-	if (INT_GET(d->d_ino_softlimit, ARCH_CONVERT) > 0 &&
-	    (INT_GET(d->d_icount, ARCH_CONVERT) >=
-	     INT_GET(d->d_ino_softlimit, ARCH_CONVERT))) {
-		if (flags & XFS_QMOPT_DOWARN) {
-			INT_MOD(d->d_iwarns, ARCH_CONVERT, +1);
-			warned++;
-		}
-	} else {
-		if (!d->d_ino_softlimit ||
-		    (INT_GET(d->d_icount, ARCH_CONVERT) <
-		     INT_GET(d->d_ino_softlimit, ARCH_CONVERT))) {
-			d->d_iwarns = 0;
-		}
-	}
-#ifdef QUOTADEBUG
-	if (INT_GET(d->d_iwarns, ARCH_CONVERT))
-		cmn_err(CE_DEBUG,
-			"--------@@Inode warnings running : %Lu >= %Lu",
-			INT_GET(d->d_icount, ARCH_CONVERT),
-			INT_GET(d->d_ino_softlimit, ARCH_CONVERT));
-	if (INT_GET(d->d_bwarns, ARCH_CONVERT))
-		cmn_err(CE_DEBUG,
-			"--------@@Blks warnings running : %Lu >= %Lu",
-			INT_GET(d->d_bcount, ARCH_CONVERT),
-			INT_GET(d->d_blk_softlimit, ARCH_CONVERT));
-#endif
-	return (warned);
-}
-
-
-/*
  * initialize a buffer full of dquots and log the whole thing
  */
 STATIC void
@@ -461,9 +407,9 @@ xfs_qm_init_dquot_blk(
 	for (i = 0; i < XFS_QM_DQPERBLK(mp); i++, d++, curid++)
 		xfs_qm_dqinit_core(curid, type, d);
 	xfs_trans_dquot_buf(tp, bp,
-			    type & XFS_DQ_USER ?
-			    XFS_BLI_UDQUOT_BUF :
-			    XFS_BLI_GDQUOT_BUF);
+			    (type & XFS_DQ_USER ? XFS_BLI_UDQUOT_BUF :
+			    ((type & XFS_DQ_PROJ) ? XFS_BLI_PDQUOT_BUF :
+			     XFS_BLI_GDQUOT_BUF)));
 	xfs_trans_log_buf(tp, bp, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp)) - 1);
 }
 
@@ -544,8 +490,7 @@ xfs_qm_dqalloc(
 	 * the entire thing.
 	 */
 	xfs_qm_init_dquot_blk(tp, mp, INT_GET(dqp->q_core.d_id, ARCH_CONVERT),
-			      dqp->dq_flags & (XFS_DQ_USER|XFS_DQ_GROUP),
-			      bp);
+			      dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
 
 	if ((error = xfs_bmap_finish(&tp, &flist, firstblock, &committed))) {
 		goto error1;
@@ -675,8 +620,7 @@ xfs_qm_dqtobp(
 	/*
 	 * A simple sanity check in case we got a corrupted dquot...
 	 */
-	if (xfs_qm_dqcheck(ddq, id,
-			   dqp->dq_flags & (XFS_DQ_USER|XFS_DQ_GROUP),
+	if (xfs_qm_dqcheck(ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES,
 			   flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN),
 			   "dqtobp")) {
 		if (!(flags & XFS_QMOPT_DQREPAIR)) {
@@ -953,8 +897,8 @@ int
 xfs_qm_dqget(
 	xfs_mount_t	*mp,
 	xfs_inode_t	*ip,	  /* locked inode (optional) */
-	xfs_dqid_t	id,	  /* gid or uid, depending on type */
-	uint		type,	  /* UDQUOT or GDQUOT */
+	xfs_dqid_t	id,	  /* uid/projid/gid depending on type */
+	uint		type,	  /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */
 	uint		flags,	  /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
 	xfs_dquot_t	**O_dqpp) /* OUT : locked incore dquot */
 {
@@ -965,6 +909,7 @@ xfs_qm_dqget(
 
 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
 	if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
+	    (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) ||
 	    (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
 		return (ESRCH);
 	}
@@ -983,7 +928,9 @@ xfs_qm_dqget(
  again:
 
 #ifdef DEBUG
-	ASSERT(type == XFS_DQ_USER || type == XFS_DQ_GROUP);
+	ASSERT(type == XFS_DQ_USER ||
+	       type == XFS_DQ_PROJ ||
+	       type == XFS_DQ_GROUP);
 	if (ip) {
 		ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
 		if (type == XFS_DQ_USER)
@@ -1306,8 +1253,8 @@ xfs_qm_dqflush(
 		return (error);
 	}
 
-	if (xfs_qm_dqcheck(&dqp->q_core, INT_GET(ddqp->d_id, ARCH_CONVERT), 0, XFS_QMOPT_DOWARN,
-			   "dqflush (incore copy)")) {
+	if (xfs_qm_dqcheck(&dqp->q_core, INT_GET(ddqp->d_id, ARCH_CONVERT),
+			   0, XFS_QMOPT_DOWARN, "dqflush (incore copy)")) {
 		xfs_force_shutdown(dqp->q_mount, XFS_CORRUPT_INCORE);
 		return XFS_ERROR(EIO);
 	}
@@ -1459,7 +1406,8 @@ xfs_dqlock2(
 {
 	if (d1 && d2) {
 		ASSERT(d1 != d2);
-		if (INT_GET(d1->q_core.d_id, ARCH_CONVERT) > INT_GET(d2->q_core.d_id, ARCH_CONVERT)) {
+		if (INT_GET(d1->q_core.d_id, ARCH_CONVERT) >
+		    INT_GET(d2->q_core.d_id, ARCH_CONVERT)) {
 			xfs_dqlock(d2);
 			xfs_dqlock(d1);
 		} else {
@@ -1582,8 +1530,7 @@ xfs_qm_dqprint(xfs_dquot_t *dqp)
 	cmn_err(CE_DEBUG, "-----------KERNEL DQUOT----------------");
 	cmn_err(CE_DEBUG, "---- dquotID =  %d",
 		(int)INT_GET(dqp->q_core.d_id, ARCH_CONVERT));
-	cmn_err(CE_DEBUG, "---- type    =  %s",
-		XFS_QM_ISUDQ(dqp) ? "USR" : "GRP");
+	cmn_err(CE_DEBUG, "---- type    =  %s", DQFLAGTO_TYPESTR(dqp));
 	cmn_err(CE_DEBUG, "---- fs      =  0x%p", dqp->q_mount);
 	cmn_err(CE_DEBUG, "---- blkno   =  0x%x", (int) dqp->q_blkno);
 	cmn_err(CE_DEBUG, "---- boffset =  0x%x", (int) dqp->q_bufoffset);
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index 0c3fe3175baa..39175103c8e0 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -114,25 +114,18 @@ typedef struct xfs_dquot {
 #define XFS_DQHOLD(dqp)		((dqp)->q_nrefs++)
 
 /*
- * Quota Accounting flags
+ * Quota Accounting/Enforcement flags
  */
-#define XFS_ALL_QUOTA_ACCT	(XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT)
-#define XFS_ALL_QUOTA_ENFD	(XFS_UQUOTA_ENFD | XFS_GQUOTA_ENFD)
-#define XFS_ALL_QUOTA_CHKD	(XFS_UQUOTA_CHKD | XFS_GQUOTA_CHKD)
-#define XFS_ALL_QUOTA_ACTV	(XFS_UQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE)
-#define XFS_ALL_QUOTA_ACCT_ENFD (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\
-				 XFS_GQUOTA_ACCT|XFS_GQUOTA_ENFD)
+#define XFS_ALL_QUOTA_ACCT	\
+		(XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT)
+#define XFS_ALL_QUOTA_ENFD	(XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD)
+#define XFS_ALL_QUOTA_CHKD	(XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD)
 
-#define XFS_IS_QUOTA_RUNNING(mp)  ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)
-#define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT)
-#define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT)
-
-/*
- * Quota Limit Enforcement flags
- */
+#define XFS_IS_QUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)
 #define XFS_IS_QUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_ALL_QUOTA_ENFD)
-#define XFS_IS_UQUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_UQUOTA_ENFD)
-#define XFS_IS_GQUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_GQUOTA_ENFD)
+#define XFS_IS_UQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_UQUOTA_ACCT)
+#define XFS_IS_PQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_PQUOTA_ACCT)
+#define XFS_IS_GQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_GQUOTA_ACCT)
 
 #ifdef DEBUG
 static inline int
@@ -167,6 +160,8 @@ XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp)
 #define XFS_DQ_IS_ON_FREELIST(dqp)  ((dqp)->dq_flnext != (dqp))
 #define XFS_DQ_IS_DIRTY(dqp)	((dqp)->dq_flags & XFS_DQ_DIRTY)
 #define XFS_QM_ISUDQ(dqp)	((dqp)->dq_flags & XFS_DQ_USER)
+#define XFS_QM_ISPDQ(dqp)	((dqp)->dq_flags & XFS_DQ_PROJ)
+#define XFS_QM_ISGDQ(dqp)	((dqp)->dq_flags & XFS_DQ_GROUP)
 #define XFS_DQ_TO_QINF(dqp)	((dqp)->q_mount->m_quotainfo)
 #define XFS_DQ_TO_QIP(dqp)	(XFS_QM_ISUDQ(dqp) ? \
 				 XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \
@@ -174,7 +169,7 @@ XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp)
 
 #define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \
 				     (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
-				     (XFS_IS_GQUOTA_ON((d)->q_mount))))
+				     (XFS_IS_OQUOTA_ON((d)->q_mount))))
 
 #ifdef XFS_DQUOT_TRACE
 /*
@@ -211,7 +206,6 @@ extern void		xfs_qm_adjust_dqtimers(xfs_mount_t *,
 					xfs_disk_dquot_t *);
 extern void		xfs_qm_adjust_dqlimits(xfs_mount_t *,
 					xfs_disk_dquot_t *);
-extern int		xfs_qm_dqwarn(xfs_disk_dquot_t *, uint);
 extern int		xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
 					xfs_dqid_t, uint, uint, xfs_dquot_t **);
 extern void		xfs_qm_dqput(xfs_dquot_t *);
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index a5425ee6e7bd..f5271b7b1e84 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -428,7 +428,7 @@ xfs_qm_dquot_logitem_committing(
 /*
  * This is the ops vector for dquots
  */
-struct xfs_item_ops xfs_dquot_item_ops = {
+STATIC struct xfs_item_ops xfs_dquot_item_ops = {
 	.iop_size	= (uint(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_size,
 	.iop_format	= (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
 					xfs_qm_dquot_logitem_format,
@@ -646,7 +646,7 @@ xfs_qm_qoffend_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn)
 	return;
 }
 
-struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
+STATIC struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
 	.iop_size	= (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size,
 	.iop_format	= (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
 					xfs_qm_qoff_logitem_format,
@@ -669,7 +669,7 @@ struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
 /*
  * This is the ops vector shared by all quotaoff-start log items.
  */
-struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
+STATIC struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
 	.iop_size	= (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size,
 	.iop_format	= (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
 					xfs_qm_qoff_logitem_format,
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 89f2cd656ebf..f665ca8f9e96 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -81,12 +81,18 @@ struct xfs_qm	*xfs_Gqm;
 
 kmem_zone_t	*qm_dqzone;
 kmem_zone_t	*qm_dqtrxzone;
-kmem_shaker_t	xfs_qm_shaker;
+STATIC kmem_shaker_t	xfs_qm_shaker;
 
 STATIC void	xfs_qm_list_init(xfs_dqlist_t *, char *, int);
 STATIC void	xfs_qm_list_destroy(xfs_dqlist_t *);
 
+STATIC void	xfs_qm_freelist_init(xfs_frlist_t *);
+STATIC void	xfs_qm_freelist_destroy(xfs_frlist_t *);
+STATIC int	xfs_qm_mplist_nowait(xfs_mount_t *);
+STATIC int	xfs_qm_dqhashlock_nowait(xfs_dquot_t *);
+
 STATIC int	xfs_qm_init_quotainos(xfs_mount_t *);
+STATIC int	xfs_qm_init_quotainfo(xfs_mount_t *);
 STATIC int	xfs_qm_shake(int, unsigned int);
 
 #ifdef DEBUG
@@ -184,7 +190,7 @@ xfs_Gqm_init(void)
 /*
  * Destroy the global quota manager when its reference count goes to zero.
  */
-void
+STATIC void
 xfs_qm_destroy(
 	struct xfs_qm	*xqm)
 {
@@ -304,9 +310,9 @@ xfs_qm_mount_quotainit(
 	uint		flags)
 {
 	/*
-	 * User or group quotas has to be on.
+	 * User, projects or group quotas has to be on.
 	 */
-	ASSERT(flags & (XFSMNT_UQUOTA | XFSMNT_GQUOTA));
+	ASSERT(flags & (XFSMNT_UQUOTA | XFSMNT_PQUOTA | XFSMNT_GQUOTA));
 
 	/*
 	 * Initialize the flags in the mount structure. From this point
@@ -324,7 +330,11 @@ xfs_qm_mount_quotainit(
 	if (flags & XFSMNT_GQUOTA) {
 		mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
 		if (flags & XFSMNT_GQUOTAENF)
-			mp->m_qflags |= XFS_GQUOTA_ENFD;
+			mp->m_qflags |= XFS_OQUOTA_ENFD;
+	} else if (flags & XFSMNT_PQUOTA) {
+		mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
+		if (flags & XFSMNT_PQUOTAENF)
+			mp->m_qflags |= XFS_OQUOTA_ENFD;
 	}
 }
 
@@ -357,11 +367,11 @@ xfs_qm_mount_quotas(
 
 	/*
 	 * If a file system had quotas running earlier, but decided to
-	 * mount without -o quota/uquota/gquota options, revoke the
+	 * mount without -o uquota/pquota/gquota options, revoke the
 	 * quotachecked license, and bail out.
 	 */
 	if (! XFS_IS_QUOTA_ON(mp) &&
-	    (mp->m_sb.sb_qflags & (XFS_UQUOTA_ACCT|XFS_GQUOTA_ACCT))) {
+	    (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT)) {
 		mp->m_qflags = 0;
 		goto write_changes;
 	}
@@ -509,7 +519,7 @@ out:
  * Flush all dquots of the given file system to disk. The dquots are
  * _not_ purged from memory here, just their data written to disk.
  */
-int
+STATIC int
 xfs_qm_dqflush_all(
 	xfs_mount_t	*mp,
 	int		flags)
@@ -613,7 +623,7 @@ xfs_qm_detach_gdquots(
 STATIC int
 xfs_qm_dqpurge_int(
 	xfs_mount_t	*mp,
-	uint		flags) /* QUOTAOFF/UMOUNTING/UQUOTA/GQUOTA */
+	uint		flags) /* QUOTAOFF/UMOUNTING/UQUOTA/PQUOTA/GQUOTA */
 {
 	xfs_dquot_t	*dqp;
 	uint		dqtype;
@@ -625,6 +635,7 @@ xfs_qm_dqpurge_int(
 		return (0);
 
 	dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
+	dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
 	dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
 
 	xfs_qm_mplist_lock(mp);
@@ -734,11 +745,11 @@ xfs_qm_dqattach_one(
 
 	/*
 	 * udqhint is the i_udquot field in inode, and is non-NULL only
-	 * when the type arg is XFS_DQ_GROUP. Its purpose is to save a
+	 * when the type arg is group/project. Its purpose is to save a
 	 * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
 	 * the user dquot.
 	 */
-	ASSERT(!udqhint || type == XFS_DQ_GROUP);
+	ASSERT(!udqhint || type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
 	if (udqhint && !dolock)
 		xfs_dqlock(udqhint);
 
@@ -897,8 +908,8 @@ xfs_qm_dqattach_grouphint(
 
 
 /*
- * Given a locked inode, attach dquot(s) to it, taking UQUOTAON / GQUOTAON
- * in to account.
+ * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
+ * into account.
  * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
  * If XFS_QMOPT_DQLOCK, the dquot(s) will be returned locked. This option pretty
  * much made this code a complete mess, but it has been pretty useful.
@@ -937,8 +948,13 @@ xfs_qm_dqattach(
 		nquotas++;
 	}
 	ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
-	if (XFS_IS_GQUOTA_ON(mp)) {
-		error = xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
+	if (XFS_IS_OQUOTA_ON(mp)) {
+		error = XFS_IS_GQUOTA_ON(mp) ?
+			xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
+						flags & XFS_QMOPT_DQALLOC,
+						flags & XFS_QMOPT_DQLOCK,
+						ip->i_udquot, &ip->i_gdquot) :
+			xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ,
 						flags & XFS_QMOPT_DQALLOC,
 						flags & XFS_QMOPT_DQLOCK,
 						ip->i_udquot, &ip->i_gdquot);
@@ -989,7 +1005,7 @@ xfs_qm_dqattach(
 		}
 		if (XFS_IS_UQUOTA_ON(mp))
 			ASSERT(ip->i_udquot);
-		if (XFS_IS_GQUOTA_ON(mp))
+		if (XFS_IS_OQUOTA_ON(mp))
 			ASSERT(ip->i_gdquot);
 	}
 #endif
@@ -1018,13 +1034,13 @@ xfs_qm_dqdetach(
 
 	ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
 	ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
-	if (ip->i_udquot)
-		xfs_dqtrace_entry_ino(ip->i_udquot, "DQDETTACH", ip);
 	if (ip->i_udquot) {
+		xfs_dqtrace_entry_ino(ip->i_udquot, "DQDETTACH", ip);
 		xfs_qm_dqrele(ip->i_udquot);
 		ip->i_udquot = NULL;
 	}
 	if (ip->i_gdquot) {
+		xfs_dqtrace_entry_ino(ip->i_gdquot, "DQDETTACH", ip);
 		xfs_qm_dqrele(ip->i_gdquot);
 		ip->i_gdquot = NULL;
 	}
@@ -1149,7 +1165,7 @@ xfs_qm_sync(
  * This initializes all the quota information that's kept in the
  * mount structure
  */
-int
+STATIC int
 xfs_qm_init_quotainfo(
 	xfs_mount_t	*mp)
 {
@@ -1202,8 +1218,9 @@ xfs_qm_init_quotainfo(
 	 * and group quotas, at least not at this point.
 	 */
 	error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
-			     (XFS_IS_UQUOTA_RUNNING(mp)) ?
-			     XFS_DQ_USER : XFS_DQ_GROUP,
+			     XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : 
+			     (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
+				XFS_DQ_PROJ),
 			     XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
 			     &dqp);
 	if (! error) {
@@ -1234,6 +1251,10 @@ xfs_qm_init_quotainfo(
 				INT_GET(ddqp->d_iwarns, ARCH_CONVERT) ?
 				INT_GET(ddqp->d_iwarns, ARCH_CONVERT) :
 				XFS_QM_IWARNLIMIT;
+		qinf->qi_rtbwarnlimit =
+				INT_GET(ddqp->d_rtbwarns, ARCH_CONVERT) ?
+				INT_GET(ddqp->d_rtbwarns, ARCH_CONVERT) :
+				XFS_QM_RTBWARNLIMIT;
 		qinf->qi_bhardlimit =
 				INT_GET(ddqp->d_blk_hardlimit, ARCH_CONVERT);
 		qinf->qi_bsoftlimit =
@@ -1259,6 +1280,7 @@ xfs_qm_init_quotainfo(
 		qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
 		qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
 		qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
+		qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
 	}
 
 	return (0);
@@ -1366,13 +1388,20 @@ xfs_qm_dqget_noattach(
 		ASSERT(udqp);
 	}
 
-	if (XFS_IS_GQUOTA_ON(mp)) {
+	if (XFS_IS_OQUOTA_ON(mp)) {
 		ASSERT(ip->i_gdquot == NULL);
 		if (udqp)
 			xfs_dqunlock(udqp);
-		if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_gid, XFS_DQ_GROUP,
-					 XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
-					 &gdqp))) {
+		error = XFS_IS_GQUOTA_ON(mp) ?
+				xfs_qm_dqget(mp, ip,
+					     ip->i_d.di_gid, XFS_DQ_GROUP,
+					     XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
+					     &gdqp) :
+				xfs_qm_dqget(mp, ip,
+					     ip->i_d.di_projid, XFS_DQ_PROJ,
+					     XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
+					     &gdqp);
+		if (error) {
 			if (udqp)
 				xfs_qm_dqrele(udqp);
 			ASSERT(error != ESRCH);
@@ -1521,8 +1550,10 @@ xfs_qm_reset_dqcounts(
 		INT_SET(ddq->d_rtbcount, ARCH_CONVERT, 0ULL);
 		INT_SET(ddq->d_btimer, ARCH_CONVERT, (time_t)0);
 		INT_SET(ddq->d_itimer, ARCH_CONVERT, (time_t)0);
+		INT_SET(ddq->d_rtbtimer, ARCH_CONVERT, (time_t)0);
 		INT_SET(ddq->d_bwarns, ARCH_CONVERT, 0UL);
 		INT_SET(ddq->d_iwarns, ARCH_CONVERT, 0UL);
+		INT_SET(ddq->d_rtbwarns, ARCH_CONVERT, 0UL);
 		ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
 	}
 
@@ -1541,11 +1572,14 @@ xfs_qm_dqiter_bufs(
 	int		error;
 	int		notcommitted;
 	int		incr;
+	int		type;
 
 	ASSERT(blkcnt > 0);
 	notcommitted = 0;
 	incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ?
 		XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt;
+	type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
+		(flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
 	error = 0;
 
 	/*
@@ -1564,9 +1598,7 @@ xfs_qm_dqiter_bufs(
 		if (error)
 			break;
 
-		(void) xfs_qm_reset_dqcounts(mp, bp, firstid,
-					     flags & XFS_QMOPT_UQUOTA ?
-					     XFS_DQ_USER : XFS_DQ_GROUP);
+		(void) xfs_qm_reset_dqcounts(mp, bp, firstid, type);
 		xfs_bdwrite(mp, bp);
 		/*
 		 * goto the next block.
@@ -1578,7 +1610,7 @@ xfs_qm_dqiter_bufs(
 }
 
 /*
- * Iterate over all allocated USR/GRP dquots in the system, calling a
+ * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a
  * caller supplied function for every chunk of dquots that we find.
  */
 STATIC int
@@ -1849,7 +1881,7 @@ xfs_qm_dqusage_adjust(
 		xfs_qm_quotacheck_dqadjust(udqp, nblks, rtblks);
 		xfs_qm_dqput(udqp);
 	}
-	if (XFS_IS_GQUOTA_ON(mp)) {
+	if (XFS_IS_OQUOTA_ON(mp)) {
 		ASSERT(gdqp);
 		xfs_qm_quotacheck_dqadjust(gdqp, nblks, rtblks);
 		xfs_qm_dqput(gdqp);
@@ -1898,7 +1930,7 @@ xfs_qm_quotacheck(
 	cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname);
 
 	/*
-	 * First we go thru all the dquots on disk, USR and GRP, and reset
+	 * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
 	 * their counters to zero. We need a clean slate.
 	 * We don't log our changes till later.
 	 */
@@ -1909,9 +1941,10 @@ xfs_qm_quotacheck(
 	}
 
 	if ((gip = XFS_QI_GQIP(mp))) {
-		if ((error = xfs_qm_dqiterate(mp, gip, XFS_QMOPT_GQUOTA)))
+		if ((error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
+					XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA)))
 			goto error_return;
-		flags |= XFS_GQUOTA_CHKD;
+		flags |= XFS_OQUOTA_CHKD;
 	}
 
 	do {
@@ -1938,7 +1971,7 @@ xfs_qm_quotacheck(
 	if (error) {
 		xfs_qm_dqpurge_all(mp,
 				   XFS_QMOPT_UQUOTA|XFS_QMOPT_GQUOTA|
-				   XFS_QMOPT_QUOTAOFF);
+				   XFS_QMOPT_PQUOTA|XFS_QMOPT_QUOTAOFF);
 		goto error_return;
 	}
 	/*
@@ -1961,7 +1994,7 @@ xfs_qm_quotacheck(
 	 * quotachecked status, since we won't be doing accounting for
 	 * that type anymore.
 	 */
-	mp->m_qflags &= ~(XFS_GQUOTA_CHKD | XFS_UQUOTA_CHKD);
+	mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
 	mp->m_qflags |= flags;
 
 	XQM_LIST_PRINT(&(XFS_QI_MPL_LIST(mp)), MPL_NEXT, "++++ Mp list +++");
@@ -2013,7 +2046,7 @@ xfs_qm_init_quotainos(
 					     0, 0, &uip, 0)))
 				return XFS_ERROR(error);
 		}
-		if (XFS_IS_GQUOTA_ON(mp) &&
+		if (XFS_IS_OQUOTA_ON(mp) &&
 		    mp->m_sb.sb_gquotino != NULLFSINO) {
 			ASSERT(mp->m_sb.sb_gquotino > 0);
 			if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
@@ -2043,10 +2076,12 @@ xfs_qm_init_quotainos(
 
 		flags &= ~XFS_QMOPT_SBVERSION;
 	}
-	if (XFS_IS_GQUOTA_ON(mp) && gip == NULL) {
-		if ((error = xfs_qm_qino_alloc(mp, &gip,
-					      sbflags | XFS_SB_GQUOTINO,
-					      flags | XFS_QMOPT_GQUOTA))) {
+	if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) {
+		flags |= (XFS_IS_GQUOTA_ON(mp) ?
+				XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
+		error = xfs_qm_qino_alloc(mp, &gip,
+					  sbflags | XFS_SB_GQUOTINO, flags);
+		if (error) {
 			if (uip)
 				VN_RELE(XFS_ITOV(uip));
 
@@ -2452,6 +2487,7 @@ xfs_qm_vop_dqalloc(
 	xfs_inode_t	*ip,
 	uid_t		uid,
 	gid_t		gid,
+	prid_t		prid,
 	uint		flags,
 	xfs_dquot_t	**O_udqpp,
 	xfs_dquot_t	**O_gdqpp)
@@ -2483,8 +2519,7 @@ xfs_qm_vop_dqalloc(
 	}
 
 	uq = gq = NULL;
-	if ((flags & XFS_QMOPT_UQUOTA) &&
-	    XFS_IS_UQUOTA_ON(mp)) {
+	if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
 		if (ip->i_d.di_uid != uid) {
 			/*
 			 * What we need is the dquot that has this uid, and
@@ -2522,8 +2557,7 @@ xfs_qm_vop_dqalloc(
 			xfs_dqunlock(uq);
 		}
 	}
-	if ((flags & XFS_QMOPT_GQUOTA) &&
-	    XFS_IS_GQUOTA_ON(mp)) {
+	if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
 		if (ip->i_d.di_gid != gid) {
 			xfs_iunlock(ip, lockflags);
 			if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
@@ -2546,6 +2580,29 @@ xfs_qm_vop_dqalloc(
 			XFS_DQHOLD(gq);
 			xfs_dqunlock(gq);
 		}
+	} else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
+		if (ip->i_d.di_projid != prid) {
+			xfs_iunlock(ip, lockflags);
+			if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
+						 XFS_DQ_PROJ,
+						 XFS_QMOPT_DQALLOC |
+						 XFS_QMOPT_DOWARN,
+						 &gq))) {
+				if (uq)
+					xfs_qm_dqrele(uq);
+				ASSERT(error != ENOENT);
+				return (error);
+			}
+			xfs_dqunlock(gq);
+			lockflags = XFS_ILOCK_SHARED;
+			xfs_ilock(ip, lockflags);
+		} else {
+			ASSERT(ip->i_gdquot);
+			gq = ip->i_gdquot;
+			xfs_dqlock(gq);
+			XFS_DQHOLD(gq);
+			xfs_dqunlock(gq);
+		}
 	}
 	if (uq)
 		xfs_dqtrace_entry_ino(uq, "DQALLOC", ip);
@@ -2574,6 +2631,9 @@ xfs_qm_vop_chown(
 	xfs_dquot_t	*newdq)
 {
 	xfs_dquot_t	*prevdq;
+	uint		bfield = XFS_IS_REALTIME_INODE(ip) ?
+				 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
+
 	ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
 	ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
 
@@ -2582,20 +2642,12 @@ xfs_qm_vop_chown(
 	ASSERT(prevdq);
 	ASSERT(prevdq != newdq);
 
-	xfs_trans_mod_dquot(tp, prevdq,
-			    XFS_TRANS_DQ_BCOUNT,
-			    -(ip->i_d.di_nblocks));
-	xfs_trans_mod_dquot(tp, prevdq,
-			    XFS_TRANS_DQ_ICOUNT,
-			    -1);
+	xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks));
+	xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
 
 	/* the sparkling new dquot */
-	xfs_trans_mod_dquot(tp, newdq,
-			    XFS_TRANS_DQ_BCOUNT,
-			    ip->i_d.di_nblocks);
-	xfs_trans_mod_dquot(tp, newdq,
-			    XFS_TRANS_DQ_ICOUNT,
-			    1);
+	xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks);
+	xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
 
 	/*
 	 * Take an extra reference, because the inode
@@ -2611,7 +2663,7 @@ xfs_qm_vop_chown(
 }
 
 /*
- * Quota reservations for setattr(AT_UID|AT_GID).
+ * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID).
  */
 int
 xfs_qm_vop_chown_reserve(
@@ -2623,7 +2675,7 @@ xfs_qm_vop_chown_reserve(
 {
 	int		error;
 	xfs_mount_t	*mp;
-	uint		delblks;
+	uint		delblks, blkflags;
 	xfs_dquot_t	*unresudq, *unresgdq, *delblksudq, *delblksgdq;
 
 	ASSERT(XFS_ISLOCKED_INODE(ip));
@@ -2632,6 +2684,8 @@ xfs_qm_vop_chown_reserve(
 
 	delblks = ip->i_delayed_blks;
 	delblksudq = delblksgdq = unresudq = unresgdq = NULL;
+	blkflags = XFS_IS_REALTIME_INODE(ip) ?
+			XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
 
 	if (XFS_IS_UQUOTA_ON(mp) && udqp &&
 	    ip->i_d.di_uid != (uid_t)INT_GET(udqp->q_core.d_id, ARCH_CONVERT)) {
@@ -2646,18 +2700,22 @@ xfs_qm_vop_chown_reserve(
 			unresudq = ip->i_udquot;
 		}
 	}
-	if (XFS_IS_GQUOTA_ON(ip->i_mount) && gdqp &&
-	    ip->i_d.di_gid != INT_GET(gdqp->q_core.d_id, ARCH_CONVERT)) {
-		delblksgdq = gdqp;
-		if (delblks) {
-			ASSERT(ip->i_gdquot);
-			unresgdq = ip->i_gdquot;
+	if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
+		if ((XFS_IS_GQUOTA_ON(ip->i_mount) && ip->i_d.di_gid !=
+				INT_GET(gdqp->q_core.d_id, ARCH_CONVERT)) ||
+		    (XFS_IS_PQUOTA_ON(ip->i_mount) && ip->i_d.di_projid !=
+				INT_GET(gdqp->q_core.d_id, ARCH_CONVERT))) {
+			delblksgdq = gdqp;
+			if (delblks) {
+				ASSERT(ip->i_gdquot);
+				unresgdq = ip->i_gdquot;
+			}
 		}
 	}
 
 	if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
 				delblksudq, delblksgdq, ip->i_d.di_nblocks, 1,
-				flags | XFS_QMOPT_RES_REGBLKS)))
+				flags | blkflags)))
 		return (error);
 
 	/*
@@ -2674,11 +2732,11 @@ xfs_qm_vop_chown_reserve(
 		ASSERT(unresudq || unresgdq);
 		if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
 				delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0,
-				flags | XFS_QMOPT_RES_REGBLKS)))
+				flags | blkflags)))
 			return (error);
 		xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
 				unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0,
-				XFS_QMOPT_RES_REGBLKS);
+				blkflags);
 	}
 
 	return (0);
@@ -2751,7 +2809,7 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
 }
 
 /* ------------- list stuff -----------------*/
-void
+STATIC void
 xfs_qm_freelist_init(xfs_frlist_t *ql)
 {
 	ql->qh_next = ql->qh_prev = (xfs_dquot_t *) ql;
@@ -2760,7 +2818,7 @@ xfs_qm_freelist_init(xfs_frlist_t *ql)
 	ql->qh_nelems = 0;
 }
 
-void
+STATIC void
 xfs_qm_freelist_destroy(xfs_frlist_t *ql)
 {
 	xfs_dquot_t	*dqp, *nextdqp;
@@ -2786,7 +2844,7 @@ xfs_qm_freelist_destroy(xfs_frlist_t *ql)
 	ASSERT(ql->qh_nelems == 0);
 }
 
-void
+STATIC void
 xfs_qm_freelist_insert(xfs_frlist_t *ql, xfs_dquot_t *dq)
 {
 	dq->dq_flnext = ql->qh_next;
@@ -2816,7 +2874,7 @@ xfs_qm_freelist_append(xfs_frlist_t *ql, xfs_dquot_t *dq)
 	xfs_qm_freelist_insert((xfs_frlist_t *)ql->qh_prev, dq);
 }
 
-int
+STATIC int
 xfs_qm_dqhashlock_nowait(
 	xfs_dquot_t *dqp)
 {
@@ -2836,7 +2894,7 @@ xfs_qm_freelist_lock_nowait(
 	return (locked);
 }
 
-int
+STATIC int
 xfs_qm_mplist_nowait(
 	xfs_mount_t	*mp)
 {
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index dcf1a7a831d8..b03eecf3b6cb 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -133,8 +133,9 @@ typedef struct xfs_quotainfo {
 	time_t		 qi_btimelimit;	 /* limit for blks timer */
 	time_t		 qi_itimelimit;	 /* limit for inodes timer */
 	time_t		 qi_rtbtimelimit;/* limit for rt blks timer */
-	xfs_qwarncnt_t	 qi_bwarnlimit;	 /* limit for num warnings */
-	xfs_qwarncnt_t	 qi_iwarnlimit;	 /* limit for num warnings */
+	xfs_qwarncnt_t	 qi_bwarnlimit;	 /* limit for blks warnings */
+	xfs_qwarncnt_t	 qi_iwarnlimit;	 /* limit for inodes warnings */
+	xfs_qwarncnt_t	 qi_rtbwarnlimit;/* limit for rt blks warnings */
 	mutex_t		 qi_quotaofflock;/* to serialize quotaoff */
 	xfs_filblks_t	 qi_dqchunklen;	 /* # BBs in a chunk of dqs */
 	uint		 qi_dqperchunk;	 /* # ondisk dqs in above chunk */
@@ -176,6 +177,7 @@ typedef struct xfs_dquot_acct {
 
 #define XFS_QM_BWARNLIMIT	5
 #define XFS_QM_IWARNLIMIT	5
+#define XFS_QM_RTBWARNLIMIT	5
 
 #define XFS_QM_LOCK(xqm)	(mutex_lock(&xqm##_lock, PINOD))
 #define XFS_QM_UNLOCK(xqm)	(mutex_unlock(&xqm##_lock))
@@ -184,7 +186,6 @@ typedef struct xfs_dquot_acct {
 
 extern void		xfs_mount_reset_sbqflags(xfs_mount_t *);
 
-extern int		xfs_qm_init_quotainfo(xfs_mount_t *);
 extern void		xfs_qm_destroy_quotainfo(xfs_mount_t *);
 extern int		xfs_qm_mount_quotas(xfs_mount_t *, int);
 extern void		xfs_qm_mount_quotainit(xfs_mount_t *, uint);
@@ -203,7 +204,7 @@ extern void		xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
 
 /* vop stuff */
 extern int		xfs_qm_vop_dqalloc(xfs_mount_t *, xfs_inode_t *,
-					uid_t, gid_t, uint,
+					uid_t, gid_t, prid_t, uint,
 					xfs_dquot_t **, xfs_dquot_t **);
 extern void		xfs_qm_vop_dqattach_and_dqmod_newinode(
 					xfs_trans_t *, xfs_inode_t *,
@@ -215,14 +216,9 @@ extern int		xfs_qm_vop_chown_reserve(xfs_trans_t *, xfs_inode_t *,
 					xfs_dquot_t *, xfs_dquot_t *, uint);
 
 /* list stuff */
-extern void		xfs_qm_freelist_init(xfs_frlist_t *);
-extern void		xfs_qm_freelist_destroy(xfs_frlist_t *);
-extern void		xfs_qm_freelist_insert(xfs_frlist_t *, xfs_dquot_t *);
 extern void		xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *);
 extern void		xfs_qm_freelist_unlink(xfs_dquot_t *);
 extern int		xfs_qm_freelist_lock_nowait(xfs_qm_t *);
-extern int		xfs_qm_mplist_nowait(xfs_mount_t *);
-extern int		xfs_qm_dqhashlock_nowait(xfs_dquot_t *);
 
 /* system call interface */
 extern int		xfs_qm_quotactl(bhv_desc_t *, int, int, xfs_caddr_t);
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index be67d9c265f8..dc3c37a1e158 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -71,10 +71,13 @@
 #define MNTOPT_NOQUOTA	"noquota"	/* no quotas */
 #define MNTOPT_USRQUOTA	"usrquota"	/* user quota enabled */
 #define MNTOPT_GRPQUOTA	"grpquota"	/* group quota enabled */
+#define MNTOPT_PRJQUOTA	"prjquota"	/* project quota enabled */
 #define MNTOPT_UQUOTA	"uquota"	/* user quota (IRIX variant) */
 #define MNTOPT_GQUOTA	"gquota"	/* group quota (IRIX variant) */
+#define MNTOPT_PQUOTA	"pquota"	/* project quota (IRIX variant) */
 #define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */
 #define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
+#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
 #define MNTOPT_QUOTANOENF  "qnoenforce"	/* same as uqnoenforce */
 
 STATIC int
@@ -109,6 +112,14 @@ xfs_qm_parseargs(
 			args->flags |= XFSMNT_UQUOTA;
 			args->flags &= ~XFSMNT_UQUOTAENF;
 			referenced = 1;
+		} else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
+			   !strcmp(this_char, MNTOPT_PRJQUOTA)) {
+			args->flags |= XFSMNT_PQUOTA | XFSMNT_PQUOTAENF;
+			referenced = 1;
+		} else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
+			args->flags |= XFSMNT_PQUOTA;
+			args->flags &= ~XFSMNT_PQUOTAENF;
+			referenced = 1;
 		} else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
 			   !strcmp(this_char, MNTOPT_GRPQUOTA)) {
 			args->flags |= XFSMNT_GQUOTA | XFSMNT_GQUOTAENF;
@@ -127,6 +138,12 @@ xfs_qm_parseargs(
 			*this_char++ = ',';
 	}
 
+	if ((args->flags & XFSMNT_GQUOTA) && (args->flags & XFSMNT_PQUOTA)) {
+		cmn_err(CE_WARN,
+			"XFS: cannot mount with both project and group quota");
+		return XFS_ERROR(EINVAL);
+	}
+
 	PVFS_PARSEARGS(BHV_NEXT(bhv), options, args, update, error);
 	if (!error && !referenced)
 		bhv_remove_vfsops(bhvtovfs(bhv), VFS_POSITION_QM);
@@ -148,13 +165,19 @@ xfs_qm_showargs(
 			seq_puts(m, "," MNTOPT_UQUOTANOENF);
 	}
 
+	if (mp->m_qflags & XFS_PQUOTA_ACCT) {
+		(mp->m_qflags & XFS_OQUOTA_ENFD) ?
+			seq_puts(m, "," MNTOPT_PRJQUOTA) :
+			seq_puts(m, "," MNTOPT_PQUOTANOENF);
+	}
+
 	if (mp->m_qflags & XFS_GQUOTA_ACCT) {
-		(mp->m_qflags & XFS_GQUOTA_ENFD) ?
+		(mp->m_qflags & XFS_OQUOTA_ENFD) ?
 			seq_puts(m, "," MNTOPT_GRPQUOTA) :
 			seq_puts(m, "," MNTOPT_GQUOTANOENF);
 	}
 
-	if (!(mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_GQUOTA_ACCT)))
+	if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
 		seq_puts(m, "," MNTOPT_NOQUOTA);
 
 	PVFS_SHOWARGS(BHV_NEXT(bhv), m, error);
@@ -171,7 +194,7 @@ xfs_qm_mount(
 	struct xfs_mount	*mp = XFS_VFSTOM(vfsp);
 	int			error;
 
-	if (args->flags & (XFSMNT_UQUOTA | XFSMNT_GQUOTA))
+	if (args->flags & (XFSMNT_UQUOTA | XFSMNT_GQUOTA | XFSMNT_PQUOTA))
 		xfs_qm_mount_quotainit(mp, args->flags);
 	PVFS_MOUNT(BHV_NEXT(bhv), args, cr, error);
 	return error;
@@ -255,16 +278,17 @@ xfs_qm_newmount(
 	uint		*quotaflags)
 {
 	uint		quotaondisk;
-	uint		uquotaondisk = 0, gquotaondisk = 0;
+	uint		uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0;
 
 	*quotaflags = 0;
 	*needquotamount = B_FALSE;
 
 	quotaondisk = XFS_SB_VERSION_HASQUOTA(&mp->m_sb) &&
-		mp->m_sb.sb_qflags & (XFS_UQUOTA_ACCT|XFS_GQUOTA_ACCT);
+				(mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT);
 
 	if (quotaondisk) {
 		uquotaondisk = mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT;
+		pquotaondisk = mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT;
 		gquotaondisk = mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT;
 	}
 
@@ -277,13 +301,16 @@ xfs_qm_newmount(
 
 	if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) ||
 	    (!uquotaondisk &&  XFS_IS_UQUOTA_ON(mp)) ||
+	     (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) ||
+	    (!pquotaondisk &&  XFS_IS_PQUOTA_ON(mp)) ||
 	     (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) ||
-	    (!gquotaondisk &&  XFS_IS_GQUOTA_ON(mp)))  &&
+	    (!gquotaondisk &&  XFS_IS_OQUOTA_ON(mp)))  &&
 	    xfs_dev_is_read_only(mp, "changing quota state")) {
 		cmn_err(CE_WARN,
-			"XFS: please mount with%s%s%s.",
+			"XFS: please mount with%s%s%s%s.",
 			(!quotaondisk ? "out quota" : ""),
 			(uquotaondisk ? " usrquota" : ""),
+			(pquotaondisk ? " prjquota" : ""),
 			(gquotaondisk ? " grpquota" : ""));
 		return XFS_ERROR(EPERM);
 	}
@@ -359,7 +386,7 @@ xfs_qm_dqrele_null(
 }
 
 
-struct xfs_qmops xfs_qmcore_xfs = {
+STATIC struct xfs_qmops xfs_qmcore_xfs = {
 	.xfs_qminit		= xfs_qm_newmount,
 	.xfs_qmdone		= xfs_qm_unmount_quotadestroy,
 	.xfs_qmmount		= xfs_qm_endmount,
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 229f5b5a2d25..68e98962dbef 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -118,40 +118,41 @@ xfs_qm_quotactl(
 	 * The following commands are valid even when quotaoff.
 	 */
 	switch (cmd) {
+	case Q_XQUOTARM:
 		/*
-		 * truncate quota files. quota must be off.
+		 * Truncate quota files. quota must be off.
 		 */
-	      case Q_XQUOTARM:
 		if (XFS_IS_QUOTA_ON(mp) || addr == NULL)
 			return XFS_ERROR(EINVAL);
 		if (vfsp->vfs_flag & VFS_RDONLY)
 			return XFS_ERROR(EROFS);
 		return (xfs_qm_scall_trunc_qfiles(mp,
 			       xfs_qm_import_qtype_flags(*(uint *)addr)));
+
+	case Q_XGETQSTAT:
 		/*
 		 * Get quota status information.
 		 */
-	      case Q_XGETQSTAT:
 		return (xfs_qm_scall_getqstat(mp, (fs_quota_stat_t *)addr));
 
+	case Q_XQUOTAON:
 		/*
-		 * QUOTAON for root f/s and quota enforcement on others..
-		 * Quota accounting for non-root f/s's must be turned on
-		 * at mount time.
+		 * QUOTAON - enabling quota enforcement.
+		 * Quota accounting must be turned on at mount time.
 		 */
-	      case Q_XQUOTAON:
 		if (addr == NULL)
 			return XFS_ERROR(EINVAL);
 		if (vfsp->vfs_flag & VFS_RDONLY)
 			return XFS_ERROR(EROFS);
 		return (xfs_qm_scall_quotaon(mp,
 					  xfs_qm_import_flags(*(uint *)addr)));
-	      case Q_XQUOTAOFF:
+
+	case Q_XQUOTAOFF:
 		if (vfsp->vfs_flag & VFS_RDONLY)
 			return XFS_ERROR(EROFS);
 		break;
 
-	      default:
+	default:
 		break;
 	}
 
@@ -159,7 +160,7 @@ xfs_qm_quotactl(
 		return XFS_ERROR(ESRCH);
 
 	switch (cmd) {
-	      case Q_XQUOTAOFF:
+	case Q_XQUOTAOFF:
 		if (vfsp->vfs_flag & VFS_RDONLY)
 			return XFS_ERROR(EROFS);
 		error = xfs_qm_scall_quotaoff(mp,
@@ -167,42 +168,39 @@ xfs_qm_quotactl(
 					    B_FALSE);
 		break;
 
-		/*
-		 * Defaults to XFS_GETUQUOTA.
-		 */
-	      case Q_XGETQUOTA:
+	case Q_XGETQUOTA:
 		error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_USER,
 					(fs_disk_quota_t *)addr);
 		break;
-		/*
-		 * Set limits, both hard and soft. Defaults to Q_SETUQLIM.
-		 */
-	      case Q_XSETQLIM:
+	case Q_XGETGQUOTA:
+		error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_GROUP,
+					(fs_disk_quota_t *)addr);
+		break;
+	case Q_XGETPQUOTA:
+		error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_PROJ,
+					(fs_disk_quota_t *)addr);
+		break;
+
+	case Q_XSETQLIM:
 		if (vfsp->vfs_flag & VFS_RDONLY)
 			return XFS_ERROR(EROFS);
 		error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_USER,
 					     (fs_disk_quota_t *)addr);
 		break;
-
-	       case Q_XSETGQLIM:
+	case Q_XSETGQLIM:
 		if (vfsp->vfs_flag & VFS_RDONLY)
 			return XFS_ERROR(EROFS);
 		error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_GROUP,
 					     (fs_disk_quota_t *)addr);
 		break;
-
-
-	      case Q_XGETGQUOTA:
-		error = xfs_qm_scall_getquota(mp, (xfs_dqid_t)id, XFS_DQ_GROUP,
-					(fs_disk_quota_t *)addr);
+	case Q_XSETPQLIM:
+		if (vfsp->vfs_flag & VFS_RDONLY)
+			return XFS_ERROR(EROFS);
+		error = xfs_qm_scall_setqlim(mp, (xfs_dqid_t)id, XFS_DQ_PROJ,
+					     (fs_disk_quota_t *)addr);
 		break;
 
-		/*
-		 * Quotas are entirely undefined after quotaoff in XFS quotas.
-		 * For instance, there's no way to set limits when quotaoff.
-		 */
-
-	      default:
+	default:
 		error = XFS_ERROR(EINVAL);
 		break;
 	}
@@ -286,8 +284,12 @@ xfs_qm_scall_quotaoff(
 	}
 	if (flags & XFS_GQUOTA_ACCT) {
 		dqtype |= XFS_QMOPT_GQUOTA;
-		flags |= (XFS_GQUOTA_CHKD | XFS_GQUOTA_ENFD);
+		flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD);
 		inactivate_flags |= XFS_GQUOTA_ACTIVE;
+	} else if (flags & XFS_PQUOTA_ACCT) {
+		dqtype |= XFS_QMOPT_PQUOTA;
+		flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD);
+		inactivate_flags |= XFS_PQUOTA_ACTIVE;
 	}
 
 	/*
@@ -364,7 +366,8 @@ xfs_qm_scall_quotaoff(
 	/*
 	 * If quotas is completely disabled, close shop.
 	 */
-	if ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_ALL) {
+	if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) ||
+	    ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) {
 		mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
 		xfs_qm_destroy_quotainfo(mp);
 		return (0);
@@ -378,7 +381,7 @@ xfs_qm_scall_quotaoff(
 		XFS_PURGE_INODE(XFS_QI_UQIP(mp));
 		XFS_QI_UQIP(mp) = NULL;
 	}
-	if ((dqtype & XFS_QMOPT_GQUOTA) && XFS_QI_GQIP(mp)) {
+	if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && XFS_QI_GQIP(mp)) {
 		XFS_PURGE_INODE(XFS_QI_GQIP(mp));
 		XFS_QI_GQIP(mp) = NULL;
 	}
@@ -411,7 +414,8 @@ xfs_qm_scall_trunc_qfiles(
 		}
 	}
 
-	if ((flags & XFS_DQ_GROUP) && mp->m_sb.sb_gquotino != NULLFSINO) {
+	if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) &&
+	    mp->m_sb.sb_gquotino != NULLFSINO) {
 		error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0);
 		if (! error) {
 			(void) xfs_truncate_file(mp, qip);
@@ -434,7 +438,7 @@ xfs_qm_scall_quotaon(
 	uint		flags)
 {
 	int		error;
-	unsigned long s;
+	unsigned long	s;
 	uint		qf;
 	uint		accflags;
 	__int64_t	sbflags;
@@ -468,9 +472,13 @@ xfs_qm_scall_quotaon(
 	    (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 &&
 	    (flags & XFS_UQUOTA_ENFD))
 	    ||
+	    ((flags & XFS_PQUOTA_ACCT) == 0 &&
+	    (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 &&
+	    (flags & XFS_OQUOTA_ENFD))
+	    ||
 	    ((flags & XFS_GQUOTA_ACCT) == 0 &&
 	    (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 &&
-	    (flags & XFS_GQUOTA_ENFD))) {
+	    (flags & XFS_OQUOTA_ENFD))) {
 		qdprintk("Can't enforce without acct, flags=%x sbflags=%x\n",
 			flags, mp->m_sb.sb_qflags);
 		return XFS_ERROR(EINVAL);
@@ -504,6 +512,10 @@ xfs_qm_scall_quotaon(
 	 */
 	if  (((mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) !=
 	     (mp->m_qflags & XFS_UQUOTA_ACCT)) ||
+	     ((mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) !=
+	     (mp->m_qflags & XFS_PQUOTA_ACCT)) ||
+	     ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) !=
+	     (mp->m_qflags & XFS_GQUOTA_ACCT)) ||
 	    (flags & XFS_ALL_QUOTA_ENFD) == 0)
 		return (0);
 
@@ -521,7 +533,6 @@ xfs_qm_scall_quotaon(
 }
 
 
-
 /*
  * Return quota status information, such as uquota-off, enforcements, etc.
  */
@@ -606,7 +617,8 @@ xfs_qm_scall_setqlim(
 	if (!capable(CAP_SYS_ADMIN))
 		return XFS_ERROR(EPERM);
 
-	if ((newlim->d_fieldmask & (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK)) == 0)
+	if ((newlim->d_fieldmask &
+	    (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK|FS_DQ_WARNS_MASK)) == 0)
 		return (0);
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
@@ -691,12 +703,23 @@ xfs_qm_scall_setqlim(
 		qdprintk("ihard %Ld < isoft %Ld\n", hard, soft);
 	}
 
+	/*
+	 * Update warnings counter(s) if requested
+	 */
+	if (newlim->d_fieldmask & FS_DQ_BWARNS)
+		INT_SET(ddq->d_bwarns, ARCH_CONVERT, newlim->d_bwarns);
+	if (newlim->d_fieldmask & FS_DQ_IWARNS)
+		INT_SET(ddq->d_iwarns, ARCH_CONVERT, newlim->d_iwarns);
+	if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
+		INT_SET(ddq->d_rtbwarns, ARCH_CONVERT, newlim->d_rtbwarns);
+
 	if (id == 0) {
 		/*
 		 * Timelimits for the super user set the relative time
 		 * the other users can be over quota for this file system.
 		 * If it is zero a default is used.  Ditto for the default
-		 * soft and hard limit values (already done, above).
+		 * soft and hard limit values (already done, above), and
+		 * for warnings.
 		 */
 		if (newlim->d_fieldmask & FS_DQ_BTIMER) {
 			mp->m_quotainfo->qi_btimelimit = newlim->d_btimer;
@@ -710,7 +733,13 @@ xfs_qm_scall_setqlim(
 			mp->m_quotainfo->qi_rtbtimelimit = newlim->d_rtbtimer;
 			INT_SET(ddq->d_rtbtimer, ARCH_CONVERT, newlim->d_rtbtimer);
 		}
-	} else /* if (XFS_IS_QUOTA_ENFORCED(mp)) */ {
+		if (newlim->d_fieldmask & FS_DQ_BWARNS)
+			mp->m_quotainfo->qi_bwarnlimit = newlim->d_bwarns;
+		if (newlim->d_fieldmask & FS_DQ_IWARNS)
+			mp->m_quotainfo->qi_iwarnlimit = newlim->d_iwarns;
+		if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
+			mp->m_quotainfo->qi_rtbwarnlimit = newlim->d_rtbwarns;
+	} else {
 		/*
 		 * If the user is now over quota, start the timelimit.
 		 * The user will not be 'warned'.
@@ -776,9 +805,9 @@ xfs_qm_log_quotaoff_end(
 	xfs_qoff_logitem_t	*startqoff,
 	uint			flags)
 {
-	xfs_trans_t	       *tp;
+	xfs_trans_t		*tp;
 	int			error;
-	xfs_qoff_logitem_t     *qoffi;
+	xfs_qoff_logitem_t	*qoffi;
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END);
 
@@ -928,18 +957,26 @@ xfs_qm_export_dquot(
 
 STATIC uint
 xfs_qm_import_qtype_flags(
-	uint uflags)
+	uint		uflags)
 {
+	uint		oflags = 0;
+
 	/*
-	 * Can't be both at the same time.
+	 * Can't be more than one, or none.
 	 */
 	if (((uflags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) ==
-	     (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) ||
-	    ((uflags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) == 0))
+			(XFS_GROUP_QUOTA | XFS_USER_QUOTA)) ||
+	    ((uflags & (XFS_GROUP_QUOTA | XFS_PROJ_QUOTA)) ==
+			(XFS_GROUP_QUOTA | XFS_PROJ_QUOTA)) ||
+	    ((uflags & (XFS_USER_QUOTA | XFS_PROJ_QUOTA)) ==
+			(XFS_USER_QUOTA | XFS_PROJ_QUOTA)) ||
+	    ((uflags & (XFS_GROUP_QUOTA|XFS_USER_QUOTA|XFS_PROJ_QUOTA)) == 0))
 		return (0);
 
-	return (uflags & XFS_USER_QUOTA) ?
-		XFS_DQ_USER : XFS_DQ_GROUP;
+	oflags |= (uflags & XFS_USER_QUOTA) ? XFS_DQ_USER : 0;
+	oflags |= (uflags & XFS_PROJ_QUOTA) ? XFS_DQ_PROJ : 0;
+	oflags |= (uflags & XFS_GROUP_QUOTA) ? XFS_DQ_GROUP: 0;
+	return oflags;
 }
 
 STATIC uint
@@ -947,14 +984,19 @@ xfs_qm_export_qtype_flags(
 	uint flags)
 {
 	/*
-	 * Can't be both at the same time.
+	 * Can't be more than one, or none.
 	 */
-	ASSERT((flags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) !=
-		(XFS_GROUP_QUOTA | XFS_USER_QUOTA));
-	ASSERT((flags & (XFS_GROUP_QUOTA | XFS_USER_QUOTA)) != 0);
+	ASSERT((flags & (XFS_PROJ_QUOTA | XFS_USER_QUOTA)) !=
+		(XFS_PROJ_QUOTA | XFS_USER_QUOTA));
+	ASSERT((flags & (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)) !=
+		(XFS_PROJ_QUOTA | XFS_GROUP_QUOTA));
+	ASSERT((flags & (XFS_USER_QUOTA | XFS_GROUP_QUOTA)) !=
+		(XFS_USER_QUOTA | XFS_GROUP_QUOTA));
+	ASSERT((flags & (XFS_PROJ_QUOTA|XFS_USER_QUOTA|XFS_GROUP_QUOTA)) != 0);
 
 	return (flags & XFS_DQ_USER) ?
-		XFS_USER_QUOTA : XFS_GROUP_QUOTA;
+		XFS_USER_QUOTA : (flags & XFS_DQ_PROJ) ?
+			XFS_PROJ_QUOTA : XFS_GROUP_QUOTA;
 }
 
 STATIC uint
@@ -965,12 +1007,14 @@ xfs_qm_import_flags(
 
 	if (uflags & XFS_QUOTA_UDQ_ACCT)
 		flags |= XFS_UQUOTA_ACCT;
+	if (uflags & XFS_QUOTA_PDQ_ACCT)
+		flags |= XFS_PQUOTA_ACCT;
 	if (uflags & XFS_QUOTA_GDQ_ACCT)
 		flags |= XFS_GQUOTA_ACCT;
 	if (uflags & XFS_QUOTA_UDQ_ENFD)
 		flags |= XFS_UQUOTA_ENFD;
-	if (uflags & XFS_QUOTA_GDQ_ENFD)
-		flags |= XFS_GQUOTA_ENFD;
+	if (uflags & (XFS_QUOTA_PDQ_ENFD|XFS_QUOTA_GDQ_ENFD))
+		flags |= XFS_OQUOTA_ENFD;
 	return (flags);
 }
 
@@ -984,12 +1028,16 @@ xfs_qm_export_flags(
 	uflags = 0;
 	if (flags & XFS_UQUOTA_ACCT)
 		uflags |= XFS_QUOTA_UDQ_ACCT;
+	if (flags & XFS_PQUOTA_ACCT)
+		uflags |= XFS_QUOTA_PDQ_ACCT;
 	if (flags & XFS_GQUOTA_ACCT)
 		uflags |= XFS_QUOTA_GDQ_ACCT;
 	if (flags & XFS_UQUOTA_ENFD)
 		uflags |= XFS_QUOTA_UDQ_ENFD;
-	if (flags & XFS_GQUOTA_ENFD)
-		uflags |= XFS_QUOTA_GDQ_ENFD;
+	if (flags & (XFS_OQUOTA_ENFD)) {
+		uflags |= (flags & XFS_GQUOTA_ACCT) ?
+			XFS_QUOTA_GDQ_ENFD : XFS_QUOTA_PDQ_ENFD;
+	}
 	return (uflags);
 }
 
@@ -1070,7 +1118,7 @@ again:
 			xfs_qm_dqrele(ip->i_udquot);
 			ip->i_udquot = NULL;
 		}
-		if ((flags & XFS_GQUOTA_ACCT) && ip->i_gdquot) {
+		if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) {
 			xfs_qm_dqrele(ip->i_gdquot);
 			ip->i_gdquot = NULL;
 		}
@@ -1160,7 +1208,6 @@ xfs_qm_dqtest_print(
 {
 	cmn_err(CE_DEBUG, "-----------DQTEST DQUOT----------------");
 	cmn_err(CE_DEBUG, "---- dquot ID = %d", d->d_id);
-	cmn_err(CE_DEBUG, "---- type     = %s", XFS_QM_ISUDQ(d)? "USR" : "GRP");
 	cmn_err(CE_DEBUG, "---- fs       = 0x%p", d->q_mount);
 	cmn_err(CE_DEBUG, "---- bcount   = %Lu (0x%x)",
 		d->d_bcount, (int)d->d_bcount);
@@ -1231,7 +1278,7 @@ xfs_dqtest_cmp2(
 #ifdef QUOTADEBUG
 	if (!err) {
 		cmn_err(CE_DEBUG, "%d [%s] [0x%p] qchecked",
-			d->d_id, XFS_QM_ISUDQ(d) ? "USR" : "GRP", d->q_mount);
+			d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount);
 	}
 #endif
 	return (err);
@@ -1287,6 +1334,7 @@ STATIC void
 xfs_qm_internalqcheck_get_dquots(
 	xfs_mount_t	*mp,
 	xfs_dqid_t	uid,
+	xfs_dqid_t	projid,
 	xfs_dqid_t	gid,
 	xfs_dqtest_t	**ud,
 	xfs_dqtest_t	**gd)
@@ -1295,6 +1343,8 @@ xfs_qm_internalqcheck_get_dquots(
 		xfs_qm_internalqcheck_dqget(mp, uid, XFS_DQ_USER, ud);
 	if (XFS_IS_GQUOTA_ON(mp))
 		xfs_qm_internalqcheck_dqget(mp, gid, XFS_DQ_GROUP, gd);
+	else if (XFS_IS_PQUOTA_ON(mp))
+		xfs_qm_internalqcheck_dqget(mp, projid, XFS_DQ_PROJ, gd);
 }
 
 
@@ -1362,13 +1412,14 @@ xfs_qm_internalqcheck_adjust(
 	}
 	xfs_qm_internalqcheck_get_dquots(mp,
 					(xfs_dqid_t) ip->i_d.di_uid,
+					(xfs_dqid_t) ip->i_d.di_projid,
 					(xfs_dqid_t) ip->i_d.di_gid,
 					&ud, &gd);
 	if (XFS_IS_UQUOTA_ON(mp)) {
 		ASSERT(ud);
 		xfs_qm_internalqcheck_dqadjust(ip, ud);
 	}
-	if (XFS_IS_GQUOTA_ON(mp)) {
+	if (XFS_IS_OQUOTA_ON(mp)) {
 		ASSERT(gd);
 		xfs_qm_internalqcheck_dqadjust(ip, gd);
 	}
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index 414b6004af21..bf413e70ec07 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -56,6 +56,7 @@
 #define XFS_QI_RTBTIMELIMIT(mp) ((mp)->m_quotainfo->qi_rtbtimelimit)
 #define XFS_QI_ITIMELIMIT(mp)	((mp)->m_quotainfo->qi_itimelimit)
 #define XFS_QI_BWARNLIMIT(mp)	((mp)->m_quotainfo->qi_bwarnlimit)
+#define XFS_QI_RTBWARNLIMIT(mp)	((mp)->m_quotainfo->qi_rtbwarnlimit)
 #define XFS_QI_IWARNLIMIT(mp)	((mp)->m_quotainfo->qi_iwarnlimit)
 #define XFS_QI_QOFFLOCK(mp)	((mp)->m_quotainfo->qi_quotaofflock)
 
@@ -102,7 +103,8 @@ static inline int XQMISLCKD(struct xfs_dqhash *h)
 				     (xfs_Gqm->qm_grp_dqhtable + \
 				      XFS_DQ_HASHVAL(mp, id)))
 #define XFS_IS_DQTYPE_ON(mp, type)   (type == XFS_DQ_USER ? \
-				      XFS_IS_UQUOTA_ON(mp):XFS_IS_GQUOTA_ON(mp))
+					XFS_IS_UQUOTA_ON(mp) : \
+					XFS_IS_OQUOTA_ON(mp))
 #define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \
 	!dqp->q_core.d_blk_hardlimit && \
 	!dqp->q_core.d_blk_softlimit && \
@@ -177,16 +179,11 @@ for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \
 	(!((dqp)->q_core.d_id))
 
 #define XFS_PURGE_INODE(ip)		\
-	{				\
-	  vmap_t dqvmap;		\
-	  vnode_t *dqvp;		\
-	  dqvp = XFS_ITOV(ip);		\
-	  VMAP(dqvp, dqvmap);		\
-	  VN_RELE(dqvp);		\
-	}
+	IRELE(ip);
 
 #define DQFLAGTO_TYPESTR(d)	(((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
-				 (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : "???"))
+				 (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \
+				 (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???")))
 #define DQFLAGTO_DIRTYSTR(d)	(XFS_DQ_IS_DIRTY(d) ? "DIRTY" : "NOTDIRTY")
 
 #endif	/* __XFS_QUOTA_PRIV_H__ */
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 149b2a1fd949..3b99daf8a640 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -187,7 +187,7 @@ xfs_trans_dup_dqinfo(
 /*
  * Wrap around mod_dquot to account for both user and group quotas.
  */
-void
+STATIC void
 xfs_trans_mod_dquot_byino(
 	xfs_trans_t	*tp,
 	xfs_inode_t	*ip,
@@ -207,12 +207,10 @@ xfs_trans_mod_dquot_byino(
 	if (tp->t_dqinfo == NULL)
 		xfs_trans_alloc_dqinfo(tp);
 
-	if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot) {
+	if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot)
 		(void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta);
-	}
-	if (XFS_IS_GQUOTA_ON(mp) && ip->i_gdquot) {
+	if (XFS_IS_OQUOTA_ON(mp) && ip->i_gdquot)
 		(void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta);
-	}
 }
 
 STATIC xfs_dqtrx_t *
@@ -368,7 +366,7 @@ xfs_trans_dqlockedjoin(
  * Unreserve just the reservations done by this transaction.
  * dquot is still left locked at exit.
  */
-void
+STATIC void
 xfs_trans_apply_dquot_deltas(
 	xfs_trans_t		*tp)
 {
@@ -499,7 +497,7 @@ xfs_trans_apply_dquot_deltas(
 			 * Adjust the RT reservation.
 			 */
 			if (qtrx->qt_rtblk_res != 0) {
-				if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) {
+				if (qtrx->qt_rtblk_res != qtrx->qt_rtblk_res_used) {
 					if (qtrx->qt_rtblk_res >
 					    qtrx->qt_rtblk_res_used)
 					       dqp->q_res_rtbcount -= (xfs_qcnt_t)
@@ -532,12 +530,6 @@ xfs_trans_apply_dquot_deltas(
 					    (xfs_qcnt_t)qtrx->qt_icount_delta;
 			}
 
-
-#ifdef QUOTADEBUG
-			if (qtrx->qt_rtblk_res != 0)
-				cmn_err(CE_DEBUG, "RT res %d for 0x%p\n",
-					(int) qtrx->qt_rtblk_res, dqp);
-#endif
 			ASSERT(dqp->q_res_bcount >=
 				INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT));
 			ASSERT(dqp->q_res_icount >=
@@ -638,7 +630,10 @@ xfs_trans_dqresv(
 	int		error;
 	xfs_qcnt_t	hardlimit;
 	xfs_qcnt_t	softlimit;
-	time_t		btimer;
+	time_t		timer;
+	xfs_qwarncnt_t	warns;
+	xfs_qwarncnt_t	warnlimit;
+	xfs_qcnt_t	count;
 	xfs_qcnt_t	*resbcountp;
 	xfs_quotainfo_t	*q = mp->m_quotainfo;
 
@@ -653,7 +648,9 @@ xfs_trans_dqresv(
 		softlimit = INT_GET(dqp->q_core.d_blk_softlimit, ARCH_CONVERT);
 		if (!softlimit)
 			softlimit = q->qi_bsoftlimit;
-		btimer = INT_GET(dqp->q_core.d_btimer, ARCH_CONVERT);
+		timer = INT_GET(dqp->q_core.d_btimer, ARCH_CONVERT);
+		warns = INT_GET(dqp->q_core.d_bwarns, ARCH_CONVERT);
+		warnlimit = XFS_QI_BWARNLIMIT(dqp->q_mount);
 		resbcountp = &dqp->q_res_bcount;
 	} else {
 		ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS);
@@ -663,7 +660,9 @@ xfs_trans_dqresv(
 		softlimit = INT_GET(dqp->q_core.d_rtb_softlimit, ARCH_CONVERT);
 		if (!softlimit)
 			softlimit = q->qi_rtbsoftlimit;
-		btimer = INT_GET(dqp->q_core.d_rtbtimer, ARCH_CONVERT);
+		timer = INT_GET(dqp->q_core.d_rtbtimer, ARCH_CONVERT);
+		warns = INT_GET(dqp->q_core.d_rtbwarns, ARCH_CONVERT);
+		warnlimit = XFS_QI_RTBWARNLIMIT(dqp->q_mount);
 		resbcountp = &dqp->q_res_rtbcount;
 	}
 	error = 0;
@@ -693,37 +692,36 @@ xfs_trans_dqresv(
 				 * If timer or warnings has expired,
 				 * return EDQUOT
 				 */
-				if ((btimer != 0 && get_seconds() > btimer) ||
-				    (dqp->q_core.d_bwarns &&
-				     INT_GET(dqp->q_core.d_bwarns, ARCH_CONVERT) >=
-				     XFS_QI_BWARNLIMIT(dqp->q_mount))) {
+				if ((timer != 0 && get_seconds() > timer) ||
+				    (warns != 0 && warns >= warnlimit)) {
 					error = EDQUOT;
 					goto error_return;
 				}
 			}
 		}
 		if (ninos > 0) {
-			hardlimit = INT_GET(dqp->q_core.d_ino_hardlimit, ARCH_CONVERT);
+			count = INT_GET(dqp->q_core.d_icount, ARCH_CONVERT);
+			timer = INT_GET(dqp->q_core.d_itimer, ARCH_CONVERT);
+			warns = INT_GET(dqp->q_core.d_iwarns, ARCH_CONVERT);
+			warnlimit = XFS_QI_IWARNLIMIT(dqp->q_mount);
+			hardlimit = INT_GET(dqp->q_core.d_ino_hardlimit,
+						ARCH_CONVERT);
 			if (!hardlimit)
 				hardlimit = q->qi_ihardlimit;
-			softlimit = INT_GET(dqp->q_core.d_ino_softlimit, ARCH_CONVERT);
+			softlimit = INT_GET(dqp->q_core.d_ino_softlimit,
+						ARCH_CONVERT);
 			if (!softlimit)
 				softlimit = q->qi_isoftlimit;
-			if (hardlimit > 0ULL &&
-			    INT_GET(dqp->q_core.d_icount, ARCH_CONVERT) >= hardlimit) {
+			if (hardlimit > 0ULL && count >= hardlimit) {
 				error = EDQUOT;
 				goto error_return;
-			} else if (softlimit > 0ULL &&
-				   INT_GET(dqp->q_core.d_icount, ARCH_CONVERT) >= softlimit) {
+			} else if (softlimit > 0ULL && count >= softlimit) {
 				/*
 				 * If timer or warnings has expired,
 				 * return EDQUOT
 				 */
-				if ((dqp->q_core.d_itimer &&
-				     get_seconds() > INT_GET(dqp->q_core.d_itimer, ARCH_CONVERT)) ||
-				    (dqp->q_core.d_iwarns &&
-				     INT_GET(dqp->q_core.d_iwarns, ARCH_CONVERT) >=
-				     XFS_QI_IWARNLIMIT(dqp->q_mount))) {
+				if ((timer != 0 && get_seconds() > timer) ||
+				     (warns != 0 && warns >= warnlimit)) {
 					error = EDQUOT;
 					goto error_return;
 				}
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index 7d6e1f37df10..4ed7b6928cd7 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -36,7 +36,6 @@
 #include <linux/sched.h>
 #include <linux/kernel.h>
 
-int			doass = 1;
 static char		message[256];	/* keep it off the stack */
 static DEFINE_SPINLOCK(xfs_err_lock);
 
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
index 40b0f4c54d9e..c5b9365a7e2a 100644
--- a/fs/xfs/support/debug.h
+++ b/fs/xfs/support/debug.h
@@ -50,16 +50,11 @@ extern void cmn_err(int, char *, ...);
 #endif
 
 #ifdef DEBUG
-# ifdef lint
-#  define ASSERT(EX)	((void)0) /* avoid "constant in conditional" babble */
-# else
-#  define ASSERT(EX) ((!doass||(EX))?((void)0):assfail(#EX, __FILE__, __LINE__))
-# endif	/* lint */
+# define ASSERT(EX)	((EX) ? ((void)0) : assfail(#EX, __FILE__, __LINE__))
 #else
 # define ASSERT(x)	((void)0)
 #endif
 
-extern int doass;		/* dynamically turn off asserts */
 extern void assfail(char *, char *, int);
 #ifdef DEBUG
 extern unsigned long random(void);
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 36603db10fe9..dcfe19703620 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -59,7 +59,7 @@
 #define	XFSA_FIXUP_BNO_OK	1
 #define	XFSA_FIXUP_CNT_OK	2
 
-int
+STATIC int
 xfs_alloc_search_busy(xfs_trans_t *tp,
 		    xfs_agnumber_t agno,
 		    xfs_agblock_t bno,
@@ -2562,7 +2562,7 @@ xfs_alloc_clear_busy(xfs_trans_t *tp,
 /*
  * returns non-zero if any of (agno,bno):len is in a busy list
  */
-int
+STATIC int
 xfs_alloc_search_busy(xfs_trans_t *tp,
 		    xfs_agnumber_t agno,
 		    xfs_agblock_t bno,
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index ee8b5904ec7c..a41ad3a5e554 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -71,6 +71,11 @@
  * Provide the external interfaces to manage attribute lists.
  */
 
+#define ATTR_SYSCOUNT	2
+STATIC struct attrnames posix_acl_access;
+STATIC struct attrnames posix_acl_default;
+STATIC struct attrnames *attr_system_names[ATTR_SYSCOUNT];
+
 /*========================================================================
  * Function prototypes for the kernel.
  *========================================================================*/
@@ -83,6 +88,7 @@ STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args);
 /*
  * Internal routines when attribute list is one block.
  */
+STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
 STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args);
 STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
 STATIC int xfs_attr_leaf_list(xfs_attr_list_context_t *context);
@@ -90,6 +96,7 @@ STATIC int xfs_attr_leaf_list(xfs_attr_list_context_t *context);
 /*
  * Internal routines when attribute list is more than one block.
  */
+STATIC int xfs_attr_node_get(xfs_da_args_t *args);
 STATIC int xfs_attr_node_addname(xfs_da_args_t *args);
 STATIC int xfs_attr_node_removename(xfs_da_args_t *args);
 STATIC int xfs_attr_node_list(xfs_attr_list_context_t *context);
@@ -1102,7 +1109,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
  * This leaf block cannot have a "remote" value, we only call this routine
  * if bmap_one_block() says there is only one block (ie: no remote blks).
  */
-int
+STATIC int
 xfs_attr_leaf_get(xfs_da_args_t *args)
 {
 	xfs_dabuf_t *bp;
@@ -1707,7 +1714,7 @@ xfs_attr_refillstate(xfs_da_state_t *state)
  * block, ie: both true Btree attr lists and for single-leaf-blocks with
  * "remote" values taking up more blocks.
  */
-int
+STATIC int
 xfs_attr_node_get(xfs_da_args_t *args)
 {
 	xfs_da_state_t *state;
@@ -2398,7 +2405,7 @@ posix_acl_default_exists(
 	return xfs_acl_vhasacl_default(vp);
 }
 
-struct attrnames posix_acl_access = {
+STATIC struct attrnames posix_acl_access = {
 	.attr_name	= "posix_acl_access",
 	.attr_namelen	= sizeof("posix_acl_access") - 1,
 	.attr_get	= posix_acl_access_get,
@@ -2407,7 +2414,7 @@ struct attrnames posix_acl_access = {
 	.attr_exists	= posix_acl_access_exists,
 };
 
-struct attrnames posix_acl_default = {
+STATIC struct attrnames posix_acl_default = {
 	.attr_name	= "posix_acl_default",
 	.attr_namelen	= sizeof("posix_acl_default") - 1,
 	.attr_get	= posix_acl_default_get,
@@ -2416,7 +2423,7 @@ struct attrnames posix_acl_default = {
 	.attr_exists	= posix_acl_default_exists,
 };
 
-struct attrnames *attr_system_names[] =
+STATIC struct attrnames *attr_system_names[] =
 	{ &posix_acl_access, &posix_acl_default };
 
 
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index 67cd0f5ac1a7..45ab1c542baf 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -76,11 +76,6 @@ extern struct attrnames attr_system;
 extern struct attrnames attr_trusted;
 extern struct attrnames *attr_namespaces[ATTR_NAMECOUNT];
 
-#define ATTR_SYSCOUNT	2
-extern struct attrnames posix_acl_access;
-extern struct attrnames posix_acl_default;
-extern struct attrnames *attr_system_names[ATTR_SYSCOUNT];
-
 extern attrnames_t *attr_lookup_namespace(char *, attrnames_t **, int);
 extern int attr_generic_list(struct vnode *, void *, size_t, int, ssize_t *);
 
@@ -184,8 +179,6 @@ int xfs_attr_list(bhv_desc_t *, char *, int, int,
 			 struct attrlist_cursor_kern *, struct cred *);
 int xfs_attr_inactive(struct xfs_inode *dp);
 
-int xfs_attr_node_get(struct xfs_da_args *);
-int xfs_attr_leaf_get(struct xfs_da_args *);
 int xfs_attr_shortform_getvalue(struct xfs_da_args *);
 int xfs_attr_fetch(struct xfs_inode *, char *, int,
 			char *, int *, int, struct cred *);
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index b11256e58bf4..1cdd574c63a9 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -79,6 +79,8 @@
 /*
  * Routines used for growing the Btree.
  */
+STATIC int xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t which_block,
+				    xfs_dabuf_t **bpp);
 STATIC int xfs_attr_leaf_add_work(xfs_dabuf_t *leaf_buffer, xfs_da_args_t *args,
 					      int freemap_index);
 STATIC void xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *leaf_buffer);
@@ -92,6 +94,16 @@ STATIC int xfs_attr_leaf_figure_balance(xfs_da_state_t *state,
 					   int *number_usedbytes_in_blk1);
 
 /*
+ * Routines used for shrinking the Btree.
+ */
+STATIC int xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp,
+				  xfs_dabuf_t *bp, int level);
+STATIC int xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp,
+				  xfs_dabuf_t *bp);
+STATIC int xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
+				   xfs_dablk_t blkno, int blkcnt);
+
+/*
  * Utility routines.
  */
 STATIC void xfs_attr_leaf_moveents(xfs_attr_leafblock_t *src_leaf,
@@ -99,6 +111,10 @@ STATIC void xfs_attr_leaf_moveents(xfs_attr_leafblock_t *src_leaf,
 					 xfs_attr_leafblock_t *dst_leaf,
 					 int dst_start, int move_count,
 					 xfs_mount_t *mp);
+STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
+STATIC int xfs_attr_put_listent(xfs_attr_list_context_t *context,
+			     attrnames_t *, char *name, int namelen,
+			     int valuelen);
 
 
 /*========================================================================
@@ -774,7 +790,7 @@ out:
  * Create the initial contents of a leaf attribute list
  * or a leaf in a node attribute list.
  */
-int
+STATIC int
 xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
 {
 	xfs_attr_leafblock_t *leaf;
@@ -2209,7 +2225,7 @@ xfs_attr_leaf_lasthash(xfs_dabuf_t *bp, int *count)
  * Calculate the number of bytes used to store the indicated attribute
  * (whether local or remote only calculate bytes in this block).
  */
-int
+STATIC int
 xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index)
 {
 	xfs_attr_leaf_name_local_t *name_loc;
@@ -2380,7 +2396,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
  * we may be reading them directly out of a user buffer.
  */
 /*ARGSUSED*/
-int
+STATIC int
 xfs_attr_put_listent(xfs_attr_list_context_t *context,
 		     attrnames_t *namesp, char *name, int namelen, int valuelen)
 {
@@ -2740,7 +2756,7 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp)
  * Recurse (gasp!) through the attribute nodes until we find leaves.
  * We're doing a depth-first traversal in order to invalidate everything.
  */
-int
+STATIC int
 xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
 				   int level)
 {
@@ -2849,7 +2865,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
  * Note that we must release the lock on the buffer so that we are not
  * caught holding something that the logging code wants to flush to disk.
  */
-int
+STATIC int
 xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
 {
 	xfs_attr_leafblock_t *leaf;
@@ -2934,7 +2950,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
  * Look at all the extents for this logical region,
  * invalidate any buffers that are incore/in transactions.
  */
-int
+STATIC int
 xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
 				    xfs_dablk_t blkno, int blkcnt)
 {
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index b1480e0b3349..0a4cfad6df91 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -261,8 +261,6 @@ int	xfs_attr_leaf_flipflags(xfs_da_args_t *args);
 /*
  * Routines used for growing the Btree.
  */
-int	xfs_attr_leaf_create(struct xfs_da_args *args, xfs_dablk_t which_block,
-				    struct xfs_dabuf **bpp);
 int	xfs_attr_leaf_split(struct xfs_da_state *state,
 				   struct xfs_da_state_blk *oldblk,
 				   struct xfs_da_state_blk *newblk);
@@ -284,12 +282,6 @@ void	xfs_attr_leaf_unbalance(struct xfs_da_state *state,
 				       struct xfs_da_state_blk *drop_blk,
 				       struct xfs_da_state_blk *save_blk);
 int	xfs_attr_root_inactive(struct xfs_trans **trans, struct xfs_inode *dp);
-int	xfs_attr_node_inactive(struct xfs_trans **trans, struct xfs_inode *dp,
-				      struct xfs_dabuf *bp, int level);
-int	xfs_attr_leaf_inactive(struct xfs_trans **trans, struct xfs_inode *dp,
-				      struct xfs_dabuf *bp);
-int	xfs_attr_leaf_freextent(struct xfs_trans **trans, struct xfs_inode *dp,
-				       xfs_dablk_t blkno, int blkcnt);
 
 /*
  * Utility routines.
@@ -299,10 +291,6 @@ int	xfs_attr_leaf_order(struct xfs_dabuf *leaf1_bp,
 				   struct xfs_dabuf *leaf2_bp);
 int	xfs_attr_leaf_newentsize(struct xfs_da_args *args, int blocksize,
 					int *local);
-int	xfs_attr_leaf_entsize(struct xfs_attr_leafblock *leaf, int index);
-int	xfs_attr_put_listent(struct xfs_attr_list_context *context,
-			     struct attrnames *, char *name, int namelen,
-			     int valuelen);
 int	xfs_attr_rolltrans(struct xfs_trans **transp, struct xfs_inode *dp);
 
 #endif	/* __XFS_ATTR_LEAF_H__ */
diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c
index a20a6c3dc13e..76c9ad3875ef 100644
--- a/fs/xfs/xfs_bit.c
+++ b/fs/xfs/xfs_bit.c
@@ -45,7 +45,7 @@
 /*
  * Index of high bit number in byte, -1 for none set, 0..7 otherwise.
  */
-const char xfs_highbit[256] = {
+STATIC const char xfs_highbit[256] = {
        -1, 0, 1, 1, 2, 2, 2, 2,			/* 00 .. 07 */
 	3, 3, 3, 3, 3, 3, 3, 3,			/* 08 .. 0f */
 	4, 4, 4, 4, 4, 4, 4, 4,			/* 10 .. 17 */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index de3162418663..6f5d283888aa 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -301,6 +301,19 @@ xfs_bmap_search_extents(
 	xfs_bmbt_irec_t	*gotp,		/* out: extent entry found */
 	xfs_bmbt_irec_t	*prevp);	/* out: previous extent entry found */
 
+/*
+ * Check the last inode extent to determine whether this allocation will result
+ * in blocks being allocated at the end of the file. When we allocate new data
+ * blocks at the end of the file which do not start at the previous data block,
+ * we will try to align the new blocks at stripe unit boundaries.
+ */
+STATIC int				/* error */
+xfs_bmap_isaeof(
+	xfs_inode_t	*ip,		/* incore inode pointer */
+	xfs_fileoff_t   off,		/* file offset in fsblocks */
+	int             whichfork,	/* data or attribute fork */
+	char		*aeof);		/* return value */
+
 #ifdef XFS_BMAP_TRACE
 /*
  * Add a bmap trace buffer entry.  Base routine for the others.
@@ -4532,18 +4545,17 @@ xfs_bmapi(
 	xfs_extlen_t	alen;		/* allocated extent length */
 	xfs_fileoff_t	aoff;		/* allocated file offset */
 	xfs_bmalloca_t	bma;		/* args for xfs_bmap_alloc */
-	char		contig;		/* allocation must be one extent */
 	xfs_btree_cur_t	*cur;		/* bmap btree cursor */
-	char		delay;		/* this request is for delayed alloc */
 	xfs_fileoff_t	end;		/* end of mapped file region */
 	int		eof;		/* we've hit the end of extent list */
+	char		contig;		/* allocation must be one extent */
+	char		delay;		/* this request is for delayed alloc */
+	char		exact;		/* don't do all of wasdelayed extent */
 	xfs_bmbt_rec_t	*ep;		/* extent list entry pointer */
 	int		error;		/* error return */
-	char		exact;		/* don't do all of wasdelayed extent */
 	xfs_bmbt_irec_t	got;		/* current extent list record */
 	xfs_ifork_t	*ifp;		/* inode fork pointer */
 	xfs_extlen_t	indlen;		/* indirect blocks length */
-	char		inhole;		/* current location is hole in file */
 	xfs_extnum_t	lastx;		/* last useful extent number */
 	int		logflags;	/* flags for transaction logging */
 	xfs_extlen_t	minleft;	/* min blocks left after allocation */
@@ -4554,13 +4566,15 @@ xfs_bmapi(
 	xfs_extnum_t	nextents;	/* number of extents in file */
 	xfs_fileoff_t	obno;		/* old block number (offset) */
 	xfs_bmbt_irec_t	prev;		/* previous extent list record */
-	char		stateless;	/* ignore state flag set */
 	int		tmp_logflags;	/* temp flags holder */
+	int		whichfork;	/* data or attr fork */
+	char		inhole;		/* current location is hole in file */
+	char		stateless;	/* ignore state flag set */
 	char		trim;		/* output trimmed to match range */
 	char		userdata;	/* allocating non-metadata */
 	char		wasdelay;	/* old extent was delayed */
-	int		whichfork;	/* data or attr fork */
 	char		wr;		/* this is a write request */
+	char		rt;		/* this is a realtime file */
 	char		rsvd;		/* OK to allocate reserved blocks */
 #ifdef DEBUG
 	xfs_fileoff_t	orig_bno;	/* original block number value */
@@ -4590,6 +4604,7 @@ xfs_bmapi(
 	}
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
+	rt = XFS_IS_REALTIME_INODE(ip);
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	ASSERT(ifp->if_ext_max ==
 	       XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
@@ -4694,9 +4709,16 @@ xfs_bmapi(
 			}
 			minlen = contig ? alen : 1;
 			if (delay) {
-				indlen = (xfs_extlen_t)
-					xfs_bmap_worst_indlen(ip, alen);
-				ASSERT(indlen > 0);
+				xfs_extlen_t	extsz = 0;
+
+				/* Figure out the extent size, adjust alen */
+				if (rt) {
+					if (!(extsz = ip->i_d.di_extsize))
+						extsz = mp->m_sb.sb_rextsize;
+					alen = roundup(alen, extsz);
+					extsz = alen / mp->m_sb.sb_rextsize;
+				}
+
 				/*
 				 * Make a transaction-less quota reservation for
 				 * delayed allocation blocks. This number gets
@@ -4704,8 +4726,10 @@ xfs_bmapi(
 				 * We return EDQUOT if we haven't allocated
 				 * blks already inside this loop;
 				 */
-				if (XFS_TRANS_RESERVE_BLKQUOTA(
-						mp, NULL, ip, (long)alen)) {
+				if (XFS_TRANS_RESERVE_QUOTA_NBLKS(
+						mp, NULL, ip, (long)alen, 0,
+						rt ? XFS_QMOPT_RES_RTBLKS :
+						     XFS_QMOPT_RES_REGBLKS)) {
 					if (n == 0) {
 						*nmap = 0;
 						ASSERT(cur == NULL);
@@ -4718,40 +4742,34 @@ xfs_bmapi(
 				 * Split changing sb for alen and indlen since
 				 * they could be coming from different places.
 				 */
-				if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) {
-					xfs_extlen_t	extsz;
-					xfs_extlen_t	ralen;
-					if (!(extsz = ip->i_d.di_extsize))
-						extsz = mp->m_sb.sb_rextsize;
-					ralen = roundup(alen, extsz);
-					ralen = ralen / mp->m_sb.sb_rextsize;
-					if (xfs_mod_incore_sb(mp,
-						XFS_SBS_FREXTENTS,
-						-(ralen), rsvd)) {
-						if (XFS_IS_QUOTA_ON(ip->i_mount))
-							XFS_TRANS_UNRESERVE_BLKQUOTA(
-						     		mp, NULL, ip,
-								(long)alen);
-						break;
-					}
-				} else {
-					if (xfs_mod_incore_sb(mp,
-							      XFS_SBS_FDBLOCKS,
-							      -(alen), rsvd)) {
-						if (XFS_IS_QUOTA_ON(ip->i_mount))
-							XFS_TRANS_UNRESERVE_BLKQUOTA(
-								mp, NULL, ip,
-								(long)alen);
-						break;
-					}
-				}
+				indlen = (xfs_extlen_t)
+					xfs_bmap_worst_indlen(ip, alen);
+				ASSERT(indlen > 0);
 
-				if (xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
-						-(indlen), rsvd)) {
-					XFS_TRANS_UNRESERVE_BLKQUOTA(
-						mp, NULL, ip, (long)alen);
+				if (rt)
+					error = xfs_mod_incore_sb(mp,
+							XFS_SBS_FREXTENTS,
+							-(extsz), rsvd);
+				else
+					error = xfs_mod_incore_sb(mp,
+							XFS_SBS_FDBLOCKS,
+							-(alen), rsvd);
+				if (!error)
+					error = xfs_mod_incore_sb(mp,
+							XFS_SBS_FDBLOCKS,
+							-(indlen), rsvd);
+
+				if (error) {
+					if (XFS_IS_QUOTA_ON(ip->i_mount))
+						/* unreserve the blocks now */
+						XFS_TRANS_UNRESERVE_QUOTA_NBLKS(
+							mp, NULL, ip,
+							(long)alen, 0, rt ?
+							XFS_QMOPT_RES_RTBLKS :
+							XFS_QMOPT_RES_REGBLKS);
 					break;
 				}
+
 				ip->i_delayed_blks += alen;
 				abno = NULLSTARTBLOCK(indlen);
 			} else {
@@ -5376,13 +5394,24 @@ xfs_bunmapi(
 		}
 		if (wasdel) {
 			ASSERT(STARTBLOCKVAL(del.br_startblock) > 0);
-			xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
-				(int)del.br_blockcount, rsvd);
-			/* Unreserve our quota space */
-			XFS_TRANS_RESERVE_QUOTA_NBLKS(
-				mp, NULL, ip, -((long)del.br_blockcount), 0,
-				isrt ?	XFS_QMOPT_RES_RTBLKS :
+			/* Update realtim/data freespace, unreserve quota */
+			if (isrt) {
+				xfs_filblks_t rtexts;
+
+				rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
+				do_div(rtexts, mp->m_sb.sb_rextsize);
+				xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
+						(int)rtexts, rsvd);
+				XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip,
+					-((long)del.br_blockcount), 0,
+					XFS_QMOPT_RES_RTBLKS);
+			} else {
+				xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
+						(int)del.br_blockcount, rsvd);
+				XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip,
+					-((long)del.br_blockcount), 0,
 					XFS_QMOPT_RES_REGBLKS);
+			}
 			ip->i_delayed_blks -= del.br_blockcount;
 			if (cur)
 				cur->bc_private.b.flags |=
@@ -5714,7 +5743,7 @@ unlock_and_return:
  * blocks at the end of the file which do not start at the previous data block,
  * we will try to align the new blocks at stripe unit boundaries.
  */
-int					/* error */
+STATIC int				/* error */
 xfs_bmap_isaeof(
 	xfs_inode_t	*ip,		/* incore inode pointer */
 	xfs_fileoff_t   off,		/* file offset in fsblocks */
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index f1bc22fb26ae..e6d22ec9b2e4 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -332,19 +332,6 @@ xfs_getbmap(
 	int			iflags);	/* interface flags */
 
 /*
- * Check the last inode extent to determine whether this allocation will result
- * in blocks being allocated at the end of the file. When we allocate new data
- * blocks at the end of the file which do not start at the previous data block,
- * we will try to align the new blocks at stripe unit boundaries.
- */
-int
-xfs_bmap_isaeof(
-	struct xfs_inode	*ip,
-	xfs_fileoff_t		off,
-	int			whichfork,
-	char			*aeof);
-
-/*
  * Check if the endoff is outside the last extent. If so the caller will grow
  * the allocation to a stripe unit boundary
  */
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 163305a79fcc..09c413576ba8 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -2331,20 +2331,6 @@ xfs_bmbt_lookup_ge(
 	return xfs_bmbt_lookup(cur, XFS_LOOKUP_GE, stat);
 }
 
-int					/* error */
-xfs_bmbt_lookup_le(
-	xfs_btree_cur_t	*cur,
-	xfs_fileoff_t	off,
-	xfs_fsblock_t	bno,
-	xfs_filblks_t	len,
-	int		*stat)		/* success/failure */
-{
-	cur->bc_rec.b.br_startoff = off;
-	cur->bc_rec.b.br_startblock = bno;
-	cur->bc_rec.b.br_blockcount = len;
-	return xfs_bmbt_lookup(cur, XFS_LOOKUP_LE, stat);
-}
-
 /*
  * Give the bmap btree a new root block.  Copy the old broot contents
  * down into a real block and make the broot point to it.
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 843ff12b4bf2..0a40cf126c28 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -580,14 +580,6 @@ xfs_bmbt_lookup_ge(
 	xfs_filblks_t,
 	int *);
 
-int
-xfs_bmbt_lookup_le(
-	struct xfs_btree_cur *,
-	xfs_fileoff_t,
-	xfs_fsblock_t,
-	xfs_filblks_t,
-	int *);
-
 /*
  * Give the bmap btree a new root block.  Copy the old broot contents
  * down into a real block and make the broot point to it.
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 9dd22dd95487..0cc63d657a14 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -90,6 +90,16 @@ xfs_btree_maxrecs(
  */
 
 /*
+ * Retrieve the block pointer from the cursor at the given level.
+ * This may be a bmap btree root or from a buffer.
+ */
+STATIC xfs_btree_block_t *			/* generic btree block pointer */
+xfs_btree_get_block(
+	xfs_btree_cur_t		*cur,	/* btree cursor */
+	int			level,	/* level in btree */
+	struct xfs_buf		**bpp);	/* buffer containing the block */
+
+/*
  * Checking routine: return maxrecs for the block.
  */
 STATIC int				/* number of records fitting in block */
@@ -497,7 +507,7 @@ xfs_btree_firstrec(
  * Retrieve the block pointer from the cursor at the given level.
  * This may be a bmap btree root or from a buffer.
  */
-xfs_btree_block_t *			/* generic btree block pointer */
+STATIC xfs_btree_block_t *		/* generic btree block pointer */
 xfs_btree_get_block(
 	xfs_btree_cur_t		*cur,	/* btree cursor */
 	int			level,	/* level in btree */
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 93872bba41f5..09b4e1532a35 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -325,16 +325,6 @@ xfs_btree_firstrec(
 	int			level);	/* level to change */
 
 /*
- * Retrieve the block pointer from the cursor at the given level.
- * This may be a bmap btree root or from a buffer.
- */
-xfs_btree_block_t *			/* generic btree block pointer */
-xfs_btree_get_block(
-	xfs_btree_cur_t		*cur,	/* btree cursor */
-	int			level,	/* level in btree */
-	struct xfs_buf		**bpp);	/* buffer containing the block */
-
-/*
  * Get a buffer for the block, return it with no data read.
  * Long-form addressing.
  */
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 9ab0039f07df..30b8285ad476 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -172,7 +172,7 @@ STATIC void	xfs_buf_do_callbacks(xfs_buf_t *bp, xfs_log_item_t *lip);
  *
  * If the XFS_BLI_STALE flag has been set, then log nothing.
  */
-uint
+STATIC uint
 xfs_buf_item_size(
 	xfs_buf_log_item_t	*bip)
 {
@@ -240,7 +240,7 @@ xfs_buf_item_size(
  * format structure, and the rest point to contiguous chunks
  * within the buffer.
  */
-void
+STATIC void
 xfs_buf_item_format(
 	xfs_buf_log_item_t	*bip,
 	xfs_log_iovec_t		*log_vector)
@@ -365,7 +365,7 @@ xfs_buf_item_format(
  * item in memory so it cannot be written out.  Simply call bpin()
  * on the buffer to do this.
  */
-void
+STATIC void
 xfs_buf_item_pin(
 	xfs_buf_log_item_t	*bip)
 {
@@ -391,7 +391,7 @@ xfs_buf_item_pin(
  * If the XFS_BLI_STALE flag is set and we are the last reference,
  * then free up the buf log item and unlock the buffer.
  */
-void
+STATIC void
 xfs_buf_item_unpin(
 	xfs_buf_log_item_t	*bip,
 	int			stale)
@@ -446,7 +446,7 @@ xfs_buf_item_unpin(
  * so we need to free the item's descriptor (that points to the item)
  * in the transaction.
  */
-void
+STATIC void
 xfs_buf_item_unpin_remove(
 	xfs_buf_log_item_t	*bip,
 	xfs_trans_t		*tp)
@@ -493,7 +493,7 @@ xfs_buf_item_unpin_remove(
  * the lock right away, return 0.  If we can get the lock, pull the
  * buffer from the free list, mark it busy, and return 1.
  */
-uint
+STATIC uint
 xfs_buf_item_trylock(
 	xfs_buf_log_item_t	*bip)
 {
@@ -537,7 +537,7 @@ xfs_buf_item_trylock(
  * This is for support of xfs_trans_bhold(). Make sure the
  * XFS_BLI_HOLD field is cleared if we don't free the item.
  */
-void
+STATIC void
 xfs_buf_item_unlock(
 	xfs_buf_log_item_t	*bip)
 {
@@ -635,7 +635,7 @@ xfs_buf_item_unlock(
  * by returning the original lsn of that transaction here rather than
  * the current one.
  */
-xfs_lsn_t
+STATIC xfs_lsn_t
 xfs_buf_item_committed(
 	xfs_buf_log_item_t	*bip,
 	xfs_lsn_t		lsn)
@@ -654,7 +654,7 @@ xfs_buf_item_committed(
  * and have aborted this transaction, we'll trap this buffer when it tries to
  * get written out.
  */
-void
+STATIC void
 xfs_buf_item_abort(
 	xfs_buf_log_item_t	*bip)
 {
@@ -674,7 +674,7 @@ xfs_buf_item_abort(
  * B_DELWRI set, then get it going out to disk with a call to bawrite().
  * If not, then just release the buffer.
  */
-void
+STATIC void
 xfs_buf_item_push(
 	xfs_buf_log_item_t	*bip)
 {
@@ -693,7 +693,7 @@ xfs_buf_item_push(
 }
 
 /* ARGSUSED */
-void
+STATIC void
 xfs_buf_item_committing(xfs_buf_log_item_t *bip, xfs_lsn_t commit_lsn)
 {
 }
@@ -701,7 +701,7 @@ xfs_buf_item_committing(xfs_buf_log_item_t *bip, xfs_lsn_t commit_lsn)
 /*
  * This is the ops vector shared by all buf log items.
  */
-struct xfs_item_ops xfs_buf_item_ops = {
+STATIC struct xfs_item_ops xfs_buf_item_ops = {
 	.iop_size	= (uint(*)(xfs_log_item_t*))xfs_buf_item_size,
 	.iop_format	= (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
 					xfs_buf_item_format,
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 5f1b0c9308f6..01aed5f2d579 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -80,7 +80,7 @@ typedef struct xfs_buf_log_format_t {
  * user or group dquots and may require special recovery handling.
  */
 #define	XFS_BLI_UDQUOT_BUF	0x4
-/* #define XFS_BLI_PDQUOT_BUF	0x8 */
+#define XFS_BLI_PDQUOT_BUF	0x8
 #define	XFS_BLI_GDQUOT_BUF	0x10
 
 #define	XFS_BLI_CHUNK		128
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index d7fe28866764..8b792ddf2164 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -113,7 +113,10 @@ STATIC void xfs_da_node_unbalance(xfs_da_state_t *state,
 STATIC uint	xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count);
 STATIC int	xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp);
 STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra);
-
+STATIC int	xfs_da_blk_unlink(xfs_da_state_t *state,
+				  xfs_da_state_blk_t *drop_blk,
+				  xfs_da_state_blk_t *save_blk);
+STATIC void	xfs_da_state_kill_altpath(xfs_da_state_t *state);
 
 /*========================================================================
  * Routines used for growing the Btree.
@@ -1424,7 +1427,7 @@ xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
 /*
  * Unlink a block from a doubly linked list of blocks.
  */
-int							/* error */
+STATIC int						/* error */
 xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 				 xfs_da_state_blk_t *save_blk)
 {
@@ -2381,7 +2384,7 @@ xfs_da_state_alloc(void)
 /*
  * Kill the altpath contents of a da-state structure.
  */
-void
+STATIC void
 xfs_da_state_kill_altpath(xfs_da_state_t *state)
 {
 	int	i;
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 9fc699d96995..3a9b9e809c60 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -296,8 +296,6 @@ int	xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
 /*
  * Utility routines.
  */
-int	xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
-					 xfs_da_state_blk_t *save_blk);
 int	xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
 				       xfs_da_state_blk_t *new_blk);
 
@@ -320,7 +318,6 @@ uint xfs_da_hashname(uchar_t *name_string, int name_length);
 uint xfs_da_log2_roundup(uint i);
 xfs_da_state_t *xfs_da_state_alloc(void);
 void xfs_da_state_free(xfs_da_state_t *state);
-void xfs_da_state_kill_altpath(xfs_da_state_t *state);
 
 void xfs_da_buf_done(xfs_dabuf_t *dabuf);
 void xfs_da_log_buf(struct xfs_trans *tp, xfs_dabuf_t *dabuf, uint first,
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 63abdc2ac7f4..681be5c93af5 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -180,9 +180,10 @@ xfs_swapext(
 		goto error0;
 	}
 
-	if (VN_CACHED(tvp) != 0)
-		xfs_inval_cached_pages(XFS_ITOV(tip), &(tip->i_iocore),
-						(xfs_off_t)0, 0, 0);
+	if (VN_CACHED(tvp) != 0) {
+		xfs_inval_cached_trace(&tip->i_iocore, 0, -1, 0, -1);
+		VOP_FLUSHINVAL_PAGES(tvp, 0, -1, FI_REMAPF_LOCKED);
+	}
 
 	/* Verify O_DIRECT for ftmp */
 	if (VN_CACHED(tvp) != 0) {
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index db9887a107de..a0aa0e44ff9d 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -304,7 +304,7 @@ xfs_dir2_data_freeinsert(
 /*
  * Remove a bestfree entry from the table.
  */
-void
+STATIC void
 xfs_dir2_data_freeremove(
 	xfs_dir2_data_t		*d,		/* data block pointer */
 	xfs_dir2_data_free_t	*dfp,		/* bestfree entry pointer */
diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h
index 3f02294ccff0..476cac920bf5 100644
--- a/fs/xfs/xfs_dir2_data.h
+++ b/fs/xfs/xfs_dir2_data.h
@@ -193,10 +193,6 @@ extern xfs_dir2_data_free_t *
 				 xfs_dir2_data_unused_t *dup, int *loghead);
 
 extern void
-	xfs_dir2_data_freeremove(xfs_dir2_data_t *d,
-				 xfs_dir2_data_free_t *dfp, int *loghead);
-
-extern void
 	xfs_dir2_data_freescan(struct xfs_mount *mp, xfs_dir2_data_t *d,
 			       int *loghead, char *aendp);
 
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 262d1e86df30..056f5283904b 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -77,6 +77,10 @@ static void xfs_dir2_leaf_check(xfs_inode_t *dp, xfs_dabuf_t *bp);
 #endif
 static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, xfs_dabuf_t **lbpp,
 				    int *indexp, xfs_dabuf_t **dbpp);
+static void xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp,
+				    int first, int last);
+static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_dabuf *bp);
+
 
 /*
  * Convert a block form directory to a leaf form directory.
@@ -1214,7 +1218,7 @@ xfs_dir2_leaf_init(
 /*
  * Log the bests entries indicated from a leaf1 block.
  */
-void
+static void
 xfs_dir2_leaf_log_bests(
 	xfs_trans_t		*tp,		/* transaction pointer */
 	xfs_dabuf_t		*bp,		/* leaf buffer */
@@ -1278,7 +1282,7 @@ xfs_dir2_leaf_log_header(
 /*
  * Log the tail of the leaf1 block.
  */
-void
+STATIC void
 xfs_dir2_leaf_log_tail(
 	xfs_trans_t		*tp,		/* transaction pointer */
 	xfs_dabuf_t		*bp)		/* leaf buffer */
diff --git a/fs/xfs/xfs_dir2_leaf.h b/fs/xfs/xfs_dir2_leaf.h
index 7f20eee56a52..3303cd6f4c00 100644
--- a/fs/xfs/xfs_dir2_leaf.h
+++ b/fs/xfs/xfs_dir2_leaf.h
@@ -330,15 +330,8 @@ extern void
 			       int first, int last);
 
 extern void
-	xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp,
-				int first, int last);
-
-extern void
 	xfs_dir2_leaf_log_header(struct xfs_trans *tp, struct xfs_dabuf *bp);
 
-extern void
-	xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_dabuf *bp);
-
 extern int
 	xfs_dir2_leaf_lookup(struct xfs_da_args *args);
 
diff --git a/fs/xfs/xfs_dir_leaf.c b/fs/xfs/xfs_dir_leaf.c
index 617018d6bbdc..c2ea6171fb0e 100644
--- a/fs/xfs/xfs_dir_leaf.c
+++ b/fs/xfs/xfs_dir_leaf.c
@@ -91,6 +91,10 @@ STATIC int xfs_dir_leaf_figure_balance(xfs_da_state_t *state,
 					  int *number_entries_in_blk1,
 					  int *number_namebytes_in_blk1);
 
+STATIC int xfs_dir_leaf_create(struct xfs_da_args *args,
+				xfs_dablk_t which_block,
+				struct xfs_dabuf **bpp);
+
 /*
  * Utility routines.
  */
@@ -781,7 +785,7 @@ xfs_dir_leaf_to_node(xfs_da_args_t *args)
  * Create the initial contents of a leaf directory
  * or a leaf in a node directory.
  */
-int
+STATIC int
 xfs_dir_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
 {
 	xfs_dir_leafblock_t *leaf;
diff --git a/fs/xfs/xfs_dir_leaf.h b/fs/xfs/xfs_dir_leaf.h
index 00d68d33cc7a..dd423ce1bc8d 100644
--- a/fs/xfs/xfs_dir_leaf.h
+++ b/fs/xfs/xfs_dir_leaf.h
@@ -202,8 +202,6 @@ int xfs_dir_leaf_to_shortform(struct xfs_da_args *args);
 /*
  * Routines used for growing the Btree.
  */
-int	xfs_dir_leaf_create(struct xfs_da_args *args, xfs_dablk_t which_block,
-				   struct xfs_dabuf **bpp);
 int	xfs_dir_leaf_split(struct xfs_da_state *state,
 				  struct xfs_da_state_blk *oldblk,
 				  struct xfs_da_state_blk *newblk);
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
index 55ae3e67d245..55c17adaaa37 100644
--- a/fs/xfs/xfs_dmapi.h
+++ b/fs/xfs/xfs_dmapi.h
@@ -166,27 +166,32 @@ typedef enum {
 #define DM_FLAGS_NDELAY		0x001	/* return EAGAIN after dm_pending() */
 #define DM_FLAGS_UNWANTED	0x002	/* event not in fsys dm_eventset_t */
 #define DM_FLAGS_ISEM		0x004	/* thread holds i_sem */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,4,21)
-/* i_alloc_sem was added in 2.4.22-pre1 */
 #define DM_FLAGS_IALLOCSEM_RD	0x010	/* thread holds i_alloc_sem rd */
 #define DM_FLAGS_IALLOCSEM_WR	0x020	/* thread holds i_alloc_sem wr */
-#endif
-#endif
 
 /*
  *	Based on IO_ISDIRECT, decide which i_ flag is set.
  */
-#ifdef DM_FLAGS_IALLOCSEM_RD
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,0)
+#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
+			      DM_FLAGS_ISEM : 0)
+#define DM_SEM_FLAG_WR	(DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_ISEM)
+#endif
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) && \
+    (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,22))
 #define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
 			      DM_FLAGS_IALLOCSEM_RD : DM_FLAGS_ISEM)
 #define DM_SEM_FLAG_WR	(DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_ISEM)
-#else
+#endif
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,21)
 #define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
 			      0 : DM_FLAGS_ISEM)
 #define DM_SEM_FLAG_WR	(DM_FLAGS_ISEM)
 #endif
 
+
 /*
  *	Macros to turn caller specified delay/block flags into
  *	dm_send_xxxx_event flag DM_FLAGS_NDELAY.
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index bbe1dea11c08..dcd3fdd5c1f7 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -280,7 +280,7 @@ xfs_error_report(
 	}
 }
 
-void
+STATIC void
 xfs_hex_dump(void *p, int length)
 {
 	__uint8_t *uip = (__uint8_t*)p;
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 6bc0535c0a65..52ee2b90b5ed 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -73,9 +73,6 @@ xfs_corruption_error(
 	int		linenum,
 	inst_t		*ra);
 
-extern void
-xfs_hex_dump(void *p, int length);
-
 #define	XFS_ERROR_REPORT(e, lvl, mp)	\
 	xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address)
 #define	XFS_CORRUPTION_ERROR(e, lvl, mp, mem)	\
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 5eafd5b63211..db7cbd1bc857 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -59,6 +59,18 @@ STATIC void	xfs_efi_item_abort(xfs_efi_log_item_t *);
 STATIC void	xfs_efd_item_abort(xfs_efd_log_item_t *);
 
 
+void
+xfs_efi_item_free(xfs_efi_log_item_t *efip)
+{
+	int nexts = efip->efi_format.efi_nextents;
+
+	if (nexts > XFS_EFI_MAX_FAST_EXTENTS) {
+		kmem_free(efip, sizeof(xfs_efi_log_item_t) +
+				(nexts - 1) * sizeof(xfs_extent_t));
+	} else {
+		kmem_zone_free(xfs_efi_zone, efip);
+	}
+}
 
 /*
  * This returns the number of iovecs needed to log the given efi item.
@@ -120,8 +132,6 @@ xfs_efi_item_pin(xfs_efi_log_item_t *efip)
 STATIC void
 xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale)
 {
-	int		nexts;
-	int		size;
 	xfs_mount_t	*mp;
 	SPLDECL(s);
 
@@ -132,21 +142,11 @@ xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale)
 		 * xfs_trans_delete_ail() drops the AIL lock.
 		 */
 		xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s);
-
-		nexts = efip->efi_format.efi_nextents;
-		if (nexts > XFS_EFI_MAX_FAST_EXTENTS) {
-			size = sizeof(xfs_efi_log_item_t);
-			size += (nexts - 1) * sizeof(xfs_extent_t);
-			kmem_free(efip, size);
-		} else {
-			kmem_zone_free(xfs_efi_zone, efip);
-		}
+		xfs_efi_item_free(efip);
 	} else {
 		efip->efi_flags |= XFS_EFI_COMMITTED;
 		AIL_UNLOCK(mp, s);
 	}
-
-	return;
 }
 
 /*
@@ -159,8 +159,6 @@ xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale)
 STATIC void
 xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp)
 {
-	int		nexts;
-	int		size;
 	xfs_mount_t	*mp;
 	xfs_log_item_desc_t	*lidp;
 	SPLDECL(s);
@@ -178,23 +176,11 @@ xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp)
 		 * xfs_trans_delete_ail() drops the AIL lock.
 		 */
 		xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s);
-		/*
-		 * now free the item itself
-		 */
-		nexts = efip->efi_format.efi_nextents;
-		if (nexts > XFS_EFI_MAX_FAST_EXTENTS) {
-			size = sizeof(xfs_efi_log_item_t);
-			size += (nexts - 1) * sizeof(xfs_extent_t);
-			kmem_free(efip, size);
-		} else {
-			kmem_zone_free(xfs_efi_zone, efip);
-		}
+		xfs_efi_item_free(efip);
 	} else {
 		efip->efi_flags |= XFS_EFI_COMMITTED;
 		AIL_UNLOCK(mp, s);
 	}
-
-	return;
 }
 
 /*
@@ -245,18 +231,7 @@ xfs_efi_item_committed(xfs_efi_log_item_t *efip, xfs_lsn_t lsn)
 STATIC void
 xfs_efi_item_abort(xfs_efi_log_item_t *efip)
 {
-	int	nexts;
-	int	size;
-
-	nexts = efip->efi_format.efi_nextents;
-	if (nexts > XFS_EFI_MAX_FAST_EXTENTS) {
-		size = sizeof(xfs_efi_log_item_t);
-		size += (nexts - 1) * sizeof(xfs_extent_t);
-		kmem_free(efip, size);
-	} else {
-		kmem_zone_free(xfs_efi_zone, efip);
-	}
-	return;
+	xfs_efi_item_free(efip);
 }
 
 /*
@@ -288,7 +263,7 @@ xfs_efi_item_committing(xfs_efi_log_item_t *efip, xfs_lsn_t lsn)
 /*
  * This is the ops vector shared by all efi log items.
  */
-struct xfs_item_ops xfs_efi_item_ops = {
+STATIC struct xfs_item_ops xfs_efi_item_ops = {
 	.iop_size	= (uint(*)(xfs_log_item_t*))xfs_efi_item_size,
 	.iop_format	= (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
 					xfs_efi_item_format,
@@ -355,8 +330,6 @@ xfs_efi_release(xfs_efi_log_item_t	*efip,
 {
 	xfs_mount_t	*mp;
 	int		extents_left;
-	uint		size;
-	int		nexts;
 	SPLDECL(s);
 
 	mp = efip->efi_item.li_mountp;
@@ -372,20 +345,10 @@ xfs_efi_release(xfs_efi_log_item_t	*efip,
 		 * xfs_trans_delete_ail() drops the AIL lock.
 		 */
 		xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s);
+		xfs_efi_item_free(efip);
 	} else {
 		AIL_UNLOCK(mp, s);
 	}
-
-	if (extents_left == 0) {
-		nexts = efip->efi_format.efi_nextents;
-		if (nexts > XFS_EFI_MAX_FAST_EXTENTS) {
-			size = sizeof(xfs_efi_log_item_t);
-			size += (nexts - 1) * sizeof(xfs_extent_t);
-			kmem_free(efip, size);
-		} else {
-			kmem_zone_free(xfs_efi_zone, efip);
-		}
-	}
 }
 
 /*
@@ -398,8 +361,6 @@ STATIC void
 xfs_efi_cancel(
 	xfs_efi_log_item_t	*efip)
 {
-	int		nexts;
-	int		size;
 	xfs_mount_t	*mp;
 	SPLDECL(s);
 
@@ -410,26 +371,25 @@ xfs_efi_cancel(
 		 * xfs_trans_delete_ail() drops the AIL lock.
 		 */
 		xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s);
-
-		nexts = efip->efi_format.efi_nextents;
-		if (nexts > XFS_EFI_MAX_FAST_EXTENTS) {
-			size = sizeof(xfs_efi_log_item_t);
-			size += (nexts - 1) * sizeof(xfs_extent_t);
-			kmem_free(efip, size);
-		} else {
-			kmem_zone_free(xfs_efi_zone, efip);
-		}
+		xfs_efi_item_free(efip);
 	} else {
 		efip->efi_flags |= XFS_EFI_CANCELED;
 		AIL_UNLOCK(mp, s);
 	}
-
-	return;
 }
 
+STATIC void
+xfs_efd_item_free(xfs_efd_log_item_t *efdp)
+{
+	int nexts = efdp->efd_format.efd_nextents;
 
-
-
+	if (nexts > XFS_EFD_MAX_FAST_EXTENTS) {
+		kmem_free(efdp, sizeof(xfs_efd_log_item_t) +
+				(nexts - 1) * sizeof(xfs_extent_t));
+	} else {
+		kmem_zone_free(xfs_efd_zone, efdp);
+	}
+}
 
 /*
  * This returns the number of iovecs needed to log the given efd item.
@@ -533,9 +493,6 @@ xfs_efd_item_unlock(xfs_efd_log_item_t *efdp)
 STATIC xfs_lsn_t
 xfs_efd_item_committed(xfs_efd_log_item_t *efdp, xfs_lsn_t lsn)
 {
-	uint	size;
-	int	nexts;
-
 	/*
 	 * If we got a log I/O error, it's always the case that the LR with the
 	 * EFI got unpinned and freed before the EFD got aborted.
@@ -543,15 +500,7 @@ xfs_efd_item_committed(xfs_efd_log_item_t *efdp, xfs_lsn_t lsn)
 	if ((efdp->efd_item.li_flags & XFS_LI_ABORTED) == 0)
 		xfs_efi_release(efdp->efd_efip, efdp->efd_format.efd_nextents);
 
-	nexts = efdp->efd_format.efd_nextents;
-	if (nexts > XFS_EFD_MAX_FAST_EXTENTS) {
-		size = sizeof(xfs_efd_log_item_t);
-		size += (nexts - 1) * sizeof(xfs_extent_t);
-		kmem_free(efdp, size);
-	} else {
-		kmem_zone_free(xfs_efd_zone, efdp);
-	}
-
+	xfs_efd_item_free(efdp);
 	return (xfs_lsn_t)-1;
 }
 
@@ -565,9 +514,6 @@ xfs_efd_item_committed(xfs_efd_log_item_t *efdp, xfs_lsn_t lsn)
 STATIC void
 xfs_efd_item_abort(xfs_efd_log_item_t *efdp)
 {
-	int	nexts;
-	int	size;
-
 	/*
 	 * If we got a log I/O error, it's always the case that the LR with the
 	 * EFI got unpinned and freed before the EFD got aborted. So don't
@@ -576,15 +522,7 @@ xfs_efd_item_abort(xfs_efd_log_item_t *efdp)
 	if ((efdp->efd_item.li_flags & XFS_LI_ABORTED) == 0)
 		xfs_efi_cancel(efdp->efd_efip);
 
-	nexts = efdp->efd_format.efd_nextents;
-	if (nexts > XFS_EFD_MAX_FAST_EXTENTS) {
-		size = sizeof(xfs_efd_log_item_t);
-		size += (nexts - 1) * sizeof(xfs_extent_t);
-		kmem_free(efdp, size);
-	} else {
-		kmem_zone_free(xfs_efd_zone, efdp);
-	}
-	return;
+	xfs_efd_item_free(efdp);
 }
 
 /*
@@ -615,7 +553,7 @@ xfs_efd_item_committing(xfs_efd_log_item_t *efip, xfs_lsn_t lsn)
 /*
  * This is the ops vector shared by all efd log items.
  */
-struct xfs_item_ops xfs_efd_item_ops = {
+STATIC struct xfs_item_ops xfs_efd_item_ops = {
 	.iop_size	= (uint(*)(xfs_log_item_t*))xfs_efd_item_size,
 	.iop_format	= (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
 					xfs_efd_item_format,
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 7122d6101d15..d433bac9f59d 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -118,6 +118,8 @@ xfs_efi_log_item_t	*xfs_efi_init(struct xfs_mount *, uint);
 xfs_efd_log_item_t	*xfs_efd_init(struct xfs_mount *, xfs_efi_log_item_t *,
 				      uint);
 
+void			xfs_efi_item_free(xfs_efi_log_item_t *);
+
 #endif	/* __KERNEL__ */
 
 #endif	/* __XFS_EXTFREE_ITEM_H__ */
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 6ee8443bf9d3..095af0a5cff3 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -60,7 +60,8 @@ struct fsxattr {
 	__u32		fsx_xflags;	/* xflags field value (get/set) */
 	__u32		fsx_extsize;	/* extsize field value (get/set)*/
 	__u32		fsx_nextents;	/* nextents field value (get)	*/
-	unsigned char	fsx_pad[16];
+	__u32		fsx_projid;	/* project identifier (get/set) */
+	unsigned char	fsx_pad[12];
 };
 #endif
 
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 21213057c27f..ca535d613190 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -559,32 +559,6 @@ xfs_reserve_blocks(
 	return(0);
 }
 
-void
-xfs_fs_log_dummy(xfs_mount_t *mp)
-{
-	xfs_trans_t *tp;
-	xfs_inode_t *ip;
-
-
-	tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
-	atomic_inc(&mp->m_active_trans);
-	if (xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0)) {
-		xfs_trans_cancel(tp, 0);
-		return;
-	}
-
-	ip = mp->m_rootip;
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-	xfs_trans_ihold(tp, ip);
-	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-	xfs_trans_set_sync(tp);
-	xfs_trans_commit(tp, 0, NULL);
-
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-}
-
 int
 xfs_fs_goingdown(
 	xfs_mount_t	*mp,
diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h
index 803c4d17a057..44be188674a6 100644
--- a/fs/xfs/xfs_ialloc_btree.h
+++ b/fs/xfs/xfs_ialloc_btree.h
@@ -100,9 +100,13 @@ xfs_inofree_t xfs_inobt_mask(int i);
 #endif
 #if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_IS_FREE)
 int xfs_inobt_is_free(xfs_inobt_rec_t *rp, int i);
-#define	XFS_INOBT_IS_FREE(rp,i)	xfs_inobt_is_free(rp,i)
+#define	XFS_INOBT_IS_FREE(rp,i)		xfs_inobt_is_free(rp,i)
+#define	XFS_INOBT_IS_FREE_DISK(rp,i)	xfs_inobt_is_free_disk(rp,i)
 #else
-#define	XFS_INOBT_IS_FREE(rp,i)	(((rp)->ir_free & XFS_INOBT_MASK(i)) != 0)
+#define	XFS_INOBT_IS_FREE(rp,i)	\
+	(((rp)->ir_free & XFS_INOBT_MASK(i)) != 0)
+#define XFS_INOBT_IS_FREE_DISK(rp,i) \
+	((INT_GET((rp)->ir_free, ARCH_CONVERT) & XFS_INOBT_MASK(i)) != 0)
 #endif
 #if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_SET_FREE)
 void xfs_inobt_set_free(xfs_inobt_rec_t *rp, int i);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index bc8c8c7f9039..34bdf5909687 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -146,51 +146,6 @@ xfs_inobp_check(
 #endif
 
 /*
- * called from bwrite on xfs inode buffers
- */
-void
-xfs_inobp_bwcheck(xfs_buf_t *bp)
-{
-	xfs_mount_t	*mp;
-	int		i;
-	int		j;
-	xfs_dinode_t	*dip;
-
-	ASSERT(XFS_BUF_FSPRIVATE3(bp, void *) != NULL);
-
-	mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *);
-
-
-	j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
-
-	for (i = 0; i < j; i++)  {
-		dip = (xfs_dinode_t *) xfs_buf_offset(bp,
-						i * mp->m_sb.sb_inodesize);
-		if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC) {
-			cmn_err(CE_WARN,
-"Bad magic # 0x%x in XFS inode buffer 0x%Lx, starting blockno %Ld, offset 0x%x",
-				INT_GET(dip->di_core.di_magic, ARCH_CONVERT),
-				(__uint64_t)(__psunsigned_t) bp,
-				(__int64_t) XFS_BUF_ADDR(bp),
-				xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize));
-			xfs_fs_cmn_err(CE_WARN, mp,
-				"corrupt, unmount and run xfs_repair");
-		}
-		if (!dip->di_next_unlinked)  {
-			cmn_err(CE_WARN,
-"Bad next_unlinked field (0) in XFS inode buffer 0x%p, starting blockno %Ld, offset 0x%x",
-				(__uint64_t)(__psunsigned_t) bp,
-				(__int64_t) XFS_BUF_ADDR(bp),
-				xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize));
-			xfs_fs_cmn_err(CE_WARN, mp,
-				"corrupt, unmount and run xfs_repair");
-		}
-	}
-
-	return;
-}
-
-/*
  * This routine is called to map an inode number within a file
  * system to the buffer containing the on-disk version of the
  * inode.  It returns a pointer to the buffer containing the
@@ -203,7 +158,7 @@ xfs_inobp_bwcheck(xfs_buf_t *bp)
  * Use xfs_imap() to determine the size and location of the
  * buffer to read from disk.
  */
-int
+STATIC int
 xfs_inotobp(
 	xfs_mount_t	*mp,
 	xfs_trans_t	*tp,
@@ -1247,26 +1202,32 @@ xfs_ialloc(
 	case S_IFREG:
 	case S_IFDIR:
 		if (unlikely(pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
-			if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) {
-				if ((mode & S_IFMT) == S_IFDIR) {
-					ip->i_d.di_flags |= XFS_DIFLAG_RTINHERIT;
-				} else {
-					ip->i_d.di_flags |= XFS_DIFLAG_REALTIME;
+			uint	di_flags = 0;
+
+			if ((mode & S_IFMT) == S_IFDIR) {
+				if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
+					di_flags |= XFS_DIFLAG_RTINHERIT;
+			} else {
+				if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) {
+					di_flags |= XFS_DIFLAG_REALTIME;
 					ip->i_iocore.io_flags |= XFS_IOCORE_RT;
 				}
 			}
 			if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) &&
 			    xfs_inherit_noatime)
-				ip->i_d.di_flags |= XFS_DIFLAG_NOATIME;
+				di_flags |= XFS_DIFLAG_NOATIME;
 			if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) &&
 			    xfs_inherit_nodump)
-				ip->i_d.di_flags |= XFS_DIFLAG_NODUMP;
+				di_flags |= XFS_DIFLAG_NODUMP;
 			if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) &&
 			    xfs_inherit_sync)
-				ip->i_d.di_flags |= XFS_DIFLAG_SYNC;
+				di_flags |= XFS_DIFLAG_SYNC;
 			if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) &&
 			    xfs_inherit_nosymlinks)
-				ip->i_d.di_flags |= XFS_DIFLAG_NOSYMLINKS;
+				di_flags |= XFS_DIFLAG_NOSYMLINKS;
+			if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
+				di_flags |= XFS_DIFLAG_PROJINHERIT;
+			ip->i_d.di_flags |= di_flags;
 		}
 		/* FALLTHROUGH */
 	case S_IFLNK:
@@ -2156,7 +2117,7 @@ static __inline__ int xfs_inode_clean(xfs_inode_t *ip)
 		(ip->i_update_core == 0));
 }
 
-void
+STATIC void
 xfs_ifree_cluster(
 	xfs_inode_t	*free_ip,
 	xfs_trans_t	*tp,
@@ -2875,7 +2836,7 @@ xfs_iunpin(
  * be subsequently pinned once someone is waiting for it to be
  * unpinned.
  */
-void
+STATIC void
 xfs_iunpin_wait(
 	xfs_inode_t	*ip)
 {
@@ -3601,107 +3562,43 @@ corrupt_out:
 
 
 /*
- * Flush all inactive inodes in mp.  Return true if no user references
- * were found, false otherwise.
+ * Flush all inactive inodes in mp.
  */
-int
+void
 xfs_iflush_all(
-	xfs_mount_t	*mp,
-	int		flag)
+	xfs_mount_t	*mp)
 {
-	int		busy;
-	int		done;
-	int		purged;
 	xfs_inode_t	*ip;
-	vmap_t		vmap;
 	vnode_t		*vp;
 
-	busy = done = 0;
-	while (!done) {
-		purged = 0;
-		XFS_MOUNT_ILOCK(mp);
-		ip = mp->m_inodes;
-		if (ip == NULL) {
-			break;
-		}
-		do {
-			/* Make sure we skip markers inserted by sync */
-			if (ip->i_mount == NULL) {
-				ip = ip->i_mnext;
-				continue;
-			}
-
-			/*
-			 * It's up to our caller to purge the root
-			 * and quota vnodes later.
-			 */
-			vp = XFS_ITOV_NULL(ip);
-
-			if (!vp) {
-				XFS_MOUNT_IUNLOCK(mp);
-				xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC);
-				purged = 1;
-				break;
-			}
+ again:
+	XFS_MOUNT_ILOCK(mp);
+	ip = mp->m_inodes;
+	if (ip == NULL)
+		goto out;
 
-			if (vn_count(vp) != 0) {
-				if (vn_count(vp) == 1 &&
-				    (ip == mp->m_rootip ||
-				     (mp->m_quotainfo &&
-				      (ip->i_ino == mp->m_sb.sb_uquotino ||
-				       ip->i_ino == mp->m_sb.sb_gquotino)))) {
+	do {
+		/* Make sure we skip markers inserted by sync */
+		if (ip->i_mount == NULL) {
+			ip = ip->i_mnext;
+			continue;
+		}
 
-					ip = ip->i_mnext;
-					continue;
-				}
-				if (!(flag & XFS_FLUSH_ALL)) {
-					busy = 1;
-					done = 1;
-					break;
-				}
-				/*
-				 * Ignore busy inodes but continue flushing
-				 * others.
-				 */
-				ip = ip->i_mnext;
-				continue;
-			}
-			/*
-			 * Sample vp mapping while holding mp locked on MP
-			 * systems, so we don't purge a reclaimed or
-			 * nonexistent vnode.  We break from the loop
-			 * since we know that we modify
-			 * it by pulling ourselves from it in xfs_reclaim()
-			 * called via vn_purge() below.  Set ip to the next
-			 * entry in the list anyway so we'll know below
-			 * whether we reached the end or not.
-			 */
-			VMAP(vp, vmap);
+		vp = XFS_ITOV_NULL(ip);
+		if (!vp) {
 			XFS_MOUNT_IUNLOCK(mp);
+			xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC);
+			goto again;
+		}
 
-			vn_purge(vp, &vmap);
+		ASSERT(vn_count(vp) == 0);
 
-			purged = 1;
-			break;
-		} while (ip != mp->m_inodes);
-		/*
-		 * We need to distinguish between when we exit the loop
-		 * after a purge and when we simply hit the end of the
-		 * list.  We can't use the (ip == mp->m_inodes) test,
-		 * because when we purge an inode at the start of the list
-		 * the next inode on the list becomes mp->m_inodes.  That
-		 * would cause such a test to bail out early.  The purged
-		 * variable tells us how we got out of the loop.
-		 */
-		if (!purged) {
-			done = 1;
-		}
-	}
+		ip = ip->i_mnext;
+	} while (ip != mp->m_inodes);
+ out:
 	XFS_MOUNT_IUNLOCK(mp);
-	return !busy;
 }
 
-
 /*
  * xfs_iaccess: check accessibility of inode for mode.
  */
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 37e1c316f3b6..54d9e54c7c95 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -412,11 +412,6 @@ void xfs_ifork_next_set(xfs_inode_t *ip, int w, int n);
 #define	XFS_IFLUSH_DELWRI		5
 
 /*
- * Flags for xfs_iflush_all.
- */
-#define	XFS_FLUSH_ALL		0x1
-
-/*
  * Flags for xfs_itruncate_start().
  */
 #define	XFS_ITRUNC_DEFINITE	0x1
@@ -487,8 +482,6 @@ int		xfs_finish_reclaim_all(struct xfs_mount *, int);
 /*
  * xfs_inode.c prototypes.
  */
-int		xfs_inotobp(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
-			    xfs_dinode_t **, struct xfs_buf **, int *);
 int		xfs_itobp(struct xfs_mount *, struct xfs_trans *,
 			  xfs_inode_t *, xfs_dinode_t **, struct xfs_buf **,
 			  xfs_daddr_t);
@@ -522,7 +515,7 @@ void		xfs_ipin(xfs_inode_t *);
 void		xfs_iunpin(xfs_inode_t *);
 int		xfs_iextents_copy(xfs_inode_t *, xfs_bmbt_rec_t *, int);
 int		xfs_iflush(xfs_inode_t *, uint);
-int		xfs_iflush_all(struct xfs_mount *, int);
+void		xfs_iflush_all(struct xfs_mount *);
 int		xfs_iaccess(xfs_inode_t *, mode_t, cred_t *);
 uint		xfs_iroundup(uint);
 void		xfs_ichgtime(xfs_inode_t *, int);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 768cb1816b8e..0eed30f5cb19 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -910,7 +910,7 @@ xfs_inode_item_committing(
 /*
  * This is the ops vector shared by all buf log items.
  */
-struct xfs_item_ops xfs_inode_item_ops = {
+STATIC struct xfs_item_ops xfs_inode_item_ops = {
 	.iop_size	= (uint(*)(xfs_log_item_t*))xfs_inode_item_size,
 	.iop_format	= (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
 					xfs_inode_item_format,
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 469e1a7939d4..2edd6769e5d3 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -385,15 +385,15 @@ xfs_iomap_write_direct(
 	int		nimaps, maps;
 	int		error;
 	int		bmapi_flag;
+	int		quota_flag;
 	int		rt;
 	xfs_trans_t	*tp;
 	xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS], *imapp;
 	xfs_bmap_free_t free_list;
 	int		aeof;
-	xfs_filblks_t	datablocks;
+	xfs_filblks_t	datablocks, qblocks, resblks;
 	int		committed;
 	int		numrtextents;
-	uint		resblks;
 
 	/*
 	 * Make sure that the dquots are there. This doesn't hold
@@ -419,7 +419,6 @@ xfs_iomap_write_direct(
 		xfs_fileoff_t	map_last_fsb;
 
 		map_last_fsb = ret_imap->br_blockcount + ret_imap->br_startoff;
-
 		if (map_last_fsb < last_fsb) {
 			last_fsb = map_last_fsb;
 			count_fsb = last_fsb - offset_fsb;
@@ -428,56 +427,47 @@ xfs_iomap_write_direct(
 	}
 
 	/*
-	 * determine if reserving space on
-	 * the data or realtime partition.
+	 * Determine if reserving space on the data or realtime partition.
 	 */
 	if ((rt = XFS_IS_REALTIME_INODE(ip))) {
-		int	sbrtextsize, iprtextsize;
+		xfs_extlen_t	extsz;
 
-		sbrtextsize = mp->m_sb.sb_rextsize;
-		iprtextsize =
-			ip->i_d.di_extsize ? ip->i_d.di_extsize : sbrtextsize;
-		numrtextents = (count_fsb + iprtextsize - 1);
-		do_div(numrtextents, sbrtextsize);
+		if (!(extsz = ip->i_d.di_extsize))
+			extsz = mp->m_sb.sb_rextsize;
+		numrtextents = qblocks = (count_fsb + extsz - 1);
+		do_div(numrtextents, mp->m_sb.sb_rextsize);
+		quota_flag = XFS_QMOPT_RES_RTBLKS;
 		datablocks = 0;
 	} else {
-		datablocks = count_fsb;
+		datablocks = qblocks = count_fsb;
+		quota_flag = XFS_QMOPT_RES_REGBLKS;
 		numrtextents = 0;
 	}
 
 	/*
-	 * allocate and setup the transaction
+	 * Allocate and setup the transaction
 	 */
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
-
 	resblks = XFS_DIOSTRAT_SPACE_RES(mp, datablocks);
-
 	error = xfs_trans_reserve(tp, resblks,
 			XFS_WRITE_LOG_RES(mp), numrtextents,
 			XFS_TRANS_PERM_LOG_RES,
 			XFS_WRITE_LOG_COUNT);
 
 	/*
-	 * check for running out of space
+	 * Check for running out of space, note: need lock to return
 	 */
 	if (error)
-		/*
-		 * Free the transaction structure.
-		 */
 		xfs_trans_cancel(tp, 0);
-
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
-
 	if (error)
-		goto error_out; /* Don't return in above if .. trans ..,
-					need lock to return */
+		goto error_out;
 
-	if (XFS_TRANS_RESERVE_BLKQUOTA(mp, tp, ip, resblks)) {
+	if (XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag)) {
 		error = (EDQUOT);
 		goto error1;
 	}
-	nimaps = 1;
 
 	bmapi_flag = XFS_BMAPI_WRITE;
 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
@@ -487,31 +477,29 @@ xfs_iomap_write_direct(
 		bmapi_flag |= XFS_BMAPI_PREALLOC;
 
 	/*
-	 * issue the bmapi() call to allocate the blocks
+	 * Issue the bmapi() call to allocate the blocks
 	 */
 	XFS_BMAP_INIT(&free_list, &firstfsb);
+	nimaps = 1;
 	imapp = &imap[0];
 	error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,
 		bmapi_flag, &firstfsb, 0, imapp, &nimaps, &free_list);
-	if (error) {
+	if (error)
 		goto error0;
-	}
 
 	/*
-	 * complete the transaction
+	 * Complete the transaction
 	 */
-
 	error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed);
-	if (error) {
+	if (error)
 		goto error0;
-	}
-
 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
-	if (error) {
+	if (error)
 		goto error_out;
-	}
 
-	/* copy any maps to caller's array and return any error. */
+	/*
+	 * Copy any maps to caller's array and return any error.
+	 */
 	if (nimaps == 0) {
 		error = (ENOSPC);
 		goto error_out;
@@ -530,10 +518,11 @@ xfs_iomap_write_direct(
         }
 	return 0;
 
- error0:	/* Cancel bmap, unlock inode, and cancel trans */
+error0:	/* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
 	xfs_bmap_cancel(&free_list);
+	XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag);
 
- error1:	/* Just cancel transaction */
+error1:	/* Just cancel transaction */
 	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
 	*nmaps = 0;	/* nothing set-up here */
 
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 092d5fb096b1..1cd2ac163877 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -134,7 +134,7 @@ STATIC void	xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
 #define xlog_verify_tail_lsn(a,b,c)
 #endif
 
-int		xlog_iclogs_empty(xlog_t *log);
+STATIC int	xlog_iclogs_empty(xlog_t *log);
 
 #ifdef DEBUG
 int xlog_do_error = 0;
@@ -1857,7 +1857,7 @@ xlog_write(xfs_mount_t *	mp,
  *
  * State Change: DIRTY -> ACTIVE
  */
-void
+STATIC void
 xlog_state_clean_log(xlog_t *log)
 {
 	xlog_in_core_t	*iclog;
@@ -3542,7 +3542,7 @@ xfs_log_force_umount(
 	return (retval);
 }
 
-int
+STATIC int
 xlog_iclogs_empty(xlog_t *log)
 {
 	xlog_in_core_t	*iclog;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index c31e3ce3be66..1a1d452f15f9 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -535,7 +535,6 @@ typedef struct log {
 
 /* common routines */
 extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
-extern int	 xlog_find_head(xlog_t *log, xfs_daddr_t *head_blk);
 extern int	 xlog_find_tail(xlog_t	*log,
 				xfs_daddr_t *head_blk,
 				xfs_daddr_t *tail_blk,
@@ -548,7 +547,6 @@ extern void	 xlog_recover_process_iunlinks(xlog_t *log);
 extern struct xfs_buf *xlog_get_bp(xlog_t *, int);
 extern void	 xlog_put_bp(struct xfs_buf *);
 extern int	 xlog_bread(xlog_t *, xfs_daddr_t, int, struct xfs_buf *);
-extern xfs_caddr_t xlog_align(xlog_t *, xfs_daddr_t, int, struct xfs_buf *);
 
 /* iclog tracing */
 #define XLOG_TRACE_GRAB_FLUSH  1
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 9824b5bf0ec0..0aac28ddb81c 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -148,7 +148,7 @@ xlog_bread(
  * The buffer is kept locked across the write and is returned locked.
  * This can only be used for synchronous log writes.
  */
-int
+STATIC int
 xlog_bwrite(
 	xlog_t		*log,
 	xfs_daddr_t	blk_no,
@@ -179,7 +179,7 @@ xlog_bwrite(
 	return error;
 }
 
-xfs_caddr_t
+STATIC xfs_caddr_t
 xlog_align(
 	xlog_t		*log,
 	xfs_daddr_t	blk_no,
@@ -528,7 +528,7 @@ out:
  *
  * Return: zero if normal, non-zero if error.
  */
-int
+STATIC int
 xlog_find_head(
 	xlog_t 		*log,
 	xfs_daddr_t	*return_head_blk)
@@ -1964,7 +1964,8 @@ xlog_recover_do_reg_buffer(
 		 * probably a good thing to do for other buf types also.
 		 */
 		error = 0;
-		if (buf_f->blf_flags & (XFS_BLI_UDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
+		if (buf_f->blf_flags &
+		   (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
 			error = xfs_qm_dqcheck((xfs_disk_dquot_t *)
 					       item->ri_buf[i].i_addr,
 					       -1, 0, XFS_QMOPT_DOWARN,
@@ -2030,6 +2031,7 @@ xfs_qm_dqcheck(
 	}
 
 	if (INT_GET(ddq->d_flags, ARCH_CONVERT) != XFS_DQ_USER &&
+	    INT_GET(ddq->d_flags, ARCH_CONVERT) != XFS_DQ_PROJ &&
 	    INT_GET(ddq->d_flags, ARCH_CONVERT) != XFS_DQ_GROUP) {
 		if (flags & XFS_QMOPT_DOWARN)
 			cmn_err(CE_ALERT,
@@ -2135,6 +2137,8 @@ xlog_recover_do_dquot_buffer(
 	type = 0;
 	if (buf_f->blf_flags & XFS_BLI_UDQUOT_BUF)
 		type |= XFS_DQ_USER;
+	if (buf_f->blf_flags & XFS_BLI_PDQUOT_BUF)
+		type |= XFS_DQ_PROJ;
 	if (buf_f->blf_flags & XFS_BLI_GDQUOT_BUF)
 		type |= XFS_DQ_GROUP;
 	/*
@@ -2247,7 +2251,8 @@ xlog_recover_do_buffer_trans(
 	error = 0;
 	if (flags & XFS_BLI_INODE_BUF) {
 		error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
-	} else if (flags & (XFS_BLI_UDQUOT_BUF | XFS_BLI_GDQUOT_BUF)) {
+	} else if (flags &
+		  (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
 		xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
 	} else {
 		xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
@@ -2619,7 +2624,7 @@ xlog_recover_do_dquot_trans(
 	 * This type of quotas was turned off, so ignore this record.
 	 */
 	type = INT_GET(recddq->d_flags, ARCH_CONVERT) &
-			(XFS_DQ_USER | XFS_DQ_GROUP);
+			(XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
 	ASSERT(type);
 	if (log->l_quotaoffs_flag & type)
 		return (0);
@@ -2742,7 +2747,6 @@ xlog_recover_do_efd_trans(
 	xfs_efi_log_item_t	*efip = NULL;
 	xfs_log_item_t		*lip;
 	int			gen;
-	int			nexts;
 	__uint64_t		efi_id;
 	SPLDECL(s);
 
@@ -2777,22 +2781,15 @@ xlog_recover_do_efd_trans(
 		}
 		lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
 	}
-	if (lip == NULL) {
-		AIL_UNLOCK(mp, s);
-	}
 
 	/*
 	 * If we found it, then free it up.  If it wasn't there, it
 	 * must have been overwritten in the log.  Oh well.
 	 */
 	if (lip != NULL) {
-		nexts = efip->efi_format.efi_nextents;
-		if (nexts > XFS_EFI_MAX_FAST_EXTENTS) {
-			kmem_free(lip, sizeof(xfs_efi_log_item_t) +
-				  ((nexts - 1) * sizeof(xfs_extent_t)));
-		} else {
-			kmem_zone_free(xfs_efi_zone, efip);
-		}
+		xfs_efi_item_free(efip);
+	} else {
+		AIL_UNLOCK(mp, s);
 	}
 }
 
diff --git a/fs/xfs/xfs_macros.c b/fs/xfs/xfs_macros.c
index ce4f46c6b3ab..698c2cd62858 100644
--- a/fs/xfs/xfs_macros.c
+++ b/fs/xfs/xfs_macros.c
@@ -1658,6 +1658,11 @@ xfs_inobt_is_free(xfs_inobt_rec_t *rp, int i)
 {
 	return XFS_INOBT_IS_FREE(rp, i);
 }
+int
+xfs_inobt_is_free_disk(xfs_inobt_rec_t *rp, int i)
+{
+	return XFS_INOBT_IS_FREE_DISK(rp, i);
+}
 #endif
 
 #if XFS_WANT_FUNCS_C || (XFS_WANT_SPACE_C && XFSSO_XFS_INOBT_IS_LAST_REC)
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 2ec967d93e5a..82e1646e6243 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -64,6 +64,7 @@
 STATIC void	xfs_mount_log_sbunit(xfs_mount_t *, __int64_t);
 STATIC int	xfs_uuid_mount(xfs_mount_t *);
 STATIC void	xfs_uuid_unmount(xfs_mount_t *mp);
+STATIC void	xfs_unmountfs_wait(xfs_mount_t *);
 
 static struct {
     short offset;
@@ -555,7 +556,7 @@ xfs_readsb(xfs_mount_t *mp)
  * fields from the superblock associated with the given
  * mount structure
  */
-void
+STATIC void
 xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
 {
 	int	i;
@@ -1081,7 +1082,7 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
 	int64_t		fsid;
 #endif
 
-	xfs_iflush_all(mp, XFS_FLUSH_ALL);
+	xfs_iflush_all(mp);
 
 	XFS_QM_DQPURGEALL(mp,
 		XFS_QMOPT_UQUOTA | XFS_QMOPT_GQUOTA | XFS_QMOPT_UMOUNTING);
@@ -1111,15 +1112,6 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
 	 */
 	ASSERT(mp->m_inodes == NULL);
 
-	/*
-	 * We may have bufs that are in the process of getting written still.
-	 * We must wait for the I/O completion of those. The sync flag here
-	 * does a two pass iteration thru the bufcache.
-	 */
-	if (XFS_FORCED_SHUTDOWN(mp)) {
-		xfs_incore_relse(mp->m_ddev_targp, 0, 1); /* synchronous */
-	}
-
 	xfs_unmountfs_close(mp, cr);
 	if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0)
 		xfs_uuid_unmount(mp);
@@ -1146,7 +1138,7 @@ xfs_unmountfs_close(xfs_mount_t *mp, struct cred *cr)
 	xfs_free_buftarg(mp->m_ddev_targp, 0);
 }
 
-void
+STATIC void
 xfs_unmountfs_wait(xfs_mount_t *mp)
 {
 	if (mp->m_logdev_targp != mp->m_ddev_targp)
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 30dd08fb9f57..5affba38a577 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -141,7 +141,7 @@ typedef int	(*xfs_dqattach_t)(struct xfs_inode *, uint);
 typedef void	(*xfs_dqdetach_t)(struct xfs_inode *);
 typedef int	(*xfs_dqpurgeall_t)(struct xfs_mount *, uint);
 typedef int	(*xfs_dqvopalloc_t)(struct xfs_mount *,
-			struct xfs_inode *, uid_t, gid_t, uint,
+			struct xfs_inode *, uid_t, gid_t, prid_t, uint,
 			struct xfs_dquot **, struct xfs_dquot **);
 typedef void	(*xfs_dqvopcreate_t)(struct xfs_trans *, struct xfs_inode *,
 			struct xfs_dquot *, struct xfs_dquot *);
@@ -185,8 +185,8 @@ typedef struct xfs_qmops {
 	(*(mp)->m_qm_ops.xfs_dqdetach)(ip)
 #define XFS_QM_DQPURGEALL(mp, fl) \
 	(*(mp)->m_qm_ops.xfs_dqpurgeall)(mp, fl)
-#define XFS_QM_DQVOPALLOC(mp, ip, uid, gid, fl, dq1, dq2) \
-	(*(mp)->m_qm_ops.xfs_dqvopalloc)(mp, ip, uid, gid, fl, dq1, dq2)
+#define XFS_QM_DQVOPALLOC(mp, ip, uid, gid, prid, fl, dq1, dq2) \
+	(*(mp)->m_qm_ops.xfs_dqvopalloc)(mp, ip, uid, gid, prid, fl, dq1, dq2)
 #define XFS_QM_DQVOPCREATE(mp, tp, ip, dq1, dq2) \
 	(*(mp)->m_qm_ops.xfs_dqvopcreate)(tp, ip, dq1, dq2)
 #define XFS_QM_DQVOPRENAME(mp, ip) \
@@ -544,7 +544,6 @@ extern void	xfs_mount_free(xfs_mount_t *mp, int remove_bhv);
 extern int	xfs_mountfs(struct vfs *, xfs_mount_t *mp, int);
 
 extern int	xfs_unmountfs(xfs_mount_t *, struct cred *);
-extern void	xfs_unmountfs_wait(xfs_mount_t *);
 extern void	xfs_unmountfs_close(xfs_mount_t *, struct cred *);
 extern int	xfs_unmountfs_writesb(xfs_mount_t *);
 extern int	xfs_unmount_flush(xfs_mount_t *, int);
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 703ec4efcb41..7134576ae7fa 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -96,7 +96,7 @@ typedef struct xfs_dqblk {
  * flags for q_flags field in the dquot.
  */
 #define XFS_DQ_USER		0x0001		/* a user quota */
-/* #define XFS_DQ_PROJ		0x0002		-- project quota (IRIX) */
+#define XFS_DQ_PROJ		0x0002		/* project quota */
 #define XFS_DQ_GROUP		0x0004		/* a group quota */
 #define XFS_DQ_FLOCKED		0x0008		/* flush lock taken */
 #define XFS_DQ_DIRTY		0x0010		/* dquot is dirty */
@@ -104,6 +104,8 @@ typedef struct xfs_dqblk {
 #define XFS_DQ_INACTIVE		0x0040		/* dq off mplist & hashlist */
 #define XFS_DQ_MARKER		0x0080		/* sentinel */
 
+#define XFS_DQ_ALLTYPES		(XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
+
 /*
  * In the worst case, when both user and group quotas are on,
  * we can have a max of three dquots changing in a single transaction.
@@ -124,7 +126,7 @@ typedef struct xfs_dqblk {
 typedef struct xfs_dq_logformat {
 	__uint16_t		qlf_type;      /* dquot log item type */
 	__uint16_t		qlf_size;      /* size of this item */
-	xfs_dqid_t		qlf_id;	       /* usr/grp id number : 32 bits */
+	xfs_dqid_t		qlf_id;	       /* usr/grp/proj id : 32 bits */
 	__int64_t		qlf_blkno;     /* blkno of dquot buffer */
 	__int32_t		qlf_len;       /* len of dquot buffer */
 	__uint32_t		qlf_boffset;   /* off of dquot in buffer */
@@ -152,9 +154,9 @@ typedef struct xfs_qoff_logformat {
 #define XFS_UQUOTA_ACCT	0x0001  /* user quota accounting ON */
 #define XFS_UQUOTA_ENFD	0x0002  /* user quota limits enforced */
 #define XFS_UQUOTA_CHKD	0x0004  /* quotacheck run on usr quotas */
-#define XFS_PQUOTA_ACCT	0x0008  /* (IRIX) project quota accounting ON */
-#define XFS_GQUOTA_ENFD	0x0010  /* group quota limits enforced */
-#define XFS_GQUOTA_CHKD	0x0020  /* quotacheck run on grp quotas */
+#define XFS_PQUOTA_ACCT	0x0008  /* project quota accounting ON */
+#define XFS_OQUOTA_ENFD	0x0010  /* other (grp/prj) quota limits enforced */
+#define XFS_OQUOTA_CHKD	0x0020  /* quotacheck run on other (grp/prj) quotas */
 #define XFS_GQUOTA_ACCT	0x0040  /* group quota accounting ON */
 
 /*
@@ -162,17 +164,22 @@ typedef struct xfs_qoff_logformat {
  * are in the process of getting turned off. These flags are in m_qflags but
  * never in sb_qflags.
  */
-#define XFS_UQUOTA_ACTIVE	0x0080  /* uquotas are being turned off */
-#define XFS_GQUOTA_ACTIVE	0x0100  /* gquotas are being turned off */
+#define XFS_UQUOTA_ACTIVE	0x0100  /* uquotas are being turned off */
+#define XFS_PQUOTA_ACTIVE	0x0200  /* pquotas are being turned off */
+#define XFS_GQUOTA_ACTIVE	0x0400  /* gquotas are being turned off */
 
 /*
  * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees
  * quota will be not be switched off as long as that inode lock is held.
  */
 #define XFS_IS_QUOTA_ON(mp)	((mp)->m_qflags & (XFS_UQUOTA_ACTIVE | \
-						   XFS_GQUOTA_ACTIVE))
+						   XFS_GQUOTA_ACTIVE | \
+						   XFS_PQUOTA_ACTIVE))
+#define XFS_IS_OQUOTA_ON(mp)	((mp)->m_qflags & (XFS_GQUOTA_ACTIVE | \
+						   XFS_PQUOTA_ACTIVE))
 #define XFS_IS_UQUOTA_ON(mp)	((mp)->m_qflags & XFS_UQUOTA_ACTIVE)
 #define XFS_IS_GQUOTA_ON(mp)	((mp)->m_qflags & XFS_GQUOTA_ACTIVE)
+#define XFS_IS_PQUOTA_ON(mp)	((mp)->m_qflags & XFS_PQUOTA_ACTIVE)
 
 /*
  * Flags to tell various functions what to do. Not all of these are meaningful
@@ -182,7 +189,7 @@ typedef struct xfs_qoff_logformat {
 #define XFS_QMOPT_DQLOCK	0x0000001 /* dqlock */
 #define XFS_QMOPT_DQALLOC	0x0000002 /* alloc dquot ondisk if needed */
 #define XFS_QMOPT_UQUOTA	0x0000004 /* user dquot requested */
-#define XFS_QMOPT_GQUOTA	0x0000008 /* group dquot requested */
+#define XFS_QMOPT_PQUOTA	0x0000008 /* project dquot requested */
 #define XFS_QMOPT_FORCE_RES	0x0000010 /* ignore quota limits */
 #define XFS_QMOPT_DQSUSER	0x0000020 /* don't cache super users dquot */
 #define XFS_QMOPT_SBVERSION	0x0000040 /* change superblock version num */
@@ -192,6 +199,7 @@ typedef struct xfs_qoff_logformat {
 #define XFS_QMOPT_DOWARN        0x0000400 /* increase warning cnt if necessary */
 #define XFS_QMOPT_ILOCKED	0x0000800 /* inode is already locked (excl) */
 #define XFS_QMOPT_DQREPAIR	0x0001000 /* repair dquot, if damaged. */
+#define XFS_QMOPT_GQUOTA	0x0002000 /* group dquot requested */
 
 /*
  * flags to xfs_trans_mod_dquot to indicate which field needs to be
@@ -231,7 +239,8 @@ typedef struct xfs_qoff_logformat {
 #define XFS_TRANS_DQ_DELRTBCOUNT XFS_QMOPT_DELRTBCOUNT
 
 
-#define XFS_QMOPT_QUOTALL	(XFS_QMOPT_UQUOTA|XFS_QMOPT_GQUOTA)
+#define XFS_QMOPT_QUOTALL	\
+		(XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA)
 #define XFS_QMOPT_RESBLK_MASK	(XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS)
 
 #ifdef __KERNEL__
@@ -246,21 +255,33 @@ typedef struct xfs_qoff_logformat {
  */
 #define XFS_NOT_DQATTACHED(mp, ip) ((XFS_IS_UQUOTA_ON(mp) &&\
 				     (ip)->i_udquot == NULL) || \
-				    (XFS_IS_GQUOTA_ON(mp) && \
+				    (XFS_IS_OQUOTA_ON(mp) && \
 				     (ip)->i_gdquot == NULL))
 
-#define XFS_QM_NEED_QUOTACHECK(mp) ((XFS_IS_UQUOTA_ON(mp) && \
-				     (mp->m_sb.sb_qflags & \
-				      XFS_UQUOTA_CHKD) == 0) || \
-				    (XFS_IS_GQUOTA_ON(mp) && \
-				     (mp->m_sb.sb_qflags & \
-				      XFS_GQUOTA_CHKD) == 0))
+#define XFS_QM_NEED_QUOTACHECK(mp) \
+	((XFS_IS_UQUOTA_ON(mp) && \
+		(mp->m_sb.sb_qflags & XFS_UQUOTA_CHKD) == 0) || \
+	 (XFS_IS_GQUOTA_ON(mp) && \
+		((mp->m_sb.sb_qflags & XFS_OQUOTA_CHKD) == 0 || \
+		 (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT))) || \
+	 (XFS_IS_PQUOTA_ON(mp) && \
+		((mp->m_sb.sb_qflags & XFS_OQUOTA_CHKD) == 0 || \
+		 (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT))))
+
+#define XFS_MOUNT_QUOTA_SET1	(XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\
+				 XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\
+				 XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD)
+
+#define XFS_MOUNT_QUOTA_SET2	(XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\
+				 XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\
+				 XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD)
 
 #define XFS_MOUNT_QUOTA_ALL	(XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\
-				 XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\
-				 XFS_GQUOTA_ENFD|XFS_GQUOTA_CHKD)
+				 XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\
+				 XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD|\
+				 XFS_GQUOTA_ACCT)
 #define XFS_MOUNT_QUOTA_MASK	(XFS_MOUNT_QUOTA_ALL | XFS_UQUOTA_ACTIVE | \
-				 XFS_GQUOTA_ACTIVE)
+				 XFS_GQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE)
 
 
 /*
@@ -331,15 +352,8 @@ typedef struct xfs_dqtrxops {
 #define XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp) \
 	XFS_DQTRXOP_VOID(mp, tp, qo_unreserve_and_mod_dquots)
 
-#define XFS_TRANS_RESERVE_BLKQUOTA(mp, tp, ip, nblks) \
-	XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, 0, \
-				XFS_QMOPT_RES_REGBLKS)
-#define XFS_TRANS_RESERVE_BLKQUOTA_FORCE(mp, tp, ip, nblks) \
-	XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, 0, \
-				XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES)
-#define XFS_TRANS_UNRESERVE_BLKQUOTA(mp, tp, ip, nblks) \
-	XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, -(nblks), 0, \
-				XFS_QMOPT_RES_REGBLKS)
+#define XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, ninos, flags) \
+	XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, -(nblks), -(ninos), flags)
 #define XFS_TRANS_RESERVE_QUOTA(mp, tp, ud, gd, nb, ni, f) \
 	XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, nb, ni, \
 				f | XFS_QMOPT_RES_REGBLKS)
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index cb13f9a1d45b..23b48ac1cb7e 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -234,9 +234,6 @@ xfs_lock_for_rename(
 	return 0;
 }
 
-
-int rename_which_error_return = 0;
-
 /*
  * xfs_rename
  */
@@ -316,7 +313,6 @@ xfs_rename(
 			&num_inodes);
 
 	if (error) {
-		rename_which_error_return = __LINE__;
 		/*
 		 * We have nothing locked, no inode references, and
 		 * no transaction, so just get out.
@@ -332,7 +328,6 @@ xfs_rename(
 		 */
 		if (target_ip == NULL && (src_dp != target_dp) &&
 		    target_dp->i_d.di_nlink >= XFS_MAXLINK) {
-			rename_which_error_return = __LINE__;
 			error = XFS_ERROR(EMLINK);
 			xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
 			goto rele_return;
@@ -359,7 +354,6 @@ xfs_rename(
 				XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT);
 	}
 	if (error) {
-		rename_which_error_return = __LINE__;
 		xfs_trans_cancel(tp, 0);
 		goto rele_return;
 	}
@@ -369,7 +363,6 @@ xfs_rename(
 	 */
 	if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) {
 		xfs_trans_cancel(tp, cancel_flags);
-		rename_which_error_return = __LINE__;
 		goto rele_return;
 	}
 
@@ -413,7 +406,6 @@ xfs_rename(
 		if (spaceres == 0 &&
 		    (error = XFS_DIR_CANENTER(mp, tp, target_dp, target_name,
 				target_namelen))) {
-			rename_which_error_return = __LINE__;
 			goto error_return;
 		}
 		/*
@@ -425,11 +417,9 @@ xfs_rename(
 					   target_namelen, src_ip->i_ino,
 					   &first_block, &free_list, spaceres);
 		if (error == ENOSPC) {
-			rename_which_error_return = __LINE__;
 			goto error_return;
 		}
 		if (error) {
-			rename_which_error_return = __LINE__;
 			goto abort_return;
 		}
 		xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -437,7 +427,6 @@ xfs_rename(
 		if (new_parent && src_is_directory) {
 			error = xfs_bumplink(tp, target_dp);
 			if (error) {
-				rename_which_error_return = __LINE__;
 				goto abort_return;
 			}
 		}
@@ -455,7 +444,6 @@ xfs_rename(
 			if (!(XFS_DIR_ISEMPTY(target_ip->i_mount, target_ip)) ||
 			    (target_ip->i_d.di_nlink > 2)) {
 				error = XFS_ERROR(EEXIST);
-				rename_which_error_return = __LINE__;
 				goto error_return;
 			}
 		}
@@ -473,7 +461,6 @@ xfs_rename(
 			target_namelen, src_ip->i_ino, &first_block,
 			&free_list, spaceres);
 		if (error) {
-			rename_which_error_return = __LINE__;
 			goto abort_return;
 		}
 		xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -484,7 +471,6 @@ xfs_rename(
 		 */
 		error = xfs_droplink(tp, target_ip);
 		if (error) {
-			rename_which_error_return = __LINE__;
 			goto abort_return;
 		}
 		target_ip_dropped = 1;
@@ -495,7 +481,6 @@ xfs_rename(
 			 */
 			error = xfs_droplink(tp, target_ip);
 			if (error) {
-				rename_which_error_return = __LINE__;
 				goto abort_return;
 			}
 		}
@@ -519,7 +504,6 @@ xfs_rename(
 					&free_list, spaceres);
 		ASSERT(error != EEXIST);
 		if (error) {
-			rename_which_error_return = __LINE__;
 			goto abort_return;
 		}
 		xfs_ichgtime(src_ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -550,7 +534,6 @@ xfs_rename(
 		 */
 		error = xfs_droplink(tp, src_dp);
 		if (error) {
-			rename_which_error_return = __LINE__;
 			goto abort_return;
 		}
 	}
@@ -558,7 +541,6 @@ xfs_rename(
 	error = XFS_DIR_REMOVENAME(mp, tp, src_dp, src_name, src_namelen,
 			src_ip->i_ino, &first_block, &free_list, spaceres);
 	if (error) {
-		rename_which_error_return = __LINE__;
 		goto abort_return;
 	}
 	xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 3db0e2200775..06dfca531f79 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -332,25 +332,6 @@ undo_blocks:
 
 
 /*
- * This is called to set the a callback to be called when the given
- * transaction is committed to disk.  The transaction pointer and the
- * argument pointer will be passed to the callback routine.
- *
- * Only one callback can be associated with any single transaction.
- */
-void
-xfs_trans_callback(
-	xfs_trans_t		*tp,
-	xfs_trans_callback_t	callback,
-	void			*arg)
-{
-	ASSERT(tp->t_callback == NULL);
-	tp->t_callback = callback;
-	tp->t_callarg = arg;
-}
-
-
-/*
  * Record the indicated change to the given field for application
  * to the file system's superblock when the transaction commits.
  * For now, just store the change in the transaction structure.
@@ -551,7 +532,7 @@ xfs_trans_apply_sb_deltas(
  *
  * This is done efficiently with a single call to xfs_mod_incore_sb_batch().
  */
-void
+STATIC void
 xfs_trans_unreserve_and_mod_sb(
 	xfs_trans_t	*tp)
 {
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index bd37ccb85e76..ec541d66fa2a 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -987,8 +987,6 @@ xfs_trans_t	*_xfs_trans_alloc(struct xfs_mount *, uint);
 xfs_trans_t	*xfs_trans_dup(xfs_trans_t *);
 int		xfs_trans_reserve(xfs_trans_t *, uint, uint, uint,
 				  uint, uint);
-void		xfs_trans_callback(xfs_trans_t *,
-				   void (*)(xfs_trans_t *, void *), void *);
 void		xfs_trans_mod_sb(xfs_trans_t *, uint, long);
 struct xfs_buf	*xfs_trans_get_buf(xfs_trans_t *, struct xfs_buftarg *, xfs_daddr_t,
 				   int, uint);
@@ -1010,7 +1008,6 @@ int		xfs_trans_iget(struct xfs_mount *, xfs_trans_t *,
 			       xfs_ino_t , uint, uint, struct xfs_inode **);
 void		xfs_trans_ijoin(xfs_trans_t *, struct xfs_inode *, uint);
 void		xfs_trans_ihold(xfs_trans_t *, struct xfs_inode *);
-void		xfs_trans_ihold_release(xfs_trans_t *, struct xfs_inode *);
 void		xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint);
 void		xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint);
 struct xfs_efi_log_item	*xfs_trans_get_efi(xfs_trans_t *, uint);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index a9682b9510c1..144da7a85466 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -976,6 +976,7 @@ xfs_trans_dquot_buf(
 	ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
 	ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
 	ASSERT(type == XFS_BLI_UDQUOT_BUF ||
+	       type == XFS_BLI_PDQUOT_BUF ||
 	       type == XFS_BLI_GDQUOT_BUF);
 
 	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index e2c3706f453d..7e7631ca4979 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -253,24 +253,6 @@ xfs_trans_ihold(
 	ip->i_itemp->ili_flags |= XFS_ILI_HOLD;
 }
 
-/*
- * Cancel the previous inode hold request made on this inode
- * for this transaction.
- */
-/*ARGSUSED*/
-void
-xfs_trans_ihold_release(
-	xfs_trans_t	*tp,
-	xfs_inode_t	*ip)
-{
-	ASSERT(ip->i_transp == tp);
-	ASSERT(ip->i_itemp != NULL);
-	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
-	ASSERT(ip->i_itemp->ili_flags & XFS_ILI_HOLD);
-
-	ip->i_itemp->ili_flags &= ~XFS_ILI_HOLD;
-}
-
 
 /*
  * This is called to mark the fields indicated in fieldmask as needing
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index e4bf711e48ff..16f5371ce102 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -55,7 +55,7 @@ typedef signed long long int	__int64_t;
 typedef unsigned long long int	__uint64_t;
 
 typedef enum { B_FALSE,B_TRUE }	boolean_t;
-typedef __int64_t		prid_t;		/* project ID */
+typedef __uint32_t		prid_t;		/* project ID */
 typedef __uint32_t		inst_t;		/* an instruction */
 
 typedef __s64			xfs_off_t;	/* <file offset> type */
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index d1f8146a06ea..11351f08d438 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -428,7 +428,7 @@ xfs_truncate_file(
 		if (ip->i_ino != mp->m_sb.sb_uquotino)
 			ASSERT(ip->i_udquot);
 	}
-	if (XFS_IS_GQUOTA_ON(mp)) {
+	if (XFS_IS_OQUOTA_ON(mp)) {
 		if (ip->i_ino != mp->m_sb.sb_gquotino)
 			ASSERT(ip->i_gdquot);
 	}
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index b53736650100..42bcc0215203 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -368,16 +368,6 @@ xfs_finish_flags(
 	}
 
 	/*
-	 * disallow mount attempts with (IRIX) project quota enabled
-	 */
-	if (XFS_SB_VERSION_HASQUOTA(&mp->m_sb) &&
-	    (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT)) {
-		cmn_err(CE_WARN,
-	"XFS: cannot mount a filesystem with IRIX project quota enabled");
-		return XFS_ERROR(ENOSYS);
-	}
-
-	/*
 	 * check for shared mount.
 	 */
 	if (ap->flags & XFSMNT_SHARED) {
@@ -622,7 +612,34 @@ out:
 	return XFS_ERROR(error);
 }
 
-#define REMOUNT_READONLY_FLAGS	(SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT)
+STATIC int
+xfs_quiesce_fs(
+	xfs_mount_t		*mp)
+{
+	int			count = 0, pincount;
+		
+	xfs_refcache_purge_mp(mp);
+	xfs_flush_buftarg(mp->m_ddev_targp, 0);
+	xfs_finish_reclaim_all(mp, 0);
+
+	/* This loop must run at least twice.
+	 * The first instance of the loop will flush
+	 * most meta data but that will generate more
+	 * meta data (typically directory updates).
+	 * Which then must be flushed and logged before
+	 * we can write the unmount record.
+	 */ 
+	do {
+		xfs_syncsub(mp, SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT, 0, NULL);
+		pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
+		if (!pincount) {
+			delay(50);
+			count++;
+		}
+	} while (count < 2);
+
+	return 0;
+}
 
 STATIC int
 xfs_mntupdate(
@@ -632,8 +649,7 @@ xfs_mntupdate(
 {
 	struct vfs	*vfsp = bhvtovfs(bdp);
 	xfs_mount_t	*mp = XFS_BHVTOM(bdp);
-	int		pincount, error;
-	int		count = 0;
+	int		error;
 
 	if (args->flags & XFSMNT_NOATIME)
 		mp->m_flags |= XFS_MOUNT_NOATIME;
@@ -645,25 +661,7 @@ xfs_mntupdate(
 	}
 
 	if (*flags & MS_RDONLY) {
-		xfs_refcache_purge_mp(mp);
-		xfs_flush_buftarg(mp->m_ddev_targp, 0);
-		xfs_finish_reclaim_all(mp, 0);
-
-		/* This loop must run at least twice.
-		 * The first instance of the loop will flush
-		 * most meta data but that will generate more
-		 * meta data (typically directory updates).
-		 * Which then must be flushed and logged before
-		 * we can write the unmount record.
-		 */ 
-		do {
-			VFS_SYNC(vfsp, REMOUNT_READONLY_FLAGS, NULL, error);
-			pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
-			if (!pincount) {
-				delay(50);
-				count++;
-			}
-		} while (count < 2);
+		xfs_quiesce_fs(mp);
 
 		/* Ok now write out an unmount record */
 		xfs_log_unmount_write(mp);
@@ -879,10 +877,12 @@ xfs_sync(
 	int		flags,
 	cred_t		*credp)
 {
-	xfs_mount_t	*mp;
+	xfs_mount_t	*mp = XFS_BHVTOM(bdp);
 
-	mp = XFS_BHVTOM(bdp);
-	return (xfs_syncsub(mp, flags, 0, NULL));
+	if (unlikely(flags == SYNC_QUIESCE))
+		return xfs_quiesce_fs(mp);
+	else
+		return xfs_syncsub(mp, flags, 0, NULL);
 }
 
 /*
@@ -1681,7 +1681,7 @@ suffix_strtoul(const char *cp, char **endp, unsigned int base)
 	return simple_strtoul(cp, endp, base) << shift_left_factor;
 }
 
-int
+STATIC int
 xfs_parseargs(
 	struct bhv_desc		*bhv,
 	char			*options,
@@ -1867,7 +1867,7 @@ printk("XFS: irixsgid is now a sysctl(2) variable, option is deprecated.\n");
 	return 0;
 }
 
-int
+STATIC int
 xfs_showargs(
 	struct bhv_desc		*bhv,
 	struct seq_file		*m)
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 25a526629b12..1377c868f3f4 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -351,21 +351,28 @@ xfs_setattr(
 	 * If the IDs do change before we take the ilock, we're covered
 	 * because the i_*dquot fields will get updated anyway.
 	 */
-	if (XFS_IS_QUOTA_ON(mp) && (mask & (XFS_AT_UID|XFS_AT_GID))) {
+	if (XFS_IS_QUOTA_ON(mp) &&
+	    (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID))) {
 		uint	qflags = 0;
 
-		if (mask & XFS_AT_UID) {
+		if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) {
 			uid = vap->va_uid;
 			qflags |= XFS_QMOPT_UQUOTA;
 		} else {
 			uid = ip->i_d.di_uid;
 		}
-		if (mask & XFS_AT_GID) {
+		if ((mask & XFS_AT_GID) && XFS_IS_GQUOTA_ON(mp)) {
 			gid = vap->va_gid;
 			qflags |= XFS_QMOPT_GQUOTA;
 		}  else {
 			gid = ip->i_d.di_gid;
 		}
+		if ((mask & XFS_AT_PROJID) && XFS_IS_PQUOTA_ON(mp)) {
+			projid = vap->va_projid;
+			qflags |= XFS_QMOPT_PQUOTA;
+		}  else {
+			projid = ip->i_d.di_projid;
+		}
 		/*
 		 * We take a reference when we initialize udqp and gdqp,
 		 * so it is important that we never blindly double trip on
@@ -373,7 +380,8 @@ xfs_setattr(
 		 */
 		ASSERT(udqp == NULL);
 		ASSERT(gdqp == NULL);
-		code = XFS_QM_DQVOPALLOC(mp, ip, uid,gid, qflags, &udqp, &gdqp);
+		code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, projid, qflags,
+					 &udqp, &gdqp);
 		if (code)
 			return (code);
 	}
@@ -499,8 +507,6 @@ xfs_setattr(
 		 * that the group ID supplied to the chown() function
 		 * shall be equal to either the group ID or one of the
 		 * supplementary group IDs of the calling process.
-		 *
-		 * XXX: How does restricted_chown affect projid?
 		 */
 		if (restricted_chown &&
 		    (iuid != uid || (igid != gid &&
@@ -510,10 +516,11 @@ xfs_setattr(
 			goto error_return;
 		}
 		/*
-		 * Do a quota reservation only if uid or gid is actually
+		 * Do a quota reservation only if uid/projid/gid is actually
 		 * going to change.
 		 */
 		if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
+		    (XFS_IS_PQUOTA_ON(mp) && iprojid != projid) ||
 		    (XFS_IS_GQUOTA_ON(mp) && igid != gid)) {
 			ASSERT(tp);
 			code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
@@ -774,6 +781,7 @@ xfs_setattr(
 		}
 		if (igid != gid) {
 			if (XFS_IS_GQUOTA_ON(mp)) {
+				ASSERT(!XFS_IS_PQUOTA_ON(mp));
 				ASSERT(mask & XFS_AT_GID);
 				ASSERT(gdqp);
 				olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
@@ -782,6 +790,13 @@ xfs_setattr(
 			ip->i_d.di_gid = gid;
 		}
 		if (iprojid != projid) {
+			if (XFS_IS_PQUOTA_ON(mp)) {
+				ASSERT(!XFS_IS_GQUOTA_ON(mp));
+				ASSERT(mask & XFS_AT_PROJID);
+				ASSERT(gdqp);
+				olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+							&ip->i_gdquot, gdqp);
+			}
 			ip->i_d.di_projid = projid;
 			/*
 			 * We may have to rev the inode as well as
@@ -843,6 +858,8 @@ xfs_setattr(
 				di_flags |= XFS_DIFLAG_NOATIME;
 			if (vap->va_xflags & XFS_XFLAG_NODUMP)
 				di_flags |= XFS_DIFLAG_NODUMP;
+			if (vap->va_xflags & XFS_XFLAG_PROJINHERIT)
+				di_flags |= XFS_DIFLAG_PROJINHERIT;
 			if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
 				if (vap->va_xflags & XFS_XFLAG_RTINHERIT)
 					di_flags |= XFS_DIFLAG_RTINHERIT;
@@ -1898,7 +1915,9 @@ xfs_create(
 	/* Return through std_return after this point. */
 
 	udqp = gdqp = NULL;
-	if (vap->va_mask & XFS_AT_PROJID)
+	if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
+		prid = dp->i_d.di_projid;
+	else if (vap->va_mask & XFS_AT_PROJID)
 		prid = (xfs_prid_t)vap->va_projid;
 	else
 		prid = (xfs_prid_t)dfltprid;
@@ -1907,7 +1926,7 @@ xfs_create(
 	 * Make sure that we have allocated dquot(s) on disk.
 	 */
 	error = XFS_QM_DQVOPALLOC(mp, dp,
-			current_fsuid(credp), current_fsgid(credp),
+			current_fsuid(credp), current_fsgid(credp), prid,
 			XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT, &udqp, &gdqp);
 	if (error)
 		goto std_return;
@@ -2604,17 +2623,7 @@ xfs_link(
 	if (src_vp->v_type == VDIR)
 		return XFS_ERROR(EPERM);
 
-	/*
-	 * For now, manually find the XFS behavior descriptor for
-	 * the source vnode.  If it doesn't exist then something
-	 * is wrong and we should just return an error.
-	 * Eventually we need to figure out how link is going to
-	 * work in the face of stacked vnodes.
-	 */
 	src_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(src_vp), &xfs_vnodeops);
-	if (src_bdp == NULL) {
-		return XFS_ERROR(EXDEV);
-	}
 	sip = XFS_BHVTOI(src_bdp);
 	tdp = XFS_BHVTOI(target_dir_bdp);
 	mp = tdp->i_mount;
@@ -2681,6 +2690,17 @@ xfs_link(
 		goto error_return;
 	}
 
+	/*
+	 * If we are using project inheritance, we only allow hard link
+	 * creation in our tree when the project IDs are the same; else
+	 * the tree quota mechanism could be circumvented.
+	 */
+	if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
+		     (tdp->i_d.di_projid != sip->i_d.di_projid))) {
+		error = XFS_ERROR(EPERM);
+		goto error_return;
+	}
+
 	if (resblks == 0 &&
 	    (error = XFS_DIR_CANENTER(mp, tp, tdp, target_name,
 			target_namelen)))
@@ -2803,7 +2823,9 @@ xfs_mkdir(
 
 	mp = dp->i_mount;
 	udqp = gdqp = NULL;
-	if (vap->va_mask & XFS_AT_PROJID)
+	if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
+		prid = dp->i_d.di_projid;
+	else if (vap->va_mask & XFS_AT_PROJID)
 		prid = (xfs_prid_t)vap->va_projid;
 	else
 		prid = (xfs_prid_t)dfltprid;
@@ -2812,7 +2834,7 @@ xfs_mkdir(
 	 * Make sure that we have allocated dquot(s) on disk.
 	 */
 	error = XFS_QM_DQVOPALLOC(mp, dp,
-			current_fsuid(credp), current_fsgid(credp),
+			current_fsuid(credp), current_fsgid(credp), prid,
 			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
 	if (error)
 		goto std_return;
@@ -3357,7 +3379,9 @@ xfs_symlink(
 	/* Return through std_return after this point. */
 
 	udqp = gdqp = NULL;
-	if (vap->va_mask & XFS_AT_PROJID)
+	if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
+		prid = dp->i_d.di_projid;
+	else if (vap->va_mask & XFS_AT_PROJID)
 		prid = (xfs_prid_t)vap->va_projid;
 	else
 		prid = (xfs_prid_t)dfltprid;
@@ -3366,7 +3390,7 @@ xfs_symlink(
 	 * Make sure that we have allocated dquot(s) on disk.
 	 */
 	error = XFS_QM_DQVOPALLOC(mp, dp,
-			current_fsuid(credp), current_fsgid(credp),
+			current_fsuid(credp), current_fsgid(credp), prid,
 			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
 	if (error)
 		goto std_return;
@@ -4028,7 +4052,7 @@ xfs_finish_reclaim_all(xfs_mount_t *mp, int noblock)
  *      errno on error
  *
  */
-int
+STATIC int
 xfs_alloc_file_space(
 	xfs_inode_t		*ip,
 	xfs_off_t		offset,
@@ -4151,9 +4175,8 @@ retry:
 			break;
 		}
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		error = XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp,
-				ip->i_udquot, ip->i_gdquot, resblks, 0, rt ?
-				XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
+		error = XFS_TRANS_RESERVE_QUOTA(mp, tp,
+				ip->i_udquot, ip->i_gdquot, resblks, 0, 0);
 		if (error)
 			goto error1;
 
@@ -4305,6 +4328,7 @@ xfs_free_file_space(
 	xfs_off_t		len,
 	int			attr_flags)
 {
+	vnode_t			*vp;
 	int			committed;
 	int			done;
 	xfs_off_t		end_dmi_offset;
@@ -4325,9 +4349,11 @@ xfs_free_file_space(
 	xfs_trans_t		*tp;
 	int			need_iolock = 1;
 
-	vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address);
+	vp = XFS_ITOV(ip);
 	mp = ip->i_mount;
 
+	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
+
 	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
 		return error;
 
@@ -4344,7 +4370,7 @@ xfs_free_file_space(
 	    DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) {
 		if (end_dmi_offset > ip->i_d.di_size)
 			end_dmi_offset = ip->i_d.di_size;
-		error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip),
+		error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp,
 				offset, end_dmi_offset - offset,
 				AT_DELAY_FLAG(attr_flags), NULL);
 		if (error)
@@ -4363,7 +4389,14 @@ xfs_free_file_space(
 	ioffset = offset & ~(rounding - 1);
 	if (ilen & (rounding - 1))
 		ilen = (ilen + rounding) & ~(rounding - 1);
-	xfs_inval_cached_pages(XFS_ITOV(ip), &(ip->i_iocore), ioffset, 0, 0);
+
+	if (VN_CACHED(vp) != 0) {
+		xfs_inval_cached_trace(&ip->i_iocore, ioffset, -1,
+				ctooff(offtoct(ioffset)), -1);
+		VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(ioffset)),
+				-1, FI_REMAPF_LOCKED);
+	}
+
 	/*
 	 * Need to zero the stuff we're not freeing, on disk.
 	 * If its a realtime file & can't use unwritten extents then we
author	David Woodhouse <dwmw2@shinybook.infradead.org>	2005-07-02 13:39:09 +0100
committer	David Woodhouse <dwmw2@shinybook.infradead.org>	2005-07-02 13:39:09 +0100
commit	d2f6409584e2c62ffad81690562330ff3bf4a458 (patch)
tree	3bdfb97d0b51be2f7f414f2107e97603c1206abb /fs
parent	e1b09eba2686eca94a3a188042b518df6044a3c1 (diff)
parent	4a89a04f1ee21a7c1f4413f1ad7dcfac50ff9b63 (diff)