summaryrefslogtreecommitdiff
path: root/fs/ext2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext2')
-rw-r--r--fs/ext2/CHANGES157
-rw-r--r--fs/ext2/Makefile12
-rw-r--r--fs/ext2/acl.c518
-rw-r--r--fs/ext2/acl.h82
-rw-r--r--fs/ext2/balloc.c699
-rw-r--r--fs/ext2/bitmap.c25
-rw-r--r--fs/ext2/dir.c673
-rw-r--r--fs/ext2/ext2.h160
-rw-r--r--fs/ext2/file.c68
-rw-r--r--fs/ext2/fsync.c51
-rw-r--r--fs/ext2/ialloc.c735
-rw-r--r--fs/ext2/inode.c1276
-rw-r--r--fs/ext2/ioctl.c81
-rw-r--r--fs/ext2/namei.c418
-rw-r--r--fs/ext2/super.c1161
-rw-r--r--fs/ext2/symlink.c52
-rw-r--r--fs/ext2/xattr.c1043
-rw-r--r--fs/ext2/xattr.h118
-rw-r--r--fs/ext2/xattr_security.c53
-rw-r--r--fs/ext2/xattr_trusted.c64
-rw-r--r--fs/ext2/xattr_user.c77
21 files changed, 7523 insertions, 0 deletions
diff --git a/fs/ext2/CHANGES b/fs/ext2/CHANGES
new file mode 100644
index 000000000000..aa5aaf0e5911
--- /dev/null
+++ b/fs/ext2/CHANGES
@@ -0,0 +1,157 @@
+Changes from version 0.5a to version 0.5b
+=========================================
+ - Now that we have sysctl(), the immutable flag cannot be changed when
+ the system is running at security level > 0.
+ - Some cleanups in the code.
+ - More consistency checks on directories.
+ - The ext2.diff patch from Tom May <ftom@netcom.com> has been
+ integrated. This patch replaces expensive "/" and "%" with
+ cheap ">>" and "&" where possible.
+
+Changes from version 0.5 to version 0.5a
+========================================
+ - Zero the partial block following the end of the file when a file
+ is truncated.
+ - Dates updated in the copyright.
+ - More checks when the filesystem is mounted: the count of blocks,
+ fragments, and inodes per group is checked against the block size.
+ - The buffers used by the error routines are now static variables, to
+ avoid using space on the kernel stack, as requested by Linus.
+ - Some cleanups in the error messages (some versions of syslog contain
+ a bug which truncates an error message if it contains '\n').
+ - Check that no data can be written to a file past the 2GB limit.
+ - The famous readdir() bug has been fixed by Stephen Tweedie.
+ - Added a revision level in the superblock.
+ - Full support for O_SYNC flag of the open system call.
+ - New mount options: `resuid=#uid' and `resgid=#gid'. `resuid' causes
+ ext2fs to consider user #uid like root for the reserved blocks.
+ `resgid' acts the same way with group #gid. New fields in the
+ superblock contain default values for resuid and resgid and can
+ be modified by tune2fs.
+ Idea comes from Rene Cougnenc <cougnenc@renux.frmug.fr.net>.
+ - New mount options: `bsddf' and `minixdf'. `bsddf' causes ext2fs
+ to remove the blocks used for FS structures from the total block
+ count in statfs. With `minixdf', ext2fs mimics Minix behavior
+ in statfs (i.e. it returns the total number of blocks on the
+ partition). This is intended to make bde happy :-)
+ - New file attributes:
+ - Immutable files cannot be modified. Data cannot be written to
+ these files. They cannot be removed, renamed and new links cannot
+ be created. Even root cannot modify the files. He has to remove
+ the immutable attribute first.
+ - Append-only files: can only be written in append-mode when writing.
+ They cannot be removed, renamed and new links cannot be created.
+ Note: files may only be added to an append-only directory.
+ - No-dump files: the attribute is not used by the kernel. My port
+ of dump uses it to avoid backing up files which are not important.
+ - New check in ext2_check_dir_entry: the inode number is checked.
+ - Support for big file systems: the copy of the FS descriptor is now
+ dynamically allocated (previous versions used a fixed size array).
+ This allows to mount 2GB+ FS.
+ - Reorganization of the ext2_inode structure to allow other operating
+ systems to create specific fields if they use ext2fs as their native
+ file system. Currently, ext2fs is only implemented in Linux but
+ will soon be part of Gnu Hurd and of Masix.
+
+Changes from version 0.4b to version 0.5
+========================================
+ - New superblock fields: s_lastcheck and s_checkinterval added
+ by Uwe Ohse <uwe@tirka.gun.de> to implement timedependent checks
+ of the file system
+ - Real random numbers for secure rm added by Pierre del Perugia
+ <delperug@gla.ecoledoc.ibp.fr>
+ - The mount warnings related to the state of a fs are not printed
+ if the fs is mounted read-only, idea by Nick Holloway
+ <alfie@dcs.warwick.ac.uk>
+
+Changes from version 0.4a to version 0.4b
+=========================================
+ - Copyrights changed to include the name of my laboratory.
+ - Clean up of balloc.c and ialloc.c.
+ - More consistency checks.
+ - Block preallocation added by Stephen Tweedie.
+ - Direct reads of directories disallowed.
+ - Readahead implemented in readdir by Stephen Tweedie.
+ - Bugs in block and inodes allocation fixed.
+ - Readahead implemented in ext2_find_entry by Chip Salzenberg.
+ - New mount options:
+ `check=none|normal|strict'
+ `debug'
+ `errors=continue|remount-ro|panic'
+ `grpid', `bsdgroups'
+ `nocheck'
+ `nogrpid', `sysvgroups'
+ - truncate() now tries to deallocate contiguous blocks in a single call
+ to ext2_free_blocks().
+ - lots of cosmetic changes.
+
+Changes from version 0.4 to version 0.4a
+========================================
+ - the `sync' option support is now complete. Version 0.4 was not
+ supporting it when truncating a file. I have tested the synchronous
+ writes and they work but they make the system very slow :-( I have
+ to work again on this to make it faster.
+ - when detecting an error on a mounted filesystem, version 0.4 used
+ to try to write a flag in the super block even if the filesystem had
+ been mounted read-only. This is fixed.
+ - the `sb=#' option now causes the kernel code to use the filesystem
+ descriptors located at block #+1. Version 0.4 used the superblock
+ backup located at block # but used the main copy of the descriptors.
+ - a new file attribute `S' is supported. This attribute causes
+ synchronous writes but is applied to a file not to the entire file
+ system (thanks to Michael Kraehe <kraehe@bakunin.north.de> for
+ suggesting it).
+ - the directory cache is inhibited by default. The cache management
+ code seems to be buggy and I have to look at it carefully before
+ using it again.
+ - deleting a file with the `s' attribute (secure deletion) causes its
+ blocks to be overwritten with random values not with zeros (thanks to
+ Michael A. Griffith <grif@cs.ucr.edu> for suggesting it).
+ - lots of cosmetic changes have been made.
+
+Changes from version 0.3 to version 0.4
+=======================================
+ - Three new mount options are supported: `check', `sync' and `sb=#'.
+ `check' tells the kernel code to make more consistency checks
+ when the file system is mounted. Currently, the kernel code checks
+ that the blocks and inodes bitmaps are consistent with the free
+ blocks and inodes counts. More checks will be added in future
+ releases.
+ `sync' tells the kernel code to use synchronous writes when updating
+ an inode, a bitmap, a directory entry or an indirect block. This
+ can make the file system much slower but can be a big win for files
+ recovery in case of a crash (and we can now say to the BSD folks
+ that Linux also supports synchronous updates :-).
+ `sb=#' tells the kernel code to use an alternate super block instead
+ of its master copy. `#' is the number of the block (counted in
+ 1024 bytes blocks) which contains the alternate super block.
+ An ext2 file system typically contains backups of the super block
+ at blocks 8193, 16385, and so on.
+ - I have change the meaning of the valid flag used by e2fsck. it
+ now contains the state of the file system. If the kernel code
+ detects an inconsistency while the file system is mounted, it flags
+ it as erroneous and e2fsck will detect that on next run.
+ - The super block now contains a mount counter. This counter is
+ incremented each time the file system is mounted read/write. When
+ this counter becomes bigger than a maximal mount counts (also stored
+ in the super block), e2fsck checks the file system, even if it had
+ been unmounted cleanly, and resets this counter to 0.
+ - File attributes are now supported. One can associate a set of
+ attributes to a file. Three attributes are defined:
+ `c': the file is marked for automatic compression,
+ `s': the file is marked for secure deletion: when the file is
+ deleted, its blocks are zeroed and written back to the disk,
+ `u': the file is marked for undeletion: when the file is deleted,
+ its contents are saved to allow a future undeletion.
+ Currently, only the `s' attribute is implemented in the kernel
+ code. Support for the other attributes will be added in a future
+ release.
+ - a few bugs related to times updates have been fixed by Bruce
+ Evans and me.
+ - a bug related to the links count of deleted inodes has been fixed.
+ Previous versions used to keep the links count set to 1 when a file
+ was deleted. The new version now sets links_count to 0 when deleting
+ the last link.
+ - a race condition when deallocating an inode has been fixed by
+ Stephen Tweedie.
+
diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile
new file mode 100644
index 000000000000..ee240a14e70f
--- /dev/null
+++ b/fs/ext2/Makefile
@@ -0,0 +1,12 @@
+#
+# Makefile for the linux ext2-filesystem routines.
+#
+
+obj-$(CONFIG_EXT2_FS) += ext2.o
+
+ext2-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+ ioctl.o namei.o super.o symlink.o
+
+ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
+ext2-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o
+ext2-$(CONFIG_EXT2_FS_SECURITY) += xattr_security.o
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
new file mode 100644
index 000000000000..8369ee8d28c4
--- /dev/null
+++ b/fs/ext2/acl.c
@@ -0,0 +1,518 @@
+/*
+ * linux/fs/ext2/acl.c
+ *
+ * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include "ext2.h"
+#include "xattr.h"
+#include "acl.h"
+
+/*
+ * Convert from filesystem to in-memory representation.
+ */
+static struct posix_acl *
+ext2_acl_from_disk(const void *value, size_t size)
+{
+ const char *end = (char *)value + size;
+ int n, count;
+ struct posix_acl *acl;
+
+ if (!value)
+ return NULL;
+ if (size < sizeof(ext2_acl_header))
+ return ERR_PTR(-EINVAL);
+ if (((ext2_acl_header *)value)->a_version !=
+ cpu_to_le32(EXT2_ACL_VERSION))
+ return ERR_PTR(-EINVAL);
+ value = (char *)value + sizeof(ext2_acl_header);
+ count = ext2_acl_count(size);
+ if (count < 0)
+ return ERR_PTR(-EINVAL);
+ if (count == 0)
+ return NULL;
+ acl = posix_acl_alloc(count, GFP_KERNEL);
+ if (!acl)
+ return ERR_PTR(-ENOMEM);
+ for (n=0; n < count; n++) {
+ ext2_acl_entry *entry =
+ (ext2_acl_entry *)value;
+ if ((char *)value + sizeof(ext2_acl_entry_short) > end)
+ goto fail;
+ acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag);
+ acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
+ switch(acl->a_entries[n].e_tag) {
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ value = (char *)value +
+ sizeof(ext2_acl_entry_short);
+ acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
+ break;
+
+ case ACL_USER:
+ case ACL_GROUP:
+ value = (char *)value + sizeof(ext2_acl_entry);
+ if ((char *)value > end)
+ goto fail;
+ acl->a_entries[n].e_id =
+ le32_to_cpu(entry->e_id);
+ break;
+
+ default:
+ goto fail;
+ }
+ }
+ if (value != end)
+ goto fail;
+ return acl;
+
+fail:
+ posix_acl_release(acl);
+ return ERR_PTR(-EINVAL);
+}
+
+/*
+ * Convert from in-memory to filesystem representation.
+ */
+static void *
+ext2_acl_to_disk(const struct posix_acl *acl, size_t *size)
+{
+ ext2_acl_header *ext_acl;
+ char *e;
+ size_t n;
+
+ *size = ext2_acl_size(acl->a_count);
+ ext_acl = (ext2_acl_header *)kmalloc(sizeof(ext2_acl_header) +
+ acl->a_count * sizeof(ext2_acl_entry), GFP_KERNEL);
+ if (!ext_acl)
+ return ERR_PTR(-ENOMEM);
+ ext_acl->a_version = cpu_to_le32(EXT2_ACL_VERSION);
+ e = (char *)ext_acl + sizeof(ext2_acl_header);
+ for (n=0; n < acl->a_count; n++) {
+ ext2_acl_entry *entry = (ext2_acl_entry *)e;
+ entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
+ entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
+ switch(acl->a_entries[n].e_tag) {
+ case ACL_USER:
+ case ACL_GROUP:
+ entry->e_id =
+ cpu_to_le32(acl->a_entries[n].e_id);
+ e += sizeof(ext2_acl_entry);
+ break;
+
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ e += sizeof(ext2_acl_entry_short);
+ break;
+
+ default:
+ goto fail;
+ }
+ }
+ return (char *)ext_acl;
+
+fail:
+ kfree(ext_acl);
+ return ERR_PTR(-EINVAL);
+}
+
+static inline struct posix_acl *
+ext2_iget_acl(struct inode *inode, struct posix_acl **i_acl)
+{
+ struct posix_acl *acl = EXT2_ACL_NOT_CACHED;
+
+ spin_lock(&inode->i_lock);
+ if (*i_acl != EXT2_ACL_NOT_CACHED)
+ acl = posix_acl_dup(*i_acl);
+ spin_unlock(&inode->i_lock);
+
+ return acl;
+}
+
+static inline void
+ext2_iset_acl(struct inode *inode, struct posix_acl **i_acl,
+ struct posix_acl *acl)
+{
+ spin_lock(&inode->i_lock);
+ if (*i_acl != EXT2_ACL_NOT_CACHED)
+ posix_acl_release(*i_acl);
+ *i_acl = posix_acl_dup(acl);
+ spin_unlock(&inode->i_lock);
+}
+
+/*
+ * inode->i_sem: don't care
+ */
+static struct posix_acl *
+ext2_get_acl(struct inode *inode, int type)
+{
+ struct ext2_inode_info *ei = EXT2_I(inode);
+ int name_index;
+ char *value = NULL;
+ struct posix_acl *acl;
+ int retval;
+
+ if (!test_opt(inode->i_sb, POSIX_ACL))
+ return NULL;
+
+ switch(type) {
+ case ACL_TYPE_ACCESS:
+ acl = ext2_iget_acl(inode, &ei->i_acl);
+ if (acl != EXT2_ACL_NOT_CACHED)
+ return acl;
+ name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS;
+ break;
+
+ case ACL_TYPE_DEFAULT:
+ acl = ext2_iget_acl(inode, &ei->i_default_acl);
+ if (acl != EXT2_ACL_NOT_CACHED)
+ return acl;
+ name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT;
+ break;
+
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+ retval = ext2_xattr_get(inode, name_index, "", NULL, 0);
+ if (retval > 0) {
+ value = kmalloc(retval, GFP_KERNEL);
+ if (!value)
+ return ERR_PTR(-ENOMEM);
+ retval = ext2_xattr_get(inode, name_index, "", value, retval);
+ }
+ if (retval > 0)
+ acl = ext2_acl_from_disk(value, retval);
+ else if (retval == -ENODATA || retval == -ENOSYS)
+ acl = NULL;
+ else
+ acl = ERR_PTR(retval);
+ if (value)
+ kfree(value);
+
+ if (!IS_ERR(acl)) {
+ switch(type) {
+ case ACL_TYPE_ACCESS:
+ ext2_iset_acl(inode, &ei->i_acl, acl);
+ break;
+
+ case ACL_TYPE_DEFAULT:
+ ext2_iset_acl(inode, &ei->i_default_acl, acl);
+ break;
+ }
+ }
+ return acl;
+}
+
+/*
+ * inode->i_sem: down
+ */
+static int
+ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+{
+ struct ext2_inode_info *ei = EXT2_I(inode);
+ int name_index;
+ void *value = NULL;
+ size_t size;
+ int error;
+
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+ if (!test_opt(inode->i_sb, POSIX_ACL))
+ return 0;
+
+ switch(type) {
+ case ACL_TYPE_ACCESS:
+ name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS;
+ if (acl) {
+ mode_t mode = inode->i_mode;
+ error = posix_acl_equiv_mode(acl, &mode);
+ if (error < 0)
+ return error;
+ else {
+ inode->i_mode = mode;
+ mark_inode_dirty(inode);
+ if (error == 0)
+ acl = NULL;
+ }
+ }
+ break;
+
+ case ACL_TYPE_DEFAULT:
+ name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT;
+ if (!S_ISDIR(inode->i_mode))
+ return acl ? -EACCES : 0;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+ if (acl) {
+ value = ext2_acl_to_disk(acl, &size);
+ if (IS_ERR(value))
+ return (int)PTR_ERR(value);
+ }
+
+ error = ext2_xattr_set(inode, name_index, "", value, size, 0);
+
+ if (value)
+ kfree(value);
+ if (!error) {
+ switch(type) {
+ case ACL_TYPE_ACCESS:
+ ext2_iset_acl(inode, &ei->i_acl, acl);
+ break;
+
+ case ACL_TYPE_DEFAULT:
+ ext2_iset_acl(inode, &ei->i_default_acl, acl);
+ break;
+ }
+ }
+ return error;
+}
+
+static int
+ext2_check_acl(struct inode *inode, int mask)
+{
+ struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS);
+
+ if (acl) {
+ int error = posix_acl_permission(inode, acl, mask);
+ posix_acl_release(acl);
+ return error;
+ }
+
+ return -EAGAIN;
+}
+
+int
+ext2_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+ return generic_permission(inode, mask, ext2_check_acl);
+}
+
+/*
+ * Initialize the ACLs of a new inode. Called from ext2_new_inode.
+ *
+ * dir->i_sem: down
+ * inode->i_sem: up (access to inode is still exclusive)
+ */
+int
+ext2_init_acl(struct inode *inode, struct inode *dir)
+{
+ struct posix_acl *acl = NULL;
+ int error = 0;
+
+ if (!S_ISLNK(inode->i_mode)) {
+ if (test_opt(dir->i_sb, POSIX_ACL)) {
+ acl = ext2_get_acl(dir, ACL_TYPE_DEFAULT);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ }
+ if (!acl)
+ inode->i_mode &= ~current->fs->umask;
+ }
+ if (test_opt(inode->i_sb, POSIX_ACL) && acl) {
+ struct posix_acl *clone;
+ mode_t mode;
+
+ if (S_ISDIR(inode->i_mode)) {
+ error = ext2_set_acl(inode, ACL_TYPE_DEFAULT, acl);
+ if (error)
+ goto cleanup;
+ }
+ clone = posix_acl_clone(acl, GFP_KERNEL);
+ error = -ENOMEM;
+ if (!clone)
+ goto cleanup;
+ mode = inode->i_mode;
+ error = posix_acl_create_masq(clone, &mode);
+ if (error >= 0) {
+ inode->i_mode = mode;
+ if (error > 0) {
+ /* This is an extended ACL */
+ error = ext2_set_acl(inode,
+ ACL_TYPE_ACCESS, clone);
+ }
+ }
+ posix_acl_release(clone);
+ }
+cleanup:
+ posix_acl_release(acl);
+ return error;
+}
+
+/*
+ * Does chmod for an inode that may have an Access Control List. The
+ * inode->i_mode field must be updated to the desired value by the caller
+ * before calling this function.
+ * Returns 0 on success, or a negative error number.
+ *
+ * We change the ACL rather than storing some ACL entries in the file
+ * mode permission bits (which would be more efficient), because that
+ * would break once additional permissions (like ACL_APPEND, ACL_DELETE
+ * for directories) are added. There are no more bits available in the
+ * file mode.
+ *
+ * inode->i_sem: down
+ */
+int
+ext2_acl_chmod(struct inode *inode)
+{
+ struct posix_acl *acl, *clone;
+ int error;
+
+ if (!test_opt(inode->i_sb, POSIX_ACL))
+ return 0;
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+ acl = ext2_get_acl(inode, ACL_TYPE_ACCESS);
+ if (IS_ERR(acl) || !acl)
+ return PTR_ERR(acl);
+ clone = posix_acl_clone(acl, GFP_KERNEL);
+ posix_acl_release(acl);
+ if (!clone)
+ return -ENOMEM;
+ error = posix_acl_chmod_masq(clone, inode->i_mode);
+ if (!error)
+ error = ext2_set_acl(inode, ACL_TYPE_ACCESS, clone);
+ posix_acl_release(clone);
+ return error;
+}
+
+/*
+ * Extended attribut handlers
+ */
+static size_t
+ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size,
+ const char *name, size_t name_len)
+{
+ const size_t size = sizeof(XATTR_NAME_ACL_ACCESS);
+
+ if (!test_opt(inode->i_sb, POSIX_ACL))
+ return 0;
+ if (list && size <= list_size)
+ memcpy(list, XATTR_NAME_ACL_ACCESS, size);
+ return size;
+}
+
+static size_t
+ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size,
+ const char *name, size_t name_len)
+{
+ const size_t size = sizeof(XATTR_NAME_ACL_DEFAULT);
+
+ if (!test_opt(inode->i_sb, POSIX_ACL))
+ return 0;
+ if (list && size <= list_size)
+ memcpy(list, XATTR_NAME_ACL_DEFAULT, size);
+ return size;
+}
+
+static int
+ext2_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
+{
+ struct posix_acl *acl;
+ int error;
+
+ if (!test_opt(inode->i_sb, POSIX_ACL))
+ return -EOPNOTSUPP;
+
+ acl = ext2_get_acl(inode, type);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (acl == NULL)
+ return -ENODATA;
+ error = posix_acl_to_xattr(acl, buffer, size);
+ posix_acl_release(acl);
+
+ return error;
+}
+
+static int
+ext2_xattr_get_acl_access(struct inode *inode, const char *name,
+ void *buffer, size_t size)
+{
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+ return ext2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
+}
+
+static int
+ext2_xattr_get_acl_default(struct inode *inode, const char *name,
+ void *buffer, size_t size)
+{
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+ return ext2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
+}
+
+static int
+ext2_xattr_set_acl(struct inode *inode, int type, const void *value,
+ size_t size)
+{
+ struct posix_acl *acl;
+ int error;
+
+ if (!test_opt(inode->i_sb, POSIX_ACL))
+ return -EOPNOTSUPP;
+ if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
+ return -EPERM;
+
+ if (value) {
+ acl = posix_acl_from_xattr(value, size);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ else if (acl) {
+ error = posix_acl_valid(acl);
+ if (error)
+ goto release_and_out;
+ }
+ } else
+ acl = NULL;
+
+ error = ext2_set_acl(inode, type, acl);
+
+release_and_out:
+ posix_acl_release(acl);
+ return error;
+}
+
+static int
+ext2_xattr_set_acl_access(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+ return ext2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
+}
+
+static int
+ext2_xattr_set_acl_default(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+ return ext2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
+}
+
+struct xattr_handler ext2_xattr_acl_access_handler = {
+ .prefix = XATTR_NAME_ACL_ACCESS,
+ .list = ext2_xattr_list_acl_access,
+ .get = ext2_xattr_get_acl_access,
+ .set = ext2_xattr_set_acl_access,
+};
+
+struct xattr_handler ext2_xattr_acl_default_handler = {
+ .prefix = XATTR_NAME_ACL_DEFAULT,
+ .list = ext2_xattr_list_acl_default,
+ .get = ext2_xattr_get_acl_default,
+ .set = ext2_xattr_set_acl_default,
+};
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
new file mode 100644
index 000000000000..fed96ae81a7d
--- /dev/null
+++ b/fs/ext2/acl.h
@@ -0,0 +1,82 @@
+/*
+ File: fs/ext2/acl.h
+
+ (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
+*/
+
+#include <linux/xattr_acl.h>
+
+#define EXT2_ACL_VERSION 0x0001
+
+typedef struct {
+ __le16 e_tag;
+ __le16 e_perm;
+ __le32 e_id;
+} ext2_acl_entry;
+
+typedef struct {
+ __le16 e_tag;
+ __le16 e_perm;
+} ext2_acl_entry_short;
+
+typedef struct {
+ __le32 a_version;
+} ext2_acl_header;
+
+static inline size_t ext2_acl_size(int count)
+{
+ if (count <= 4) {
+ return sizeof(ext2_acl_header) +
+ count * sizeof(ext2_acl_entry_short);
+ } else {
+ return sizeof(ext2_acl_header) +
+ 4 * sizeof(ext2_acl_entry_short) +
+ (count - 4) * sizeof(ext2_acl_entry);
+ }
+}
+
+static inline int ext2_acl_count(size_t size)
+{
+ ssize_t s;
+ size -= sizeof(ext2_acl_header);
+ s = size - 4 * sizeof(ext2_acl_entry_short);
+ if (s < 0) {
+ if (size % sizeof(ext2_acl_entry_short))
+ return -1;
+ return size / sizeof(ext2_acl_entry_short);
+ } else {
+ if (s % sizeof(ext2_acl_entry))
+ return -1;
+ return s / sizeof(ext2_acl_entry) + 4;
+ }
+}
+
+#ifdef CONFIG_EXT2_FS_POSIX_ACL
+
+/* Value for inode->u.ext2_i.i_acl and inode->u.ext2_i.i_default_acl
+ if the ACL has not been cached */
+#define EXT2_ACL_NOT_CACHED ((void *)-1)
+
+/* acl.c */
+extern int ext2_permission (struct inode *, int, struct nameidata *);
+extern int ext2_acl_chmod (struct inode *);
+extern int ext2_init_acl (struct inode *, struct inode *);
+
+#else
+#include <linux/sched.h>
+#define ext2_permission NULL
+#define ext2_get_acl NULL
+#define ext2_set_acl NULL
+
+static inline int
+ext2_acl_chmod (struct inode *inode)
+{
+ return 0;
+}
+
+static inline int ext2_init_acl (struct inode *inode, struct inode *dir)
+{
+ return 0;
+}
+#endif
+
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
new file mode 100644
index 000000000000..6591abef64d0
--- /dev/null
+++ b/fs/ext2/balloc.c
@@ -0,0 +1,699 @@
+/*
+ * linux/fs/ext2/balloc.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
+ * Big-endian to little-endian byte-swapping/bitmaps by
+ * David S. Miller (davem@caip.rutgers.edu), 1995
+ */
+
+#include <linux/config.h>
+#include "ext2.h"
+#include <linux/quotaops.h>
+#include <linux/sched.h>
+#include <linux/buffer_head.h>
+
+/*
+ * balloc.c contains the blocks allocation and deallocation routines
+ */
+
+/*
+ * The free blocks are managed by bitmaps. A file system contains several
+ * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
+ * block for inodes, N blocks for the inode table and data blocks.
+ *
+ * The file system contains group descriptors which are located after the
+ * super block. Each descriptor contains the number of the bitmap block and
+ * the free blocks count in the block. The descriptors are loaded in memory
+ * when a file system is mounted (see ext2_read_super).
+ */
+
+
+#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
+
+struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
+ unsigned int block_group,
+ struct buffer_head ** bh)
+{
+ unsigned long group_desc;
+ unsigned long offset;
+ struct ext2_group_desc * desc;
+ struct ext2_sb_info *sbi = EXT2_SB(sb);
+
+ if (block_group >= sbi->s_groups_count) {
+ ext2_error (sb, "ext2_get_group_desc",
+ "block_group >= groups_count - "
+ "block_group = %d, groups_count = %lu",
+ block_group, sbi->s_groups_count);
+
+ return NULL;
+ }
+
+ group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(sb);
+ offset = block_group & (EXT2_DESC_PER_BLOCK(sb) - 1);
+ if (!sbi->s_group_desc[group_desc]) {
+ ext2_error (sb, "ext2_get_group_desc",
+ "Group descriptor not loaded - "
+ "block_group = %d, group_desc = %lu, desc = %lu",
+ block_group, group_desc, offset);
+ return NULL;
+ }
+
+ desc = (struct ext2_group_desc *) sbi->s_group_desc[group_desc]->b_data;
+ if (bh)
+ *bh = sbi->s_group_desc[group_desc];
+ return desc + offset;
+}
+
+/*
+ * Read the bitmap for a given block_group, reading into the specified
+ * slot in the superblock's bitmap cache.
+ *
+ * Return buffer_head on success or NULL in case of failure.
+ */
+static struct buffer_head *
+read_block_bitmap(struct super_block *sb, unsigned int block_group)
+{
+ struct ext2_group_desc * desc;
+ struct buffer_head * bh = NULL;
+
+ desc = ext2_get_group_desc (sb, block_group, NULL);
+ if (!desc)
+ goto error_out;
+ bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
+ if (!bh)
+ ext2_error (sb, "read_block_bitmap",
+ "Cannot read block bitmap - "
+ "block_group = %d, block_bitmap = %u",
+ block_group, le32_to_cpu(desc->bg_block_bitmap));
+error_out:
+ return bh;
+}
+
+/*
+ * Set sb->s_dirt here because the superblock was "logically" altered. We
+ * need to recalculate its free blocks count and flush it out.
+ */
+static int reserve_blocks(struct super_block *sb, int count)
+{
+ struct ext2_sb_info *sbi = EXT2_SB(sb);
+ struct ext2_super_block *es = sbi->s_es;
+ unsigned free_blocks;
+ unsigned root_blocks;
+
+ free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
+ root_blocks = le32_to_cpu(es->s_r_blocks_count);
+
+ if (free_blocks < count)
+ count = free_blocks;
+
+ if (free_blocks < root_blocks + count && !capable(CAP_SYS_RESOURCE) &&
+ sbi->s_resuid != current->fsuid &&
+ (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
+ /*
+ * We are too close to reserve and we are not privileged.
+ * Can we allocate anything at all?
+ */
+ if (free_blocks > root_blocks)
+ count = free_blocks - root_blocks;
+ else
+ return 0;
+ }
+
+ percpu_counter_mod(&sbi->s_freeblocks_counter, -count);
+ sb->s_dirt = 1;
+ return count;
+}
+
+static void release_blocks(struct super_block *sb, int count)
+{
+ if (count) {
+ struct ext2_sb_info *sbi = EXT2_SB(sb);
+
+ percpu_counter_mod(&sbi->s_freeblocks_counter, count);
+ sb->s_dirt = 1;
+ }
+}
+
+static int group_reserve_blocks(struct ext2_sb_info *sbi, int group_no,
+ struct ext2_group_desc *desc, struct buffer_head *bh, int count)
+{
+ unsigned free_blocks;
+
+ if (!desc->bg_free_blocks_count)
+ return 0;
+
+ spin_lock(sb_bgl_lock(sbi, group_no));
+ free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
+ if (free_blocks < count)
+ count = free_blocks;
+ desc->bg_free_blocks_count = cpu_to_le16(free_blocks - count);
+ spin_unlock(sb_bgl_lock(sbi, group_no));
+ mark_buffer_dirty(bh);
+ return count;
+}
+
+static void group_release_blocks(struct super_block *sb, int group_no,
+ struct ext2_group_desc *desc, struct buffer_head *bh, int count)
+{
+ if (count) {
+ struct ext2_sb_info *sbi = EXT2_SB(sb);
+ unsigned free_blocks;
+
+ spin_lock(sb_bgl_lock(sbi, group_no));
+ free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
+ desc->bg_free_blocks_count = cpu_to_le16(free_blocks + count);
+ spin_unlock(sb_bgl_lock(sbi, group_no));
+ sb->s_dirt = 1;
+ mark_buffer_dirty(bh);
+ }
+}
+
+/* Free given blocks, update quota and i_blocks field */
+void ext2_free_blocks (struct inode * inode, unsigned long block,
+ unsigned long count)
+{
+ struct buffer_head *bitmap_bh = NULL;
+ struct buffer_head * bh2;
+ unsigned long block_group;
+ unsigned long bit;
+ unsigned long i;
+ unsigned long overflow;
+ struct super_block * sb = inode->i_sb;
+ struct ext2_sb_info * sbi = EXT2_SB(sb);
+ struct ext2_group_desc * desc;
+ struct ext2_super_block * es = sbi->s_es;
+ unsigned freed = 0, group_freed;
+
+ if (block < le32_to_cpu(es->s_first_data_block) ||
+ block + count < block ||
+ block + count > le32_to_cpu(es->s_blocks_count)) {
+ ext2_error (sb, "ext2_free_blocks",
+ "Freeing blocks not in datazone - "
+ "block = %lu, count = %lu", block, count);
+ goto error_return;
+ }
+
+ ext2_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1);
+
+do_more:
+ overflow = 0;
+ block_group = (block - le32_to_cpu(es->s_first_data_block)) /
+ EXT2_BLOCKS_PER_GROUP(sb);
+ bit = (block - le32_to_cpu(es->s_first_data_block)) %
+ EXT2_BLOCKS_PER_GROUP(sb);
+ /*
+ * Check to see if we are freeing blocks across a group
+ * boundary.
+ */
+ if (bit + count > EXT2_BLOCKS_PER_GROUP(sb)) {
+ overflow = bit + count - EXT2_BLOCKS_PER_GROUP(sb);
+ count -= overflow;
+ }
+ brelse(bitmap_bh);
+ bitmap_bh = read_block_bitmap(sb, block_group);
+ if (!bitmap_bh)
+ goto error_return;
+
+ desc = ext2_get_group_desc (sb, block_group, &bh2);
+ if (!desc)
+ goto error_return;
+
+ if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) ||
+ in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) ||
+ in_range (block, le32_to_cpu(desc->bg_inode_table),
+ sbi->s_itb_per_group) ||
+ in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table),
+ sbi->s_itb_per_group))
+ ext2_error (sb, "ext2_free_blocks",
+ "Freeing blocks in system zones - "
+ "Block = %lu, count = %lu",
+ block, count);
+
+ for (i = 0, group_freed = 0; i < count; i++) {
+ if (!ext2_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
+ bit + i, bitmap_bh->b_data)) {
+ ext2_error(sb, __FUNCTION__,
+ "bit already cleared for block %lu", block + i);
+ } else {
+ group_freed++;
+ }
+ }
+
+ mark_buffer_dirty(bitmap_bh);
+ if (sb->s_flags & MS_SYNCHRONOUS)
+ sync_dirty_buffer(bitmap_bh);
+
+ group_release_blocks(sb, block_group, desc, bh2, group_freed);
+ freed += group_freed;
+
+ if (overflow) {
+ block += count;
+ count = overflow;
+ goto do_more;
+ }
+error_return:
+ brelse(bitmap_bh);
+ release_blocks(sb, freed);
+ DQUOT_FREE_BLOCK(inode, freed);
+}
+
+static int grab_block(spinlock_t *lock, char *map, unsigned size, int goal)
+{
+ int k;
+ char *p, *r;
+
+ if (!ext2_test_bit(goal, map))
+ goto got_it;
+
+repeat:
+ if (goal) {
+ /*
+ * The goal was occupied; search forward for a free
+ * block within the next XX blocks.
+ *
+ * end_goal is more or less random, but it has to be
+ * less than EXT2_BLOCKS_PER_GROUP. Aligning up to the
+ * next 64-bit boundary is simple..
+ */
+ k = (goal + 63) & ~63;
+ goal = ext2_find_next_zero_bit(map, k, goal);
+ if (goal < k)
+ goto got_it;
+ /*
+ * Search in the remainder of the current group.
+ */
+ }
+
+ p = map + (goal >> 3);
+ r = memscan(p, 0, (size - goal + 7) >> 3);
+ k = (r - map) << 3;
+ if (k < size) {
+ /*
+ * We have succeeded in finding a free byte in the block
+ * bitmap. Now search backwards to find the start of this
+ * group of free blocks - won't take more than 7 iterations.
+ */
+ for (goal = k; goal && !ext2_test_bit (goal - 1, map); goal--)
+ ;
+ goto got_it;
+ }
+
+ k = ext2_find_next_zero_bit ((u32 *)map, size, goal);
+ if (k < size) {
+ goal = k;
+ goto got_it;
+ }
+ return -1;
+got_it:
+ if (ext2_set_bit_atomic(lock, goal, (void *) map))
+ goto repeat;
+ return goal;
+}
+
+/*
+ * ext2_new_block uses a goal block to assist allocation. If the goal is
+ * free, or there is a free block within 32 blocks of the goal, that block
+ * is allocated. Otherwise a forward search is made for a free block; within
+ * each block group the search first looks for an entire free byte in the block
+ * bitmap, and then for any free bit if that fails.
+ * This function also updates quota and i_blocks field.
+ */
+int ext2_new_block(struct inode *inode, unsigned long goal,
+ u32 *prealloc_count, u32 *prealloc_block, int *err)
+{
+ struct buffer_head *bitmap_bh = NULL;
+ struct buffer_head *gdp_bh; /* bh2 */
+ struct ext2_group_desc *desc;
+ int group_no; /* i */
+ int ret_block; /* j */
+ int group_idx; /* k */
+ int target_block; /* tmp */
+ int block = 0;
+ struct super_block *sb = inode->i_sb;
+ struct ext2_sb_info *sbi = EXT2_SB(sb);
+ struct ext2_super_block *es = sbi->s_es;
+ unsigned group_size = EXT2_BLOCKS_PER_GROUP(sb);
+ unsigned prealloc_goal = es->s_prealloc_blocks;
+ unsigned group_alloc = 0, es_alloc, dq_alloc;
+ int nr_scanned_groups;
+
+ if (!prealloc_goal--)
+ prealloc_goal = EXT2_DEFAULT_PREALLOC_BLOCKS - 1;
+ if (!prealloc_count || *prealloc_count)
+ prealloc_goal = 0;
+
+ if (DQUOT_ALLOC_BLOCK(inode, 1)) {
+ *err = -EDQUOT;
+ goto out;
+ }
+
+ while (prealloc_goal && DQUOT_PREALLOC_BLOCK(inode, prealloc_goal))
+ prealloc_goal--;
+
+ dq_alloc = prealloc_goal + 1;
+ es_alloc = reserve_blocks(sb, dq_alloc);
+ if (!es_alloc) {
+ *err = -ENOSPC;
+ goto out_dquot;
+ }
+
+ ext2_debug ("goal=%lu.\n", goal);
+
+ if (goal < le32_to_cpu(es->s_first_data_block) ||
+ goal >= le32_to_cpu(es->s_blocks_count))
+ goal = le32_to_cpu(es->s_first_data_block);
+ group_no = (goal - le32_to_cpu(es->s_first_data_block)) / group_size;
+ desc = ext2_get_group_desc (sb, group_no, &gdp_bh);
+ if (!desc) {
+ /*
+ * gdp_bh may still be uninitialised. But group_release_blocks
+ * will not touch it because group_alloc is zero.
+ */
+ goto io_error;
+ }
+
+ group_alloc = group_reserve_blocks(sbi, group_no, desc,
+ gdp_bh, es_alloc);
+ if (group_alloc) {
+ ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) %
+ group_size);
+ brelse(bitmap_bh);
+ bitmap_bh = read_block_bitmap(sb, group_no);
+ if (!bitmap_bh)
+ goto io_error;
+
+ ext2_debug("goal is at %d:%d.\n", group_no, ret_block);
+
+ ret_block = grab_block(sb_bgl_lock(sbi, group_no),
+ bitmap_bh->b_data, group_size, ret_block);
+ if (ret_block >= 0)
+ goto got_block;
+ group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
+ group_alloc = 0;
+ }
+
+ ext2_debug ("Bit not found in block group %d.\n", group_no);
+
+ /*
+ * Now search the rest of the groups. We assume that
+ * i and desc correctly point to the last group visited.
+ */
+ nr_scanned_groups = 0;
+retry:
+ for (group_idx = 0; !group_alloc &&
+ group_idx < sbi->s_groups_count; group_idx++) {
+ group_no++;
+ if (group_no >= sbi->s_groups_count)
+ group_no = 0;
+ desc = ext2_get_group_desc(sb, group_no, &gdp_bh);
+ if (!desc)
+ goto io_error;
+ group_alloc = group_reserve_blocks(sbi, group_no, desc,
+ gdp_bh, es_alloc);
+ }
+ if (!group_alloc) {
+ *err = -ENOSPC;
+ goto out_release;
+ }
+ brelse(bitmap_bh);
+ bitmap_bh = read_block_bitmap(sb, group_no);
+ if (!bitmap_bh)
+ goto io_error;
+
+ ret_block = grab_block(sb_bgl_lock(sbi, group_no), bitmap_bh->b_data,
+ group_size, 0);
+ if (ret_block < 0) {
+ /*
+ * If a free block counter is corrupted we can loop inifintely.
+ * Detect that here.
+ */
+ nr_scanned_groups++;
+ if (nr_scanned_groups > 2 * sbi->s_groups_count) {
+ ext2_error(sb, "ext2_new_block",
+ "corrupted free blocks counters");
+ goto io_error;
+ }
+ /*
+ * Someone else grabbed the last free block in this blockgroup
+ * before us. Retry the scan.
+ */
+ group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
+ group_alloc = 0;
+ goto retry;
+ }
+
+got_block:
+ ext2_debug("using block group %d(%d)\n",
+ group_no, desc->bg_free_blocks_count);
+
+ target_block = ret_block + group_no * group_size +
+ le32_to_cpu(es->s_first_data_block);
+
+ if (target_block == le32_to_cpu(desc->bg_block_bitmap) ||
+ target_block == le32_to_cpu(desc->bg_inode_bitmap) ||
+ in_range(target_block, le32_to_cpu(desc->bg_inode_table),
+ sbi->s_itb_per_group))
+ ext2_error (sb, "ext2_new_block",
+ "Allocating block in system zone - "
+ "block = %u", target_block);
+
+ if (target_block >= le32_to_cpu(es->s_blocks_count)) {
+ ext2_error (sb, "ext2_new_block",
+ "block(%d) >= blocks count(%d) - "
+ "block_group = %d, es == %p ", ret_block,
+ le32_to_cpu(es->s_blocks_count), group_no, es);
+ goto io_error;
+ }
+ block = target_block;
+
+ /* OK, we _had_ allocated something */
+ ext2_debug("found bit %d\n", ret_block);
+
+ dq_alloc--;
+ es_alloc--;
+ group_alloc--;
+
+ /*
+ * Do block preallocation now if required.
+ */
+ write_lock(&EXT2_I(inode)->i_meta_lock);
+ if (group_alloc && !*prealloc_count) {
+ unsigned n;
+
+ for (n = 0; n < group_alloc && ++ret_block < group_size; n++) {
+ if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group_no),
+ ret_block,
+ (void*) bitmap_bh->b_data))
+ break;
+ }
+ *prealloc_block = block + 1;
+ *prealloc_count = n;
+ es_alloc -= n;
+ dq_alloc -= n;
+ group_alloc -= n;
+ }
+ write_unlock(&EXT2_I(inode)->i_meta_lock);
+
+ mark_buffer_dirty(bitmap_bh);
+ if (sb->s_flags & MS_SYNCHRONOUS)
+ sync_dirty_buffer(bitmap_bh);
+
+ ext2_debug ("allocating block %d. ", block);
+
+ *err = 0;
+out_release:
+ group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
+ release_blocks(sb, es_alloc);
+out_dquot:
+ DQUOT_FREE_BLOCK(inode, dq_alloc);
+out:
+ brelse(bitmap_bh);
+ return block;
+
+io_error:
+ *err = -EIO;
+ goto out_release;
+}
+
+unsigned long ext2_count_free_blocks (struct super_block * sb)
+{
+ struct ext2_group_desc * desc;
+ unsigned long desc_count = 0;
+ int i;
+#ifdef EXT2FS_DEBUG
+ unsigned long bitmap_count, x;
+ struct ext2_super_block *es;
+
+ lock_super (sb);
+ es = EXT2_SB(sb)->s_es;
+ desc_count = 0;
+ bitmap_count = 0;
+ desc = NULL;
+ for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
+ struct buffer_head *bitmap_bh;
+ desc = ext2_get_group_desc (sb, i, NULL);
+ if (!desc)
+ continue;
+ desc_count += le16_to_cpu(desc->bg_free_blocks_count);
+ bitmap_bh = read_block_bitmap(sb, i);
+ if (!bitmap_bh)
+ continue;
+
+ x = ext2_count_free(bitmap_bh, sb->s_blocksize);
+ printk ("group %d: stored = %d, counted = %lu\n",
+ i, le16_to_cpu(desc->bg_free_blocks_count), x);
+ bitmap_count += x;
+ brelse(bitmap_bh);
+ }
+ printk("ext2_count_free_blocks: stored = %lu, computed = %lu, %lu\n",
+ (long)le32_to_cpu(es->s_free_blocks_count),
+ desc_count, bitmap_count);
+ unlock_super (sb);
+ return bitmap_count;
+#else
+ for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
+ desc = ext2_get_group_desc (sb, i, NULL);
+ if (!desc)
+ continue;
+ desc_count += le16_to_cpu(desc->bg_free_blocks_count);
+ }
+ return desc_count;
+#endif
+}
+
+static inline int
+block_in_use(unsigned long block, struct super_block *sb, unsigned char *map)
+{
+ return ext2_test_bit ((block -
+ le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block)) %
+ EXT2_BLOCKS_PER_GROUP(sb), map);
+}
+
+static inline int test_root(int a, int b)
+{
+ int num = b;
+
+ while (a > num)
+ num *= b;
+ return num == a;
+}
+
+static int ext2_group_sparse(int group)
+{
+ if (group <= 1)
+ return 1;
+ return (test_root(group, 3) || test_root(group, 5) ||
+ test_root(group, 7));
+}
+
+/**
+ * ext2_bg_has_super - number of blocks used by the superblock in group
+ * @sb: superblock for filesystem
+ * @group: group number to check
+ *
+ * Return the number of blocks used by the superblock (primary or backup)
+ * in this group. Currently this will be only 0 or 1.
+ */
+int ext2_bg_has_super(struct super_block *sb, int group)
+{
+ if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
+ !ext2_group_sparse(group))
+ return 0;
+ return 1;
+}
+
+/**
+ * ext2_bg_num_gdb - number of blocks used by the group table in group
+ * @sb: superblock for filesystem
+ * @group: group number to check
+ *
+ * Return the number of blocks used by the group descriptor table
+ * (primary or backup) in this group. In the future there may be a
+ * different number of descriptor blocks in each group.
+ */
+unsigned long ext2_bg_num_gdb(struct super_block *sb, int group)
+{
+ if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
+ !ext2_group_sparse(group))
+ return 0;
+ return EXT2_SB(sb)->s_gdb_count;
+}
+
+#ifdef CONFIG_EXT2_CHECK
+/* Called at mount-time, super-block is locked */
+void ext2_check_blocks_bitmap (struct super_block * sb)
+{
+ struct buffer_head *bitmap_bh = NULL;
+ struct ext2_super_block * es;
+ unsigned long desc_count, bitmap_count, x, j;
+ unsigned long desc_blocks;
+ struct ext2_group_desc * desc;
+ int i;
+
+ es = EXT2_SB(sb)->s_es;
+ desc_count = 0;
+ bitmap_count = 0;
+ desc = NULL;
+ for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
+ desc = ext2_get_group_desc (sb, i, NULL);
+ if (!desc)
+ continue;
+ desc_count += le16_to_cpu(desc->bg_free_blocks_count);
+ brelse(bitmap_bh);
+ bitmap_bh = read_block_bitmap(sb, i);
+ if (!bitmap_bh)
+ continue;
+
+ if (ext2_bg_has_super(sb, i) &&
+ !ext2_test_bit(0, bitmap_bh->b_data))
+ ext2_error(sb, __FUNCTION__,
+ "Superblock in group %d is marked free", i);
+
+ desc_blocks = ext2_bg_num_gdb(sb, i);
+ for (j = 0; j < desc_blocks; j++)
+ if (!ext2_test_bit(j + 1, bitmap_bh->b_data))
+ ext2_error(sb, __FUNCTION__,
+ "Descriptor block #%ld in group "
+ "%d is marked free", j, i);
+
+ if (!block_in_use(le32_to_cpu(desc->bg_block_bitmap),
+ sb, bitmap_bh->b_data))
+ ext2_error(sb, "ext2_check_blocks_bitmap",
+ "Block bitmap for group %d is marked free",
+ i);
+
+ if (!block_in_use(le32_to_cpu(desc->bg_inode_bitmap),
+ sb, bitmap_bh->b_data))
+ ext2_error(sb, "ext2_check_blocks_bitmap",
+ "Inode bitmap for group %d is marked free",
+ i);
+
+ for (j = 0; j < EXT2_SB(sb)->s_itb_per_group; j++)
+ if (!block_in_use(le32_to_cpu(desc->bg_inode_table) + j,
+ sb, bitmap_bh->b_data))
+ ext2_error (sb, "ext2_check_blocks_bitmap",
+ "Block #%ld of the inode table in "
+ "group %d is marked free", j, i);
+
+ x = ext2_count_free(bitmap_bh, sb->s_blocksize);
+ if (le16_to_cpu(desc->bg_free_blocks_count) != x)
+ ext2_error (sb, "ext2_check_blocks_bitmap",
+ "Wrong free blocks count for group %d, "
+ "stored = %d, counted = %lu", i,
+ le16_to_cpu(desc->bg_free_blocks_count), x);
+ bitmap_count += x;
+ }
+ if (le32_to_cpu(es->s_free_blocks_count) != bitmap_count)
+ ext2_error (sb, "ext2_check_blocks_bitmap",
+ "Wrong free blocks count in super block, "
+ "stored = %lu, counted = %lu",
+ (unsigned long)le32_to_cpu(es->s_free_blocks_count),
+ bitmap_count);
+ brelse(bitmap_bh);
+}
+#endif
diff --git a/fs/ext2/bitmap.c b/fs/ext2/bitmap.c
new file mode 100644
index 000000000000..20145b74623f
--- /dev/null
+++ b/fs/ext2/bitmap.c
@@ -0,0 +1,25 @@
+/*
+ * linux/fs/ext2/bitmap.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ */
+
+#include <linux/buffer_head.h>
+
+static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
+
+unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars)
+{
+ unsigned int i;
+ unsigned long sum = 0;
+
+ if (!map)
+ return (0);
+ for (i = 0; i < numchars; i++)
+ sum += nibblemap[map->b_data[i] & 0xf] +
+ nibblemap[(map->b_data[i] >> 4) & 0xf];
+ return (sum);
+}
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
new file mode 100644
index 000000000000..5b5f52876b42
--- /dev/null
+++ b/fs/ext2/dir.c
@@ -0,0 +1,673 @@
+/*
+ * linux/fs/ext2/dir.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * from
+ *
+ * linux/fs/minix/dir.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * ext2 directory handling functions
+ *
+ * Big-endian to little-endian byte-swapping/bitmaps by
+ * David S. Miller (davem@caip.rutgers.edu), 1995
+ *
+ * All code that works with directory layout had been switched to pagecache
+ * and moved here. AV
+ */
+
+#include "ext2.h"
+#include <linux/pagemap.h>
+#include <linux/smp_lock.h>
+
+typedef struct ext2_dir_entry_2 ext2_dirent;
+
+/*
+ * ext2 uses block-sized chunks. Arguably, sector-sized ones would be
+ * more robust, but we have what we have
+ */
+static inline unsigned ext2_chunk_size(struct inode *inode)
+{
+ return inode->i_sb->s_blocksize;
+}
+
+static inline void ext2_put_page(struct page *page)
+{
+ kunmap(page);
+ page_cache_release(page);
+}
+
+static inline unsigned long dir_pages(struct inode *inode)
+{
+ return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
+}
+
+/*
+ * Return the offset into page `page_nr' of the last valid
+ * byte in that page, plus one.
+ */
+static unsigned
+ext2_last_byte(struct inode *inode, unsigned long page_nr)
+{
+ unsigned last_byte = inode->i_size;
+
+ last_byte -= page_nr << PAGE_CACHE_SHIFT;
+ if (last_byte > PAGE_CACHE_SIZE)
+ last_byte = PAGE_CACHE_SIZE;
+ return last_byte;
+}
+
+static int ext2_commit_chunk(struct page *page, unsigned from, unsigned to)
+{
+ struct inode *dir = page->mapping->host;
+ int err = 0;
+ dir->i_version++;
+ page->mapping->a_ops->commit_write(NULL, page, from, to);
+ if (IS_DIRSYNC(dir))
+ err = write_one_page(page, 1);
+ else
+ unlock_page(page);
+ return err;
+}
+
+static void ext2_check_page(struct page *page)
+{
+ struct inode *dir = page->mapping->host;
+ struct super_block *sb = dir->i_sb;
+ unsigned chunk_size = ext2_chunk_size(dir);
+ char *kaddr = page_address(page);
+ u32 max_inumber = le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count);
+ unsigned offs, rec_len;
+ unsigned limit = PAGE_CACHE_SIZE;
+ ext2_dirent *p;
+ char *error;
+
+ if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) {
+ limit = dir->i_size & ~PAGE_CACHE_MASK;
+ if (limit & (chunk_size - 1))
+ goto Ebadsize;
+ if (!limit)
+ goto out;
+ }
+ for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) {
+ p = (ext2_dirent *)(kaddr + offs);
+ rec_len = le16_to_cpu(p->rec_len);
+
+ if (rec_len < EXT2_DIR_REC_LEN(1))
+ goto Eshort;
+ if (rec_len & 3)
+ goto Ealign;
+ if (rec_len < EXT2_DIR_REC_LEN(p->name_len))
+ goto Enamelen;
+ if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1))
+ goto Espan;
+ if (le32_to_cpu(p->inode) > max_inumber)
+ goto Einumber;
+ }
+ if (offs != limit)
+ goto Eend;
+out:
+ SetPageChecked(page);
+ return;
+
+ /* Too bad, we had an error */
+
+Ebadsize:
+ ext2_error(sb, "ext2_check_page",
+ "size of directory #%lu is not a multiple of chunk size",
+ dir->i_ino
+ );
+ goto fail;
+Eshort:
+ error = "rec_len is smaller than minimal";
+ goto bad_entry;
+Ealign:
+ error = "unaligned directory entry";
+ goto bad_entry;
+Enamelen:
+ error = "rec_len is too small for name_len";
+ goto bad_entry;
+Espan:
+ error = "directory entry across blocks";
+ goto bad_entry;
+Einumber:
+ error = "inode out of bounds";
+bad_entry:
+ ext2_error (sb, "ext2_check_page", "bad entry in directory #%lu: %s - "
+ "offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
+ dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
+ (unsigned long) le32_to_cpu(p->inode),
+ rec_len, p->name_len);
+ goto fail;
+Eend:
+ p = (ext2_dirent *)(kaddr + offs);
+ ext2_error (sb, "ext2_check_page",
+ "entry in directory #%lu spans the page boundary"
+ "offset=%lu, inode=%lu",
+ dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
+ (unsigned long) le32_to_cpu(p->inode));
+fail:
+ SetPageChecked(page);
+ SetPageError(page);
+}
+
+static struct page * ext2_get_page(struct inode *dir, unsigned long n)
+{
+ struct address_space *mapping = dir->i_mapping;
+ struct page *page = read_cache_page(mapping, n,
+ (filler_t*)mapping->a_ops->readpage, NULL);
+ if (!IS_ERR(page)) {
+ wait_on_page_locked(page);
+ kmap(page);
+ if (!PageUptodate(page))
+ goto fail;
+ if (!PageChecked(page))
+ ext2_check_page(page);
+ if (PageError(page))
+ goto fail;
+ }
+ return page;
+
+fail:
+ ext2_put_page(page);
+ return ERR_PTR(-EIO);
+}
+
+/*
+ * NOTE! unlike strncmp, ext2_match returns 1 for success, 0 for failure.
+ *
+ * len <= EXT2_NAME_LEN and de != NULL are guaranteed by caller.
+ */
+static inline int ext2_match (int len, const char * const name,
+ struct ext2_dir_entry_2 * de)
+{
+ if (len != de->name_len)
+ return 0;
+ if (!de->inode)
+ return 0;
+ return !memcmp(name, de->name, len);
+}
+
+/*
+ * p is at least 6 bytes before the end of page
+ */
+static inline ext2_dirent *ext2_next_entry(ext2_dirent *p)
+{
+ return (ext2_dirent *)((char*)p + le16_to_cpu(p->rec_len));
+}
+
+static inline unsigned
+ext2_validate_entry(char *base, unsigned offset, unsigned mask)
+{
+ ext2_dirent *de = (ext2_dirent*)(base + offset);
+ ext2_dirent *p = (ext2_dirent*)(base + (offset&mask));
+ while ((char*)p < (char*)de) {
+ if (p->rec_len == 0)
+ break;
+ p = ext2_next_entry(p);
+ }
+ return (char *)p - base;
+}
+
+static unsigned char ext2_filetype_table[EXT2_FT_MAX] = {
+ [EXT2_FT_UNKNOWN] = DT_UNKNOWN,
+ [EXT2_FT_REG_FILE] = DT_REG,
+ [EXT2_FT_DIR] = DT_DIR,
+ [EXT2_FT_CHRDEV] = DT_CHR,
+ [EXT2_FT_BLKDEV] = DT_BLK,
+ [EXT2_FT_FIFO] = DT_FIFO,
+ [EXT2_FT_SOCK] = DT_SOCK,
+ [EXT2_FT_SYMLINK] = DT_LNK,
+};
+
+#define S_SHIFT 12
+static unsigned char ext2_type_by_mode[S_IFMT >> S_SHIFT] = {
+ [S_IFREG >> S_SHIFT] = EXT2_FT_REG_FILE,
+ [S_IFDIR >> S_SHIFT] = EXT2_FT_DIR,
+ [S_IFCHR >> S_SHIFT] = EXT2_FT_CHRDEV,
+ [S_IFBLK >> S_SHIFT] = EXT2_FT_BLKDEV,
+ [S_IFIFO >> S_SHIFT] = EXT2_FT_FIFO,
+ [S_IFSOCK >> S_SHIFT] = EXT2_FT_SOCK,
+ [S_IFLNK >> S_SHIFT] = EXT2_FT_SYMLINK,
+};
+
+static inline void ext2_set_de_type(ext2_dirent *de, struct inode *inode)
+{
+ mode_t mode = inode->i_mode;
+ if (EXT2_HAS_INCOMPAT_FEATURE(inode->i_sb, EXT2_FEATURE_INCOMPAT_FILETYPE))
+ de->file_type = ext2_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
+ else
+ de->file_type = 0;
+}
+
+static int
+ext2_readdir (struct file * filp, void * dirent, filldir_t filldir)
+{
+ loff_t pos = filp->f_pos;
+ struct inode *inode = filp->f_dentry->d_inode;
+ struct super_block *sb = inode->i_sb;
+ unsigned int offset = pos & ~PAGE_CACHE_MASK;
+ unsigned long n = pos >> PAGE_CACHE_SHIFT;
+ unsigned long npages = dir_pages(inode);
+ unsigned chunk_mask = ~(ext2_chunk_size(inode)-1);
+ unsigned char *types = NULL;
+ int need_revalidate = (filp->f_version != inode->i_version);
+ int ret;
+
+ if (pos > inode->i_size - EXT2_DIR_REC_LEN(1))
+ goto success;
+
+ if (EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_FILETYPE))
+ types = ext2_filetype_table;
+
+ for ( ; n < npages; n++, offset = 0) {
+ char *kaddr, *limit;
+ ext2_dirent *de;
+ struct page *page = ext2_get_page(inode, n);
+
+ if (IS_ERR(page)) {
+ ext2_error(sb, __FUNCTION__,
+ "bad page in #%lu",
+ inode->i_ino);
+ filp->f_pos += PAGE_CACHE_SIZE - offset;
+ ret = -EIO;
+ goto done;
+ }
+ kaddr = page_address(page);
+ if (need_revalidate) {
+ offset = ext2_validate_entry(kaddr, offset, chunk_mask);
+ need_revalidate = 0;
+ }
+ de = (ext2_dirent *)(kaddr+offset);
+ limit = kaddr + ext2_last_byte(inode, n) - EXT2_DIR_REC_LEN(1);
+ for ( ;(char*)de <= limit; de = ext2_next_entry(de)) {
+ if (de->rec_len == 0) {
+ ext2_error(sb, __FUNCTION__,
+ "zero-length directory entry");
+ ret = -EIO;
+ ext2_put_page(page);
+ goto done;
+ }
+ if (de->inode) {
+ int over;
+ unsigned char d_type = DT_UNKNOWN;
+
+ if (types && de->file_type < EXT2_FT_MAX)
+ d_type = types[de->file_type];
+
+ offset = (char *)de - kaddr;
+ over = filldir(dirent, de->name, de->name_len,
+ (n<<PAGE_CACHE_SHIFT) | offset,
+ le32_to_cpu(de->inode), d_type);
+ if (over) {
+ ext2_put_page(page);
+ goto success;
+ }
+ }
+ filp->f_pos += le16_to_cpu(de->rec_len);
+ }
+ ext2_put_page(page);
+ }
+
+success:
+ ret = 0;
+done:
+ filp->f_version = inode->i_version;
+ return ret;
+}
+
+/*
+ * ext2_find_entry()
+ *
+ * finds an entry in the specified directory with the wanted name. It
+ * returns the page in which the entry was found, and the entry itself
+ * (as a parameter - res_dir). Page is returned mapped and unlocked.
+ * Entry is guaranteed to be valid.
+ */
+struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir,
+ struct dentry *dentry, struct page ** res_page)
+{
+ const char *name = dentry->d_name.name;
+ int namelen = dentry->d_name.len;
+ unsigned reclen = EXT2_DIR_REC_LEN(namelen);
+ unsigned long start, n;
+ unsigned long npages = dir_pages(dir);
+ struct page *page = NULL;
+ struct ext2_inode_info *ei = EXT2_I(dir);
+ ext2_dirent * de;
+
+ if (npages == 0)
+ goto out;
+
+ /* OFFSET_CACHE */
+ *res_page = NULL;
+
+ start = ei->i_dir_start_lookup;
+ if (start >= npages)
+ start = 0;
+ n = start;
+ do {
+ char *kaddr;
+ page = ext2_get_page(dir, n);
+ if (!IS_ERR(page)) {
+ kaddr = page_address(page);
+ de = (ext2_dirent *) kaddr;
+ kaddr += ext2_last_byte(dir, n) - reclen;
+ while ((char *) de <= kaddr) {
+ if (de->rec_len == 0) {
+ ext2_error(dir->i_sb, __FUNCTION__,
+ "zero-length directory entry");
+ ext2_put_page(page);
+ goto out;
+ }
+ if (ext2_match (namelen, name, de))
+ goto found;
+ de = ext2_next_entry(de);
+ }
+ ext2_put_page(page);
+ }
+ if (++n >= npages)
+ n = 0;
+ } while (n != start);
+out:
+ return NULL;
+
+found:
+ *res_page = page;
+ ei->i_dir_start_lookup = n;
+ return de;
+}
+
+struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p)
+{
+ struct page *page = ext2_get_page(dir, 0);
+ ext2_dirent *de = NULL;
+
+ if (!IS_ERR(page)) {
+ de = ext2_next_entry((ext2_dirent *) page_address(page));
+ *p = page;
+ }
+ return de;
+}
+
+ino_t ext2_inode_by_name(struct inode * dir, struct dentry *dentry)
+{
+ ino_t res = 0;
+ struct ext2_dir_entry_2 * de;
+ struct page *page;
+
+ de = ext2_find_entry (dir, dentry, &page);
+ if (de) {
+ res = le32_to_cpu(de->inode);
+ kunmap(page);
+ page_cache_release(page);
+ }
+ return res;
+}
+
+/* Releases the page */
+void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
+ struct page *page, struct inode *inode)
+{
+ unsigned from = (char *) de - (char *) page_address(page);
+ unsigned to = from + le16_to_cpu(de->rec_len);
+ int err;
+
+ lock_page(page);
+ err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
+ if (err)
+ BUG();
+ de->inode = cpu_to_le32(inode->i_ino);
+ ext2_set_de_type (de, inode);
+ err = ext2_commit_chunk(page, from, to);
+ ext2_put_page(page);
+ dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
+ EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
+ mark_inode_dirty(dir);
+}
+
+/*
+ * Parent is locked.
+ */
+int ext2_add_link (struct dentry *dentry, struct inode *inode)
+{
+ struct inode *dir = dentry->d_parent->d_inode;
+ const char *name = dentry->d_name.name;
+ int namelen = dentry->d_name.len;
+ unsigned chunk_size = ext2_chunk_size(dir);
+ unsigned reclen = EXT2_DIR_REC_LEN(namelen);
+ unsigned short rec_len, name_len;
+ struct page *page = NULL;
+ ext2_dirent * de;
+ unsigned long npages = dir_pages(dir);
+ unsigned long n;
+ char *kaddr;
+ unsigned from, to;
+ int err;
+
+ /*
+ * We take care of directory expansion in the same loop.
+ * This code plays outside i_size, so it locks the page
+ * to protect that region.
+ */
+ for (n = 0; n <= npages; n++) {
+ char *dir_end;
+
+ page = ext2_get_page(dir, n);
+ err = PTR_ERR(page);
+ if (IS_ERR(page))
+ goto out;
+ lock_page(page);
+ kaddr = page_address(page);
+ dir_end = kaddr + ext2_last_byte(dir, n);
+ de = (ext2_dirent *)kaddr;
+ kaddr += PAGE_CACHE_SIZE - reclen;
+ while ((char *)de <= kaddr) {
+ if ((char *)de == dir_end) {
+ /* We hit i_size */
+ name_len = 0;
+ rec_len = chunk_size;
+ de->rec_len = cpu_to_le16(chunk_size);
+ de->inode = 0;
+ goto got_it;
+ }
+ if (de->rec_len == 0) {
+ ext2_error(dir->i_sb, __FUNCTION__,
+ "zero-length directory entry");
+ err = -EIO;
+ goto out_unlock;
+ }
+ err = -EEXIST;
+ if (ext2_match (namelen, name, de))
+ goto out_unlock;
+ name_len = EXT2_DIR_REC_LEN(de->name_len);
+ rec_len = le16_to_cpu(de->rec_len);
+ if (!de->inode && rec_len >= reclen)
+ goto got_it;
+ if (rec_len >= name_len + reclen)
+ goto got_it;
+ de = (ext2_dirent *) ((char *) de + rec_len);
+ }
+ unlock_page(page);
+ ext2_put_page(page);
+ }
+ BUG();
+ return -EINVAL;
+
+got_it:
+ from = (char*)de - (char*)page_address(page);
+ to = from + rec_len;
+ err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
+ if (err)
+ goto out_unlock;
+ if (de->inode) {
+ ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
+ de1->rec_len = cpu_to_le16(rec_len - name_len);
+ de->rec_len = cpu_to_le16(name_len);
+ de = de1;
+ }
+ de->name_len = namelen;
+ memcpy (de->name, name, namelen);
+ de->inode = cpu_to_le32(inode->i_ino);
+ ext2_set_de_type (de, inode);
+ err = ext2_commit_chunk(page, from, to);
+ dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
+ EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
+ mark_inode_dirty(dir);
+ /* OFFSET_CACHE */
+out_put:
+ ext2_put_page(page);
+out:
+ return err;
+out_unlock:
+ unlock_page(page);
+ goto out_put;
+}
+
+/*
+ * ext2_delete_entry deletes a directory entry by merging it with the
+ * previous entry. Page is up-to-date. Releases the page.
+ */
+int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
+{
+ struct address_space *mapping = page->mapping;
+ struct inode *inode = mapping->host;
+ char *kaddr = page_address(page);
+ unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1);
+ unsigned to = ((char*)dir - kaddr) + le16_to_cpu(dir->rec_len);
+ ext2_dirent * pde = NULL;
+ ext2_dirent * de = (ext2_dirent *) (kaddr + from);
+ int err;
+
+ while ((char*)de < (char*)dir) {
+ if (de->rec_len == 0) {
+ ext2_error(inode->i_sb, __FUNCTION__,
+ "zero-length directory entry");
+ err = -EIO;
+ goto out;
+ }
+ pde = de;
+ de = ext2_next_entry(de);
+ }
+ if (pde)
+ from = (char*)pde - (char*)page_address(page);
+ lock_page(page);
+ err = mapping->a_ops->prepare_write(NULL, page, from, to);
+ if (err)
+ BUG();
+ if (pde)
+ pde->rec_len = cpu_to_le16(to-from);
+ dir->inode = 0;
+ err = ext2_commit_chunk(page, from, to);
+ inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
+ EXT2_I(inode)->i_flags &= ~EXT2_BTREE_FL;
+ mark_inode_dirty(inode);
+out:
+ ext2_put_page(page);
+ return err;
+}
+
+/*
+ * Set the first fragment of directory.
+ */
+int ext2_make_empty(struct inode *inode, struct inode *parent)
+{
+ struct address_space *mapping = inode->i_mapping;
+ struct page *page = grab_cache_page(mapping, 0);
+ unsigned chunk_size = ext2_chunk_size(inode);
+ struct ext2_dir_entry_2 * de;
+ int err;
+ void *kaddr;
+
+ if (!page)
+ return -ENOMEM;
+ err = mapping->a_ops->prepare_write(NULL, page, 0, chunk_size);
+ if (err) {
+ unlock_page(page);
+ goto fail;
+ }
+ kaddr = kmap_atomic(page, KM_USER0);
+ memset(kaddr, 0, chunk_size);
+ de = (struct ext2_dir_entry_2 *)kaddr;
+ de->name_len = 1;
+ de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));
+ memcpy (de->name, ".\0\0", 4);
+ de->inode = cpu_to_le32(inode->i_ino);
+ ext2_set_de_type (de, inode);
+
+ de = (struct ext2_dir_entry_2 *)(kaddr + EXT2_DIR_REC_LEN(1));
+ de->name_len = 2;
+ de->rec_len = cpu_to_le16(chunk_size - EXT2_DIR_REC_LEN(1));
+ de->inode = cpu_to_le32(parent->i_ino);
+ memcpy (de->name, "..\0", 4);
+ ext2_set_de_type (de, inode);
+ kunmap_atomic(kaddr, KM_USER0);
+ err = ext2_commit_chunk(page, 0, chunk_size);
+fail:
+ page_cache_release(page);
+ return err;
+}
+
+/*
+ * routine to check that the specified directory is empty (for rmdir)
+ */
+int ext2_empty_dir (struct inode * inode)
+{
+ struct page *page = NULL;
+ unsigned long i, npages = dir_pages(inode);
+
+ for (i = 0; i < npages; i++) {
+ char *kaddr;
+ ext2_dirent * de;
+ page = ext2_get_page(inode, i);
+
+ if (IS_ERR(page))
+ continue;
+
+ kaddr = page_address(page);
+ de = (ext2_dirent *)kaddr;
+ kaddr += ext2_last_byte(inode, i) - EXT2_DIR_REC_LEN(1);
+
+ while ((char *)de <= kaddr) {
+ if (de->rec_len == 0) {
+ ext2_error(inode->i_sb, __FUNCTION__,
+ "zero-length directory entry");
+ printk("kaddr=%p, de=%p\n", kaddr, de);
+ goto not_empty;
+ }
+ if (de->inode != 0) {
+ /* check for . and .. */
+ if (de->name[0] != '.')
+ goto not_empty;
+ if (de->name_len > 2)
+ goto not_empty;
+ if (de->name_len < 2) {
+ if (de->inode !=
+ cpu_to_le32(inode->i_ino))
+ goto not_empty;
+ } else if (de->name[1] != '.')
+ goto not_empty;
+ }
+ de = ext2_next_entry(de);
+ }
+ ext2_put_page(page);
+ }
+ return 1;
+
+not_empty:
+ ext2_put_page(page);
+ return 0;
+}
+
+struct file_operations ext2_dir_operations = {
+ .llseek = generic_file_llseek,
+ .read = generic_read_dir,
+ .readdir = ext2_readdir,
+ .ioctl = ext2_ioctl,
+ .fsync = ext2_sync_file,
+};
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
new file mode 100644
index 000000000000..9f1a40e7945c
--- /dev/null
+++ b/fs/ext2/ext2.h
@@ -0,0 +1,160 @@
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+
+/*
+ * second extended file system inode data in memory
+ */
+struct ext2_inode_info {
+ __le32 i_data[15];
+ __u32 i_flags;
+ __u32 i_faddr;
+ __u8 i_frag_no;
+ __u8 i_frag_size;
+ __u16 i_state;
+ __u32 i_file_acl;
+ __u32 i_dir_acl;
+ __u32 i_dtime;
+
+ /*
+ * i_block_group is the number of the block group which contains
+ * this file's inode. Constant across the lifetime of the inode,
+ * it is ued for making block allocation decisions - we try to
+ * place a file's data blocks near its inode block, and new inodes
+ * near to their parent directory's inode.
+ */
+ __u32 i_block_group;
+
+ /*
+ * i_next_alloc_block is the logical (file-relative) number of the
+ * most-recently-allocated block in this file. Yes, it is misnamed.
+ * We use this for detecting linearly ascending allocation requests.
+ */
+ __u32 i_next_alloc_block;
+
+ /*
+ * i_next_alloc_goal is the *physical* companion to i_next_alloc_block.
+ * it the the physical block number of the block which was most-recently
+ * allocated to this file. This give us the goal (target) for the next
+ * allocation when we detect linearly ascending requests.
+ */
+ __u32 i_next_alloc_goal;
+ __u32 i_prealloc_block;
+ __u32 i_prealloc_count;
+ __u32 i_dir_start_lookup;
+#ifdef CONFIG_EXT2_FS_XATTR
+ /*
+ * Extended attributes can be read independently of the main file
+ * data. Taking i_sem even when reading would cause contention
+ * between readers of EAs and writers of regular file data, so
+ * instead we synchronize on xattr_sem when reading or changing
+ * EAs.
+ */
+ struct rw_semaphore xattr_sem;
+#endif
+#ifdef CONFIG_EXT2_FS_POSIX_ACL
+ struct posix_acl *i_acl;
+ struct posix_acl *i_default_acl;
+#endif
+ rwlock_t i_meta_lock;
+ struct inode vfs_inode;
+};
+
+/*
+ * Inode dynamic state flags
+ */
+#define EXT2_STATE_NEW 0x00000001 /* inode is newly created */
+
+
+/*
+ * Function prototypes
+ */
+
+/*
+ * Ok, these declarations are also in <linux/kernel.h> but none of the
+ * ext2 source programs needs to include it so they are duplicated here.
+ */
+
+static inline struct ext2_inode_info *EXT2_I(struct inode *inode)
+{
+ return container_of(inode, struct ext2_inode_info, vfs_inode);
+}
+
+/* balloc.c */
+extern int ext2_bg_has_super(struct super_block *sb, int group);
+extern unsigned long ext2_bg_num_gdb(struct super_block *sb, int group);
+extern int ext2_new_block (struct inode *, unsigned long,
+ __u32 *, __u32 *, int *);
+extern void ext2_free_blocks (struct inode *, unsigned long,
+ unsigned long);
+extern unsigned long ext2_count_free_blocks (struct super_block *);
+extern unsigned long ext2_count_dirs (struct super_block *);
+extern void ext2_check_blocks_bitmap (struct super_block *);
+extern struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
+ unsigned int block_group,
+ struct buffer_head ** bh);
+
+/* dir.c */
+extern int ext2_add_link (struct dentry *, struct inode *);
+extern ino_t ext2_inode_by_name(struct inode *, struct dentry *);
+extern int ext2_make_empty(struct inode *, struct inode *);
+extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct dentry *, struct page **);
+extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *);
+extern int ext2_empty_dir (struct inode *);
+extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
+extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *);
+
+/* fsync.c */
+extern int ext2_sync_file (struct file *, struct dentry *, int);
+
+/* ialloc.c */
+extern struct inode * ext2_new_inode (struct inode *, int);
+extern void ext2_free_inode (struct inode *);
+extern unsigned long ext2_count_free_inodes (struct super_block *);
+extern void ext2_check_inodes_bitmap (struct super_block *);
+extern unsigned long ext2_count_free (struct buffer_head *, unsigned);
+
+/* inode.c */
+extern void ext2_read_inode (struct inode *);
+extern int ext2_write_inode (struct inode *, int);
+extern void ext2_delete_inode (struct inode *);
+extern int ext2_sync_inode (struct inode *);
+extern void ext2_discard_prealloc (struct inode *);
+extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
+extern void ext2_truncate (struct inode *);
+extern int ext2_setattr (struct dentry *, struct iattr *);
+extern void ext2_set_inode_flags(struct inode *inode);
+
+/* ioctl.c */
+extern int ext2_ioctl (struct inode *, struct file *, unsigned int,
+ unsigned long);
+
+/* super.c */
+extern void ext2_error (struct super_block *, const char *, const char *, ...)
+ __attribute__ ((format (printf, 3, 4)));
+extern void ext2_warning (struct super_block *, const char *, const char *, ...)
+ __attribute__ ((format (printf, 3, 4)));
+extern void ext2_update_dynamic_rev (struct super_block *sb);
+extern void ext2_write_super (struct super_block *);
+
+/*
+ * Inodes and files operations
+ */
+
+/* dir.c */
+extern struct file_operations ext2_dir_operations;
+
+/* file.c */
+extern struct inode_operations ext2_file_inode_operations;
+extern struct file_operations ext2_file_operations;
+
+/* inode.c */
+extern struct address_space_operations ext2_aops;
+extern struct address_space_operations ext2_nobh_aops;
+
+/* namei.c */
+extern struct inode_operations ext2_dir_inode_operations;
+extern struct inode_operations ext2_special_inode_operations;
+
+/* symlink.c */
+extern struct inode_operations ext2_fast_symlink_inode_operations;
+extern struct inode_operations ext2_symlink_inode_operations;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
new file mode 100644
index 000000000000..f5e86141ec54
--- /dev/null
+++ b/fs/ext2/file.c
@@ -0,0 +1,68 @@
+/*
+ * linux/fs/ext2/file.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * from
+ *
+ * linux/fs/minix/file.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * ext2 fs regular file handling primitives
+ *
+ * 64-bit file support on 64-bit platforms by Jakub Jelinek
+ * (jj@sunsite.ms.mff.cuni.cz)
+ */
+
+#include <linux/time.h>
+#include "ext2.h"
+#include "xattr.h"
+#include "acl.h"
+
+/*
+ * Called when an inode is released. Note that this is different
+ * from ext2_open_file: open gets called at every open, but release
+ * gets called only when /all/ the files are closed.
+ */
+static int ext2_release_file (struct inode * inode, struct file * filp)
+{
+ if (filp->f_mode & FMODE_WRITE)
+ ext2_discard_prealloc (inode);
+ return 0;
+}
+
+/*
+ * We have mostly NULL's here: the current defaults are ok for
+ * the ext2 filesystem.
+ */
+struct file_operations ext2_file_operations = {
+ .llseek = generic_file_llseek,
+ .read = generic_file_read,
+ .write = generic_file_write,
+ .aio_read = generic_file_aio_read,
+ .aio_write = generic_file_aio_write,
+ .ioctl = ext2_ioctl,
+ .mmap = generic_file_mmap,
+ .open = generic_file_open,
+ .release = ext2_release_file,
+ .fsync = ext2_sync_file,
+ .readv = generic_file_readv,
+ .writev = generic_file_writev,
+ .sendfile = generic_file_sendfile,
+};
+
+struct inode_operations ext2_file_inode_operations = {
+ .truncate = ext2_truncate,
+#ifdef CONFIG_EXT2_FS_XATTR
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .listxattr = ext2_listxattr,
+ .removexattr = generic_removexattr,
+#endif
+ .setattr = ext2_setattr,
+ .permission = ext2_permission,
+};
diff --git a/fs/ext2/fsync.c b/fs/ext2/fsync.c
new file mode 100644
index 000000000000..c9c2e5ffa48e
--- /dev/null
+++ b/fs/ext2/fsync.c
@@ -0,0 +1,51 @@
+/*
+ * linux/fs/ext2/fsync.c
+ *
+ * Copyright (C) 1993 Stephen Tweedie (sct@dcs.ed.ac.uk)
+ * from
+ * Copyright (C) 1992 Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ * from
+ * linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * ext2fs fsync primitive
+ *
+ * Big-endian to little-endian byte-swapping/bitmaps by
+ * David S. Miller (davem@caip.rutgers.edu), 1995
+ *
+ * Removed unnecessary code duplication for little endian machines
+ * and excessive __inline__s.
+ * Andi Kleen, 1997
+ *
+ * Major simplications and cleanup - we only need to do the metadata, because
+ * we can depend on generic_block_fdatasync() to sync the data blocks.
+ */
+
+#include "ext2.h"
+#include <linux/smp_lock.h>
+#include <linux/buffer_head.h> /* for fsync_inode_buffers() */
+
+
+/*
+ * File may be NULL when we are called. Perhaps we shouldn't
+ * even pass file to fsync ?
+ */
+
+int ext2_sync_file(struct file *file, struct dentry *dentry, int datasync)
+{
+ struct inode *inode = dentry->d_inode;
+ int err;
+ int ret;
+
+ ret = sync_mapping_buffers(inode->i_mapping);
+ if (!(inode->i_state & I_DIRTY))
+ return ret;
+ if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+ return ret;
+
+ err = ext2_sync_inode(inode);
+ if (ret == 0)
+ ret = err;
+ return ret;
+}
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
new file mode 100644
index 000000000000..77e059149212
--- /dev/null
+++ b/fs/ext2/ialloc.c
@@ -0,0 +1,735 @@
+/*
+ * linux/fs/ext2/ialloc.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * BSD ufs-inspired inode and directory allocation by
+ * Stephen Tweedie (sct@dcs.ed.ac.uk), 1993
+ * Big-endian to little-endian byte-swapping/bitmaps by
+ * David S. Miller (davem@caip.rutgers.edu), 1995
+ */
+
+#include <linux/config.h>
+#include <linux/quotaops.h>
+#include <linux/sched.h>
+#include <linux/backing-dev.h>
+#include <linux/buffer_head.h>
+#include <linux/random.h>
+#include "ext2.h"
+#include "xattr.h"
+#include "acl.h"
+
+/*
+ * ialloc.c contains the inodes allocation and deallocation routines
+ */
+
+/*
+ * The free inodes are managed by bitmaps. A file system contains several
+ * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
+ * block for inodes, N blocks for the inode table and data blocks.
+ *
+ * The file system contains group descriptors which are located after the
+ * super block. Each descriptor contains the number of the bitmap block and
+ * the free blocks count in the block.
+ */
+
+
+/*
+ * Read the inode allocation bitmap for a given block_group, reading
+ * into the specified slot in the superblock's bitmap cache.
+ *
+ * Return buffer_head of bitmap on success or NULL.
+ */
+static struct buffer_head *
+read_inode_bitmap(struct super_block * sb, unsigned long block_group)
+{
+ struct ext2_group_desc *desc;
+ struct buffer_head *bh = NULL;
+
+ desc = ext2_get_group_desc(sb, block_group, NULL);
+ if (!desc)
+ goto error_out;
+
+ bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
+ if (!bh)
+ ext2_error(sb, "read_inode_bitmap",
+ "Cannot read inode bitmap - "
+ "block_group = %lu, inode_bitmap = %u",
+ block_group, le32_to_cpu(desc->bg_inode_bitmap));
+error_out:
+ return bh;
+}
+
+static void ext2_release_inode(struct super_block *sb, int group, int dir)
+{
+ struct ext2_group_desc * desc;
+ struct buffer_head *bh;
+
+ desc = ext2_get_group_desc(sb, group, &bh);
+ if (!desc) {
+ ext2_error(sb, "ext2_release_inode",
+ "can't get descriptor for group %d", group);
+ return;
+ }
+
+ spin_lock(sb_bgl_lock(EXT2_SB(sb), group));
+ desc->bg_free_inodes_count =
+ cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1);
+ if (dir)
+ desc->bg_used_dirs_count =
+ cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1);
+ spin_unlock(sb_bgl_lock(EXT2_SB(sb), group));
+ if (dir)
+ percpu_counter_dec(&EXT2_SB(sb)->s_dirs_counter);
+ sb->s_dirt = 1;
+ mark_buffer_dirty(bh);
+}
+
+/*
+ * NOTE! When we get the inode, we're the only people
+ * that have access to it, and as such there are no
+ * race conditions we have to worry about. The inode
+ * is not on the hash-lists, and it cannot be reached
+ * through the filesystem because the directory entry
+ * has been deleted earlier.
+ *
+ * HOWEVER: we must make sure that we get no aliases,
+ * which means that we have to call "clear_inode()"
+ * _before_ we mark the inode not in use in the inode
+ * bitmaps. Otherwise a newly created file might use
+ * the same inode number (not actually the same pointer
+ * though), and then we'd have two inodes sharing the
+ * same inode number and space on the harddisk.
+ */
+void ext2_free_inode (struct inode * inode)
+{
+ struct super_block * sb = inode->i_sb;
+ int is_directory;
+ unsigned long ino;
+ struct buffer_head *bitmap_bh = NULL;
+ unsigned long block_group;
+ unsigned long bit;
+ struct ext2_super_block * es;
+
+ ino = inode->i_ino;
+ ext2_debug ("freeing inode %lu\n", ino);
+
+ /*
+ * Note: we must free any quota before locking the superblock,
+ * as writing the quota to disk may need the lock as well.
+ */
+ if (!is_bad_inode(inode)) {
+ /* Quota is already initialized in iput() */
+ ext2_xattr_delete_inode(inode);
+ DQUOT_FREE_INODE(inode);
+ DQUOT_DROP(inode);
+ }
+
+ es = EXT2_SB(sb)->s_es;
+ is_directory = S_ISDIR(inode->i_mode);
+
+ /* Do this BEFORE marking the inode not in use or returning an error */
+ clear_inode (inode);
+
+ if (ino < EXT2_FIRST_INO(sb) ||
+ ino > le32_to_cpu(es->s_inodes_count)) {
+ ext2_error (sb, "ext2_free_inode",
+ "reserved or nonexistent inode %lu", ino);
+ goto error_return;
+ }
+ block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb);
+ bit = (ino - 1) % EXT2_INODES_PER_GROUP(sb);
+ brelse(bitmap_bh);
+ bitmap_bh = read_inode_bitmap(sb, block_group);
+ if (!bitmap_bh)
+ goto error_return;
+
+ /* Ok, now we can actually update the inode bitmaps.. */
+ if (!ext2_clear_bit_atomic(sb_bgl_lock(EXT2_SB(sb), block_group),
+ bit, (void *) bitmap_bh->b_data))
+ ext2_error (sb, "ext2_free_inode",
+ "bit already cleared for inode %lu", ino);
+ else
+ ext2_release_inode(sb, block_group, is_directory);
+ mark_buffer_dirty(bitmap_bh);
+ if (sb->s_flags & MS_SYNCHRONOUS)
+ sync_dirty_buffer(bitmap_bh);
+error_return:
+ brelse(bitmap_bh);
+}
+
+/*
+ * We perform asynchronous prereading of the new inode's inode block when
+ * we create the inode, in the expectation that the inode will be written
+ * back soon. There are two reasons:
+ *
+ * - When creating a large number of files, the async prereads will be
+ * nicely merged into large reads
+ * - When writing out a large number of inodes, we don't need to keep on
+ * stalling the writes while we read the inode block.
+ *
+ * FIXME: ext2_get_group_desc() needs to be simplified.
+ */
+static void ext2_preread_inode(struct inode *inode)
+{
+ unsigned long block_group;
+ unsigned long offset;
+ unsigned long block;
+ struct buffer_head *bh;
+ struct ext2_group_desc * gdp;
+ struct backing_dev_info *bdi;
+
+ bdi = inode->i_mapping->backing_dev_info;
+ if (bdi_read_congested(bdi))
+ return;
+ if (bdi_write_congested(bdi))
+ return;
+
+ block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
+ gdp = ext2_get_group_desc(inode->i_sb, block_group, &bh);
+ if (gdp == NULL)
+ return;
+
+ /*
+ * Figure out the offset within the block group inode table
+ */
+ offset = ((inode->i_ino - 1) % EXT2_INODES_PER_GROUP(inode->i_sb)) *
+ EXT2_INODE_SIZE(inode->i_sb);
+ block = le32_to_cpu(gdp->bg_inode_table) +
+ (offset >> EXT2_BLOCK_SIZE_BITS(inode->i_sb));
+ sb_breadahead(inode->i_sb, block);
+}
+
+/*
+ * There are two policies for allocating an inode. If the new inode is
+ * a directory, then a forward search is made for a block group with both
+ * free space and a low directory-to-inode ratio; if that fails, then of
+ * the groups with above-average free space, that group with the fewest
+ * directories already is chosen.
+ *
+ * For other inodes, search forward from the parent directory\'s block
+ * group to find a free inode.
+ */
+static int find_group_dir(struct super_block *sb, struct inode *parent)
+{
+ int ngroups = EXT2_SB(sb)->s_groups_count;
+ int avefreei = ext2_count_free_inodes(sb) / ngroups;
+ struct ext2_group_desc *desc, *best_desc = NULL;
+ struct buffer_head *bh, *best_bh = NULL;
+ int group, best_group = -1;
+
+ for (group = 0; group < ngroups; group++) {
+ desc = ext2_get_group_desc (sb, group, &bh);
+ if (!desc || !desc->bg_free_inodes_count)
+ continue;
+ if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
+ continue;
+ if (!best_desc ||
+ (le16_to_cpu(desc->bg_free_blocks_count) >
+ le16_to_cpu(best_desc->bg_free_blocks_count))) {
+ best_group = group;
+ best_desc = desc;
+ best_bh = bh;
+ }
+ }
+ if (!best_desc)
+ return -1;
+
+ return best_group;
+}
+
+/*
+ * Orlov's allocator for directories.
+ *
+ * We always try to spread first-level directories.
+ *
+ * If there are blockgroups with both free inodes and free blocks counts
+ * not worse than average we return one with smallest directory count.
+ * Otherwise we simply return a random group.
+ *
+ * For the rest rules look so:
+ *
+ * It's OK to put directory into a group unless
+ * it has too many directories already (max_dirs) or
+ * it has too few free inodes left (min_inodes) or
+ * it has too few free blocks left (min_blocks) or
+ * it's already running too large debt (max_debt).
+ * Parent's group is prefered, if it doesn't satisfy these
+ * conditions we search cyclically through the rest. If none
+ * of the groups look good we just look for a group with more
+ * free inodes than average (starting at parent's group).
+ *
+ * Debt is incremented each time we allocate a directory and decremented
+ * when we allocate an inode, within 0--255.
+ */
+
+#define INODE_COST 64
+#define BLOCK_COST 256
+
+static int find_group_orlov(struct super_block *sb, struct inode *parent)
+{
+ int parent_group = EXT2_I(parent)->i_block_group;
+ struct ext2_sb_info *sbi = EXT2_SB(sb);
+ struct ext2_super_block *es = sbi->s_es;
+ int ngroups = sbi->s_groups_count;
+ int inodes_per_group = EXT2_INODES_PER_GROUP(sb);
+ int freei;
+ int avefreei;
+ int free_blocks;
+ int avefreeb;
+ int blocks_per_dir;
+ int ndirs;
+ int max_debt, max_dirs, min_blocks, min_inodes;
+ int group = -1, i;
+ struct ext2_group_desc *desc;
+ struct buffer_head *bh;
+
+ freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
+ avefreei = freei / ngroups;
+ free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
+ avefreeb = free_blocks / ngroups;
+ ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
+
+ if ((parent == sb->s_root->d_inode) ||
+ (EXT2_I(parent)->i_flags & EXT2_TOPDIR_FL)) {
+ struct ext2_group_desc *best_desc = NULL;
+ struct buffer_head *best_bh = NULL;
+ int best_ndir = inodes_per_group;
+ int best_group = -1;
+
+ get_random_bytes(&group, sizeof(group));
+ parent_group = (unsigned)group % ngroups;
+ for (i = 0; i < ngroups; i++) {
+ group = (parent_group + i) % ngroups;
+ desc = ext2_get_group_desc (sb, group, &bh);
+ if (!desc || !desc->bg_free_inodes_count)
+ continue;
+ if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir)
+ continue;
+ if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
+ continue;
+ if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb)
+ continue;
+ best_group = group;
+ best_ndir = le16_to_cpu(desc->bg_used_dirs_count);
+ best_desc = desc;
+ best_bh = bh;
+ }
+ if (best_group >= 0) {
+ desc = best_desc;
+ bh = best_bh;
+ group = best_group;
+ goto found;
+ }
+ goto fallback;
+ }
+
+ if (ndirs == 0)
+ ndirs = 1; /* percpu_counters are approximate... */
+
+ blocks_per_dir = (le32_to_cpu(es->s_blocks_count)-free_blocks) / ndirs;
+
+ max_dirs = ndirs / ngroups + inodes_per_group / 16;
+ min_inodes = avefreei - inodes_per_group / 4;
+ min_blocks = avefreeb - EXT2_BLOCKS_PER_GROUP(sb) / 4;
+
+ max_debt = EXT2_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, BLOCK_COST);
+ if (max_debt * INODE_COST > inodes_per_group)
+ max_debt = inodes_per_group / INODE_COST;
+ if (max_debt > 255)
+ max_debt = 255;
+ if (max_debt == 0)
+ max_debt = 1;
+
+ for (i = 0; i < ngroups; i++) {
+ group = (parent_group + i) % ngroups;
+ desc = ext2_get_group_desc (sb, group, &bh);
+ if (!desc || !desc->bg_free_inodes_count)
+ continue;
+ if (sbi->s_debts[group] >= max_debt)
+ continue;
+ if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
+ continue;
+ if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes)
+ continue;
+ if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks)
+ continue;
+ goto found;
+ }
+
+fallback:
+ for (i = 0; i < ngroups; i++) {
+ group = (parent_group + i) % ngroups;
+ desc = ext2_get_group_desc (sb, group, &bh);
+ if (!desc || !desc->bg_free_inodes_count)
+ continue;
+ if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei)
+ goto found;
+ }
+
+ if (avefreei) {
+ /*
+ * The free-inodes counter is approximate, and for really small
+ * filesystems the above test can fail to find any blockgroups
+ */
+ avefreei = 0;
+ goto fallback;
+ }
+
+ return -1;
+
+found:
+ return group;
+}
+
+static int find_group_other(struct super_block *sb, struct inode *parent)
+{
+ int parent_group = EXT2_I(parent)->i_block_group;
+ int ngroups = EXT2_SB(sb)->s_groups_count;
+ struct ext2_group_desc *desc;
+ struct buffer_head *bh;
+ int group, i;
+
+ /*
+ * Try to place the inode in its parent directory
+ */
+ group = parent_group;
+ desc = ext2_get_group_desc (sb, group, &bh);
+ if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
+ le16_to_cpu(desc->bg_free_blocks_count))
+ goto found;
+
+ /*
+ * We're going to place this inode in a different blockgroup from its
+ * parent. We want to cause files in a common directory to all land in
+ * the same blockgroup. But we want files which are in a different
+ * directory which shares a blockgroup with our parent to land in a
+ * different blockgroup.
+ *
+ * So add our directory's i_ino into the starting point for the hash.
+ */
+ group = (group + parent->i_ino) % ngroups;
+
+ /*
+ * Use a quadratic hash to find a group with a free inode and some
+ * free blocks.
+ */
+ for (i = 1; i < ngroups; i <<= 1) {
+ group += i;
+ if (group >= ngroups)
+ group -= ngroups;
+ desc = ext2_get_group_desc (sb, group, &bh);
+ if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
+ le16_to_cpu(desc->bg_free_blocks_count))
+ goto found;
+ }
+
+ /*
+ * That failed: try linear search for a free inode, even if that group
+ * has no free blocks.
+ */
+ group = parent_group;
+ for (i = 0; i < ngroups; i++) {
+ if (++group >= ngroups)
+ group = 0;
+ desc = ext2_get_group_desc (sb, group, &bh);
+ if (desc && le16_to_cpu(desc->bg_free_inodes_count))
+ goto found;
+ }
+
+ return -1;
+
+found:
+ return group;
+}
+
+struct inode *ext2_new_inode(struct inode *dir, int mode)
+{
+ struct super_block *sb;
+ struct buffer_head *bitmap_bh = NULL;
+ struct buffer_head *bh2;
+ int group, i;
+ ino_t ino = 0;
+ struct inode * inode;
+ struct ext2_group_desc *gdp;
+ struct ext2_super_block *es;
+ struct ext2_inode_info *ei;
+ struct ext2_sb_info *sbi;
+ int err;
+
+ sb = dir->i_sb;
+ inode = new_inode(sb);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+
+ ei = EXT2_I(inode);
+ sbi = EXT2_SB(sb);
+ es = sbi->s_es;
+ if (S_ISDIR(mode)) {
+ if (test_opt(sb, OLDALLOC))
+ group = find_group_dir(sb, dir);
+ else
+ group = find_group_orlov(sb, dir);
+ } else
+ group = find_group_other(sb, dir);
+
+ if (group == -1) {
+ err = -ENOSPC;
+ goto fail;
+ }
+
+ for (i = 0; i < sbi->s_groups_count; i++) {
+ gdp = ext2_get_group_desc(sb, group, &bh2);
+ brelse(bitmap_bh);
+ bitmap_bh = read_inode_bitmap(sb, group);
+ if (!bitmap_bh) {
+ err = -EIO;
+ goto fail;
+ }
+ ino = 0;
+
+repeat_in_this_group:
+ ino = ext2_find_next_zero_bit((unsigned long *)bitmap_bh->b_data,
+ EXT2_INODES_PER_GROUP(sb), ino);
+ if (ino >= EXT2_INODES_PER_GROUP(sb)) {
+ /*
+ * Rare race: find_group_xx() decided that there were
+ * free inodes in this group, but by the time we tried
+ * to allocate one, they're all gone. This can also
+ * occur because the counters which find_group_orlov()
+ * uses are approximate. So just go and search the
+ * next block group.
+ */
+ if (++group == sbi->s_groups_count)
+ group = 0;
+ continue;
+ }
+ if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group),
+ ino, bitmap_bh->b_data)) {
+ /* we lost this inode */
+ if (++ino >= EXT2_INODES_PER_GROUP(sb)) {
+ /* this group is exhausted, try next group */
+ if (++group == sbi->s_groups_count)
+ group = 0;
+ continue;
+ }
+ /* try to find free inode in the same group */
+ goto repeat_in_this_group;
+ }
+ goto got;
+ }
+
+ /*
+ * Scanned all blockgroups.
+ */
+ err = -ENOSPC;
+ goto fail;
+got:
+ mark_buffer_dirty(bitmap_bh);
+ if (sb->s_flags & MS_SYNCHRONOUS)
+ sync_dirty_buffer(bitmap_bh);
+ brelse(bitmap_bh);
+
+ ino += group * EXT2_INODES_PER_GROUP(sb) + 1;
+ if (ino < EXT2_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
+ ext2_error (sb, "ext2_new_inode",
+ "reserved inode or inode > inodes count - "
+ "block_group = %d,inode=%lu", group,
+ (unsigned long) ino);
+ err = -EIO;
+ goto fail;
+ }
+
+ percpu_counter_mod(&sbi->s_freeinodes_counter, -1);
+ if (S_ISDIR(mode))
+ percpu_counter_inc(&sbi->s_dirs_counter);
+
+ spin_lock(sb_bgl_lock(sbi, group));
+ gdp->bg_free_inodes_count =
+ cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
+ if (S_ISDIR(mode)) {
+ if (sbi->s_debts[group] < 255)
+ sbi->s_debts[group]++;
+ gdp->bg_used_dirs_count =
+ cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
+ } else {
+ if (sbi->s_debts[group])
+ sbi->s_debts[group]--;
+ }
+ spin_unlock(sb_bgl_lock(sbi, group));
+
+ sb->s_dirt = 1;
+ mark_buffer_dirty(bh2);
+ inode->i_uid = current->fsuid;
+ if (test_opt (sb, GRPID))
+ inode->i_gid = dir->i_gid;
+ else if (dir->i_mode & S_ISGID) {
+ inode->i_gid = dir->i_gid;
+ if (S_ISDIR(mode))
+ mode |= S_ISGID;
+ } else
+ inode->i_gid = current->fsgid;
+ inode->i_mode = mode;
+
+ inode->i_ino = ino;
+ inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size (for stat), not the fs block size */
+ inode->i_blocks = 0;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
+ memset(ei->i_data, 0, sizeof(ei->i_data));
+ ei->i_flags = EXT2_I(dir)->i_flags & ~EXT2_BTREE_FL;
+ if (S_ISLNK(mode))
+ ei->i_flags &= ~(EXT2_IMMUTABLE_FL|EXT2_APPEND_FL);
+ /* dirsync is only applied to directories */
+ if (!S_ISDIR(mode))
+ ei->i_flags &= ~EXT2_DIRSYNC_FL;
+ ei->i_faddr = 0;
+ ei->i_frag_no = 0;
+ ei->i_frag_size = 0;
+ ei->i_file_acl = 0;
+ ei->i_dir_acl = 0;
+ ei->i_dtime = 0;
+ ei->i_block_group = group;
+ ei->i_next_alloc_block = 0;
+ ei->i_next_alloc_goal = 0;
+ ei->i_prealloc_block = 0;
+ ei->i_prealloc_count = 0;
+ ei->i_dir_start_lookup = 0;
+ ei->i_state = EXT2_STATE_NEW;
+ ext2_set_inode_flags(inode);
+ spin_lock(&sbi->s_next_gen_lock);
+ inode->i_generation = sbi->s_next_generation++;
+ spin_unlock(&sbi->s_next_gen_lock);
+ insert_inode_hash(inode);
+
+ if (DQUOT_ALLOC_INODE(inode)) {
+ DQUOT_DROP(inode);
+ err = -ENOSPC;
+ goto fail2;
+ }
+ err = ext2_init_acl(inode, dir);
+ if (err) {
+ DQUOT_FREE_INODE(inode);
+ goto fail2;
+ }
+ mark_inode_dirty(inode);
+ ext2_debug("allocating inode %lu\n", inode->i_ino);
+ ext2_preread_inode(inode);
+ return inode;
+
+fail2:
+ inode->i_flags |= S_NOQUOTA;
+ inode->i_nlink = 0;
+ iput(inode);
+ return ERR_PTR(err);
+
+fail:
+ make_bad_inode(inode);
+ iput(inode);
+ return ERR_PTR(err);
+}
+
+unsigned long ext2_count_free_inodes (struct super_block * sb)
+{
+ struct ext2_group_desc *desc;
+ unsigned long desc_count = 0;
+ int i;
+
+#ifdef EXT2FS_DEBUG
+ struct ext2_super_block *es;
+ unsigned long bitmap_count = 0;
+ struct buffer_head *bitmap_bh = NULL;
+
+ lock_super (sb);
+ es = EXT2_SB(sb)->s_es;
+ for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
+ unsigned x;
+
+ desc = ext2_get_group_desc (sb, i, NULL);
+ if (!desc)
+ continue;
+ desc_count += le16_to_cpu(desc->bg_free_inodes_count);
+ brelse(bitmap_bh);
+ bitmap_bh = read_inode_bitmap(sb, i);
+ if (!bitmap_bh)
+ continue;
+
+ x = ext2_count_free(bitmap_bh, EXT2_INODES_PER_GROUP(sb) / 8);
+ printk("group %d: stored = %d, counted = %u\n",
+ i, le16_to_cpu(desc->bg_free_inodes_count), x);
+ bitmap_count += x;
+ }
+ brelse(bitmap_bh);
+ printk("ext2_count_free_inodes: stored = %lu, computed = %lu, %lu\n",
+ percpu_counter_read(&EXT2_SB(sb)->s_freeinodes_counter),
+ desc_count, bitmap_count);
+ unlock_super(sb);
+ return desc_count;
+#else
+ for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
+ desc = ext2_get_group_desc (sb, i, NULL);
+ if (!desc)
+ continue;
+ desc_count += le16_to_cpu(desc->bg_free_inodes_count);
+ }
+ return desc_count;
+#endif
+}
+
+/* Called at mount-time, super-block is locked */
+unsigned long ext2_count_dirs (struct super_block * sb)
+{
+ unsigned long count = 0;
+ int i;
+
+ for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
+ struct ext2_group_desc *gdp = ext2_get_group_desc (sb, i, NULL);
+ if (!gdp)
+ continue;
+ count += le16_to_cpu(gdp->bg_used_dirs_count);
+ }
+ return count;
+}
+
+#ifdef CONFIG_EXT2_CHECK
+/* Called at mount-time, super-block is locked */
+void ext2_check_inodes_bitmap (struct super_block * sb)
+{
+ struct ext2_super_block * es = EXT2_SB(sb)->s_es;
+ unsigned long desc_count = 0, bitmap_count = 0;
+ struct buffer_head *bitmap_bh = NULL;
+ int i;
+
+ for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
+ struct ext2_group_desc *desc;
+ unsigned x;
+
+ desc = ext2_get_group_desc(sb, i, NULL);
+ if (!desc)
+ continue;
+ desc_count += le16_to_cpu(desc->bg_free_inodes_count);
+ brelse(bitmap_bh);
+ bitmap_bh = read_inode_bitmap(sb, i);
+ if (!bitmap_bh)
+ continue;
+
+ x = ext2_count_free(bitmap_bh, EXT2_INODES_PER_GROUP(sb) / 8);
+ if (le16_to_cpu(desc->bg_free_inodes_count) != x)
+ ext2_error (sb, "ext2_check_inodes_bitmap",
+ "Wrong free inodes count in group %d, "
+ "stored = %d, counted = %lu", i,
+ le16_to_cpu(desc->bg_free_inodes_count), x);
+ bitmap_count += x;
+ }
+ brelse(bitmap_bh);
+ if (percpu_counter_read(&EXT2_SB(sb)->s_freeinodes_counter) !=
+ bitmap_count)
+ ext2_error(sb, "ext2_check_inodes_bitmap",
+ "Wrong free inodes count in super block, "
+ "stored = %lu, counted = %lu",
+ (unsigned long)le32_to_cpu(es->s_free_inodes_count),
+ bitmap_count);
+}
+#endif
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
new file mode 100644
index 000000000000..b890be022496
--- /dev/null
+++ b/fs/ext2/inode.c
@@ -0,0 +1,1276 @@
+/*
+ * linux/fs/ext2/inode.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * from
+ *
+ * linux/fs/minix/inode.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * Goal-directed block allocation by Stephen Tweedie
+ * (sct@dcs.ed.ac.uk), 1993, 1998
+ * Big-endian to little-endian byte-swapping/bitmaps by
+ * David S. Miller (davem@caip.rutgers.edu), 1995
+ * 64-bit file support on 64-bit platforms by Jakub Jelinek
+ * (jj@sunsite.ms.mff.cuni.cz)
+ *
+ * Assorted race fixes, rewrite of ext2_get_block() by Al Viro, 2000
+ */
+
+#include <linux/smp_lock.h>
+#include <linux/time.h>
+#include <linux/highuid.h>
+#include <linux/pagemap.h>
+#include <linux/quotaops.h>
+#include <linux/module.h>
+#include <linux/writeback.h>
+#include <linux/buffer_head.h>
+#include <linux/mpage.h>
+#include "ext2.h"
+#include "acl.h"
+
+MODULE_AUTHOR("Remy Card and others");
+MODULE_DESCRIPTION("Second Extended Filesystem");
+MODULE_LICENSE("GPL");
+
+static int ext2_update_inode(struct inode * inode, int do_sync);
+
+/*
+ * Test whether an inode is a fast symlink.
+ */
+static inline int ext2_inode_is_fast_symlink(struct inode *inode)
+{
+ int ea_blocks = EXT2_I(inode)->i_file_acl ?
+ (inode->i_sb->s_blocksize >> 9) : 0;
+
+ return (S_ISLNK(inode->i_mode) &&
+ inode->i_blocks - ea_blocks == 0);
+}
+
+/*
+ * Called at the last iput() if i_nlink is zero.
+ */
+void ext2_delete_inode (struct inode * inode)
+{
+ if (is_bad_inode(inode))
+ goto no_delete;
+ EXT2_I(inode)->i_dtime = get_seconds();
+ mark_inode_dirty(inode);
+ ext2_update_inode(inode, inode_needs_sync(inode));
+
+ inode->i_size = 0;
+ if (inode->i_blocks)
+ ext2_truncate (inode);
+ ext2_free_inode (inode);
+
+ return;
+no_delete:
+ clear_inode(inode); /* We must guarantee clearing of inode... */
+}
+
+void ext2_discard_prealloc (struct inode * inode)
+{
+#ifdef EXT2_PREALLOCATE
+ struct ext2_inode_info *ei = EXT2_I(inode);
+ write_lock(&ei->i_meta_lock);
+ if (ei->i_prealloc_count) {
+ unsigned short total = ei->i_prealloc_count;
+ unsigned long block = ei->i_prealloc_block;
+ ei->i_prealloc_count = 0;
+ ei->i_prealloc_block = 0;
+ write_unlock(&ei->i_meta_lock);
+ ext2_free_blocks (inode, block, total);
+ return;
+ } else
+ write_unlock(&ei->i_meta_lock);
+#endif
+}
+
+static int ext2_alloc_block (struct inode * inode, unsigned long goal, int *err)
+{
+#ifdef EXT2FS_DEBUG
+ static unsigned long alloc_hits, alloc_attempts;
+#endif
+ unsigned long result;
+
+
+#ifdef EXT2_PREALLOCATE
+ struct ext2_inode_info *ei = EXT2_I(inode);
+ write_lock(&ei->i_meta_lock);
+ if (ei->i_prealloc_count &&
+ (goal == ei->i_prealloc_block || goal + 1 == ei->i_prealloc_block))
+ {
+ result = ei->i_prealloc_block++;
+ ei->i_prealloc_count--;
+ write_unlock(&ei->i_meta_lock);
+ ext2_debug ("preallocation hit (%lu/%lu).\n",
+ ++alloc_hits, ++alloc_attempts);
+ } else {
+ write_unlock(&ei->i_meta_lock);
+ ext2_discard_prealloc (inode);
+ ext2_debug ("preallocation miss (%lu/%lu).\n",
+ alloc_hits, ++alloc_attempts);
+ if (S_ISREG(inode->i_mode))
+ result = ext2_new_block (inode, goal,
+ &ei->i_prealloc_count,
+ &ei->i_prealloc_block, err);
+ else
+ result = ext2_new_block(inode, goal, NULL, NULL, err);
+ }
+#else
+ result = ext2_new_block (inode, goal, 0, 0, err);
+#endif
+ return result;
+}
+
+typedef struct {
+ __le32 *p;
+ __le32 key;
+ struct buffer_head *bh;
+} Indirect;
+
+static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
+{
+ p->key = *(p->p = v);
+ p->bh = bh;
+}
+
+static inline int verify_chain(Indirect *from, Indirect *to)
+{
+ while (from <= to && from->key == *from->p)
+ from++;
+ return (from > to);
+}
+
+/**
+ * ext2_block_to_path - parse the block number into array of offsets
+ * @inode: inode in question (we are only interested in its superblock)
+ * @i_block: block number to be parsed
+ * @offsets: array to store the offsets in
+ * @boundary: set this non-zero if the referred-to block is likely to be
+ * followed (on disk) by an indirect block.
+ * To store the locations of file's data ext2 uses a data structure common
+ * for UNIX filesystems - tree of pointers anchored in the inode, with
+ * data blocks at leaves and indirect blocks in intermediate nodes.
+ * This function translates the block number into path in that tree -
+ * return value is the path length and @offsets[n] is the offset of
+ * pointer to (n+1)th node in the nth one. If @block is out of range
+ * (negative or too large) warning is printed and zero returned.
+ *
+ * Note: function doesn't find node addresses, so no IO is needed. All
+ * we need to know is the capacity of indirect blocks (taken from the
+ * inode->i_sb).
+ */
+
+/*
+ * Portability note: the last comparison (check that we fit into triple
+ * indirect block) is spelled differently, because otherwise on an
+ * architecture with 32-bit longs and 8Kb pages we might get into trouble
+ * if our filesystem had 8Kb blocks. We might use long long, but that would
+ * kill us on x86. Oh, well, at least the sign propagation does not matter -
+ * i_block would have to be negative in the very beginning, so we would not
+ * get there at all.
+ */
+
+static int ext2_block_to_path(struct inode *inode,
+ long i_block, int offsets[4], int *boundary)
+{
+ int ptrs = EXT2_ADDR_PER_BLOCK(inode->i_sb);
+ int ptrs_bits = EXT2_ADDR_PER_BLOCK_BITS(inode->i_sb);
+ const long direct_blocks = EXT2_NDIR_BLOCKS,
+ indirect_blocks = ptrs,
+ double_blocks = (1 << (ptrs_bits * 2));
+ int n = 0;
+ int final = 0;
+
+ if (i_block < 0) {
+ ext2_warning (inode->i_sb, "ext2_block_to_path", "block < 0");
+ } else if (i_block < direct_blocks) {
+ offsets[n++] = i_block;
+ final = direct_blocks;
+ } else if ( (i_block -= direct_blocks) < indirect_blocks) {
+ offsets[n++] = EXT2_IND_BLOCK;
+ offsets[n++] = i_block;
+ final = ptrs;
+ } else if ((i_block -= indirect_blocks) < double_blocks) {
+ offsets[n++] = EXT2_DIND_BLOCK;
+ offsets[n++] = i_block >> ptrs_bits;
+ offsets[n++] = i_block & (ptrs - 1);
+ final = ptrs;
+ } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) {
+ offsets[n++] = EXT2_TIND_BLOCK;
+ offsets[n++] = i_block >> (ptrs_bits * 2);
+ offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1);
+ offsets[n++] = i_block & (ptrs - 1);
+ final = ptrs;
+ } else {
+ ext2_warning (inode->i_sb, "ext2_block_to_path", "block > big");
+ }
+ if (boundary)
+ *boundary = (i_block & (ptrs - 1)) == (final - 1);
+ return n;
+}
+
+/**
+ * ext2_get_branch - read the chain of indirect blocks leading to data
+ * @inode: inode in question
+ * @depth: depth of the chain (1 - direct pointer, etc.)
+ * @offsets: offsets of pointers in inode/indirect blocks
+ * @chain: place to store the result
+ * @err: here we store the error value
+ *
+ * Function fills the array of triples <key, p, bh> and returns %NULL
+ * if everything went OK or the pointer to the last filled triple
+ * (incomplete one) otherwise. Upon the return chain[i].key contains
+ * the number of (i+1)-th block in the chain (as it is stored in memory,
+ * i.e. little-endian 32-bit), chain[i].p contains the address of that
+ * number (it points into struct inode for i==0 and into the bh->b_data
+ * for i>0) and chain[i].bh points to the buffer_head of i-th indirect
+ * block for i>0 and NULL for i==0. In other words, it holds the block
+ * numbers of the chain, addresses they were taken from (and where we can
+ * verify that chain did not change) and buffer_heads hosting these
+ * numbers.
+ *
+ * Function stops when it stumbles upon zero pointer (absent block)
+ * (pointer to last triple returned, *@err == 0)
+ * or when it gets an IO error reading an indirect block
+ * (ditto, *@err == -EIO)
+ * or when it notices that chain had been changed while it was reading
+ * (ditto, *@err == -EAGAIN)
+ * or when it reads all @depth-1 indirect blocks successfully and finds
+ * the whole chain, all way to the data (returns %NULL, *err == 0).
+ */
+static Indirect *ext2_get_branch(struct inode *inode,
+ int depth,
+ int *offsets,
+ Indirect chain[4],
+ int *err)
+{
+ struct super_block *sb = inode->i_sb;
+ Indirect *p = chain;
+ struct buffer_head *bh;
+
+ *err = 0;
+ /* i_data is not going away, no lock needed */
+ add_chain (chain, NULL, EXT2_I(inode)->i_data + *offsets);
+ if (!p->key)
+ goto no_block;
+ while (--depth) {
+ bh = sb_bread(sb, le32_to_cpu(p->key));
+ if (!bh)
+ goto failure;
+ read_lock(&EXT2_I(inode)->i_meta_lock);
+ if (!verify_chain(chain, p))
+ goto changed;
+ add_chain(++p, bh, (__le32*)bh->b_data + *++offsets);
+ read_unlock(&EXT2_I(inode)->i_meta_lock);
+ if (!p->key)
+ goto no_block;
+ }
+ return NULL;
+
+changed:
+ read_unlock(&EXT2_I(inode)->i_meta_lock);
+ brelse(bh);
+ *err = -EAGAIN;
+ goto no_block;
+failure:
+ *err = -EIO;
+no_block:
+ return p;
+}
+
+/**
+ * ext2_find_near - find a place for allocation with sufficient locality
+ * @inode: owner
+ * @ind: descriptor of indirect block.
+ *
+ * This function returns the prefered place for block allocation.
+ * It is used when heuristic for sequential allocation fails.
+ * Rules are:
+ * + if there is a block to the left of our position - allocate near it.
+ * + if pointer will live in indirect block - allocate near that block.
+ * + if pointer will live in inode - allocate in the same cylinder group.
+ *
+ * In the latter case we colour the starting block by the callers PID to
+ * prevent it from clashing with concurrent allocations for a different inode
+ * in the same block group. The PID is used here so that functionally related
+ * files will be close-by on-disk.
+ *
+ * Caller must make sure that @ind is valid and will stay that way.
+ */
+
+static unsigned long ext2_find_near(struct inode *inode, Indirect *ind)
+{
+ struct ext2_inode_info *ei = EXT2_I(inode);
+ __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data;
+ __le32 *p;
+ unsigned long bg_start;
+ unsigned long colour;
+
+ /* Try to find previous block */
+ for (p = ind->p - 1; p >= start; p--)
+ if (*p)
+ return le32_to_cpu(*p);
+
+ /* No such thing, so let's try location of indirect block */
+ if (ind->bh)
+ return ind->bh->b_blocknr;
+
+ /*
+ * It is going to be refered from inode itself? OK, just put it into
+ * the same cylinder group then.
+ */
+ bg_start = (ei->i_block_group * EXT2_BLOCKS_PER_GROUP(inode->i_sb)) +
+ le32_to_cpu(EXT2_SB(inode->i_sb)->s_es->s_first_data_block);
+ colour = (current->pid % 16) *
+ (EXT2_BLOCKS_PER_GROUP(inode->i_sb) / 16);
+ return bg_start + colour;
+}
+
+/**
+ * ext2_find_goal - find a prefered place for allocation.
+ * @inode: owner
+ * @block: block we want
+ * @chain: chain of indirect blocks
+ * @partial: pointer to the last triple within a chain
+ * @goal: place to store the result.
+ *
+ * Normally this function find the prefered place for block allocation,
+ * stores it in *@goal and returns zero. If the branch had been changed
+ * under us we return -EAGAIN.
+ */
+
+static inline int ext2_find_goal(struct inode *inode,
+ long block,
+ Indirect chain[4],
+ Indirect *partial,
+ unsigned long *goal)
+{
+ struct ext2_inode_info *ei = EXT2_I(inode);
+ write_lock(&ei->i_meta_lock);
+ if ((block == ei->i_next_alloc_block + 1) && ei->i_next_alloc_goal) {
+ ei->i_next_alloc_block++;
+ ei->i_next_alloc_goal++;
+ }
+ if (verify_chain(chain, partial)) {
+ /*
+ * try the heuristic for sequential allocation,
+ * failing that at least try to get decent locality.
+ */
+ if (block == ei->i_next_alloc_block)
+ *goal = ei->i_next_alloc_goal;
+ if (!*goal)
+ *goal = ext2_find_near(inode, partial);
+ write_unlock(&ei->i_meta_lock);
+ return 0;
+ }
+ write_unlock(&ei->i_meta_lock);
+ return -EAGAIN;
+}
+
+/**
+ * ext2_alloc_branch - allocate and set up a chain of blocks.
+ * @inode: owner
+ * @num: depth of the chain (number of blocks to allocate)
+ * @offsets: offsets (in the blocks) to store the pointers to next.
+ * @branch: place to store the chain in.
+ *
+ * This function allocates @num blocks, zeroes out all but the last one,
+ * links them into chain and (if we are synchronous) writes them to disk.
+ * In other words, it prepares a branch that can be spliced onto the
+ * inode. It stores the information about that chain in the branch[], in
+ * the same format as ext2_get_branch() would do. We are calling it after
+ * we had read the existing part of chain and partial points to the last
+ * triple of that (one with zero ->key). Upon the exit we have the same
+ * picture as after the successful ext2_get_block(), excpet that in one
+ * place chain is disconnected - *branch->p is still zero (we did not
+ * set the last link), but branch->key contains the number that should
+ * be placed into *branch->p to fill that gap.
+ *
+ * If allocation fails we free all blocks we've allocated (and forget
+ * their buffer_heads) and return the error value the from failed
+ * ext2_alloc_block() (normally -ENOSPC). Otherwise we set the chain
+ * as described above and return 0.
+ */
+
+static int ext2_alloc_branch(struct inode *inode,
+ int num,
+ unsigned long goal,
+ int *offsets,
+ Indirect *branch)
+{
+ int blocksize = inode->i_sb->s_blocksize;
+ int n = 0;
+ int err;
+ int i;
+ int parent = ext2_alloc_block(inode, goal, &err);
+
+ branch[0].key = cpu_to_le32(parent);
+ if (parent) for (n = 1; n < num; n++) {
+ struct buffer_head *bh;
+ /* Allocate the next block */
+ int nr = ext2_alloc_block(inode, parent, &err);
+ if (!nr)
+ break;
+ branch[n].key = cpu_to_le32(nr);
+ /*
+ * Get buffer_head for parent block, zero it out and set
+ * the pointer to new one, then send parent to disk.
+ */
+ bh = sb_getblk(inode->i_sb, parent);
+ lock_buffer(bh);
+ memset(bh->b_data, 0, blocksize);
+ branch[n].bh = bh;
+ branch[n].p = (__le32 *) bh->b_data + offsets[n];
+ *branch[n].p = branch[n].key;
+ set_buffer_uptodate(bh);
+ unlock_buffer(bh);
+ mark_buffer_dirty_inode(bh, inode);
+ /* We used to sync bh here if IS_SYNC(inode).
+ * But we now rely upon generic_osync_inode()
+ * and b_inode_buffers. But not for directories.
+ */
+ if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode))
+ sync_dirty_buffer(bh);
+ parent = nr;
+ }
+ if (n == num)
+ return 0;
+
+ /* Allocation failed, free what we already allocated */
+ for (i = 1; i < n; i++)
+ bforget(branch[i].bh);
+ for (i = 0; i < n; i++)
+ ext2_free_blocks(inode, le32_to_cpu(branch[i].key), 1);
+ return err;
+}
+
+/**
+ * ext2_splice_branch - splice the allocated branch onto inode.
+ * @inode: owner
+ * @block: (logical) number of block we are adding
+ * @chain: chain of indirect blocks (with a missing link - see
+ * ext2_alloc_branch)
+ * @where: location of missing link
+ * @num: number of blocks we are adding
+ *
+ * This function verifies that chain (up to the missing link) had not
+ * changed, fills the missing link and does all housekeeping needed in
+ * inode (->i_blocks, etc.). In case of success we end up with the full
+ * chain to new block and return 0. Otherwise (== chain had been changed)
+ * we free the new blocks (forgetting their buffer_heads, indeed) and
+ * return -EAGAIN.
+ */
+
+static inline int ext2_splice_branch(struct inode *inode,
+ long block,
+ Indirect chain[4],
+ Indirect *where,
+ int num)
+{
+ struct ext2_inode_info *ei = EXT2_I(inode);
+ int i;
+
+ /* Verify that place we are splicing to is still there and vacant */
+
+ write_lock(&ei->i_meta_lock);
+ if (!verify_chain(chain, where-1) || *where->p)
+ goto changed;
+
+ /* That's it */
+
+ *where->p = where->key;
+ ei->i_next_alloc_block = block;
+ ei->i_next_alloc_goal = le32_to_cpu(where[num-1].key);
+
+ write_unlock(&ei->i_meta_lock);
+
+ /* We are done with atomic stuff, now do the rest of housekeeping */
+
+ inode->i_ctime = CURRENT_TIME_SEC;
+
+ /* had we spliced it onto indirect block? */
+ if (where->bh)
+ mark_buffer_dirty_inode(where->bh, inode);
+
+ mark_inode_dirty(inode);
+ return 0;
+
+changed:
+ write_unlock(&ei->i_meta_lock);
+ for (i = 1; i < num; i++)
+ bforget(where[i].bh);
+ for (i = 0; i < num; i++)
+ ext2_free_blocks(inode, le32_to_cpu(where[i].key), 1);
+ return -EAGAIN;
+}
+
+/*
+ * Allocation strategy is simple: if we have to allocate something, we will
+ * have to go the whole way to leaf. So let's do it before attaching anything
+ * to tree, set linkage between the newborn blocks, write them if sync is
+ * required, recheck the path, free and repeat if check fails, otherwise
+ * set the last missing link (that will protect us from any truncate-generated
+ * removals - all blocks on the path are immune now) and possibly force the
+ * write on the parent block.
+ * That has a nice additional property: no special recovery from the failed
+ * allocations is needed - we simply release blocks and do not touch anything
+ * reachable from inode.
+ */
+
+int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create)
+{
+ int err = -EIO;
+ int offsets[4];
+ Indirect chain[4];
+ Indirect *partial;
+ unsigned long goal;
+ int left;
+ int boundary = 0;
+ int depth = ext2_block_to_path(inode, iblock, offsets, &boundary);
+
+ if (depth == 0)
+ goto out;
+
+reread:
+ partial = ext2_get_branch(inode, depth, offsets, chain, &err);
+
+ /* Simplest case - block found, no allocation needed */
+ if (!partial) {
+got_it:
+ map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
+ if (boundary)
+ set_buffer_boundary(bh_result);
+ /* Clean up and exit */
+ partial = chain+depth-1; /* the whole chain */
+ goto cleanup;
+ }
+
+ /* Next simple case - plain lookup or failed read of indirect block */
+ if (!create || err == -EIO) {
+cleanup:
+ while (partial > chain) {
+ brelse(partial->bh);
+ partial--;
+ }
+out:
+ return err;
+ }
+
+ /*
+ * Indirect block might be removed by truncate while we were
+ * reading it. Handling of that case (forget what we've got and
+ * reread) is taken out of the main path.
+ */
+ if (err == -EAGAIN)
+ goto changed;
+
+ goal = 0;
+ if (ext2_find_goal(inode, iblock, chain, partial, &goal) < 0)
+ goto changed;
+
+ left = (chain + depth) - partial;
+ err = ext2_alloc_branch(inode, left, goal,
+ offsets+(partial-chain), partial);
+ if (err)
+ goto cleanup;
+
+ if (ext2_splice_branch(inode, iblock, chain, partial, left) < 0)
+ goto changed;
+
+ set_buffer_new(bh_result);
+ goto got_it;
+
+changed:
+ while (partial > chain) {
+ brelse(partial->bh);
+ partial--;
+ }
+ goto reread;
+}
+
+static int ext2_writepage(struct page *page, struct writeback_control *wbc)
+{
+ return block_write_full_page(page, ext2_get_block, wbc);
+}
+
+static int ext2_readpage(struct file *file, struct page *page)
+{
+ return mpage_readpage(page, ext2_get_block);
+}
+
+static int
+ext2_readpages(struct file *file, struct address_space *mapping,
+ struct list_head *pages, unsigned nr_pages)
+{
+ return mpage_readpages(mapping, pages, nr_pages, ext2_get_block);
+}
+
+static int
+ext2_prepare_write(struct file *file, struct page *page,
+ unsigned from, unsigned to)
+{
+ return block_prepare_write(page,from,to,ext2_get_block);
+}
+
+static int
+ext2_nobh_prepare_write(struct file *file, struct page *page,
+ unsigned from, unsigned to)
+{
+ return nobh_prepare_write(page,from,to,ext2_get_block);
+}
+
+static int ext2_nobh_writepage(struct page *page,
+ struct writeback_control *wbc)
+{
+ return nobh_writepage(page, ext2_get_block, wbc);
+}
+
+static sector_t ext2_bmap(struct address_space *mapping, sector_t block)
+{
+ return generic_block_bmap(mapping,block,ext2_get_block);
+}
+
+static int
+ext2_get_blocks(struct inode *inode, sector_t iblock, unsigned long max_blocks,
+ struct buffer_head *bh_result, int create)
+{
+ int ret;
+
+ ret = ext2_get_block(inode, iblock, bh_result, create);
+ if (ret == 0)
+ bh_result->b_size = (1 << inode->i_blkbits);
+ return ret;
+}
+
+static ssize_t
+ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
+ loff_t offset, unsigned long nr_segs)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_mapping->host;
+
+ return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+ offset, nr_segs, ext2_get_blocks, NULL);
+}
+
+static int
+ext2_writepages(struct address_space *mapping, struct writeback_control *wbc)
+{
+ return mpage_writepages(mapping, wbc, ext2_get_block);
+}
+
+struct address_space_operations ext2_aops = {
+ .readpage = ext2_readpage,
+ .readpages = ext2_readpages,
+ .writepage = ext2_writepage,
+ .sync_page = block_sync_page,
+ .prepare_write = ext2_prepare_write,
+ .commit_write = generic_commit_write,
+ .bmap = ext2_bmap,
+ .direct_IO = ext2_direct_IO,
+ .writepages = ext2_writepages,
+};
+
+struct address_space_operations ext2_nobh_aops = {
+ .readpage = ext2_readpage,
+ .readpages = ext2_readpages,
+ .writepage = ext2_nobh_writepage,
+ .sync_page = block_sync_page,
+ .prepare_write = ext2_nobh_prepare_write,
+ .commit_write = nobh_commit_write,
+ .bmap = ext2_bmap,
+ .direct_IO = ext2_direct_IO,
+ .writepages = ext2_writepages,
+};
+
+/*
+ * Probably it should be a library function... search for first non-zero word
+ * or memcmp with zero_page, whatever is better for particular architecture.
+ * Linus?
+ */
+static inline int all_zeroes(__le32 *p, __le32 *q)
+{
+ while (p < q)
+ if (*p++)
+ return 0;
+ return 1;
+}
+
+/**
+ * ext2_find_shared - find the indirect blocks for partial truncation.
+ * @inode: inode in question
+ * @depth: depth of the affected branch
+ * @offsets: offsets of pointers in that branch (see ext2_block_to_path)
+ * @chain: place to store the pointers to partial indirect blocks
+ * @top: place to the (detached) top of branch
+ *
+ * This is a helper function used by ext2_truncate().
+ *
+ * When we do truncate() we may have to clean the ends of several indirect
+ * blocks but leave the blocks themselves alive. Block is partially
+ * truncated if some data below the new i_size is refered from it (and
+ * it is on the path to the first completely truncated data block, indeed).
+ * We have to free the top of that path along with everything to the right
+ * of the path. Since no allocation past the truncation point is possible
+ * until ext2_truncate() finishes, we may safely do the latter, but top
+ * of branch may require special attention - pageout below the truncation
+ * point might try to populate it.
+ *
+ * We atomically detach the top of branch from the tree, store the block
+ * number of its root in *@top, pointers to buffer_heads of partially
+ * truncated blocks - in @chain[].bh and pointers to their last elements
+ * that should not be removed - in @chain[].p. Return value is the pointer
+ * to last filled element of @chain.
+ *
+ * The work left to caller to do the actual freeing of subtrees:
+ * a) free the subtree starting from *@top
+ * b) free the subtrees whose roots are stored in
+ * (@chain[i].p+1 .. end of @chain[i].bh->b_data)
+ * c) free the subtrees growing from the inode past the @chain[0].p
+ * (no partially truncated stuff there).
+ */
+
+static Indirect *ext2_find_shared(struct inode *inode,
+ int depth,
+ int offsets[4],
+ Indirect chain[4],
+ __le32 *top)
+{
+ Indirect *partial, *p;
+ int k, err;
+
+ *top = 0;
+ for (k = depth; k > 1 && !offsets[k-1]; k--)
+ ;
+ partial = ext2_get_branch(inode, k, offsets, chain, &err);
+ if (!partial)
+ partial = chain + k-1;
+ /*
+ * If the branch acquired continuation since we've looked at it -
+ * fine, it should all survive and (new) top doesn't belong to us.
+ */
+ write_lock(&EXT2_I(inode)->i_meta_lock);
+ if (!partial->key && *partial->p) {
+ write_unlock(&EXT2_I(inode)->i_meta_lock);
+ goto no_top;
+ }
+ for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--)
+ ;
+ /*
+ * OK, we've found the last block that must survive. The rest of our
+ * branch should be detached before unlocking. However, if that rest
+ * of branch is all ours and does not grow immediately from the inode
+ * it's easier to cheat and just decrement partial->p.
+ */
+ if (p == chain + k - 1 && p > chain) {
+ p->p--;
+ } else {
+ *top = *p->p;
+ *p->p = 0;
+ }
+ write_unlock(&EXT2_I(inode)->i_meta_lock);
+
+ while(partial > p)
+ {
+ brelse(partial->bh);
+ partial--;
+ }
+no_top:
+ return partial;
+}
+
+/**
+ * ext2_free_data - free a list of data blocks
+ * @inode: inode we are dealing with
+ * @p: array of block numbers
+ * @q: points immediately past the end of array
+ *
+ * We are freeing all blocks refered from that array (numbers are
+ * stored as little-endian 32-bit) and updating @inode->i_blocks
+ * appropriately.
+ */
+static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q)
+{
+ unsigned long block_to_free = 0, count = 0;
+ unsigned long nr;
+
+ for ( ; p < q ; p++) {
+ nr = le32_to_cpu(*p);
+ if (nr) {
+ *p = 0;
+ /* accumulate blocks to free if they're contiguous */
+ if (count == 0)
+ goto free_this;
+ else if (block_to_free == nr - count)
+ count++;
+ else {
+ mark_inode_dirty(inode);
+ ext2_free_blocks (inode, block_to_free, count);
+ free_this:
+ block_to_free = nr;
+ count = 1;
+ }
+ }
+ }
+ if (count > 0) {
+ mark_inode_dirty(inode);
+ ext2_free_blocks (inode, block_to_free, count);
+ }
+}
+
+/**
+ * ext2_free_branches - free an array of branches
+ * @inode: inode we are dealing with
+ * @p: array of block numbers
+ * @q: pointer immediately past the end of array
+ * @depth: depth of the branches to free
+ *
+ * We are freeing all blocks refered from these branches (numbers are
+ * stored as little-endian 32-bit) and updating @inode->i_blocks
+ * appropriately.
+ */
+static void ext2_free_branches(struct inode *inode, __le32 *p, __le32 *q, int depth)
+{
+ struct buffer_head * bh;
+ unsigned long nr;
+
+ if (depth--) {
+ int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
+ for ( ; p < q ; p++) {
+ nr = le32_to_cpu(*p);
+ if (!nr)
+ continue;
+ *p = 0;
+ bh = sb_bread(inode->i_sb, nr);
+ /*
+ * A read failure? Report error and clear slot
+ * (should be rare).
+ */
+ if (!bh) {
+ ext2_error(inode->i_sb, "ext2_free_branches",
+ "Read failure, inode=%ld, block=%ld",
+ inode->i_ino, nr);
+ continue;
+ }
+ ext2_free_branches(inode,
+ (__le32*)bh->b_data,
+ (__le32*)bh->b_data + addr_per_block,
+ depth);
+ bforget(bh);
+ ext2_free_blocks(inode, nr, 1);
+ mark_inode_dirty(inode);
+ }
+ } else
+ ext2_free_data(inode, p, q);
+}
+
+void ext2_truncate (struct inode * inode)
+{
+ __le32 *i_data = EXT2_I(inode)->i_data;
+ int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
+ int offsets[4];
+ Indirect chain[4];
+ Indirect *partial;
+ __le32 nr = 0;
+ int n;
+ long iblock;
+ unsigned blocksize;
+
+ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)))
+ return;
+ if (ext2_inode_is_fast_symlink(inode))
+ return;
+ if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+ return;
+
+ ext2_discard_prealloc(inode);
+
+ blocksize = inode->i_sb->s_blocksize;
+ iblock = (inode->i_size + blocksize-1)
+ >> EXT2_BLOCK_SIZE_BITS(inode->i_sb);
+
+ if (test_opt(inode->i_sb, NOBH))
+ nobh_truncate_page(inode->i_mapping, inode->i_size);
+ else
+ block_truncate_page(inode->i_mapping,
+ inode->i_size, ext2_get_block);
+
+ n = ext2_block_to_path(inode, iblock, offsets, NULL);
+ if (n == 0)
+ return;
+
+ if (n == 1) {
+ ext2_free_data(inode, i_data+offsets[0],
+ i_data + EXT2_NDIR_BLOCKS);
+ goto do_indirects;
+ }
+
+ partial = ext2_find_shared(inode, n, offsets, chain, &nr);
+ /* Kill the top of shared branch (already detached) */
+ if (nr) {
+ if (partial == chain)
+ mark_inode_dirty(inode);
+ else
+ mark_buffer_dirty_inode(partial->bh, inode);
+ ext2_free_branches(inode, &nr, &nr+1, (chain+n-1) - partial);
+ }
+ /* Clear the ends of indirect blocks on the shared branch */
+ while (partial > chain) {
+ ext2_free_branches(inode,
+ partial->p + 1,
+ (__le32*)partial->bh->b_data+addr_per_block,
+ (chain+n-1) - partial);
+ mark_buffer_dirty_inode(partial->bh, inode);
+ brelse (partial->bh);
+ partial--;
+ }
+do_indirects:
+ /* Kill the remaining (whole) subtrees */
+ switch (offsets[0]) {
+ default:
+ nr = i_data[EXT2_IND_BLOCK];
+ if (nr) {
+ i_data[EXT2_IND_BLOCK] = 0;
+ mark_inode_dirty(inode);
+ ext2_free_branches(inode, &nr, &nr+1, 1);
+ }
+ case EXT2_IND_BLOCK:
+ nr = i_data[EXT2_DIND_BLOCK];
+ if (nr) {
+ i_data[EXT2_DIND_BLOCK] = 0;
+ mark_inode_dirty(inode);
+ ext2_free_branches(inode, &nr, &nr+1, 2);
+ }
+ case EXT2_DIND_BLOCK:
+ nr = i_data[EXT2_TIND_BLOCK];
+ if (nr) {
+ i_data[EXT2_TIND_BLOCK] = 0;
+ mark_inode_dirty(inode);
+ ext2_free_branches(inode, &nr, &nr+1, 3);
+ }
+ case EXT2_TIND_BLOCK:
+ ;
+ }
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
+ if (inode_needs_sync(inode)) {
+ sync_mapping_buffers(inode->i_mapping);
+ ext2_sync_inode (inode);
+ } else {
+ mark_inode_dirty(inode);
+ }
+}
+
+static struct ext2_inode *ext2_get_inode(struct super_block *sb, ino_t ino,
+ struct buffer_head **p)
+{
+ struct buffer_head * bh;
+ unsigned long block_group;
+ unsigned long block;
+ unsigned long offset;
+ struct ext2_group_desc * gdp;
+
+ *p = NULL;
+ if ((ino != EXT2_ROOT_INO && ino < EXT2_FIRST_INO(sb)) ||
+ ino > le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count))
+ goto Einval;
+
+ block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb);
+ gdp = ext2_get_group_desc(sb, block_group, &bh);
+ if (!gdp)
+ goto Egdp;
+ /*
+ * Figure out the offset within the block group inode table
+ */
+ offset = ((ino - 1) % EXT2_INODES_PER_GROUP(sb)) * EXT2_INODE_SIZE(sb);
+ block = le32_to_cpu(gdp->bg_inode_table) +
+ (offset >> EXT2_BLOCK_SIZE_BITS(sb));
+ if (!(bh = sb_bread(sb, block)))
+ goto Eio;
+
+ *p = bh;
+ offset &= (EXT2_BLOCK_SIZE(sb) - 1);
+ return (struct ext2_inode *) (bh->b_data + offset);
+
+Einval:
+ ext2_error(sb, "ext2_get_inode", "bad inode number: %lu",
+ (unsigned long) ino);
+ return ERR_PTR(-EINVAL);
+Eio:
+ ext2_error(sb, "ext2_get_inode",
+ "unable to read inode block - inode=%lu, block=%lu",
+ (unsigned long) ino, block);
+Egdp:
+ return ERR_PTR(-EIO);
+}
+
+void ext2_set_inode_flags(struct inode *inode)
+{
+ unsigned int flags = EXT2_I(inode)->i_flags;
+
+ inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+ if (flags & EXT2_SYNC_FL)
+ inode->i_flags |= S_SYNC;
+ if (flags & EXT2_APPEND_FL)
+ inode->i_flags |= S_APPEND;
+ if (flags & EXT2_IMMUTABLE_FL)
+ inode->i_flags |= S_IMMUTABLE;
+ if (flags & EXT2_NOATIME_FL)
+ inode->i_flags |= S_NOATIME;
+ if (flags & EXT2_DIRSYNC_FL)
+ inode->i_flags |= S_DIRSYNC;
+}
+
+void ext2_read_inode (struct inode * inode)
+{
+ struct ext2_inode_info *ei = EXT2_I(inode);
+ ino_t ino = inode->i_ino;
+ struct buffer_head * bh;
+ struct ext2_inode * raw_inode = ext2_get_inode(inode->i_sb, ino, &bh);
+ int n;
+
+#ifdef CONFIG_EXT2_FS_POSIX_ACL
+ ei->i_acl = EXT2_ACL_NOT_CACHED;
+ ei->i_default_acl = EXT2_ACL_NOT_CACHED;
+#endif
+ if (IS_ERR(raw_inode))
+ goto bad_inode;
+
+ inode->i_mode = le16_to_cpu(raw_inode->i_mode);
+ inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
+ inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+ if (!(test_opt (inode->i_sb, NO_UID32))) {
+ inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
+ inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
+ }
+ inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
+ inode->i_size = le32_to_cpu(raw_inode->i_size);
+ inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime);
+ inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->i_ctime);
+ inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->i_mtime);
+ inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0;
+ ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
+ /* We now have enough fields to check if the inode was active or not.
+ * This is needed because nfsd might try to access dead inodes
+ * the test is that same one that e2fsck uses
+ * NeilBrown 1999oct15
+ */
+ if (inode->i_nlink == 0 && (inode->i_mode == 0 || ei->i_dtime)) {
+ /* this inode is deleted */
+ brelse (bh);
+ goto bad_inode;
+ }
+ inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size (for stat), not the fs block size */
+ inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
+ ei->i_flags = le32_to_cpu(raw_inode->i_flags);
+ ei->i_faddr = le32_to_cpu(raw_inode->i_faddr);
+ ei->i_frag_no = raw_inode->i_frag;
+ ei->i_frag_size = raw_inode->i_fsize;
+ ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
+ ei->i_dir_acl = 0;
+ if (S_ISREG(inode->i_mode))
+ inode->i_size |= ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32;
+ else
+ ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl);
+ ei->i_dtime = 0;
+ inode->i_generation = le32_to_cpu(raw_inode->i_generation);
+ ei->i_state = 0;
+ ei->i_next_alloc_block = 0;
+ ei->i_next_alloc_goal = 0;
+ ei->i_prealloc_count = 0;
+ ei->i_block_group = (ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
+ ei->i_dir_start_lookup = 0;
+
+ /*
+ * NOTE! The in-memory inode i_data array is in little-endian order
+ * even on big-endian machines: we do NOT byteswap the block numbers!
+ */
+ for (n = 0; n < EXT2_N_BLOCKS; n++)
+ ei->i_data[n] = raw_inode->i_block[n];
+
+ if (S_ISREG(inode->i_mode)) {
+ inode->i_op = &ext2_file_inode_operations;
+ inode->i_fop = &ext2_file_operations;
+ if (test_opt(inode->i_sb, NOBH))
+ inode->i_mapping->a_ops = &ext2_nobh_aops;
+ else
+ inode->i_mapping->a_ops = &ext2_aops;
+ } else if (S_ISDIR(inode->i_mode)) {
+ inode->i_op = &ext2_dir_inode_operations;
+ inode->i_fop = &ext2_dir_operations;
+ if (test_opt(inode->i_sb, NOBH))
+ inode->i_mapping->a_ops = &ext2_nobh_aops;
+ else
+ inode->i_mapping->a_ops = &ext2_aops;
+ } else if (S_ISLNK(inode->i_mode)) {
+ if (ext2_inode_is_fast_symlink(inode))
+ inode->i_op = &ext2_fast_symlink_inode_operations;
+ else {
+ inode->i_op = &ext2_symlink_inode_operations;
+ if (test_opt(inode->i_sb, NOBH))
+ inode->i_mapping->a_ops = &ext2_nobh_aops;
+ else
+ inode->i_mapping->a_ops = &ext2_aops;
+ }
+ } else {
+ inode->i_op = &ext2_special_inode_operations;
+ if (raw_inode->i_block[0])
+ init_special_inode(inode, inode->i_mode,
+ old_decode_dev(le32_to_cpu(raw_inode->i_block[0])));
+ else
+ init_special_inode(inode, inode->i_mode,
+ new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
+ }
+ brelse (bh);
+ ext2_set_inode_flags(inode);
+ return;
+
+bad_inode:
+ make_bad_inode(inode);
+ return;
+}
+
+static int ext2_update_inode(struct inode * inode, int do_sync)
+{
+ struct ext2_inode_info *ei = EXT2_I(inode);
+ struct super_block *sb = inode->i_sb;
+ ino_t ino = inode->i_ino;
+ uid_t uid = inode->i_uid;
+ gid_t gid = inode->i_gid;
+ struct buffer_head * bh;
+ struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh);
+ int n;
+ int err = 0;
+
+ if (IS_ERR(raw_inode))
+ return -EIO;
+
+ /* For fields not not tracking in the in-memory inode,
+ * initialise them to zero for new inodes. */
+ if (ei->i_state & EXT2_STATE_NEW)
+ memset(raw_inode, 0, EXT2_SB(sb)->s_inode_size);
+
+ raw_inode->i_mode = cpu_to_le16(inode->i_mode);
+ if (!(test_opt(sb, NO_UID32))) {
+ raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid));
+ raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid));
+/*
+ * Fix up interoperability with old kernels. Otherwise, old inodes get
+ * re-used with the upper 16 bits of the uid/gid intact
+ */
+ if (!ei->i_dtime) {
+ raw_inode->i_uid_high = cpu_to_le16(high_16_bits(uid));
+ raw_inode->i_gid_high = cpu_to_le16(high_16_bits(gid));
+ } else {
+ raw_inode->i_uid_high = 0;
+ raw_inode->i_gid_high = 0;
+ }
+ } else {
+ raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(uid));
+ raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(gid));
+ raw_inode->i_uid_high = 0;
+ raw_inode->i_gid_high = 0;
+ }
+ raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
+ raw_inode->i_size = cpu_to_le32(inode->i_size);
+ raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
+ raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
+ raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
+
+ raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
+ raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
+ raw_inode->i_flags = cpu_to_le32(ei->i_flags);
+ raw_inode->i_faddr = cpu_to_le32(ei->i_faddr);
+ raw_inode->i_frag = ei->i_frag_no;
+ raw_inode->i_fsize = ei->i_frag_size;
+ raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl);
+ if (!S_ISREG(inode->i_mode))
+ raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl);
+ else {
+ raw_inode->i_size_high = cpu_to_le32(inode->i_size >> 32);
+ if (inode->i_size > 0x7fffffffULL) {
+ if (!EXT2_HAS_RO_COMPAT_FEATURE(sb,
+ EXT2_FEATURE_RO_COMPAT_LARGE_FILE) ||
+ EXT2_SB(sb)->s_es->s_rev_level ==
+ cpu_to_le32(EXT2_GOOD_OLD_REV)) {
+ /* If this is the first large file
+ * created, add a flag to the superblock.
+ */
+ lock_kernel();
+ ext2_update_dynamic_rev(sb);
+ EXT2_SET_RO_COMPAT_FEATURE(sb,
+ EXT2_FEATURE_RO_COMPAT_LARGE_FILE);
+ unlock_kernel();
+ ext2_write_super(sb);
+ }
+ }
+ }
+
+ raw_inode->i_generation = cpu_to_le32(inode->i_generation);
+ if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
+ if (old_valid_dev(inode->i_rdev)) {
+ raw_inode->i_block[0] =
+ cpu_to_le32(old_encode_dev(inode->i_rdev));
+ raw_inode->i_block[1] = 0;
+ } else {
+ raw_inode->i_block[0] = 0;
+ raw_inode->i_block[1] =
+ cpu_to_le32(new_encode_dev(inode->i_rdev));
+ raw_inode->i_block[2] = 0;
+ }
+ } else for (n = 0; n < EXT2_N_BLOCKS; n++)
+ raw_inode->i_block[n] = ei->i_data[n];
+ mark_buffer_dirty(bh);
+ if (do_sync) {
+ sync_dirty_buffer(bh);
+ if (buffer_req(bh) && !buffer_uptodate(bh)) {
+ printk ("IO error syncing ext2 inode [%s:%08lx]\n",
+ sb->s_id, (unsigned long) ino);
+ err = -EIO;
+ }
+ }
+ ei->i_state &= ~EXT2_STATE_NEW;
+ brelse (bh);
+ return err;
+}
+
+int ext2_write_inode(struct inode *inode, int wait)
+{
+ return ext2_update_inode(inode, wait);
+}
+
+int ext2_sync_inode(struct inode *inode)
+{
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = 0, /* sys_fsync did this */
+ };
+ return sync_inode(inode, &wbc);
+}
+
+int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
+{
+ struct inode *inode = dentry->d_inode;
+ int error;
+
+ error = inode_change_ok(inode, iattr);
+ if (error)
+ return error;
+ if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
+ (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
+ error = DQUOT_TRANSFER(inode, iattr) ? -EDQUOT : 0;
+ if (error)
+ return error;
+ }
+ error = inode_setattr(inode, iattr);
+ if (!error && (iattr->ia_valid & ATTR_MODE))
+ error = ext2_acl_chmod(inode);
+ return error;
+}
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
new file mode 100644
index 000000000000..709d8676b962
--- /dev/null
+++ b/fs/ext2/ioctl.c
@@ -0,0 +1,81 @@
+/*
+ * linux/fs/ext2/ioctl.c
+ *
+ * Copyright (C) 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ */
+
+#include "ext2.h"
+#include <linux/time.h>
+#include <linux/sched.h>
+#include <asm/current.h>
+#include <asm/uaccess.h>
+
+
+int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
+ unsigned long arg)
+{
+ struct ext2_inode_info *ei = EXT2_I(inode);
+ unsigned int flags;
+
+ ext2_debug ("cmd = %u, arg = %lu\n", cmd, arg);
+
+ switch (cmd) {
+ case EXT2_IOC_GETFLAGS:
+ flags = ei->i_flags & EXT2_FL_USER_VISIBLE;
+ return put_user(flags, (int __user *) arg);
+ case EXT2_IOC_SETFLAGS: {
+ unsigned int oldflags;
+
+ if (IS_RDONLY(inode))
+ return -EROFS;
+
+ if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
+ return -EACCES;
+
+ if (get_user(flags, (int __user *) arg))
+ return -EFAULT;
+
+ if (!S_ISDIR(inode->i_mode))
+ flags &= ~EXT2_DIRSYNC_FL;
+
+ oldflags = ei->i_flags;
+
+ /*
+ * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+ * the relevant capability.
+ *
+ * This test looks nicer. Thanks to Pauline Middelink
+ */
+ if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
+ if (!capable(CAP_LINUX_IMMUTABLE))
+ return -EPERM;
+ }
+
+ flags = flags & EXT2_FL_USER_MODIFIABLE;
+ flags |= oldflags & ~EXT2_FL_USER_MODIFIABLE;
+ ei->i_flags = flags;
+
+ ext2_set_inode_flags(inode);
+ inode->i_ctime = CURRENT_TIME_SEC;
+ mark_inode_dirty(inode);
+ return 0;
+ }
+ case EXT2_IOC_GETVERSION:
+ return put_user(inode->i_generation, (int __user *) arg);
+ case EXT2_IOC_SETVERSION:
+ if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
+ return -EPERM;
+ if (IS_RDONLY(inode))
+ return -EROFS;
+ if (get_user(inode->i_generation, (int __user *) arg))
+ return -EFAULT;
+ inode->i_ctime = CURRENT_TIME_SEC;
+ mark_inode_dirty(inode);
+ return 0;
+ default:
+ return -ENOTTY;
+ }
+}
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
new file mode 100644
index 000000000000..3176b3d3ffa8
--- /dev/null
+++ b/fs/ext2/namei.c
@@ -0,0 +1,418 @@
+/*
+ * linux/fs/ext2/namei.c
+ *
+ * Rewrite to pagecache. Almost all code had been changed, so blame me
+ * if the things go wrong. Please, send bug reports to
+ * viro@parcelfarce.linux.theplanet.co.uk
+ *
+ * Stuff here is basically a glue between the VFS and generic UNIXish
+ * filesystem that keeps everything in pagecache. All knowledge of the
+ * directory layout is in fs/ext2/dir.c - it turned out to be easily separatable
+ * and it's easier to debug that way. In principle we might want to
+ * generalize that a bit and turn it into a library. Or not.
+ *
+ * The only non-static object here is ext2_dir_inode_operations.
+ *
+ * TODO: get rid of kmap() use, add readahead.
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * from
+ *
+ * linux/fs/minix/namei.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * Big-endian to little-endian byte-swapping/bitmaps by
+ * David S. Miller (davem@caip.rutgers.edu), 1995
+ */
+
+#include <linux/pagemap.h>
+#include "ext2.h"
+#include "xattr.h"
+#include "acl.h"
+
+/*
+ * Couple of helper functions - make the code slightly cleaner.
+ */
+
+static inline void ext2_inc_count(struct inode *inode)
+{
+ inode->i_nlink++;
+ mark_inode_dirty(inode);
+}
+
+static inline void ext2_dec_count(struct inode *inode)
+{
+ inode->i_nlink--;
+ mark_inode_dirty(inode);
+}
+
+static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
+{
+ int err = ext2_add_link(dentry, inode);
+ if (!err) {
+ d_instantiate(dentry, inode);
+ return 0;
+ }
+ ext2_dec_count(inode);
+ iput(inode);
+ return err;
+}
+
+/*
+ * Methods themselves.
+ */
+
+static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+{
+ struct inode * inode;
+ ino_t ino;
+
+ if (dentry->d_name.len > EXT2_NAME_LEN)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ ino = ext2_inode_by_name(dir, dentry);
+ inode = NULL;
+ if (ino) {
+ inode = iget(dir->i_sb, ino);
+ if (!inode)
+ return ERR_PTR(-EACCES);
+ }
+ if (inode)
+ return d_splice_alias(inode, dentry);
+ d_add(dentry, inode);
+ return NULL;
+}
+
+struct dentry *ext2_get_parent(struct dentry *child)
+{
+ unsigned long ino;
+ struct dentry *parent;
+ struct inode *inode;
+ struct dentry dotdot;
+
+ dotdot.d_name.name = "..";
+ dotdot.d_name.len = 2;
+
+ ino = ext2_inode_by_name(child->d_inode, &dotdot);
+ if (!ino)
+ return ERR_PTR(-ENOENT);
+ inode = iget(child->d_inode->i_sb, ino);
+
+ if (!inode)
+ return ERR_PTR(-EACCES);
+ parent = d_alloc_anon(inode);
+ if (!parent) {
+ iput(inode);
+ parent = ERR_PTR(-ENOMEM);
+ }
+ return parent;
+}
+
+/*
+ * By the time this is called, we already have created
+ * the directory cache entry for the new file, but it
+ * is so far negative - it has no inode.
+ *
+ * If the create succeeds, we fill in the inode information
+ * with d_instantiate().
+ */
+static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd)
+{
+ struct inode * inode = ext2_new_inode (dir, mode);
+ int err = PTR_ERR(inode);
+ if (!IS_ERR(inode)) {
+ inode->i_op = &ext2_file_inode_operations;
+ inode->i_fop = &ext2_file_operations;
+ if (test_opt(inode->i_sb, NOBH))
+ inode->i_mapping->a_ops = &ext2_nobh_aops;
+ else
+ inode->i_mapping->a_ops = &ext2_aops;
+ mark_inode_dirty(inode);
+ err = ext2_add_nondir(dentry, inode);
+ }
+ return err;
+}
+
+static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t rdev)
+{
+ struct inode * inode;
+ int err;
+
+ if (!new_valid_dev(rdev))
+ return -EINVAL;
+
+ inode = ext2_new_inode (dir, mode);
+ err = PTR_ERR(inode);
+ if (!IS_ERR(inode)) {
+ init_special_inode(inode, inode->i_mode, rdev);
+#ifdef CONFIG_EXT2_FS_XATTR
+ inode->i_op = &ext2_special_inode_operations;
+#endif
+ mark_inode_dirty(inode);
+ err = ext2_add_nondir(dentry, inode);
+ }
+ return err;
+}
+
+static int ext2_symlink (struct inode * dir, struct dentry * dentry,
+ const char * symname)
+{
+ struct super_block * sb = dir->i_sb;
+ int err = -ENAMETOOLONG;
+ unsigned l = strlen(symname)+1;
+ struct inode * inode;
+
+ if (l > sb->s_blocksize)
+ goto out;
+
+ inode = ext2_new_inode (dir, S_IFLNK | S_IRWXUGO);
+ err = PTR_ERR(inode);
+ if (IS_ERR(inode))
+ goto out;
+
+ if (l > sizeof (EXT2_I(inode)->i_data)) {
+ /* slow symlink */
+ inode->i_op = &ext2_symlink_inode_operations;
+ if (test_opt(inode->i_sb, NOBH))
+ inode->i_mapping->a_ops = &ext2_nobh_aops;
+ else
+ inode->i_mapping->a_ops = &ext2_aops;
+ err = page_symlink(inode, symname, l);
+ if (err)
+ goto out_fail;
+ } else {
+ /* fast symlink */
+ inode->i_op = &ext2_fast_symlink_inode_operations;
+ memcpy((char*)(EXT2_I(inode)->i_data),symname,l);
+ inode->i_size = l-1;
+ }
+ mark_inode_dirty(inode);
+
+ err = ext2_add_nondir(dentry, inode);
+out:
+ return err;
+
+out_fail:
+ ext2_dec_count(inode);
+ iput (inode);
+ goto out;
+}
+
+static int ext2_link (struct dentry * old_dentry, struct inode * dir,
+ struct dentry *dentry)
+{
+ struct inode *inode = old_dentry->d_inode;
+
+ if (inode->i_nlink >= EXT2_LINK_MAX)
+ return -EMLINK;
+
+ inode->i_ctime = CURRENT_TIME_SEC;
+ ext2_inc_count(inode);
+ atomic_inc(&inode->i_count);
+
+ return ext2_add_nondir(dentry, inode);
+}
+
+static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
+{
+ struct inode * inode;
+ int err = -EMLINK;
+
+ if (dir->i_nlink >= EXT2_LINK_MAX)
+ goto out;
+
+ ext2_inc_count(dir);
+
+ inode = ext2_new_inode (dir, S_IFDIR | mode);
+ err = PTR_ERR(inode);
+ if (IS_ERR(inode))
+ goto out_dir;
+
+ inode->i_op = &ext2_dir_inode_operations;
+ inode->i_fop = &ext2_dir_operations;
+ if (test_opt(inode->i_sb, NOBH))
+ inode->i_mapping->a_ops = &ext2_nobh_aops;
+ else
+ inode->i_mapping->a_ops = &ext2_aops;
+
+ ext2_inc_count(inode);
+
+ err = ext2_make_empty(inode, dir);
+ if (err)
+ goto out_fail;
+
+ err = ext2_add_link(dentry, inode);
+ if (err)
+ goto out_fail;
+
+ d_instantiate(dentry, inode);
+out:
+ return err;
+
+out_fail:
+ ext2_dec_count(inode);
+ ext2_dec_count(inode);
+ iput(inode);
+out_dir:
+ ext2_dec_count(dir);
+ goto out;
+}
+
+static int ext2_unlink(struct inode * dir, struct dentry *dentry)
+{
+ struct inode * inode = dentry->d_inode;
+ struct ext2_dir_entry_2 * de;
+ struct page * page;
+ int err = -ENOENT;
+
+ de = ext2_find_entry (dir, dentry, &page);
+ if (!de)
+ goto out;
+
+ err = ext2_delete_entry (de, page);
+ if (err)
+ goto out;
+
+ inode->i_ctime = dir->i_ctime;
+ ext2_dec_count(inode);
+ err = 0;
+out:
+ return err;
+}
+
+static int ext2_rmdir (struct inode * dir, struct dentry *dentry)
+{
+ struct inode * inode = dentry->d_inode;
+ int err = -ENOTEMPTY;
+
+ if (ext2_empty_dir(inode)) {
+ err = ext2_unlink(dir, dentry);
+ if (!err) {
+ inode->i_size = 0;
+ ext2_dec_count(inode);
+ ext2_dec_count(dir);
+ }
+ }
+ return err;
+}
+
+static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
+ struct inode * new_dir, struct dentry * new_dentry )
+{
+ struct inode * old_inode = old_dentry->d_inode;
+ struct inode * new_inode = new_dentry->d_inode;
+ struct page * dir_page = NULL;
+ struct ext2_dir_entry_2 * dir_de = NULL;
+ struct page * old_page;
+ struct ext2_dir_entry_2 * old_de;
+ int err = -ENOENT;
+
+ old_de = ext2_find_entry (old_dir, old_dentry, &old_page);
+ if (!old_de)
+ goto out;
+
+ if (S_ISDIR(old_inode->i_mode)) {
+ err = -EIO;
+ dir_de = ext2_dotdot(old_inode, &dir_page);
+ if (!dir_de)
+ goto out_old;
+ }
+
+ if (new_inode) {
+ struct page *new_page;
+ struct ext2_dir_entry_2 *new_de;
+
+ err = -ENOTEMPTY;
+ if (dir_de && !ext2_empty_dir (new_inode))
+ goto out_dir;
+
+ err = -ENOENT;
+ new_de = ext2_find_entry (new_dir, new_dentry, &new_page);
+ if (!new_de)
+ goto out_dir;
+ ext2_inc_count(old_inode);
+ ext2_set_link(new_dir, new_de, new_page, old_inode);
+ new_inode->i_ctime = CURRENT_TIME_SEC;
+ if (dir_de)
+ new_inode->i_nlink--;
+ ext2_dec_count(new_inode);
+ } else {
+ if (dir_de) {
+ err = -EMLINK;
+ if (new_dir->i_nlink >= EXT2_LINK_MAX)
+ goto out_dir;
+ }
+ ext2_inc_count(old_inode);
+ err = ext2_add_link(new_dentry, old_inode);
+ if (err) {
+ ext2_dec_count(old_inode);
+ goto out_dir;
+ }
+ if (dir_de)
+ ext2_inc_count(new_dir);
+ }
+
+ /*
+ * Like most other Unix systems, set the ctime for inodes on a
+ * rename.
+ * ext2_dec_count() will mark the inode dirty.
+ */
+ old_inode->i_ctime = CURRENT_TIME_SEC;
+
+ ext2_delete_entry (old_de, old_page);
+ ext2_dec_count(old_inode);
+
+ if (dir_de) {
+ ext2_set_link(old_inode, dir_de, dir_page, new_dir);
+ ext2_dec_count(old_dir);
+ }
+ return 0;
+
+
+out_dir:
+ if (dir_de) {
+ kunmap(dir_page);
+ page_cache_release(dir_page);
+ }
+out_old:
+ kunmap(old_page);
+ page_cache_release(old_page);
+out:
+ return err;
+}
+
+struct inode_operations ext2_dir_inode_operations = {
+ .create = ext2_create,
+ .lookup = ext2_lookup,
+ .link = ext2_link,
+ .unlink = ext2_unlink,
+ .symlink = ext2_symlink,
+ .mkdir = ext2_mkdir,
+ .rmdir = ext2_rmdir,
+ .mknod = ext2_mknod,
+ .rename = ext2_rename,
+#ifdef CONFIG_EXT2_FS_XATTR
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .listxattr = ext2_listxattr,
+ .removexattr = generic_removexattr,
+#endif
+ .setattr = ext2_setattr,
+ .permission = ext2_permission,
+};
+
+struct inode_operations ext2_special_inode_operations = {
+#ifdef CONFIG_EXT2_FS_XATTR
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .listxattr = ext2_listxattr,
+ .removexattr = generic_removexattr,
+#endif
+ .setattr = ext2_setattr,
+ .permission = ext2_permission,
+};
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
new file mode 100644
index 000000000000..37ca77a157ba
--- /dev/null
+++ b/fs/ext2/super.c
@@ -0,0 +1,1161 @@
+/*
+ * linux/fs/ext2/super.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * from
+ *
+ * linux/fs/minix/inode.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * Big-endian to little-endian byte-swapping/bitmaps by
+ * David S. Miller (davem@caip.rutgers.edu), 1995
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <linux/parser.h>
+#include <linux/random.h>
+#include <linux/buffer_head.h>
+#include <linux/smp_lock.h>
+#include <linux/vfs.h>
+#include <asm/uaccess.h>
+#include "ext2.h"
+#include "xattr.h"
+#include "acl.h"
+
+static void ext2_sync_super(struct super_block *sb,
+ struct ext2_super_block *es);
+static int ext2_remount (struct super_block * sb, int * flags, char * data);
+static int ext2_statfs (struct super_block * sb, struct kstatfs * buf);
+
+void ext2_error (struct super_block * sb, const char * function,
+ const char * fmt, ...)
+{
+ va_list args;
+ struct ext2_sb_info *sbi = EXT2_SB(sb);
+ struct ext2_super_block *es = sbi->s_es;
+
+ if (!(sb->s_flags & MS_RDONLY)) {
+ sbi->s_mount_state |= EXT2_ERROR_FS;
+ es->s_state =
+ cpu_to_le16(le16_to_cpu(es->s_state) | EXT2_ERROR_FS);
+ ext2_sync_super(sb, es);
+ }
+
+ va_start(args, fmt);
+ printk(KERN_CRIT "EXT2-fs error (device %s): %s: ",sb->s_id, function);
+ vprintk(fmt, args);
+ printk("\n");
+ va_end(args);
+
+ if (test_opt(sb, ERRORS_PANIC))
+ panic("EXT2-fs panic from previous error\n");
+ if (test_opt(sb, ERRORS_RO)) {
+ printk("Remounting filesystem read-only\n");
+ sb->s_flags |= MS_RDONLY;
+ }
+}
+
+void ext2_warning (struct super_block * sb, const char * function,
+ const char * fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ printk(KERN_WARNING "EXT2-fs warning (device %s): %s: ",
+ sb->s_id, function);
+ vprintk(fmt, args);
+ printk("\n");
+ va_end(args);
+}
+
+void ext2_update_dynamic_rev(struct super_block *sb)
+{
+ struct ext2_super_block *es = EXT2_SB(sb)->s_es;
+
+ if (le32_to_cpu(es->s_rev_level) > EXT2_GOOD_OLD_REV)
+ return;
+
+ ext2_warning(sb, __FUNCTION__,
+ "updating to rev %d because of new feature flag, "
+ "running e2fsck is recommended",
+ EXT2_DYNAMIC_REV);
+
+ es->s_first_ino = cpu_to_le32(EXT2_GOOD_OLD_FIRST_INO);
+ es->s_inode_size = cpu_to_le16(EXT2_GOOD_OLD_INODE_SIZE);
+ es->s_rev_level = cpu_to_le32(EXT2_DYNAMIC_REV);
+ /* leave es->s_feature_*compat flags alone */
+ /* es->s_uuid will be set by e2fsck if empty */
+
+ /*
+ * The rest of the superblock fields should be zero, and if not it
+ * means they are likely already in use, so leave them alone. We
+ * can leave it up to e2fsck to clean up any inconsistencies there.
+ */
+}
+
+static void ext2_put_super (struct super_block * sb)
+{
+ int db_count;
+ int i;
+ struct ext2_sb_info *sbi = EXT2_SB(sb);
+
+ ext2_xattr_put_super(sb);
+ if (!(sb->s_flags & MS_RDONLY)) {
+ struct ext2_super_block *es = sbi->s_es;
+
+ es->s_state = cpu_to_le16(sbi->s_mount_state);
+ ext2_sync_super(sb, es);
+ }
+ db_count = sbi->s_gdb_count;
+ for (i = 0; i < db_count; i++)
+ if (sbi->s_group_desc[i])
+ brelse (sbi->s_group_desc[i]);
+ kfree(sbi->s_group_desc);
+ kfree(sbi->s_debts);
+ percpu_counter_destroy(&sbi->s_freeblocks_counter);
+ percpu_counter_destroy(&sbi->s_freeinodes_counter);
+ percpu_counter_destroy(&sbi->s_dirs_counter);
+ brelse (sbi->s_sbh);
+ sb->s_fs_info = NULL;
+ kfree(sbi);
+
+ return;
+}
+
+static kmem_cache_t * ext2_inode_cachep;
+
+static struct inode *ext2_alloc_inode(struct super_block *sb)
+{
+ struct ext2_inode_info *ei;
+ ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, SLAB_KERNEL);
+ if (!ei)
+ return NULL;
+#ifdef CONFIG_EXT2_FS_POSIX_ACL
+ ei->i_acl = EXT2_ACL_NOT_CACHED;
+ ei->i_default_acl = EXT2_ACL_NOT_CACHED;
+#endif
+ ei->vfs_inode.i_version = 1;
+ return &ei->vfs_inode;
+}
+
+static void ext2_destroy_inode(struct inode *inode)
+{
+ kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
+}
+
+static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+{
+ struct ext2_inode_info *ei = (struct ext2_inode_info *) foo;
+
+ if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+ SLAB_CTOR_CONSTRUCTOR) {
+ rwlock_init(&ei->i_meta_lock);
+#ifdef CONFIG_EXT2_FS_XATTR
+ init_rwsem(&ei->xattr_sem);
+#endif
+ inode_init_once(&ei->vfs_inode);
+ }
+}
+
+static int init_inodecache(void)
+{
+ ext2_inode_cachep = kmem_cache_create("ext2_inode_cache",
+ sizeof(struct ext2_inode_info),
+ 0, SLAB_RECLAIM_ACCOUNT,
+ init_once, NULL);
+ if (ext2_inode_cachep == NULL)
+ return -ENOMEM;
+ return 0;
+}
+
+static void destroy_inodecache(void)
+{
+ if (kmem_cache_destroy(ext2_inode_cachep))
+ printk(KERN_INFO "ext2_inode_cache: not all structures were freed\n");
+}
+
+static void ext2_clear_inode(struct inode *inode)
+{
+#ifdef CONFIG_EXT2_FS_POSIX_ACL
+ struct ext2_inode_info *ei = EXT2_I(inode);
+
+ if (ei->i_acl && ei->i_acl != EXT2_ACL_NOT_CACHED) {
+ posix_acl_release(ei->i_acl);
+ ei->i_acl = EXT2_ACL_NOT_CACHED;
+ }
+ if (ei->i_default_acl && ei->i_default_acl != EXT2_ACL_NOT_CACHED) {
+ posix_acl_release(ei->i_default_acl);
+ ei->i_default_acl = EXT2_ACL_NOT_CACHED;
+ }
+#endif
+ if (!is_bad_inode(inode))
+ ext2_discard_prealloc(inode);
+}
+
+
+#ifdef CONFIG_QUOTA
+static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off);
+static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off);
+#endif
+
+static struct super_operations ext2_sops = {
+ .alloc_inode = ext2_alloc_inode,
+ .destroy_inode = ext2_destroy_inode,
+ .read_inode = ext2_read_inode,
+ .write_inode = ext2_write_inode,
+ .delete_inode = ext2_delete_inode,
+ .put_super = ext2_put_super,
+ .write_super = ext2_write_super,
+ .statfs = ext2_statfs,
+ .remount_fs = ext2_remount,
+ .clear_inode = ext2_clear_inode,
+#ifdef CONFIG_QUOTA
+ .quota_read = ext2_quota_read,
+ .quota_write = ext2_quota_write,
+#endif
+};
+
+/* Yes, most of these are left as NULL!!
+ * A NULL value implies the default, which works with ext2-like file
+ * systems, but can be improved upon.
+ * Currently only get_parent is required.
+ */
+struct dentry *ext2_get_parent(struct dentry *child);
+static struct export_operations ext2_export_ops = {
+ .get_parent = ext2_get_parent,
+};
+
+static unsigned long get_sb_block(void **data)
+{
+ unsigned long sb_block;
+ char *options = (char *) *data;
+
+ if (!options || strncmp(options, "sb=", 3) != 0)
+ return 1; /* Default location */
+ options += 3;
+ sb_block = simple_strtoul(options, &options, 0);
+ if (*options && *options != ',') {
+ printk("EXT2-fs: Invalid sb specification: %s\n",
+ (char *) *data);
+ return 1;
+ }
+ if (*options == ',')
+ options++;
+ *data = (void *) options;
+ return sb_block;
+}
+
+enum {
+ Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
+ Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
+ Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh,
+ Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
+ Opt_ignore, Opt_err,
+};
+
+static match_table_t tokens = {
+ {Opt_bsd_df, "bsddf"},
+ {Opt_minix_df, "minixdf"},
+ {Opt_grpid, "grpid"},
+ {Opt_grpid, "bsdgroups"},
+ {Opt_nogrpid, "nogrpid"},
+ {Opt_nogrpid, "sysvgroups"},
+ {Opt_resgid, "resgid=%u"},
+ {Opt_resuid, "resuid=%u"},
+ {Opt_sb, "sb=%u"},
+ {Opt_err_cont, "errors=continue"},
+ {Opt_err_panic, "errors=panic"},
+ {Opt_err_ro, "errors=remount-ro"},
+ {Opt_nouid32, "nouid32"},
+ {Opt_nocheck, "check=none"},
+ {Opt_nocheck, "nocheck"},
+ {Opt_check, "check"},
+ {Opt_debug, "debug"},
+ {Opt_oldalloc, "oldalloc"},
+ {Opt_orlov, "orlov"},
+ {Opt_nobh, "nobh"},
+ {Opt_user_xattr, "user_xattr"},
+ {Opt_nouser_xattr, "nouser_xattr"},
+ {Opt_acl, "acl"},
+ {Opt_noacl, "noacl"},
+ {Opt_ignore, "grpquota"},
+ {Opt_ignore, "noquota"},
+ {Opt_ignore, "quota"},
+ {Opt_ignore, "usrquota"},
+ {Opt_err, NULL}
+};
+
+static int parse_options (char * options,
+ struct ext2_sb_info *sbi)
+{
+ char * p;
+ substring_t args[MAX_OPT_ARGS];
+ unsigned long kind = EXT2_MOUNT_ERRORS_CONT;
+ int option;
+
+ if (!options)
+ return 1;
+
+ while ((p = strsep (&options, ",")) != NULL) {
+ int token;
+ if (!*p)
+ continue;
+
+ token = match_token(p, tokens, args);
+ switch (token) {
+ case Opt_bsd_df:
+ clear_opt (sbi->s_mount_opt, MINIX_DF);
+ break;
+ case Opt_minix_df:
+ set_opt (sbi->s_mount_opt, MINIX_DF);
+ break;
+ case Opt_grpid:
+ set_opt (sbi->s_mount_opt, GRPID);
+ break;
+ case Opt_nogrpid:
+ clear_opt (sbi->s_mount_opt, GRPID);
+ break;
+ case Opt_resuid:
+ if (match_int(&args[0], &option))
+ return 0;
+ sbi->s_resuid = option;
+ break;
+ case Opt_resgid:
+ if (match_int(&args[0], &option))
+ return 0;
+ sbi->s_resgid = option;
+ break;
+ case Opt_sb:
+ /* handled by get_sb_block() instead of here */
+ /* *sb_block = match_int(&args[0]); */
+ break;
+ case Opt_err_panic:
+ kind = EXT2_MOUNT_ERRORS_PANIC;
+ break;
+ case Opt_err_ro:
+ kind = EXT2_MOUNT_ERRORS_RO;
+ break;
+ case Opt_err_cont:
+ kind = EXT2_MOUNT_ERRORS_CONT;
+ break;
+ case Opt_nouid32:
+ set_opt (sbi->s_mount_opt, NO_UID32);
+ break;
+ case Opt_check:
+#ifdef CONFIG_EXT2_CHECK
+ set_opt (sbi->s_mount_opt, CHECK);
+#else
+ printk("EXT2 Check option not supported\n");
+#endif
+ break;
+ case Opt_nocheck:
+ clear_opt (sbi->s_mount_opt, CHECK);
+ break;
+ case Opt_debug:
+ set_opt (sbi->s_mount_opt, DEBUG);
+ break;
+ case Opt_oldalloc:
+ set_opt (sbi->s_mount_opt, OLDALLOC);
+ break;
+ case Opt_orlov:
+ clear_opt (sbi->s_mount_opt, OLDALLOC);
+ break;
+ case Opt_nobh:
+ set_opt (sbi->s_mount_opt, NOBH);
+ break;
+#ifdef CONFIG_EXT2_FS_XATTR
+ case Opt_user_xattr:
+ set_opt (sbi->s_mount_opt, XATTR_USER);
+ break;
+ case Opt_nouser_xattr:
+ clear_opt (sbi->s_mount_opt, XATTR_USER);
+ break;
+#else
+ case Opt_user_xattr:
+ case Opt_nouser_xattr:
+ printk("EXT2 (no)user_xattr options not supported\n");
+ break;
+#endif
+#ifdef CONFIG_EXT2_FS_POSIX_ACL
+ case Opt_acl:
+ set_opt(sbi->s_mount_opt, POSIX_ACL);
+ break;
+ case Opt_noacl:
+ clear_opt(sbi->s_mount_opt, POSIX_ACL);
+ break;
+#else
+ case Opt_acl:
+ case Opt_noacl:
+ printk("EXT2 (no)acl options not supported\n");
+ break;
+#endif
+ case Opt_ignore:
+ break;
+ default:
+ return 0;
+ }
+ }
+ sbi->s_mount_opt |= kind;
+ return 1;
+}
+
+static int ext2_setup_super (struct super_block * sb,
+ struct ext2_super_block * es,
+ int read_only)
+{
+ int res = 0;
+ struct ext2_sb_info *sbi = EXT2_SB(sb);
+
+ if (le32_to_cpu(es->s_rev_level) > EXT2_MAX_SUPP_REV) {
+ printk ("EXT2-fs warning: revision level too high, "
+ "forcing read-only mode\n");
+ res = MS_RDONLY;
+ }
+ if (read_only)
+ return res;
+ if (!(sbi->s_mount_state & EXT2_VALID_FS))
+ printk ("EXT2-fs warning: mounting unchecked fs, "
+ "running e2fsck is recommended\n");
+ else if ((sbi->s_mount_state & EXT2_ERROR_FS))
+ printk ("EXT2-fs warning: mounting fs with errors, "
+ "running e2fsck is recommended\n");
+ else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
+ le16_to_cpu(es->s_mnt_count) >=
+ (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
+ printk ("EXT2-fs warning: maximal mount count reached, "
+ "running e2fsck is recommended\n");
+ else if (le32_to_cpu(es->s_checkinterval) &&
+ (le32_to_cpu(es->s_lastcheck) + le32_to_cpu(es->s_checkinterval) <= get_seconds()))
+ printk ("EXT2-fs warning: checktime reached, "
+ "running e2fsck is recommended\n");
+ if (!le16_to_cpu(es->s_max_mnt_count))
+ es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT);
+ es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1);
+ ext2_write_super(sb);
+ if (test_opt (sb, DEBUG))
+ printk ("[EXT II FS %s, %s, bs=%lu, fs=%lu, gc=%lu, "
+ "bpg=%lu, ipg=%lu, mo=%04lx]\n",
+ EXT2FS_VERSION, EXT2FS_DATE, sb->s_blocksize,
+ sbi->s_frag_size,
+ sbi->s_groups_count,
+ EXT2_BLOCKS_PER_GROUP(sb),
+ EXT2_INODES_PER_GROUP(sb),
+ sbi->s_mount_opt);
+#ifdef CONFIG_EXT2_CHECK
+ if (test_opt (sb, CHECK)) {
+ ext2_check_blocks_bitmap (sb);
+ ext2_check_inodes_bitmap (sb);
+ }
+#endif
+ return res;
+}
+
+static int ext2_check_descriptors (struct super_block * sb)
+{
+ int i;
+ int desc_block = 0;
+ struct ext2_sb_info *sbi = EXT2_SB(sb);
+ unsigned long block = le32_to_cpu(sbi->s_es->s_first_data_block);
+ struct ext2_group_desc * gdp = NULL;
+
+ ext2_debug ("Checking group descriptors");
+
+ for (i = 0; i < sbi->s_groups_count; i++)
+ {
+ if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0)
+ gdp = (struct ext2_group_desc *) sbi->s_group_desc[desc_block++]->b_data;
+ if (le32_to_cpu(gdp->bg_block_bitmap) < block ||
+ le32_to_cpu(gdp->bg_block_bitmap) >= block + EXT2_BLOCKS_PER_GROUP(sb))
+ {
+ ext2_error (sb, "ext2_check_descriptors",
+ "Block bitmap for group %d"
+ " not in group (block %lu)!",
+ i, (unsigned long) le32_to_cpu(gdp->bg_block_bitmap));
+ return 0;
+ }
+ if (le32_to_cpu(gdp->bg_inode_bitmap) < block ||
+ le32_to_cpu(gdp->bg_inode_bitmap) >= block + EXT2_BLOCKS_PER_GROUP(sb))
+ {
+ ext2_error (sb, "ext2_check_descriptors",
+ "Inode bitmap for group %d"
+ " not in group (block %lu)!",
+ i, (unsigned long) le32_to_cpu(gdp->bg_inode_bitmap));
+ return 0;
+ }
+ if (le32_to_cpu(gdp->bg_inode_table) < block ||
+ le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >=
+ block + EXT2_BLOCKS_PER_GROUP(sb))
+ {
+ ext2_error (sb, "ext2_check_descriptors",
+ "Inode table for group %d"
+ " not in group (block %lu)!",
+ i, (unsigned long) le32_to_cpu(gdp->bg_inode_table));
+ return 0;
+ }
+ block += EXT2_BLOCKS_PER_GROUP(sb);
+ gdp++;
+ }
+ return 1;
+}
+
+#define log2(n) ffz(~(n))
+
+/*
+ * Maximal file size. There is a direct, and {,double-,triple-}indirect
+ * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks.
+ * We need to be 1 filesystem block less than the 2^32 sector limit.
+ */
+static loff_t ext2_max_size(int bits)
+{
+ loff_t res = EXT2_NDIR_BLOCKS;
+ /* This constant is calculated to be the largest file size for a
+ * dense, 4k-blocksize file such that the total number of
+ * sectors in the file, including data and all indirect blocks,
+ * does not exceed 2^32. */
+ const loff_t upper_limit = 0x1ff7fffd000LL;
+
+ res += 1LL << (bits-2);
+ res += 1LL << (2*(bits-2));
+ res += 1LL << (3*(bits-2));
+ res <<= bits;
+ if (res > upper_limit)
+ res = upper_limit;
+ return res;
+}
+
+static unsigned long descriptor_loc(struct super_block *sb,
+ unsigned long logic_sb_block,
+ int nr)
+{
+ struct ext2_sb_info *sbi = EXT2_SB(sb);
+ unsigned long bg, first_data_block, first_meta_bg;
+ int has_super = 0;
+
+ first_data_block = le32_to_cpu(sbi->s_es->s_first_data_block);
+ first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
+
+ if (!EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_META_BG) ||
+ nr < first_meta_bg)
+ return (logic_sb_block + nr + 1);
+ bg = sbi->s_desc_per_block * nr;
+ if (ext2_bg_has_super(sb, bg))
+ has_super = 1;
+ return (first_data_block + has_super + (bg * sbi->s_blocks_per_group));
+}
+
+static int ext2_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct buffer_head * bh;
+ struct ext2_sb_info * sbi;
+ struct ext2_super_block * es;
+ struct inode *root;
+ unsigned long block;
+ unsigned long sb_block = get_sb_block(&data);
+ unsigned long logic_sb_block;
+ unsigned long offset = 0;
+ unsigned long def_mount_opts;
+ int blocksize = BLOCK_SIZE;
+ int db_count;
+ int i, j;
+ __le32 features;
+
+ sbi = kmalloc(sizeof(*sbi), GFP_KERNEL);
+ if (!sbi)
+ return -ENOMEM;
+ sb->s_fs_info = sbi;
+ memset(sbi, 0, sizeof(*sbi));
+
+ /*
+ * See what the current blocksize for the device is, and
+ * use that as the blocksize. Otherwise (or if the blocksize
+ * is smaller than the default) use the default.
+ * This is important for devices that have a hardware
+ * sectorsize that is larger than the default.
+ */
+ blocksize = sb_min_blocksize(sb, BLOCK_SIZE);
+ if (!blocksize) {
+ printk ("EXT2-fs: unable to set blocksize\n");
+ goto failed_sbi;
+ }
+
+ /*
+ * If the superblock doesn't start on a hardware sector boundary,
+ * calculate the offset.
+ */
+ if (blocksize != BLOCK_SIZE) {
+ logic_sb_block = (sb_block*BLOCK_SIZE) / blocksize;
+ offset = (sb_block*BLOCK_SIZE) % blocksize;
+ } else {
+ logic_sb_block = sb_block;
+ }
+
+ if (!(bh = sb_bread(sb, logic_sb_block))) {
+ printk ("EXT2-fs: unable to read superblock\n");
+ goto failed_sbi;
+ }
+ /*
+ * Note: s_es must be initialized as soon as possible because
+ * some ext2 macro-instructions depend on its value
+ */
+ es = (struct ext2_super_block *) (((char *)bh->b_data) + offset);
+ sbi->s_es = es;
+ sb->s_magic = le16_to_cpu(es->s_magic);
+
+ if (sb->s_magic != EXT2_SUPER_MAGIC)
+ goto cantfind_ext2;
+
+ /* Set defaults before we parse the mount options */
+ def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
+ if (def_mount_opts & EXT2_DEFM_DEBUG)
+ set_opt(sbi->s_mount_opt, DEBUG);
+ if (def_mount_opts & EXT2_DEFM_BSDGROUPS)
+ set_opt(sbi->s_mount_opt, GRPID);
+ if (def_mount_opts & EXT2_DEFM_UID16)
+ set_opt(sbi->s_mount_opt, NO_UID32);
+ if (def_mount_opts & EXT2_DEFM_XATTR_USER)
+ set_opt(sbi->s_mount_opt, XATTR_USER);
+ if (def_mount_opts & EXT2_DEFM_ACL)
+ set_opt(sbi->s_mount_opt, POSIX_ACL);
+
+ if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_PANIC)
+ set_opt(sbi->s_mount_opt, ERRORS_PANIC);
+ else if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_RO)
+ set_opt(sbi->s_mount_opt, ERRORS_RO);
+
+ sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
+ sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
+
+ if (!parse_options ((char *) data, sbi))
+ goto failed_mount;
+
+ sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+ ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
+ MS_POSIXACL : 0);
+
+ if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV &&
+ (EXT2_HAS_COMPAT_FEATURE(sb, ~0U) ||
+ EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
+ EXT2_HAS_INCOMPAT_FEATURE(sb, ~0U)))
+ printk("EXT2-fs warning: feature flags set on rev 0 fs, "
+ "running e2fsck is recommended\n");
+ /*
+ * Check feature flags regardless of the revision level, since we
+ * previously didn't change the revision level when setting the flags,
+ * so there is a chance incompat flags are set on a rev 0 filesystem.
+ */
+ features = EXT2_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP);
+ if (features) {
+ printk("EXT2-fs: %s: couldn't mount because of "
+ "unsupported optional features (%x).\n",
+ sb->s_id, le32_to_cpu(features));
+ goto failed_mount;
+ }
+ if (!(sb->s_flags & MS_RDONLY) &&
+ (features = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))){
+ printk("EXT2-fs: %s: couldn't mount RDWR because of "
+ "unsupported optional features (%x).\n",
+ sb->s_id, le32_to_cpu(features));
+ goto failed_mount;
+ }
+
+ blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
+
+ /* If the blocksize doesn't match, re-read the thing.. */
+ if (sb->s_blocksize != blocksize) {
+ brelse(bh);
+
+ if (!sb_set_blocksize(sb, blocksize)) {
+ printk(KERN_ERR "EXT2-fs: blocksize too small for device.\n");
+ goto failed_sbi;
+ }
+
+ logic_sb_block = (sb_block*BLOCK_SIZE) / blocksize;
+ offset = (sb_block*BLOCK_SIZE) % blocksize;
+ bh = sb_bread(sb, logic_sb_block);
+ if(!bh) {
+ printk("EXT2-fs: Couldn't read superblock on "
+ "2nd try.\n");
+ goto failed_sbi;
+ }
+ es = (struct ext2_super_block *) (((char *)bh->b_data) + offset);
+ sbi->s_es = es;
+ if (es->s_magic != cpu_to_le16(EXT2_SUPER_MAGIC)) {
+ printk ("EXT2-fs: Magic mismatch, very weird !\n");
+ goto failed_mount;
+ }
+ }
+
+ sb->s_maxbytes = ext2_max_size(sb->s_blocksize_bits);
+
+ if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV) {
+ sbi->s_inode_size = EXT2_GOOD_OLD_INODE_SIZE;
+ sbi->s_first_ino = EXT2_GOOD_OLD_FIRST_INO;
+ } else {
+ sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
+ sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
+ if ((sbi->s_inode_size < EXT2_GOOD_OLD_INODE_SIZE) ||
+ (sbi->s_inode_size & (sbi->s_inode_size - 1)) ||
+ (sbi->s_inode_size > blocksize)) {
+ printk ("EXT2-fs: unsupported inode size: %d\n",
+ sbi->s_inode_size);
+ goto failed_mount;
+ }
+ }
+
+ sbi->s_frag_size = EXT2_MIN_FRAG_SIZE <<
+ le32_to_cpu(es->s_log_frag_size);
+ if (sbi->s_frag_size == 0)
+ goto cantfind_ext2;
+ sbi->s_frags_per_block = sb->s_blocksize / sbi->s_frag_size;
+
+ sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
+ sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
+ sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
+
+ if (EXT2_INODE_SIZE(sb) == 0)
+ goto cantfind_ext2;
+ sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb);
+ if (sbi->s_inodes_per_block == 0)
+ goto cantfind_ext2;
+ sbi->s_itb_per_group = sbi->s_inodes_per_group /
+ sbi->s_inodes_per_block;
+ sbi->s_desc_per_block = sb->s_blocksize /
+ sizeof (struct ext2_group_desc);
+ sbi->s_sbh = bh;
+ sbi->s_mount_state = le16_to_cpu(es->s_state);
+ sbi->s_addr_per_block_bits =
+ log2 (EXT2_ADDR_PER_BLOCK(sb));
+ sbi->s_desc_per_block_bits =
+ log2 (EXT2_DESC_PER_BLOCK(sb));
+
+ if (sb->s_magic != EXT2_SUPER_MAGIC)
+ goto cantfind_ext2;
+
+ if (sb->s_blocksize != bh->b_size) {
+ if (!silent)
+ printk ("VFS: Unsupported blocksize on dev "
+ "%s.\n", sb->s_id);
+ goto failed_mount;
+ }
+
+ if (sb->s_blocksize != sbi->s_frag_size) {
+ printk ("EXT2-fs: fragsize %lu != blocksize %lu (not supported yet)\n",
+ sbi->s_frag_size, sb->s_blocksize);
+ goto failed_mount;
+ }
+
+ if (sbi->s_blocks_per_group > sb->s_blocksize * 8) {
+ printk ("EXT2-fs: #blocks per group too big: %lu\n",
+ sbi->s_blocks_per_group);
+ goto failed_mount;
+ }
+ if (sbi->s_frags_per_group > sb->s_blocksize * 8) {
+ printk ("EXT2-fs: #fragments per group too big: %lu\n",
+ sbi->s_frags_per_group);
+ goto failed_mount;
+ }
+ if (sbi->s_inodes_per_group > sb->s_blocksize * 8) {
+ printk ("EXT2-fs: #inodes per group too big: %lu\n",
+ sbi->s_inodes_per_group);
+ goto failed_mount;
+ }
+
+ if (EXT2_BLOCKS_PER_GROUP(sb) == 0)
+ goto cantfind_ext2;
+ sbi->s_groups_count = (le32_to_cpu(es->s_blocks_count) -
+ le32_to_cpu(es->s_first_data_block) +
+ EXT2_BLOCKS_PER_GROUP(sb) - 1) /
+ EXT2_BLOCKS_PER_GROUP(sb);
+ db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) /
+ EXT2_DESC_PER_BLOCK(sb);
+ sbi->s_group_desc = kmalloc (db_count * sizeof (struct buffer_head *), GFP_KERNEL);
+ if (sbi->s_group_desc == NULL) {
+ printk ("EXT2-fs: not enough memory\n");
+ goto failed_mount;
+ }
+ percpu_counter_init(&sbi->s_freeblocks_counter);
+ percpu_counter_init(&sbi->s_freeinodes_counter);
+ percpu_counter_init(&sbi->s_dirs_counter);
+ bgl_lock_init(&sbi->s_blockgroup_lock);
+ sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(*sbi->s_debts),
+ GFP_KERNEL);
+ if (!sbi->s_debts) {
+ printk ("EXT2-fs: not enough memory\n");
+ goto failed_mount_group_desc;
+ }
+ memset(sbi->s_debts, 0, sbi->s_groups_count * sizeof(*sbi->s_debts));
+ for (i = 0; i < db_count; i++) {
+ block = descriptor_loc(sb, logic_sb_block, i);
+ sbi->s_group_desc[i] = sb_bread(sb, block);
+ if (!sbi->s_group_desc[i]) {
+ for (j = 0; j < i; j++)
+ brelse (sbi->s_group_desc[j]);
+ printk ("EXT2-fs: unable to read group descriptors\n");
+ goto failed_mount_group_desc;
+ }
+ }
+ if (!ext2_check_descriptors (sb)) {
+ printk ("EXT2-fs: group descriptors corrupted!\n");
+ db_count = i;
+ goto failed_mount2;
+ }
+ sbi->s_gdb_count = db_count;
+ get_random_bytes(&sbi->s_next_generation, sizeof(u32));
+ spin_lock_init(&sbi->s_next_gen_lock);
+ /*
+ * set up enough so that it can read an inode
+ */
+ sb->s_op = &ext2_sops;
+ sb->s_export_op = &ext2_export_ops;
+ sb->s_xattr = ext2_xattr_handlers;
+ root = iget(sb, EXT2_ROOT_INO);
+ sb->s_root = d_alloc_root(root);
+ if (!sb->s_root) {
+ iput(root);
+ printk(KERN_ERR "EXT2-fs: get root inode failed\n");
+ goto failed_mount2;
+ }
+ if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
+ dput(sb->s_root);
+ sb->s_root = NULL;
+ printk(KERN_ERR "EXT2-fs: corrupt root inode, run e2fsck\n");
+ goto failed_mount2;
+ }
+ if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL))
+ ext2_warning(sb, __FUNCTION__,
+ "mounting ext3 filesystem as ext2\n");
+ ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY);
+ percpu_counter_mod(&sbi->s_freeblocks_counter,
+ ext2_count_free_blocks(sb));
+ percpu_counter_mod(&sbi->s_freeinodes_counter,
+ ext2_count_free_inodes(sb));
+ percpu_counter_mod(&sbi->s_dirs_counter,
+ ext2_count_dirs(sb));
+ return 0;
+
+cantfind_ext2:
+ if (!silent)
+ printk("VFS: Can't find an ext2 filesystem on dev %s.\n",
+ sb->s_id);
+ goto failed_mount;
+
+failed_mount2:
+ for (i = 0; i < db_count; i++)
+ brelse(sbi->s_group_desc[i]);
+failed_mount_group_desc:
+ kfree(sbi->s_group_desc);
+ kfree(sbi->s_debts);
+failed_mount:
+ brelse(bh);
+failed_sbi:
+ sb->s_fs_info = NULL;
+ kfree(sbi);
+ return -EINVAL;
+}
+
+static void ext2_commit_super (struct super_block * sb,
+ struct ext2_super_block * es)
+{
+ es->s_wtime = cpu_to_le32(get_seconds());
+ mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
+ sb->s_dirt = 0;
+}
+
+static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es)
+{
+ es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
+ es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));
+ es->s_wtime = cpu_to_le32(get_seconds());
+ mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
+ sync_dirty_buffer(EXT2_SB(sb)->s_sbh);
+ sb->s_dirt = 0;
+}
+
+/*
+ * In the second extended file system, it is not necessary to
+ * write the super block since we use a mapping of the
+ * disk super block in a buffer.
+ *
+ * However, this function is still used to set the fs valid
+ * flags to 0. We need to set this flag to 0 since the fs
+ * may have been checked while mounted and e2fsck may have
+ * set s_state to EXT2_VALID_FS after some corrections.
+ */
+
+void ext2_write_super (struct super_block * sb)
+{
+ struct ext2_super_block * es;
+ lock_kernel();
+ if (!(sb->s_flags & MS_RDONLY)) {
+ es = EXT2_SB(sb)->s_es;
+
+ if (le16_to_cpu(es->s_state) & EXT2_VALID_FS) {
+ ext2_debug ("setting valid to 0\n");
+ es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) &
+ ~EXT2_VALID_FS);
+ es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
+ es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));
+ es->s_mtime = cpu_to_le32(get_seconds());
+ ext2_sync_super(sb, es);
+ } else
+ ext2_commit_super (sb, es);
+ }
+ sb->s_dirt = 0;
+ unlock_kernel();
+}
+
+static int ext2_remount (struct super_block * sb, int * flags, char * data)
+{
+ struct ext2_sb_info * sbi = EXT2_SB(sb);
+ struct ext2_super_block * es;
+
+ /*
+ * Allow the "check" option to be passed as a remount option.
+ */
+ if (!parse_options (data, sbi))
+ return -EINVAL;
+
+ sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+ ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
+
+ es = sbi->s_es;
+ if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
+ return 0;
+ if (*flags & MS_RDONLY) {
+ if (le16_to_cpu(es->s_state) & EXT2_VALID_FS ||
+ !(sbi->s_mount_state & EXT2_VALID_FS))
+ return 0;
+ /*
+ * OK, we are remounting a valid rw partition rdonly, so set
+ * the rdonly flag and then mark the partition as valid again.
+ */
+ es->s_state = cpu_to_le16(sbi->s_mount_state);
+ es->s_mtime = cpu_to_le32(get_seconds());
+ } else {
+ __le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb,
+ ~EXT2_FEATURE_RO_COMPAT_SUPP);
+ if (ret) {
+ printk("EXT2-fs: %s: couldn't remount RDWR because of "
+ "unsupported optional features (%x).\n",
+ sb->s_id, le32_to_cpu(ret));
+ return -EROFS;
+ }
+ /*
+ * Mounting a RDONLY partition read-write, so reread and
+ * store the current valid flag. (It may have been changed
+ * by e2fsck since we originally mounted the partition.)
+ */
+ sbi->s_mount_state = le16_to_cpu(es->s_state);
+ if (!ext2_setup_super (sb, es, 0))
+ sb->s_flags &= ~MS_RDONLY;
+ }
+ ext2_sync_super(sb, es);
+ return 0;
+}
+
+static int ext2_statfs (struct super_block * sb, struct kstatfs * buf)
+{
+ struct ext2_sb_info *sbi = EXT2_SB(sb);
+ unsigned long overhead;
+ int i;
+
+ if (test_opt (sb, MINIX_DF))
+ overhead = 0;
+ else {
+ /*
+ * Compute the overhead (FS structures)
+ */
+
+ /*
+ * All of the blocks before first_data_block are
+ * overhead
+ */
+ overhead = le32_to_cpu(sbi->s_es->s_first_data_block);
+
+ /*
+ * Add the overhead attributed to the superblock and
+ * block group descriptors. If the sparse superblocks
+ * feature is turned on, then not all groups have this.
+ */
+ for (i = 0; i < sbi->s_groups_count; i++)
+ overhead += ext2_bg_has_super(sb, i) +
+ ext2_bg_num_gdb(sb, i);
+
+ /*
+ * Every block group has an inode bitmap, a block
+ * bitmap, and an inode table.
+ */
+ overhead += (sbi->s_groups_count *
+ (2 + sbi->s_itb_per_group));
+ }
+
+ buf->f_type = EXT2_SUPER_MAGIC;
+ buf->f_bsize = sb->s_blocksize;
+ buf->f_blocks = le32_to_cpu(sbi->s_es->s_blocks_count) - overhead;
+ buf->f_bfree = ext2_count_free_blocks(sb);
+ buf->f_bavail = buf->f_bfree - le32_to_cpu(sbi->s_es->s_r_blocks_count);
+ if (buf->f_bfree < le32_to_cpu(sbi->s_es->s_r_blocks_count))
+ buf->f_bavail = 0;
+ buf->f_files = le32_to_cpu(sbi->s_es->s_inodes_count);
+ buf->f_ffree = ext2_count_free_inodes (sb);
+ buf->f_namelen = EXT2_NAME_LEN;
+ return 0;
+}
+
+static struct super_block *ext2_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
+{
+ return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super);
+}
+
+#ifdef CONFIG_QUOTA
+
+/* Read data from quotafile - avoid pagecache and such because we cannot afford
+ * acquiring the locks... As quota files are never truncated and quota code
+ * itself serializes the operations (and noone else should touch the files)
+ * we don't have to be afraid of races */
+static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data,
+ size_t len, loff_t off)
+{
+ struct inode *inode = sb_dqopt(sb)->files[type];
+ sector_t blk = off >> EXT2_BLOCK_SIZE_BITS(sb);
+ int err = 0;
+ int offset = off & (sb->s_blocksize - 1);
+ int tocopy;
+ size_t toread;
+ struct buffer_head tmp_bh;
+ struct buffer_head *bh;
+ loff_t i_size = i_size_read(inode);
+
+ if (off > i_size)
+ return 0;
+ if (off+len > i_size)
+ len = i_size-off;
+ toread = len;
+ while (toread > 0) {
+ tocopy = sb->s_blocksize - offset < toread ?
+ sb->s_blocksize - offset : toread;
+
+ tmp_bh.b_state = 0;
+ err = ext2_get_block(inode, blk, &tmp_bh, 0);
+ if (err)
+ return err;
+ if (!buffer_mapped(&tmp_bh)) /* A hole? */
+ memset(data, 0, tocopy);
+ else {
+ bh = sb_bread(sb, tmp_bh.b_blocknr);
+ if (!bh)
+ return -EIO;
+ memcpy(data, bh->b_data+offset, tocopy);
+ brelse(bh);
+ }
+ offset = 0;
+ toread -= tocopy;
+ data += tocopy;
+ blk++;
+ }
+ return len;
+}
+
+/* Write to quotafile */
+static ssize_t ext2_quota_write(struct super_block *sb, int type,
+ const char *data, size_t len, loff_t off)
+{
+ struct inode *inode = sb_dqopt(sb)->files[type];
+ sector_t blk = off >> EXT2_BLOCK_SIZE_BITS(sb);
+ int err = 0;
+ int offset = off & (sb->s_blocksize - 1);
+ int tocopy;
+ size_t towrite = len;
+ struct buffer_head tmp_bh;
+ struct buffer_head *bh;
+
+ down(&inode->i_sem);
+ while (towrite > 0) {
+ tocopy = sb->s_blocksize - offset < towrite ?
+ sb->s_blocksize - offset : towrite;
+
+ tmp_bh.b_state = 0;
+ err = ext2_get_block(inode, blk, &tmp_bh, 1);
+ if (err)
+ goto out;
+ if (offset || tocopy != EXT2_BLOCK_SIZE(sb))
+ bh = sb_bread(sb, tmp_bh.b_blocknr);
+ else
+ bh = sb_getblk(sb, tmp_bh.b_blocknr);
+ if (!bh) {
+ err = -EIO;
+ goto out;
+ }
+ lock_buffer(bh);
+ memcpy(bh->b_data+offset, data, tocopy);
+ flush_dcache_page(bh->b_page);
+ set_buffer_uptodate(bh);
+ mark_buffer_dirty(bh);
+ unlock_buffer(bh);
+ brelse(bh);
+ offset = 0;
+ towrite -= tocopy;
+ data += tocopy;
+ blk++;
+ }
+out:
+ if (len == towrite)
+ return err;
+ if (inode->i_size < off+len-towrite)
+ i_size_write(inode, off+len-towrite);
+ inode->i_version++;
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ mark_inode_dirty(inode);
+ up(&inode->i_sem);
+ return len - towrite;
+}
+
+#endif
+
+static struct file_system_type ext2_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "ext2",
+ .get_sb = ext2_get_sb,
+ .kill_sb = kill_block_super,
+ .fs_flags = FS_REQUIRES_DEV,
+};
+
+static int __init init_ext2_fs(void)
+{
+ int err = init_ext2_xattr();
+ if (err)
+ return err;
+ err = init_inodecache();
+ if (err)
+ goto out1;
+ err = register_filesystem(&ext2_fs_type);
+ if (err)
+ goto out;
+ return 0;
+out:
+ destroy_inodecache();
+out1:
+ exit_ext2_xattr();
+ return err;
+}
+
+static void __exit exit_ext2_fs(void)
+{
+ unregister_filesystem(&ext2_fs_type);
+ destroy_inodecache();
+ exit_ext2_xattr();
+}
+
+module_init(init_ext2_fs)
+module_exit(exit_ext2_fs)
diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c
new file mode 100644
index 000000000000..9f7bac01d557
--- /dev/null
+++ b/fs/ext2/symlink.c
@@ -0,0 +1,52 @@
+/*
+ * linux/fs/ext2/symlink.c
+ *
+ * Only fast symlinks left here - the rest is done by generic code. AV, 1999
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * from
+ *
+ * linux/fs/minix/symlink.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * ext2 symlink handling code
+ */
+
+#include "ext2.h"
+#include "xattr.h"
+#include <linux/namei.h>
+
+static int ext2_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+ struct ext2_inode_info *ei = EXT2_I(dentry->d_inode);
+ nd_set_link(nd, (char *)ei->i_data);
+ return 0;
+}
+
+struct inode_operations ext2_symlink_inode_operations = {
+ .readlink = generic_readlink,
+ .follow_link = page_follow_link_light,
+ .put_link = page_put_link,
+#ifdef CONFIG_EXT2_FS_XATTR
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .listxattr = ext2_listxattr,
+ .removexattr = generic_removexattr,
+#endif
+};
+
+struct inode_operations ext2_fast_symlink_inode_operations = {
+ .readlink = generic_readlink,
+ .follow_link = ext2_follow_link,
+#ifdef CONFIG_EXT2_FS_XATTR
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .listxattr = ext2_listxattr,
+ .removexattr = generic_removexattr,
+#endif
+};
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
new file mode 100644
index 000000000000..27982b500e84
--- /dev/null
+++ b/fs/ext2/xattr.c
@@ -0,0 +1,1043 @@
+/*
+ * linux/fs/ext2/xattr.c
+ *
+ * Copyright (C) 2001-2003 Andreas Gruenbacher <agruen@suse.de>
+ *
+ * Fix by Harrison Xing <harrison@mountainviewdata.com>.
+ * Extended attributes for symlinks and special files added per
+ * suggestion of Luka Renko <luka.renko@hermes.si>.
+ * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
+ * Red Hat Inc.
+ *
+ */
+
+/*
+ * Extended attributes are stored on disk blocks allocated outside of
+ * any inode. The i_file_acl field is then made to point to this allocated
+ * block. If all extended attributes of an inode are identical, these
+ * inodes may share the same extended attribute block. Such situations
+ * are automatically detected by keeping a cache of recent attribute block
+ * numbers and hashes over the block's contents in memory.
+ *
+ *
+ * Extended attribute block layout:
+ *
+ * +------------------+
+ * | header |
+ * | entry 1 | |
+ * | entry 2 | | growing downwards
+ * | entry 3 | v
+ * | four null bytes |
+ * | . . . |
+ * | value 1 | ^
+ * | value 3 | | growing upwards
+ * | value 2 | |
+ * +------------------+
+ *
+ * The block header is followed by multiple entry descriptors. These entry
+ * descriptors are variable in size, and alligned to EXT2_XATTR_PAD
+ * byte boundaries. The entry descriptors are sorted by attribute name,
+ * so that two extended attribute blocks can be compared efficiently.
+ *
+ * Attribute values are aligned to the end of the block, stored in
+ * no specific order. They are also padded to EXT2_XATTR_PAD byte
+ * boundaries. No additional gaps are left between them.
+ *
+ * Locking strategy
+ * ----------------
+ * EXT2_I(inode)->i_file_acl is protected by EXT2_I(inode)->xattr_sem.
+ * EA blocks are only changed if they are exclusive to an inode, so
+ * holding xattr_sem also means that nothing but the EA block's reference
+ * count will change. Multiple writers to an EA block are synchronized
+ * by the bh lock. No more than a single bh lock is held at any time
+ * to avoid deadlocks.
+ */
+
+#include <linux/buffer_head.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/mbcache.h>
+#include <linux/quotaops.h>
+#include <linux/rwsem.h>
+#include "ext2.h"
+#include "xattr.h"
+#include "acl.h"
+
+#define HDR(bh) ((struct ext2_xattr_header *)((bh)->b_data))
+#define ENTRY(ptr) ((struct ext2_xattr_entry *)(ptr))
+#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1)
+#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
+
+#ifdef EXT2_XATTR_DEBUG
+# define ea_idebug(inode, f...) do { \
+ printk(KERN_DEBUG "inode %s:%ld: ", \
+ inode->i_sb->s_id, inode->i_ino); \
+ printk(f); \
+ printk("\n"); \
+ } while (0)
+# define ea_bdebug(bh, f...) do { \
+ char b[BDEVNAME_SIZE]; \
+ printk(KERN_DEBUG "block %s:%lu: ", \
+ bdevname(bh->b_bdev, b), \
+ (unsigned long) bh->b_blocknr); \
+ printk(f); \
+ printk("\n"); \
+ } while (0)
+#else
+# define ea_idebug(f...)
+# define ea_bdebug(f...)
+#endif
+
+static int ext2_xattr_set2(struct inode *, struct buffer_head *,
+ struct ext2_xattr_header *);
+
+static int ext2_xattr_cache_insert(struct buffer_head *);
+static struct buffer_head *ext2_xattr_cache_find(struct inode *,
+ struct ext2_xattr_header *);
+static void ext2_xattr_rehash(struct ext2_xattr_header *,
+ struct ext2_xattr_entry *);
+
+static struct mb_cache *ext2_xattr_cache;
+
+static struct xattr_handler *ext2_xattr_handler_map[] = {
+ [EXT2_XATTR_INDEX_USER] = &ext2_xattr_user_handler,
+#ifdef CONFIG_EXT2_FS_POSIX_ACL
+ [EXT2_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext2_xattr_acl_access_handler,
+ [EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext2_xattr_acl_default_handler,
+#endif
+ [EXT2_XATTR_INDEX_TRUSTED] = &ext2_xattr_trusted_handler,
+#ifdef CONFIG_EXT2_FS_SECURITY
+ [EXT2_XATTR_INDEX_SECURITY] = &ext2_xattr_security_handler,
+#endif
+};
+
+struct xattr_handler *ext2_xattr_handlers[] = {
+ &ext2_xattr_user_handler,
+ &ext2_xattr_trusted_handler,
+#ifdef CONFIG_EXT2_FS_POSIX_ACL
+ &ext2_xattr_acl_access_handler,
+ &ext2_xattr_acl_default_handler,
+#endif
+#ifdef CONFIG_EXT2_FS_SECURITY
+ &ext2_xattr_security_handler,
+#endif
+ NULL
+};
+
+static inline struct xattr_handler *
+ext2_xattr_handler(int name_index)
+{
+ struct xattr_handler *handler = NULL;
+
+ if (name_index > 0 && name_index < ARRAY_SIZE(ext2_xattr_handler_map))
+ handler = ext2_xattr_handler_map[name_index];
+ return handler;
+}
+
+/*
+ * ext2_xattr_get()
+ *
+ * Copy an extended attribute into the buffer
+ * provided, or compute the buffer size required.
+ * Buffer is NULL to compute the size of the buffer required.
+ *
+ * Returns a negative error number on failure, or the number of bytes
+ * used / required on success.
+ */
+int
+ext2_xattr_get(struct inode *inode, int name_index, const char *name,
+ void *buffer, size_t buffer_size)
+{
+ struct buffer_head *bh = NULL;
+ struct ext2_xattr_entry *entry;
+ size_t name_len, size;
+ char *end;
+ int error;
+
+ ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
+ name_index, name, buffer, (long)buffer_size);
+
+ if (name == NULL)
+ return -EINVAL;
+ down_read(&EXT2_I(inode)->xattr_sem);
+ error = -ENODATA;
+ if (!EXT2_I(inode)->i_file_acl)
+ goto cleanup;
+ ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl);
+ bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
+ error = -EIO;
+ if (!bh)
+ goto cleanup;
+ ea_bdebug(bh, "b_count=%d, refcount=%d",
+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
+ end = bh->b_data + bh->b_size;
+ if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
+ HDR(bh)->h_blocks != cpu_to_le32(1)) {
+bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
+ "inode %ld: bad block %d", inode->i_ino,
+ EXT2_I(inode)->i_file_acl);
+ error = -EIO;
+ goto cleanup;
+ }
+ /* find named attribute */
+ name_len = strlen(name);
+
+ error = -ERANGE;
+ if (name_len > 255)
+ goto cleanup;
+ entry = FIRST_ENTRY(bh);
+ while (!IS_LAST_ENTRY(entry)) {
+ struct ext2_xattr_entry *next =
+ EXT2_XATTR_NEXT(entry);
+ if ((char *)next >= end)
+ goto bad_block;
+ if (name_index == entry->e_name_index &&
+ name_len == entry->e_name_len &&
+ memcmp(name, entry->e_name, name_len) == 0)
+ goto found;
+ entry = next;
+ }
+ /* Check the remaining name entries */
+ while (!IS_LAST_ENTRY(entry)) {
+ struct ext2_xattr_entry *next =
+ EXT2_XATTR_NEXT(entry);
+ if ((char *)next >= end)
+ goto bad_block;
+ entry = next;
+ }
+ if (ext2_xattr_cache_insert(bh))
+ ea_idebug(inode, "cache insert failed");
+ error = -ENODATA;
+ goto cleanup;
+found:
+ /* check the buffer size */
+ if (entry->e_value_block != 0)
+ goto bad_block;
+ size = le32_to_cpu(entry->e_value_size);
+ if (size > inode->i_sb->s_blocksize ||
+ le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
+ goto bad_block;
+
+ if (ext2_xattr_cache_insert(bh))
+ ea_idebug(inode, "cache insert failed");
+ if (buffer) {
+ error = -ERANGE;
+ if (size > buffer_size)
+ goto cleanup;
+ /* return value of attribute */
+ memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
+ size);
+ }
+ error = size;
+
+cleanup:
+ brelse(bh);
+ up_read(&EXT2_I(inode)->xattr_sem);
+
+ return error;
+}
+
+/*
+ * ext2_xattr_list()
+ *
+ * Copy a list of attribute names into the buffer
+ * provided, or compute the buffer size required.
+ * Buffer is NULL to compute the size of the buffer required.
+ *
+ * Returns a negative error number on failure, or the number of bytes
+ * used / required on success.
+ */
+static int
+ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
+{
+ struct buffer_head *bh = NULL;
+ struct ext2_xattr_entry *entry;
+ char *end;
+ size_t rest = buffer_size;
+ int error;
+
+ ea_idebug(inode, "buffer=%p, buffer_size=%ld",
+ buffer, (long)buffer_size);
+
+ down_read(&EXT2_I(inode)->xattr_sem);
+ error = 0;
+ if (!EXT2_I(inode)->i_file_acl)
+ goto cleanup;
+ ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl);
+ bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
+ error = -EIO;
+ if (!bh)
+ goto cleanup;
+ ea_bdebug(bh, "b_count=%d, refcount=%d",
+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
+ end = bh->b_data + bh->b_size;
+ if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
+ HDR(bh)->h_blocks != cpu_to_le32(1)) {
+bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
+ "inode %ld: bad block %d", inode->i_ino,
+ EXT2_I(inode)->i_file_acl);
+ error = -EIO;
+ goto cleanup;
+ }
+
+ /* check the on-disk data structure */
+ entry = FIRST_ENTRY(bh);
+ while (!IS_LAST_ENTRY(entry)) {
+ struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(entry);
+
+ if ((char *)next >= end)
+ goto bad_block;
+ entry = next;
+ }
+ if (ext2_xattr_cache_insert(bh))
+ ea_idebug(inode, "cache insert failed");
+
+ /* list the attribute names */
+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
+ entry = EXT2_XATTR_NEXT(entry)) {
+ struct xattr_handler *handler =
+ ext2_xattr_handler(entry->e_name_index);
+
+ if (handler) {
+ size_t size = handler->list(inode, buffer, rest,
+ entry->e_name,
+ entry->e_name_len);
+ if (buffer) {
+ if (size > rest) {
+ error = -ERANGE;
+ goto cleanup;
+ }
+ buffer += size;
+ }
+ rest -= size;
+ }
+ }
+ error = buffer_size - rest; /* total size */
+
+cleanup:
+ brelse(bh);
+ up_read(&EXT2_I(inode)->xattr_sem);
+
+ return error;
+}
+
+/*
+ * Inode operation listxattr()
+ *
+ * dentry->d_inode->i_sem: don't care
+ */
+ssize_t
+ext2_listxattr(struct dentry *dentry, char *buffer, size_t size)
+{
+ return ext2_xattr_list(dentry->d_inode, buffer, size);
+}
+
+/*
+ * If the EXT2_FEATURE_COMPAT_EXT_ATTR feature of this file system is
+ * not set, set it.
+ */
+static void ext2_xattr_update_super_block(struct super_block *sb)
+{
+ if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR))
+ return;
+
+ lock_super(sb);
+ EXT2_SB(sb)->s_es->s_feature_compat |=
+ cpu_to_le32(EXT2_FEATURE_COMPAT_EXT_ATTR);
+ sb->s_dirt = 1;
+ mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
+ unlock_super(sb);
+}
+
+/*
+ * ext2_xattr_set()
+ *
+ * Create, replace or remove an extended attribute for this inode. Buffer
+ * is NULL to remove an existing extended attribute, and non-NULL to
+ * either replace an existing extended attribute, or create a new extended
+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE
+ * specify that an extended attribute must exist and must not exist
+ * previous to the call, respectively.
+ *
+ * Returns 0, or a negative error number on failure.
+ */
+int
+ext2_xattr_set(struct inode *inode, int name_index, const char *name,
+ const void *value, size_t value_len, int flags)
+{
+ struct super_block *sb = inode->i_sb;
+ struct buffer_head *bh = NULL;
+ struct ext2_xattr_header *header = NULL;
+ struct ext2_xattr_entry *here, *last;
+ size_t name_len, free, min_offs = sb->s_blocksize;
+ int not_found = 1, error;
+ char *end;
+
+ /*
+ * header -- Points either into bh, or to a temporarily
+ * allocated buffer.
+ * here -- The named entry found, or the place for inserting, within
+ * the block pointed to by header.
+ * last -- Points right after the last named entry within the block
+ * pointed to by header.
+ * min_offs -- The offset of the first value (values are aligned
+ * towards the end of the block).
+ * end -- Points right after the block pointed to by header.
+ */
+
+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
+ name_index, name, value, (long)value_len);
+
+ if (IS_RDONLY(inode))
+ return -EROFS;
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ return -EPERM;
+ if (value == NULL)
+ value_len = 0;
+ if (name == NULL)
+ return -EINVAL;
+ name_len = strlen(name);
+ if (name_len > 255 || value_len > sb->s_blocksize)
+ return -ERANGE;
+ down_write(&EXT2_I(inode)->xattr_sem);
+ if (EXT2_I(inode)->i_file_acl) {
+ /* The inode already has an extended attribute block. */
+ bh = sb_bread(sb, EXT2_I(inode)->i_file_acl);
+ error = -EIO;
+ if (!bh)
+ goto cleanup;
+ ea_bdebug(bh, "b_count=%d, refcount=%d",
+ atomic_read(&(bh->b_count)),
+ le32_to_cpu(HDR(bh)->h_refcount));
+ header = HDR(bh);
+ end = bh->b_data + bh->b_size;
+ if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
+ header->h_blocks != cpu_to_le32(1)) {
+bad_block: ext2_error(sb, "ext2_xattr_set",
+ "inode %ld: bad block %d", inode->i_ino,
+ EXT2_I(inode)->i_file_acl);
+ error = -EIO;
+ goto cleanup;
+ }
+ /* Find the named attribute. */
+ here = FIRST_ENTRY(bh);
+ while (!IS_LAST_ENTRY(here)) {
+ struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(here);
+ if ((char *)next >= end)
+ goto bad_block;
+ if (!here->e_value_block && here->e_value_size) {
+ size_t offs = le16_to_cpu(here->e_value_offs);
+ if (offs < min_offs)
+ min_offs = offs;
+ }
+ not_found = name_index - here->e_name_index;
+ if (!not_found)
+ not_found = name_len - here->e_name_len;
+ if (!not_found)
+ not_found = memcmp(name, here->e_name,name_len);
+ if (not_found <= 0)
+ break;
+ here = next;
+ }
+ last = here;
+ /* We still need to compute min_offs and last. */
+ while (!IS_LAST_ENTRY(last)) {
+ struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(last);
+ if ((char *)next >= end)
+ goto bad_block;
+ if (!last->e_value_block && last->e_value_size) {
+ size_t offs = le16_to_cpu(last->e_value_offs);
+ if (offs < min_offs)
+ min_offs = offs;
+ }
+ last = next;
+ }
+
+ /* Check whether we have enough space left. */
+ free = min_offs - ((char*)last - (char*)header) - sizeof(__u32);
+ } else {
+ /* We will use a new extended attribute block. */
+ free = sb->s_blocksize -
+ sizeof(struct ext2_xattr_header) - sizeof(__u32);
+ here = last = NULL; /* avoid gcc uninitialized warning. */
+ }
+
+ if (not_found) {
+ /* Request to remove a nonexistent attribute? */
+ error = -ENODATA;
+ if (flags & XATTR_REPLACE)
+ goto cleanup;
+ error = 0;
+ if (value == NULL)
+ goto cleanup;
+ } else {
+ /* Request to create an existing attribute? */
+ error = -EEXIST;
+ if (flags & XATTR_CREATE)
+ goto cleanup;
+ if (!here->e_value_block && here->e_value_size) {
+ size_t size = le32_to_cpu(here->e_value_size);
+
+ if (le16_to_cpu(here->e_value_offs) + size >
+ sb->s_blocksize || size > sb->s_blocksize)
+ goto bad_block;
+ free += EXT2_XATTR_SIZE(size);
+ }
+ free += EXT2_XATTR_LEN(name_len);
+ }
+ error = -ENOSPC;
+ if (free < EXT2_XATTR_LEN(name_len) + EXT2_XATTR_SIZE(value_len))
+ goto cleanup;
+
+ /* Here we know that we can set the new attribute. */
+
+ if (header) {
+ struct mb_cache_entry *ce;
+
+ /* assert(header == HDR(bh)); */
+ ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev,
+ bh->b_blocknr);
+ lock_buffer(bh);
+ if (header->h_refcount == cpu_to_le32(1)) {
+ ea_bdebug(bh, "modifying in-place");
+ if (ce)
+ mb_cache_entry_free(ce);
+ /* keep the buffer locked while modifying it. */
+ } else {
+ int offset;
+
+ if (ce)
+ mb_cache_entry_release(ce);
+ unlock_buffer(bh);
+ ea_bdebug(bh, "cloning");
+ header = kmalloc(bh->b_size, GFP_KERNEL);
+ error = -ENOMEM;
+ if (header == NULL)
+ goto cleanup;
+ memcpy(header, HDR(bh), bh->b_size);
+ header->h_refcount = cpu_to_le32(1);
+
+ offset = (char *)here - bh->b_data;
+ here = ENTRY((char *)header + offset);
+ offset = (char *)last - bh->b_data;
+ last = ENTRY((char *)header + offset);
+ }
+ } else {
+ /* Allocate a buffer where we construct the new block. */
+ header = kmalloc(sb->s_blocksize, GFP_KERNEL);
+ error = -ENOMEM;
+ if (header == NULL)
+ goto cleanup;
+ memset(header, 0, sb->s_blocksize);
+ end = (char *)header + sb->s_blocksize;
+ header->h_magic = cpu_to_le32(EXT2_XATTR_MAGIC);
+ header->h_blocks = header->h_refcount = cpu_to_le32(1);
+ last = here = ENTRY(header+1);
+ }
+
+ /* Iff we are modifying the block in-place, bh is locked here. */
+
+ if (not_found) {
+ /* Insert the new name. */
+ size_t size = EXT2_XATTR_LEN(name_len);
+ size_t rest = (char *)last - (char *)here;
+ memmove((char *)here + size, here, rest);
+ memset(here, 0, size);
+ here->e_name_index = name_index;
+ here->e_name_len = name_len;
+ memcpy(here->e_name, name, name_len);
+ } else {
+ if (!here->e_value_block && here->e_value_size) {
+ char *first_val = (char *)header + min_offs;
+ size_t offs = le16_to_cpu(here->e_value_offs);
+ char *val = (char *)header + offs;
+ size_t size = EXT2_XATTR_SIZE(
+ le32_to_cpu(here->e_value_size));
+
+ if (size == EXT2_XATTR_SIZE(value_len)) {
+ /* The old and the new value have the same
+ size. Just replace. */
+ here->e_value_size = cpu_to_le32(value_len);
+ memset(val + size - EXT2_XATTR_PAD, 0,
+ EXT2_XATTR_PAD); /* Clear pad bytes. */
+ memcpy(val, value, value_len);
+ goto skip_replace;
+ }
+
+ /* Remove the old value. */
+ memmove(first_val + size, first_val, val - first_val);
+ memset(first_val, 0, size);
+ here->e_value_offs = 0;
+ min_offs += size;
+
+ /* Adjust all value offsets. */
+ last = ENTRY(header+1);
+ while (!IS_LAST_ENTRY(last)) {
+ size_t o = le16_to_cpu(last->e_value_offs);
+ if (!last->e_value_block && o < offs)
+ last->e_value_offs =
+ cpu_to_le16(o + size);
+ last = EXT2_XATTR_NEXT(last);
+ }
+ }
+ if (value == NULL) {
+ /* Remove the old name. */
+ size_t size = EXT2_XATTR_LEN(name_len);
+ last = ENTRY((char *)last - size);
+ memmove(here, (char*)here + size,
+ (char*)last - (char*)here);
+ memset(last, 0, size);
+ }
+ }
+
+ if (value != NULL) {
+ /* Insert the new value. */
+ here->e_value_size = cpu_to_le32(value_len);
+ if (value_len) {
+ size_t size = EXT2_XATTR_SIZE(value_len);
+ char *val = (char *)header + min_offs - size;
+ here->e_value_offs =
+ cpu_to_le16((char *)val - (char *)header);
+ memset(val + size - EXT2_XATTR_PAD, 0,
+ EXT2_XATTR_PAD); /* Clear the pad bytes. */
+ memcpy(val, value, value_len);
+ }
+ }
+
+skip_replace:
+ if (IS_LAST_ENTRY(ENTRY(header+1))) {
+ /* This block is now empty. */
+ if (bh && header == HDR(bh))
+ unlock_buffer(bh); /* we were modifying in-place. */
+ error = ext2_xattr_set2(inode, bh, NULL);
+ } else {
+ ext2_xattr_rehash(header, here);
+ if (bh && header == HDR(bh))
+ unlock_buffer(bh); /* we were modifying in-place. */
+ error = ext2_xattr_set2(inode, bh, header);
+ }
+
+cleanup:
+ brelse(bh);
+ if (!(bh && header == HDR(bh)))
+ kfree(header);
+ up_write(&EXT2_I(inode)->xattr_sem);
+
+ return error;
+}
+
+/*
+ * Second half of ext2_xattr_set(): Update the file system.
+ */
+static int
+ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
+ struct ext2_xattr_header *header)
+{
+ struct super_block *sb = inode->i_sb;
+ struct buffer_head *new_bh = NULL;
+ int error;
+
+ if (header) {
+ new_bh = ext2_xattr_cache_find(inode, header);
+ if (new_bh) {
+ /* We found an identical block in the cache. */
+ if (new_bh == old_bh) {
+ ea_bdebug(new_bh, "keeping this block");
+ } else {
+ /* The old block is released after updating
+ the inode. */
+ ea_bdebug(new_bh, "reusing block");
+
+ error = -EDQUOT;
+ if (DQUOT_ALLOC_BLOCK(inode, 1)) {
+ unlock_buffer(new_bh);
+ goto cleanup;
+ }
+ HDR(new_bh)->h_refcount = cpu_to_le32(1 +
+ le32_to_cpu(HDR(new_bh)->h_refcount));
+ ea_bdebug(new_bh, "refcount now=%d",
+ le32_to_cpu(HDR(new_bh)->h_refcount));
+ }
+ unlock_buffer(new_bh);
+ } else if (old_bh && header == HDR(old_bh)) {
+ /* Keep this block. No need to lock the block as we
+ don't need to change the reference count. */
+ new_bh = old_bh;
+ get_bh(new_bh);
+ ext2_xattr_cache_insert(new_bh);
+ } else {
+ /* We need to allocate a new block */
+ int goal = le32_to_cpu(EXT2_SB(sb)->s_es->
+ s_first_data_block) +
+ EXT2_I(inode)->i_block_group *
+ EXT2_BLOCKS_PER_GROUP(sb);
+ int block = ext2_new_block(inode, goal,
+ NULL, NULL, &error);
+ if (error)
+ goto cleanup;
+ ea_idebug(inode, "creating block %d", block);
+
+ new_bh = sb_getblk(sb, block);
+ if (!new_bh) {
+ ext2_free_blocks(inode, block, 1);
+ error = -EIO;
+ goto cleanup;
+ }
+ lock_buffer(new_bh);
+ memcpy(new_bh->b_data, header, new_bh->b_size);
+ set_buffer_uptodate(new_bh);
+ unlock_buffer(new_bh);
+ ext2_xattr_cache_insert(new_bh);
+
+ ext2_xattr_update_super_block(sb);
+ }
+ mark_buffer_dirty(new_bh);
+ if (IS_SYNC(inode)) {
+ sync_dirty_buffer(new_bh);
+ error = -EIO;
+ if (buffer_req(new_bh) && !buffer_uptodate(new_bh))
+ goto cleanup;
+ }
+ }
+
+ /* Update the inode. */
+ EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
+ inode->i_ctime = CURRENT_TIME_SEC;
+ if (IS_SYNC(inode)) {
+ error = ext2_sync_inode (inode);
+ /* In case sync failed due to ENOSPC the inode was actually
+ * written (only some dirty data were not) so we just proceed
+ * as if nothing happened and cleanup the unused block */
+ if (error && error != -ENOSPC) {
+ if (new_bh && new_bh != old_bh)
+ DQUOT_FREE_BLOCK(inode, 1);
+ goto cleanup;
+ }
+ } else
+ mark_inode_dirty(inode);
+
+ error = 0;
+ if (old_bh && old_bh != new_bh) {
+ struct mb_cache_entry *ce;
+
+ /*
+ * If there was an old block and we are no longer using it,
+ * release the old block.
+ */
+ ce = mb_cache_entry_get(ext2_xattr_cache, old_bh->b_bdev,
+ old_bh->b_blocknr);
+ lock_buffer(old_bh);
+ if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
+ /* Free the old block. */
+ if (ce)
+ mb_cache_entry_free(ce);
+ ea_bdebug(old_bh, "freeing");
+ ext2_free_blocks(inode, old_bh->b_blocknr, 1);
+ /* We let our caller release old_bh, so we
+ * need to duplicate the buffer before. */
+ get_bh(old_bh);
+ bforget(old_bh);
+ } else {
+ /* Decrement the refcount only. */
+ HDR(old_bh)->h_refcount = cpu_to_le32(
+ le32_to_cpu(HDR(old_bh)->h_refcount) - 1);
+ if (ce)
+ mb_cache_entry_release(ce);
+ DQUOT_FREE_BLOCK(inode, 1);
+ mark_buffer_dirty(old_bh);
+ ea_bdebug(old_bh, "refcount now=%d",
+ le32_to_cpu(HDR(old_bh)->h_refcount));
+ }
+ unlock_buffer(old_bh);
+ }
+
+cleanup:
+ brelse(new_bh);
+
+ return error;
+}
+
+/*
+ * ext2_xattr_delete_inode()
+ *
+ * Free extended attribute resources associated with this inode. This
+ * is called immediately before an inode is freed.
+ */
+void
+ext2_xattr_delete_inode(struct inode *inode)
+{
+ struct buffer_head *bh = NULL;
+ struct mb_cache_entry *ce;
+
+ down_write(&EXT2_I(inode)->xattr_sem);
+ if (!EXT2_I(inode)->i_file_acl)
+ goto cleanup;
+ bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
+ if (!bh) {
+ ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
+ "inode %ld: block %d read error", inode->i_ino,
+ EXT2_I(inode)->i_file_acl);
+ goto cleanup;
+ }
+ ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count)));
+ if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
+ HDR(bh)->h_blocks != cpu_to_le32(1)) {
+ ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
+ "inode %ld: bad block %d", inode->i_ino,
+ EXT2_I(inode)->i_file_acl);
+ goto cleanup;
+ }
+ ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev, bh->b_blocknr);
+ lock_buffer(bh);
+ if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
+ if (ce)
+ mb_cache_entry_free(ce);
+ ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
+ get_bh(bh);
+ bforget(bh);
+ } else {
+ HDR(bh)->h_refcount = cpu_to_le32(
+ le32_to_cpu(HDR(bh)->h_refcount) - 1);
+ if (ce)
+ mb_cache_entry_release(ce);
+ mark_buffer_dirty(bh);
+ if (IS_SYNC(inode))
+ sync_dirty_buffer(bh);
+ DQUOT_FREE_BLOCK(inode, 1);
+ }
+ ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1);
+ unlock_buffer(bh);
+ EXT2_I(inode)->i_file_acl = 0;
+
+cleanup:
+ brelse(bh);
+ up_write(&EXT2_I(inode)->xattr_sem);
+}
+
+/*
+ * ext2_xattr_put_super()
+ *
+ * This is called when a file system is unmounted.
+ */
+void
+ext2_xattr_put_super(struct super_block *sb)
+{
+ mb_cache_shrink(ext2_xattr_cache, sb->s_bdev);
+}
+
+
+/*
+ * ext2_xattr_cache_insert()
+ *
+ * Create a new entry in the extended attribute cache, and insert
+ * it unless such an entry is already in the cache.
+ *
+ * Returns 0, or a negative error number on failure.
+ */
+static int
+ext2_xattr_cache_insert(struct buffer_head *bh)
+{
+ __u32 hash = le32_to_cpu(HDR(bh)->h_hash);
+ struct mb_cache_entry *ce;
+ int error;
+
+ ce = mb_cache_entry_alloc(ext2_xattr_cache);
+ if (!ce)
+ return -ENOMEM;
+ error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash);
+ if (error) {
+ mb_cache_entry_free(ce);
+ if (error == -EBUSY) {
+ ea_bdebug(bh, "already in cache (%d cache entries)",
+ atomic_read(&ext2_xattr_cache->c_entry_count));
+ error = 0;
+ }
+ } else {
+ ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash,
+ atomic_read(&ext2_xattr_cache->c_entry_count));
+ mb_cache_entry_release(ce);
+ }
+ return error;
+}
+
+/*
+ * ext2_xattr_cmp()
+ *
+ * Compare two extended attribute blocks for equality.
+ *
+ * Returns 0 if the blocks are equal, 1 if they differ, and
+ * a negative error number on errors.
+ */
+static int
+ext2_xattr_cmp(struct ext2_xattr_header *header1,
+ struct ext2_xattr_header *header2)
+{
+ struct ext2_xattr_entry *entry1, *entry2;
+
+ entry1 = ENTRY(header1+1);
+ entry2 = ENTRY(header2+1);
+ while (!IS_LAST_ENTRY(entry1)) {
+ if (IS_LAST_ENTRY(entry2))
+ return 1;
+ if (entry1->e_hash != entry2->e_hash ||
+ entry1->e_name_index != entry2->e_name_index ||
+ entry1->e_name_len != entry2->e_name_len ||
+ entry1->e_value_size != entry2->e_value_size ||
+ memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
+ return 1;
+ if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
+ return -EIO;
+ if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
+ (char *)header2 + le16_to_cpu(entry2->e_value_offs),
+ le32_to_cpu(entry1->e_value_size)))
+ return 1;
+
+ entry1 = EXT2_XATTR_NEXT(entry1);
+ entry2 = EXT2_XATTR_NEXT(entry2);
+ }
+ if (!IS_LAST_ENTRY(entry2))
+ return 1;
+ return 0;
+}
+
+/*
+ * ext2_xattr_cache_find()
+ *
+ * Find an identical extended attribute block.
+ *
+ * Returns a locked buffer head to the block found, or NULL if such
+ * a block was not found or an error occurred.
+ */
+static struct buffer_head *
+ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
+{
+ __u32 hash = le32_to_cpu(header->h_hash);
+ struct mb_cache_entry *ce;
+
+ if (!header->h_hash)
+ return NULL; /* never share */
+ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
+again:
+ ce = mb_cache_entry_find_first(ext2_xattr_cache, 0,
+ inode->i_sb->s_bdev, hash);
+ while (ce) {
+ struct buffer_head *bh;
+
+ if (IS_ERR(ce)) {
+ if (PTR_ERR(ce) == -EAGAIN)
+ goto again;
+ break;
+ }
+
+ bh = sb_bread(inode->i_sb, ce->e_block);
+ if (!bh) {
+ ext2_error(inode->i_sb, "ext2_xattr_cache_find",
+ "inode %ld: block %ld read error",
+ inode->i_ino, (unsigned long) ce->e_block);
+ } else {
+ lock_buffer(bh);
+ if (le32_to_cpu(HDR(bh)->h_refcount) >
+ EXT2_XATTR_REFCOUNT_MAX) {
+ ea_idebug(inode, "block %ld refcount %d>%d",
+ (unsigned long) ce->e_block,
+ le32_to_cpu(HDR(bh)->h_refcount),
+ EXT2_XATTR_REFCOUNT_MAX);
+ } else if (!ext2_xattr_cmp(header, HDR(bh))) {
+ ea_bdebug(bh, "b_count=%d",
+ atomic_read(&(bh->b_count)));
+ mb_cache_entry_release(ce);
+ return bh;
+ }
+ unlock_buffer(bh);
+ brelse(bh);
+ }
+ ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash);
+ }
+ return NULL;
+}
+
+#define NAME_HASH_SHIFT 5
+#define VALUE_HASH_SHIFT 16
+
+/*
+ * ext2_xattr_hash_entry()
+ *
+ * Compute the hash of an extended attribute.
+ */
+static inline void ext2_xattr_hash_entry(struct ext2_xattr_header *header,
+ struct ext2_xattr_entry *entry)
+{
+ __u32 hash = 0;
+ char *name = entry->e_name;
+ int n;
+
+ for (n=0; n < entry->e_name_len; n++) {
+ hash = (hash << NAME_HASH_SHIFT) ^
+ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
+ *name++;
+ }
+
+ if (entry->e_value_block == 0 && entry->e_value_size != 0) {
+ __le32 *value = (__le32 *)((char *)header +
+ le16_to_cpu(entry->e_value_offs));
+ for (n = (le32_to_cpu(entry->e_value_size) +
+ EXT2_XATTR_ROUND) >> EXT2_XATTR_PAD_BITS; n; n--) {
+ hash = (hash << VALUE_HASH_SHIFT) ^
+ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
+ le32_to_cpu(*value++);
+ }
+ }
+ entry->e_hash = cpu_to_le32(hash);
+}
+
+#undef NAME_HASH_SHIFT
+#undef VALUE_HASH_SHIFT
+
+#define BLOCK_HASH_SHIFT 16
+
+/*
+ * ext2_xattr_rehash()
+ *
+ * Re-compute the extended attribute hash value after an entry has changed.
+ */
+static void ext2_xattr_rehash(struct ext2_xattr_header *header,
+ struct ext2_xattr_entry *entry)
+{
+ struct ext2_xattr_entry *here;
+ __u32 hash = 0;
+
+ ext2_xattr_hash_entry(header, entry);
+ here = ENTRY(header+1);
+ while (!IS_LAST_ENTRY(here)) {
+ if (!here->e_hash) {
+ /* Block is not shared if an entry's hash value == 0 */
+ hash = 0;
+ break;
+ }
+ hash = (hash << BLOCK_HASH_SHIFT) ^
+ (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
+ le32_to_cpu(here->e_hash);
+ here = EXT2_XATTR_NEXT(here);
+ }
+ header->h_hash = cpu_to_le32(hash);
+}
+
+#undef BLOCK_HASH_SHIFT
+
+int __init
+init_ext2_xattr(void)
+{
+ ext2_xattr_cache = mb_cache_create("ext2_xattr", NULL,
+ sizeof(struct mb_cache_entry) +
+ sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6);
+ if (!ext2_xattr_cache)
+ return -ENOMEM;
+ return 0;
+}
+
+void
+exit_ext2_xattr(void)
+{
+ mb_cache_destroy(ext2_xattr_cache);
+}
diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h
new file mode 100644
index 000000000000..5f3bfde3b810
--- /dev/null
+++ b/fs/ext2/xattr.h
@@ -0,0 +1,118 @@
+/*
+ File: linux/ext2_xattr.h
+
+ On-disk format of extended attributes for the ext2 filesystem.
+
+ (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
+*/
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/xattr.h>
+
+/* Magic value in attribute blocks */
+#define EXT2_XATTR_MAGIC 0xEA020000
+
+/* Maximum number of references to one attribute block */
+#define EXT2_XATTR_REFCOUNT_MAX 1024
+
+/* Name indexes */
+#define EXT2_XATTR_INDEX_USER 1
+#define EXT2_XATTR_INDEX_POSIX_ACL_ACCESS 2
+#define EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT 3
+#define EXT2_XATTR_INDEX_TRUSTED 4
+#define EXT2_XATTR_INDEX_LUSTRE 5
+#define EXT2_XATTR_INDEX_SECURITY 6
+
+struct ext2_xattr_header {
+ __le32 h_magic; /* magic number for identification */
+ __le32 h_refcount; /* reference count */
+ __le32 h_blocks; /* number of disk blocks used */
+ __le32 h_hash; /* hash value of all attributes */
+ __u32 h_reserved[4]; /* zero right now */
+};
+
+struct ext2_xattr_entry {
+ __u8 e_name_len; /* length of name */
+ __u8 e_name_index; /* attribute name index */
+ __le16 e_value_offs; /* offset in disk block of value */
+ __le32 e_value_block; /* disk block attribute is stored on (n/i) */
+ __le32 e_value_size; /* size of attribute value */
+ __le32 e_hash; /* hash value of name and value */
+ char e_name[0]; /* attribute name */
+};
+
+#define EXT2_XATTR_PAD_BITS 2
+#define EXT2_XATTR_PAD (1<<EXT2_XATTR_PAD_BITS)
+#define EXT2_XATTR_ROUND (EXT2_XATTR_PAD-1)
+#define EXT2_XATTR_LEN(name_len) \
+ (((name_len) + EXT2_XATTR_ROUND + \
+ sizeof(struct ext2_xattr_entry)) & ~EXT2_XATTR_ROUND)
+#define EXT2_XATTR_NEXT(entry) \
+ ( (struct ext2_xattr_entry *)( \
+ (char *)(entry) + EXT2_XATTR_LEN((entry)->e_name_len)) )
+#define EXT2_XATTR_SIZE(size) \
+ (((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND)
+
+# ifdef CONFIG_EXT2_FS_XATTR
+
+extern struct xattr_handler ext2_xattr_user_handler;
+extern struct xattr_handler ext2_xattr_trusted_handler;
+extern struct xattr_handler ext2_xattr_acl_access_handler;
+extern struct xattr_handler ext2_xattr_acl_default_handler;
+extern struct xattr_handler ext2_xattr_security_handler;
+
+extern ssize_t ext2_listxattr(struct dentry *, char *, size_t);
+
+extern int ext2_xattr_get(struct inode *, int, const char *, void *, size_t);
+extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
+
+extern void ext2_xattr_delete_inode(struct inode *);
+extern void ext2_xattr_put_super(struct super_block *);
+
+extern int init_ext2_xattr(void);
+extern void exit_ext2_xattr(void);
+
+extern struct xattr_handler *ext2_xattr_handlers[];
+
+# else /* CONFIG_EXT2_FS_XATTR */
+
+static inline int
+ext2_xattr_get(struct inode *inode, int name_index,
+ const char *name, void *buffer, size_t size)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int
+ext2_xattr_set(struct inode *inode, int name_index, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void
+ext2_xattr_delete_inode(struct inode *inode)
+{
+}
+
+static inline void
+ext2_xattr_put_super(struct super_block *sb)
+{
+}
+
+static inline int
+init_ext2_xattr(void)
+{
+ return 0;
+}
+
+static inline void
+exit_ext2_xattr(void)
+{
+}
+
+#define ext2_xattr_handlers NULL
+
+# endif /* CONFIG_EXT2_FS_XATTR */
+
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
new file mode 100644
index 000000000000..6a6c59fbe599
--- /dev/null
+++ b/fs/ext2/xattr_security.c
@@ -0,0 +1,53 @@
+/*
+ * linux/fs/ext2/xattr_security.c
+ * Handler for storing security labels as extended attributes.
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/smp_lock.h>
+#include <linux/ext2_fs.h>
+#include "xattr.h"
+
+static size_t
+ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size,
+ const char *name, size_t name_len)
+{
+ const int prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
+ const size_t total_len = prefix_len + name_len + 1;
+
+ if (list && total_len <= list_size) {
+ memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
+ memcpy(list+prefix_len, name, name_len);
+ list[prefix_len + name_len] = '\0';
+ }
+ return total_len;
+}
+
+static int
+ext2_xattr_security_get(struct inode *inode, const char *name,
+ void *buffer, size_t size)
+{
+ if (strcmp(name, "") == 0)
+ return -EINVAL;
+ return ext2_xattr_get(inode, EXT2_XATTR_INDEX_SECURITY, name,
+ buffer, size);
+}
+
+static int
+ext2_xattr_security_set(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ if (strcmp(name, "") == 0)
+ return -EINVAL;
+ return ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY, name,
+ value, size, flags);
+}
+
+struct xattr_handler ext2_xattr_security_handler = {
+ .prefix = XATTR_SECURITY_PREFIX,
+ .list = ext2_xattr_security_list,
+ .get = ext2_xattr_security_get,
+ .set = ext2_xattr_security_set,
+};
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
new file mode 100644
index 000000000000..52b30ee6a25f
--- /dev/null
+++ b/fs/ext2/xattr_trusted.c
@@ -0,0 +1,64 @@
+/*
+ * linux/fs/ext2/xattr_trusted.c
+ * Handler for trusted extended attributes.
+ *
+ * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/smp_lock.h>
+#include <linux/ext2_fs.h>
+#include "xattr.h"
+
+#define XATTR_TRUSTED_PREFIX "trusted."
+
+static size_t
+ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
+ const char *name, size_t name_len)
+{
+ const int prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1;
+ const size_t total_len = prefix_len + name_len + 1;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return 0;
+
+ if (list && total_len <= list_size) {
+ memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
+ memcpy(list+prefix_len, name, name_len);
+ list[prefix_len + name_len] = '\0';
+ }
+ return total_len;
+}
+
+static int
+ext2_xattr_trusted_get(struct inode *inode, const char *name,
+ void *buffer, size_t size)
+{
+ if (strcmp(name, "") == 0)
+ return -EINVAL;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ return ext2_xattr_get(inode, EXT2_XATTR_INDEX_TRUSTED, name,
+ buffer, size);
+}
+
+static int
+ext2_xattr_trusted_set(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ if (strcmp(name, "") == 0)
+ return -EINVAL;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ return ext2_xattr_set(inode, EXT2_XATTR_INDEX_TRUSTED, name,
+ value, size, flags);
+}
+
+struct xattr_handler ext2_xattr_trusted_handler = {
+ .prefix = XATTR_TRUSTED_PREFIX,
+ .list = ext2_xattr_trusted_list,
+ .get = ext2_xattr_trusted_get,
+ .set = ext2_xattr_trusted_set,
+};
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
new file mode 100644
index 000000000000..0c03ea131a94
--- /dev/null
+++ b/fs/ext2/xattr_user.c
@@ -0,0 +1,77 @@
+/*
+ * linux/fs/ext2/xattr_user.c
+ * Handler for extended user attributes.
+ *
+ * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include "ext2.h"
+#include "xattr.h"
+
+#define XATTR_USER_PREFIX "user."
+
+static size_t
+ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size,
+ const char *name, size_t name_len)
+{
+ const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1;
+ const size_t total_len = prefix_len + name_len + 1;
+
+ if (!test_opt(inode->i_sb, XATTR_USER))
+ return 0;
+
+ if (list && total_len <= list_size) {
+ memcpy(list, XATTR_USER_PREFIX, prefix_len);
+ memcpy(list+prefix_len, name, name_len);
+ list[prefix_len + name_len] = '\0';
+ }
+ return total_len;
+}
+
+static int
+ext2_xattr_user_get(struct inode *inode, const char *name,
+ void *buffer, size_t size)
+{
+ int error;
+
+ if (strcmp(name, "") == 0)
+ return -EINVAL;
+ if (!test_opt(inode->i_sb, XATTR_USER))
+ return -EOPNOTSUPP;
+ error = permission(inode, MAY_READ, NULL);
+ if (error)
+ return error;
+
+ return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, buffer, size);
+}
+
+static int
+ext2_xattr_user_set(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ int error;
+
+ if (strcmp(name, "") == 0)
+ return -EINVAL;
+ if (!test_opt(inode->i_sb, XATTR_USER))
+ return -EOPNOTSUPP;
+ if ( !S_ISREG(inode->i_mode) &&
+ (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
+ return -EPERM;
+ error = permission(inode, MAY_WRITE, NULL);
+ if (error)
+ return error;
+
+ return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name,
+ value, size, flags);
+}
+
+struct xattr_handler ext2_xattr_user_handler = {
+ .prefix = XATTR_USER_PREFIX,
+ .list = ext2_xattr_user_list,
+ .get = ext2_xattr_user_get,
+ .set = ext2_xattr_user_set,
+};