From c002f42543e155dd2b5b5039ea2637ab26c82513 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 3 Feb 2005 12:02:56 +0000
Subject: NTFS: - Add disable_sparse mount option together with a per volume
 sparse 	enable bit which is set appropriately and a per inode sparse
 disable 	bit which is preset on some system file inodes as appropriate.
       - Enforce that sparse support is disabled on NTFS volumes pre 3.0.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 Documentation/filesystems/ntfs.txt | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'Documentation')
diff --git a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.txt
index f89b440fad1d..cb3cb8c06e9d 100644
--- a/Documentation/filesystems/ntfs.txt
+++ b/Documentation/filesystems/ntfs.txt
@@ -21,7 +21,7 @@ Overview
 ========
 
 Linux-NTFS comes with a number of user-space programs known as ntfsprogs.
-These include mkntfs, a full-featured ntfs file system format utility,
+These include mkntfs, a full-featured ntfs filesystem format utility,
 ntfsundelete used for recovering files that were unintentionally deleted
 from an NTFS volume and ntfsresize which is used to resize an NTFS partition.
 See the web site for more information.
@@ -149,7 +149,14 @@ case_sensitive=<BOOL>	If case_sensitive is specified, treat all file names as
 			name, if it exists.  If case_sensitive, you will need
 			to provide the correct case of the short file name.
 
-errors=opt		What to do when critical file system errors are found.
+disable_sparse=<BOOL>	If disable_sparse is specified, creation of sparse
+			regions, i.e. holes, inside files is disabled for the
+			volume (for the duration of this mount only).  By
+			default, creation of sparse regions is enabled, which
+			is consistent with the behaviour of traditional Unix
+			filesystems.
+
+errors=opt		What to do when critical filesystem errors are found.
 			Following values can be used for "opt":
 			  continue: DEFAULT, try to clean-up as much as
 				    possible, e.g. marking a corrupt inode as
-- 
cgit v1.2.3


From af859a42d798f047fbfe198ed315a942662c39d2 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Sat, 25 Jun 2005 21:07:27 +0100
Subject: NTFS: Prepare for 2.1.23 release: Update documentation and bump
 version.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 Documentation/filesystems/ntfs.txt | 15 +++++++++++++++
 fs/ntfs/ChangeLog                  | 25 ++++++-------------------
 fs/ntfs/Makefile                   |  2 +-
 fs/ntfs/attrib.c                   |  2 +-
 4 files changed, 23 insertions(+), 21 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.txt
index cb3cb8c06e9d..1415b96ed491 100644
--- a/Documentation/filesystems/ntfs.txt
+++ b/Documentation/filesystems/ntfs.txt
@@ -439,6 +439,21 @@ ChangeLog
 
 Note, a technical ChangeLog aimed at kernel hackers is in fs/ntfs/ChangeLog.
 
+2.1.23:
+	- Stamp the user space journal, aka transaction log, aka $UsnJrnl, if
+	  it is present and active thus telling Windows and applications using
+	  the transaction log that changes can have happened on the volume
+	  which are not recorded in $UsnJrnl.
+	- Detect the case when Windows has been hibernated (suspended to disk)
+	  and if this is the case do not allow (re)mounting read-write to
+	  prevent data corruption when you boot back into the suspended
+	  Windows session.
+	- Implement extension of resident files using the normal file write
+	  code paths, i.e. most very small files can be extended to be a little
+	  bit bigger but not by much.
+	- Improve handling of ntfs volumes with errors and strange boot sectors
+	  in particular.
+	- Fix various bugs.
 2.1.22:
 	- Improve handling of ntfs volumes with errors.
 	- Fix various bugs and race conditions.
diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index a6d2b943a148..3d2cac4061d6 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -22,35 +22,22 @@ ToDo/Notes:
 	- Enable the code for setting the NT4 compatibility flag when we start
 	  making NTFS 1.2 specific modifications.
 
-2.1.23-WIP
+2.1.23 - Implement extension of resident files and make writing safe as well as
+	 many bug fixes, cleanups, and enhancements...
 
 	- Add printk rate limiting for ntfs_warning() and ntfs_error() when
 	  compiled without debug.  This avoids a possible denial of service
 	  attack.  Thanks to Carl-Daniel Hailfinger from SuSE for pointing this
 	  out.
 	- Fix compilation warnings on ia64.  (Randy Dunlap)
-	- Use i_size_read() in fs/ntfs/attrib.c::ntfs_attr_set().
-	- Use i_size_read() in fs/ntfs/logfile.c::ntfs_{check,empty}_logfile().
-	- Use i_size_read() once and then use the cached value in
-	  fs/ntfs/lcnalloc.c::ntfs_cluster_alloc().
-	- Use i_size_read() in fs/ntfs/file.c::ntfs_file_open().
+	- Use i_size_{read,write}() instead of reading i_size by hand and cache
+	  the value where apropriate.
 	- Add size_lock to the ntfs_inode structure.  This is an rw spinlock
 	  and it locks against access to the inode sizes.  Note, ->size_lock
 	  is also accessed from irq context so you must use the _irqsave and
-	  _irqrestore lock and unlock functions, respectively.
-	- Use i_size_read() in fs/ntfs/compress.c at the start of the read and
-	  use the cached value afterwards.  Cache the initialized_size in the
-	  same way and protect access to the two sizes using the size_lock.
-	- Use i_size_read() in fs/ntfs/dir.c once and then use the cached
-	  value afterwards.
-	- Use i_size_read() in fs/ntfs/super.c once and then use the cached
-	  value afterwards.  Cache the initialized_size in the same way and
-	  protect access to the two sizes using the size_lock.
+	  _irqrestore lock and unlock functions, respectively.  Protect all
+	  accesses to allocated_size, initialized_size, and compressed_size.
 	- Minor optimization to fs/ntfs/super.c::ntfs_statfs() and its helpers.
-	- Use i_size_read() in fs/ntfs/inode.c once and then use the cached
-	  value afterwards when reading the size of the bitmap inode.
-	- Use i_size_{read,write}() in fs/ntfs/{aops.c,mft.c} and protect
-	  access to the i_size and other size fields using the size_lock.
 	- Implement extension of resident files in the regular file write code
 	  paths (fs/ntfs/aops.c::ntfs_{prepare,commit}_write()).  At present
 	  this only works until the data attribute becomes too big for the mft
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index 59f9606a82a1..f083f27d8b69 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
 	     index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \
 	     unistr.o upcase.o
 
-EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.23-WIP\"
+EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.23\"
 
 ifeq ($(CONFIG_NTFS_DEBUG),y)
 EXTRA_CFLAGS += -DDEBUG
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 543d47fa5fc9..cd0f9e740b14 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1324,7 +1324,7 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
 		if (IS_ERR(rl)) {
 			err = PTR_ERR(rl);
 			ntfs_debug("Failed to allocate cluster%s, error code "
-					"%i.\n", (new_size >>
+					"%i.", (new_size >>
 					vol->cluster_size_bits) > 1 ? "s" : "",
 					err);
 			goto page_err_out;
-- 
cgit v1.2.3


From ba6d2377c85c9b8a793f455d8c9b6cf31985d70f Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Sun, 26 Jun 2005 22:12:02 +0100
Subject: NTFS: Fix a nasty deadlock that appeared in recent kernels.       The
 situation: VFS inode X on a mounted ntfs volume is dirty.  For       same
 inode X, the ntfs_inode is dirty and thus corresponding on-disk       inode,
 i.e. mft record, which is in a dirty PAGE_CACHE_PAGE belonging       to the
 table of inodes, i.e. $MFT, inode 0.       What happens:       Process 1:
 sys_sync()/umount()/whatever...  calls       __sync_single_inode() for $MFT
 -> do_writepages() -> write_page for       the dirty page containing the
 on-disk inode X, the page is now locked       -> ntfs_write_mst_block() which
 clears PageUptodate() on the page to       prevent anyone else getting hold
 of it whilst it does the write out.       This is necessary as the on-disk
 inode needs "fixups" applied before       the write to disk which are removed
 again after the write and       PageUptodate is then set again.  It then
 analyses the page looking       for dirty on-disk inodes and when it finds
 one it calls       ntfs_may_write_mft_record() to see if it is safe to write
 this       on-disk inode.  This then calls ilookup5() to check if the      
 corresponding VFS inode is in icache().  This in turn calls ifind()      
 which waits on the inode lock via wait_on_inode whilst holding the      
 global inode_lock.       Process 2: pdflush results in a call to
 __sync_single_inode for the       same VFS inode X on the ntfs volume.  This
 locks the inode (I_LOCK)       then calls write-inode -> ntfs_write_inode ->
 map_mft_record() ->       read_cache_page() for the page (in page cache of
 table of inodes       $MFT, inode 0) containing the on-disk inode.  This page
 has       PageUptodate() clear because of Process 1 (see above) so      
 read_cache_page() blocks when it tries to take the page lock for the      
 page so it can call ntfs_read_page().       Thus Process 1 is holding the
 page lock on the page containing the       on-disk inode X and it is waiting
 on the inode X to be unlocked in       ifind() so it can write the page out
 and then unlock the page.       And Process 2 is holding the inode lock on
 inode X and is waiting for       the page to be unlocked so it can call
 ntfs_readpage() or discover       that Process 1 set PageUptodate() again and
 use the page.       Thus we have a deadlock due to ifind() waiting on the
 inode lock.       The solution: The fix is to use the newly introduced      
 ilookup5_nowait() which does not wait on the inode's lock and hence      
 avoids the deadlock.  This is safe as we do not care about the VFS      
 inode and only use the fact that it is in the VFS inode cache and the      
 fact that the vfs and ntfs inodes are one struct in memory to find       the
 ntfs inode in memory if present.  Also, the ntfs inode has its       own
 locking so it does not matter if the vfs inode is locked.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 Documentation/filesystems/ntfs.txt |  5 ++++-
 fs/ntfs/ChangeLog                  | 42 ++++++++++++++++++++++++++++++++++++++
 fs/ntfs/mft.c                      | 29 +++++++++++++++++---------
 3 files changed, 65 insertions(+), 11 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.txt
index 1415b96ed491..eef4aca0c753 100644
--- a/Documentation/filesystems/ntfs.txt
+++ b/Documentation/filesystems/ntfs.txt
@@ -451,9 +451,12 @@ Note, a technical ChangeLog aimed at kernel hackers is in fs/ntfs/ChangeLog.
 	- Implement extension of resident files using the normal file write
 	  code paths, i.e. most very small files can be extended to be a little
 	  bit bigger but not by much.
+	- Add new mount option "disable_sparse".  (See list of mount options
+	  above for details.)
 	- Improve handling of ntfs volumes with errors and strange boot sectors
 	  in particular.
-	- Fix various bugs.
+	- Fix various bugs including a nasty deadlock that appeared in recent
+	  kernels (around 2.6.11-2.6.12 timeframe).
 2.1.22:
 	- Improve handling of ntfs volumes with errors.
 	- Fix various bugs and race conditions.
diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 3d2cac4061d6..9709fac6531d 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -132,6 +132,48 @@ ToDo/Notes:
 	  the already mapped runlist fragment which causes
 	  ntfs_mapping_pairs_decompress() to fail and return error.  Update
 	  ntfs_attr_find_vcn_nolock() accordingly.
+	- Fix a nasty deadlock that appeared in recent kernels.
+	  The situation: VFS inode X on a mounted ntfs volume is dirty.  For
+	  same inode X, the ntfs_inode is dirty and thus corresponding on-disk
+	  inode, i.e. mft record, which is in a dirty PAGE_CACHE_PAGE belonging
+	  to the table of inodes, i.e. $MFT, inode 0.
+	  What happens:
+	  Process 1: sys_sync()/umount()/whatever...  calls
+	  __sync_single_inode() for $MFT -> do_writepages() -> write_page for
+	  the dirty page containing the on-disk inode X, the page is now locked
+	  -> ntfs_write_mst_block() which clears PageUptodate() on the page to
+	  prevent anyone else getting hold of it whilst it does the write out.
+	  This is necessary as the on-disk inode needs "fixups" applied before
+	  the write to disk which are removed again after the write and
+	  PageUptodate is then set again.  It then analyses the page looking
+	  for dirty on-disk inodes and when it finds one it calls
+	  ntfs_may_write_mft_record() to see if it is safe to write this
+	  on-disk inode.  This then calls ilookup5() to check if the
+	  corresponding VFS inode is in icache().  This in turn calls ifind()
+	  which waits on the inode lock via wait_on_inode whilst holding the
+	  global inode_lock.
+	  Process 2: pdflush results in a call to __sync_single_inode for the
+	  same VFS inode X on the ntfs volume.  This locks the inode (I_LOCK)
+	  then calls write-inode -> ntfs_write_inode -> map_mft_record() ->
+	  read_cache_page() for the page (in page cache of table of inodes
+	  $MFT, inode 0) containing the on-disk inode.  This page has
+	  PageUptodate() clear because of Process 1 (see above) so
+	  read_cache_page() blocks when it tries to take the page lock for the
+	  page so it can call ntfs_read_page().
+	  Thus Process 1 is holding the page lock on the page containing the
+	  on-disk inode X and it is waiting on the inode X to be unlocked in
+	  ifind() so it can write the page out and then unlock the page.
+	  And Process 2 is holding the inode lock on inode X and is waiting for
+	  the page to be unlocked so it can call ntfs_readpage() or discover
+	  that Process 1 set PageUptodate() again and use the page.
+	  Thus we have a deadlock due to ifind() waiting on the inode lock.
+	  The solution: The fix is to use the newly introduced
+	  ilookup5_nowait() which does not wait on the inode's lock and hence
+	  avoids the deadlock.  This is safe as we do not care about the VFS
+	  inode and only use the fact that it is in the VFS inode cache and the
+	  fact that the vfs and ntfs inodes are one struct in memory to find
+	  the ntfs inode in memory if present.  Also, the ntfs inode has its
+	  own locking so it does not matter if the vfs inode is locked.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 3d0ba8e60adc..ac9ff39aa834 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -948,20 +948,23 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,
 	na.name_len = 0;
 	na.type = AT_UNUSED;
 	/*
-	 * For inode 0, i.e. $MFT itself, we cannot use ilookup5() from here or
-	 * we deadlock because the inode is already locked by the kernel
-	 * (fs/fs-writeback.c::__sync_single_inode()) and ilookup5() waits
-	 * until the inode is unlocked before returning it and it never gets
-	 * unlocked because ntfs_should_write_mft_record() never returns.  )-:
-	 * Fortunately, we have inode 0 pinned in icache for the duration of
-	 * the mount so we can access it directly.
+	 * Optimize inode 0, i.e. $MFT itself, since we have it in memory and
+	 * we get here for it rather often.
 	 */
 	if (!mft_no) {
 		/* Balance the below iput(). */
 		vi = igrab(mft_vi);
 		BUG_ON(vi != mft_vi);
-	} else
-		vi = ilookup5(sb, mft_no, (test_t)ntfs_test_inode, &na);
+	} else {
+		/*
+		 * Have to use ilookup5_nowait() since ilookup5() waits for the
+		 * inode lock which causes ntfs to deadlock when a concurrent
+		 * inode write via the inode dirty code paths and the page
+		 * dirty code path of the inode dirty code path when writing
+		 * $MFT occurs.
+		 */
+		vi = ilookup5_nowait(sb, mft_no, (test_t)ntfs_test_inode, &na);
+	}
 	if (vi) {
 		ntfs_debug("Base inode 0x%lx is in icache.", mft_no);
 		/* The inode is in icache. */
@@ -1016,7 +1019,13 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,
 	na.mft_no = MREF_LE(m->base_mft_record);
 	ntfs_debug("Mft record 0x%lx is an extent record.  Looking for base "
 			"inode 0x%lx in icache.", mft_no, na.mft_no);
-	vi = ilookup5(sb, na.mft_no, (test_t)ntfs_test_inode, &na);
+	if (!na.mft_no) {
+		/* Balance the below iput(). */
+		vi = igrab(mft_vi);
+		BUG_ON(vi != mft_vi);
+	} else
+		vi = ilookup5_nowait(sb, na.mft_no, (test_t)ntfs_test_inode,
+				&na);
 	if (!vi) {
 		/*
 		 * The base inode is not in icache, write this extent mft
-- 
cgit v1.2.3